001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.juneau.parser; 018 019import static org.apache.juneau.commons.utils.IoUtils.*; 020import static org.apache.juneau.commons.utils.StringUtils.*; 021import static org.apache.juneau.commons.utils.ThrowableUtils.*; 022import static org.apache.juneau.commons.utils.Utils.*; 023 024import java.io.*; 025import java.nio.charset.*; 026 027import org.apache.juneau.*; 028import org.apache.juneau.commons.utils.*; 029 030/** 031 * A wrapper around an object that a parser reads its input from. 032 * 033 * <p> 034 * For character-based parsers, the input object can be any of the following: 035 * <ul> 036 * <li>{@link Reader} 037 * <li>{@link CharSequence} 038 * <li>{@link InputStream} 039 * <li><code><jk>byte</jk>[]</code> 040 * <li>{@link File} 041 * <li><code><jk>null</jk></code> 042 * </ul> 043 * 044 * <p> 045 * For stream-based parsers, the input object can be any of the following: 046 * <ul> 047 * <li>{@link InputStream} 048 * <li><code><jk>byte</jk>[]</code> 049 * <li>{@link File} 050 * <li>{@link String} - Hex-encoded bytes. (not BASE-64!) 051 * <li><code><jk>null</jk></code> 052 * </ul> 053 * 054 * <p> 055 * Note that Readers and InputStreams will NOT be automatically closed when {@link #close()} is called, but 056 * streams and readers created from other types (e.g. Files) WILL be automatically closed. 057 * 058 * <h5 class='section'>See Also:</h5><ul> 059 * <li class='link'><a class="doclink" href="https://juneau.apache.org/docs/topics/SerializersAndParsers">Serializers and Parsers</a> 060 * </ul> 061 */ 062@SuppressWarnings("resource") 063public class ParserPipe implements Closeable { 064 065 private final Object input; 066 final boolean debug, strict, autoCloseStreams, unbuffered; 067 private final Charset charset; 068 069 private String inputString; 070 private InputStream inputStream; 071 private Reader reader; 072 private ParserReader parserReader; 073 private boolean doClose; 074 private BinaryFormat binaryFormat; 075 private Positionable positionable; 076 077 /** 078 * Shortcut constructor, typically for straight string input. 079 * 080 * <p> 081 * Equivalent to calling <code><jk>new</jk> ParserPipe(input, <jk>false</jk>, <jk>false</jk>, <jk>null</jk>, <jk>null</jk>);</code> 082 * 083 * @param input The input object. 084 */ 085 public ParserPipe(Object input) { 086 this(input, false, false, false, false, null, null); 087 } 088 089 /** 090 * Constructor for stream-based parsers. 091 * 092 * @param input The parser input object. 093 * @param debug 094 * If <jk>true</jk>, the input contents will be copied locally and accessible via the {@link #getInputAsString()} 095 * method. 096 * This allows the contents of the pipe to be accessed when a problem occurs. 097 * @param autoCloseStreams 098 * Automatically close {@link InputStream InputStreams} and {@link Reader Readers} when passed in as input. 099 * @param unbuffered 100 * If <jk>true</jk>, we read one character at a time from underlying readers when the readers are expected to be parsed 101 * multiple times. 102 * <br>Otherwise, we read character data into a reusable buffer. 103 * @param binaryFormat The binary format of input strings when converted to bytes. 104 */ 105 public ParserPipe(Object input, boolean debug, boolean autoCloseStreams, boolean unbuffered, BinaryFormat binaryFormat) { 106 this.input = input; 107 this.debug = debug; 108 this.strict = false; 109 this.autoCloseStreams = autoCloseStreams; 110 this.unbuffered = unbuffered; 111 this.charset = null; 112 if (input instanceof CharSequence input2) 113 this.inputString = input2.toString(); 114 this.binaryFormat = binaryFormat; 115 } 116 117 /** 118 * Constructor for reader-based parsers. 119 * 120 * @param input The parser input object. 121 * @param debug 122 * If <jk>true</jk>, the input contents will be copied locally and accessible via the {@link #getInputAsString()} 123 * method. 124 * This allows the contents of the pipe to be accessed when a problem occurs. 125 * @param strict 126 * If <jk>true</jk>, sets {@link CodingErrorAction#REPORT} on {@link CharsetDecoder#onMalformedInput(CodingErrorAction)} 127 * and {@link CharsetDecoder#onUnmappableCharacter(CodingErrorAction)}. 128 * Otherwise, sets them to {@link CodingErrorAction#REPLACE}. 129 * @param autoCloseStreams 130 * Automatically close {@link InputStream InputStreams} and {@link Reader Readers} when passed in as input. 131 * @param unbuffered 132 * If <jk>true</jk>, we read one character at a time from underlying readers when the readers are expected to be parsed 133 * multiple times. 134 * <br>Otherwise, we read character data into a reusable buffer. 135 * @param fileCharset 136 * The charset to expect when reading from {@link File Files}. 137 * @param streamCharset 138 * The charset to expect when reading from {@link InputStream InputStreams}. 139 */ 140 public ParserPipe(Object input, boolean debug, boolean strict, boolean autoCloseStreams, boolean unbuffered, Charset streamCharset, Charset fileCharset) { 141 boolean isFile = input instanceof File; 142 this.input = input; 143 this.debug = debug; 144 this.strict = strict; 145 this.autoCloseStreams = autoCloseStreams; 146 this.unbuffered = unbuffered; 147 Charset cs = isFile ? fileCharset : streamCharset; 148 if (cs == null) 149 cs = (isFile ? Charset.defaultCharset() : UTF8); 150 this.charset = cs; 151 if (input instanceof CharSequence cs2) 152 this.inputString = cs2.toString(); 153 this.binaryFormat = null; 154 } 155 156 /** 157 * Returns the contents of this pipe as a string. 158 * 159 * @return The contents of this pipe as a string. 160 * @throws IOException If thrown from inner reader. 161 */ 162 public String asString() throws IOException { 163 if (inputString == null) 164 inputString = read(getReader()); 165 return inputString; 166 } 167 168 @Override /* Overridden from Closeable */ 169 public void close() { 170 try { 171 if (doClose) 172 IoUtils.close(reader, inputStream); 173 } catch (IOException e) { 174 throw bex(e); 175 } 176 } 177 178 /** 179 * Returns the contents of this pipe as a buffered reader. 180 * 181 * <p> 182 * If the reader passed into this pipe is already a buffered reader, that reader will be returned. 183 * 184 * @return The contents of this pipe as a buffered reader. 185 * @throws IOException Thrown by underlying stream. 186 */ 187 public Reader getBufferedReader() throws IOException { return toBufferedReader(getReader()); } 188 189 /** 190 * Returns the input to this parser as a plain string. 191 * 192 * <p> 193 * This method only returns a value if {@link org.apache.juneau.Context.Builder#debug()} is enabled. 194 * 195 * @return The input as a string, or <jk>null</jk> if debug mode not enabled. 196 */ 197 public String getInputAsString() { return inputString; } 198 199 /** 200 * Wraps the specified input object inside an input stream. 201 * 202 * <p> 203 * Subclasses can override this method to implement their own input streams. 204 * 205 * @return The input object wrapped in an input stream, or <jk>null</jk> if the object is null. 206 * @throws IOException If object could not be converted to an input stream. 207 */ 208 public InputStream getInputStream() throws IOException { 209 if (input == null) 210 return null; 211 212 if (input instanceof InputStream input2) { 213 if (debug) { 214 var b = readBytes(input2); 215 inputString = toHex(b); 216 inputStream = new ByteArrayInputStream(b); 217 } else { 218 inputStream = input2; 219 doClose = autoCloseStreams; 220 } 221 } else if (input instanceof byte[]) { 222 if (debug) 223 inputString = toHex((byte[])input); 224 inputStream = new ByteArrayInputStream((byte[])input); 225 doClose = false; 226 } else if (input instanceof String input2) { 227 inputString = input2; 228 inputStream = new ByteArrayInputStream(convertFromString(input2)); 229 doClose = false; 230 } else if (input instanceof File input2) { 231 if (debug) { 232 var b = readBytes(input2); 233 inputString = toHex(b); 234 inputStream = new ByteArrayInputStream(b); 235 } else { 236 inputStream = new FileInputStream(input2); 237 doClose = true; 238 } 239 } else { 240 throw ioex("Cannot convert object of type {0} to an InputStream.", cn(input)); 241 } 242 243 return inputStream; 244 } 245 246 /** 247 * Converts this pipe into a {@link ParserReader}. 248 * 249 * @return The converted pipe. 250 * @throws IOException Thrown by underlying stream. 251 */ 252 public ParserReader getParserReader() throws IOException { 253 if (input == null) 254 return null; 255 if (input instanceof ParserReader input2) 256 parserReader = input2; 257 else 258 parserReader = new ParserReader(this); 259 return parserReader; 260 } 261 262 /** 263 * Wraps the specified input object inside a reader. 264 * 265 * <p> 266 * Subclasses can override this method to implement their own readers. 267 * 268 * @return The input object wrapped in a Reader, or <jk>null</jk> if the object is null. 269 * @throws IOException If object could not be converted to a reader. 270 */ 271 public Reader getReader() throws IOException { 272 if (input == null) 273 return null; 274 275 if (input instanceof Reader input2) { 276 if (debug) { 277 inputString = read(input2); 278 reader = new StringReader(inputString); 279 } else { 280 reader = input2; 281 doClose = autoCloseStreams; 282 } 283 } else if (input instanceof CharSequence input2) { 284 inputString = input2.toString(); 285 reader = new ParserReader(this); 286 doClose = false; 287 } else if (input instanceof InputStream || input instanceof byte[]) { 288 doClose = input instanceof InputStream && autoCloseStreams; 289 InputStream is = (input instanceof InputStream input2 ? input2 : new ByteArrayInputStream((byte[])input)); 290 CharsetDecoder cd = charset.newDecoder(); 291 if (strict) { 292 cd.onMalformedInput(CodingErrorAction.REPORT); 293 cd.onUnmappableCharacter(CodingErrorAction.REPORT); 294 } else { 295 cd.onMalformedInput(CodingErrorAction.REPLACE); 296 cd.onUnmappableCharacter(CodingErrorAction.REPLACE); 297 } 298 reader = new InputStreamReader(is, cd); 299 if (debug) { 300 inputString = read(reader); 301 reader = new StringReader(inputString); 302 } 303 } else if (input instanceof File input2) { 304 CharsetDecoder cd = charset.newDecoder(); 305 if (strict) { 306 cd.onMalformedInput(CodingErrorAction.REPORT); 307 cd.onUnmappableCharacter(CodingErrorAction.REPORT); 308 } else { 309 cd.onMalformedInput(CodingErrorAction.REPLACE); 310 cd.onUnmappableCharacter(CodingErrorAction.REPLACE); 311 } 312 reader = new InputStreamReader(new FileInputStream(input2), cd); 313 if (debug) { 314 inputString = read(reader); 315 reader = new StringReader(inputString); 316 } 317 doClose = true; 318 } else { 319 throw ioex("Cannot convert object of type {0} to an InputStream.", cn(input)); 320 } 321 322 return reader; 323 } 324 325 /** 326 * Returns <jk>true</jk> if the contents passed into this pipe was a {@link CharSequence}. 327 * 328 * @return <jk>true</jk> if the contents passed into this pipe was a {@link CharSequence}. 329 */ 330 public boolean isString() { return nn(inputString); } 331 332 /** 333 * Sets the ParserReader/ParserInputStream/XmlReader constructed from this pipe. 334 * 335 * <p> 336 * Used for gathering the failure position when {@link ParseException} is thrown. 337 * 338 * @param positionable The ParserReader/ParserInputStream/XmlReader constructed from this pipe. 339 */ 340 public void setPositionable(Positionable positionable) { this.positionable = positionable; } 341 342 private byte[] convertFromString(String in) { 343 return switch (binaryFormat) { 344 case BASE64 -> base64Decode(in); 345 case HEX -> fromHex(in); 346 case SPACED_HEX -> fromSpacedHex(in); 347 default -> new byte[0]; 348 }; 349 } 350 351 Position getPosition() { 352 if (positionable == null) 353 return Position.UNKNOWN; 354 Position p = positionable.getPosition(); 355 if (p == null) 356 return Position.UNKNOWN; 357 return p; 358 } 359}