001// *************************************************************************************************************************** 002// * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file * 003// * distributed with this work for additional information regarding copyright ownership. The ASF licenses this file * 004// * to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance * 005// * with the License. You may obtain a copy of the License at * 006// * * 007// * http://www.apache.org/licenses/LICENSE-2.0 * 008// * * 009// * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an * 010// * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * 011// * specific language governing permissions and limitations under the License. * 012// *************************************************************************************************************************** 013package org.apache.juneau.parser; 014 015import static org.apache.juneau.common.internal.IOUtils.*; 016import static org.apache.juneau.common.internal.StringUtils.*; 017import static org.apache.juneau.internal.ClassUtils.*; 018 019import java.io.*; 020import java.nio.charset.*; 021 022import org.apache.juneau.*; 023import org.apache.juneau.common.internal.*; 024 025/** 026 * A wrapper around an object that a parser reads its input from. 027 * 028 * <p> 029 * For character-based parsers, the input object can be any of the following: 030 * <ul> 031 * <li>{@link Reader} 032 * <li>{@link CharSequence} 033 * <li>{@link InputStream} 034 * <li><code><jk>byte</jk>[]</code> 035 * <li>{@link File} 036 * <li><code><jk>null</jk></code> 037 * </ul> 038 * 039 * <p> 040 * For stream-based parsers, the input object can be any of the following: 041 * <ul> 042 * <li>{@link InputStream} 043 * <li><code><jk>byte</jk>[]</code> 044 * <li>{@link File} 045 * <li>{@link String} - Hex-encoded bytes. (not BASE-64!) 046 * <li><code><jk>null</jk></code> 047 * </ul> 048 * 049 * <p> 050 * Note that Readers and InputStreams will NOT be automatically closed when {@link #close()} is called, but 051 * streams and readers created from other types (e.g. Files) WILL be automatically closed. 052 * 053 * <h5 class='section'>See Also:</h5><ul> 054 * <li class='link'><a class="doclink" href="../../../../index.html#jm.SerializersAndParsers">Serializers and Parsers</a> 055 * </ul> 056 */ 057public final class ParserPipe implements Closeable { 058 059 private final Object input; 060 final boolean debug, strict, autoCloseStreams, unbuffered; 061 private final Charset charset; 062 063 private String inputString; 064 private InputStream inputStream; 065 private Reader reader; 066 private ParserReader parserReader; 067 private boolean doClose; 068 private BinaryFormat binaryFormat; 069 private Positionable positionable; 070 071 /** 072 * Constructor for reader-based parsers. 073 * 074 * @param input The parser input object. 075 * @param debug 076 * If <jk>true</jk>, the input contents will be copied locally and accessible via the {@link #getInputAsString()} 077 * method. 078 * This allows the contents of the pipe to be accessed when a problem occurs. 079 * @param strict 080 * If <jk>true</jk>, sets {@link CodingErrorAction#REPORT} on {@link CharsetDecoder#onMalformedInput(CodingErrorAction)} 081 * and {@link CharsetDecoder#onUnmappableCharacter(CodingErrorAction)}. 082 * Otherwise, sets them to {@link CodingErrorAction#REPLACE}. 083 * @param autoCloseStreams 084 * Automatically close {@link InputStream InputStreams} and {@link Reader Readers} when passed in as input. 085 * @param unbuffered 086 * If <jk>true</jk>, we read one character at a time from underlying readers when the readers are expected to be parsed 087 * multiple times. 088 * <br>Otherwise, we read character data into a reusable buffer. 089 * @param fileCharset 090 * The charset to expect when reading from {@link File Files}. 091 * @param streamCharset 092 * The charset to expect when reading from {@link InputStream InputStreams}. 093 */ 094 public ParserPipe(Object input, boolean debug, boolean strict, boolean autoCloseStreams, boolean unbuffered, Charset streamCharset, Charset fileCharset) { 095 boolean isFile = input instanceof File; 096 this.input = input; 097 this.debug = debug; 098 this.strict = strict; 099 this.autoCloseStreams = autoCloseStreams; 100 this.unbuffered = unbuffered; 101 Charset cs = isFile ? fileCharset : streamCharset; 102 if (cs == null) 103 cs = (isFile ? Charset.defaultCharset() : UTF8); 104 this.charset = cs; 105 if (input instanceof CharSequence) 106 this.inputString = input.toString(); 107 this.binaryFormat = null; 108 } 109 110 /** 111 * Constructor for stream-based parsers. 112 * 113 * @param input The parser input object. 114 * @param debug 115 * If <jk>true</jk>, the input contents will be copied locally and accessible via the {@link #getInputAsString()} 116 * method. 117 * This allows the contents of the pipe to be accessed when a problem occurs. 118 * @param autoCloseStreams 119 * Automatically close {@link InputStream InputStreams} and {@link Reader Readers} when passed in as input. 120 * @param unbuffered 121 * If <jk>true</jk>, we read one character at a time from underlying readers when the readers are expected to be parsed 122 * multiple times. 123 * <br>Otherwise, we read character data into a reusable buffer. 124 * @param binaryFormat The binary format of input strings when converted to bytes. 125 */ 126 public ParserPipe(Object input, boolean debug, boolean autoCloseStreams, boolean unbuffered, BinaryFormat binaryFormat) { 127 this.input = input; 128 this.debug = debug; 129 this.strict = false; 130 this.autoCloseStreams = autoCloseStreams; 131 this.unbuffered = unbuffered; 132 this.charset = null; 133 if (input instanceof CharSequence) 134 this.inputString = input.toString(); 135 this.binaryFormat = binaryFormat; 136 } 137 138 /** 139 * Shortcut constructor, typically for straight string input. 140 * 141 * <p> 142 * Equivalent to calling <code><jk>new</jk> ParserPipe(input, <jk>false</jk>, <jk>false</jk>, <jk>null</jk>, <jk>null</jk>);</code> 143 * 144 * @param input The input object. 145 */ 146 public ParserPipe(Object input) { 147 this(input, false, false, false, false, null, null); 148 } 149 150 /** 151 * Wraps the specified input object inside an input stream. 152 * 153 * <p> 154 * Subclasses can override this method to implement their own input streams. 155 * 156 * @return The input object wrapped in an input stream, or <jk>null</jk> if the object is null. 157 * @throws IOException If object could not be converted to an input stream. 158 */ 159 public InputStream getInputStream() throws IOException { 160 if (input == null) 161 return null; 162 163 if (input instanceof InputStream) { 164 if (debug) { 165 byte[] b = readBytes((InputStream)input); 166 inputString = toHex(b); 167 inputStream = new ByteArrayInputStream(b); 168 } else { 169 inputStream = (InputStream)input; 170 doClose = autoCloseStreams; 171 } 172 } else if (input instanceof byte[]) { 173 if (debug) 174 inputString = toHex((byte[])input); 175 inputStream = new ByteArrayInputStream((byte[])input); 176 doClose = false; 177 } else if (input instanceof String) { 178 inputString = (String)input; 179 inputStream = new ByteArrayInputStream(convertFromString((String)input)); 180 doClose = false; 181 } else if (input instanceof File) { 182 if (debug) { 183 byte[] b = readBytes((File)input); 184 inputString = toHex(b); 185 inputStream = new ByteArrayInputStream(b); 186 } else { 187 inputStream = new FileInputStream((File)input); 188 doClose = true; 189 } 190 } else { 191 throw new IOException("Cannot convert object of type "+className(input)+" to an InputStream."); 192 } 193 194 return inputStream; 195 } 196 197 private byte[] convertFromString(String in) { 198 switch(binaryFormat) { 199 case BASE64: return base64Decode(in); 200 case HEX: return fromHex(in); 201 case SPACED_HEX: return fromSpacedHex(in); 202 default: return new byte[0]; 203 } 204 } 205 206 /** 207 * Wraps the specified input object inside a reader. 208 * 209 * <p> 210 * Subclasses can override this method to implement their own readers. 211 * 212 * @return The input object wrapped in a Reader, or <jk>null</jk> if the object is null. 213 * @throws IOException If object could not be converted to a reader. 214 */ 215 public Reader getReader() throws IOException { 216 if (input == null) 217 return null; 218 219 if (input instanceof Reader) { 220 if (debug) { 221 inputString = read((Reader)input); 222 reader = new StringReader(inputString); 223 } else { 224 reader = (Reader)input; 225 doClose = autoCloseStreams; 226 } 227 } else if (input instanceof CharSequence) { 228 inputString = input.toString(); 229 reader = new ParserReader(this); 230 doClose = false; 231 } else if (input instanceof InputStream || input instanceof byte[]) { 232 doClose = input instanceof InputStream && autoCloseStreams; 233 InputStream is = ( 234 input instanceof InputStream 235 ? (InputStream)input 236 : new ByteArrayInputStream((byte[])input) 237 ); 238 CharsetDecoder cd = charset.newDecoder(); 239 if (strict) { 240 cd.onMalformedInput(CodingErrorAction.REPORT); 241 cd.onUnmappableCharacter(CodingErrorAction.REPORT); 242 } else { 243 cd.onMalformedInput(CodingErrorAction.REPLACE); 244 cd.onUnmappableCharacter(CodingErrorAction.REPLACE); 245 } 246 reader = new InputStreamReader(is, cd); 247 if (debug) { 248 inputString = read(reader); 249 reader = new StringReader(inputString); 250 } 251 } else if (input instanceof File) { 252 CharsetDecoder cd = charset.newDecoder(); 253 if (strict) { 254 cd.onMalformedInput(CodingErrorAction.REPORT); 255 cd.onUnmappableCharacter(CodingErrorAction.REPORT); 256 } else { 257 cd.onMalformedInput(CodingErrorAction.REPLACE); 258 cd.onUnmappableCharacter(CodingErrorAction.REPLACE); 259 } 260 reader = new InputStreamReader(new FileInputStream((File)input), cd); 261 if (debug) { 262 inputString = read(reader); 263 reader = new StringReader(inputString); 264 } 265 doClose = true; 266 } else { 267 throw new IOException("Cannot convert object of type "+className(input)+" to an InputStream."); 268 } 269 270 return reader; 271 } 272 273 /** 274 * Returns the contents of this pipe as a buffered reader. 275 * 276 * <p> 277 * If the reader passed into this pipe is already a buffered reader, that reader will be returned. 278 * 279 * @return The contents of this pipe as a buffered reader. 280 * @throws IOException Thrown by underlying stream. 281 */ 282 public Reader getBufferedReader() throws IOException { 283 return toBufferedReader(getReader()); 284 } 285 286 /** 287 * Returns the input to this parser as a plain string. 288 * 289 * <p> 290 * This method only returns a value if {@link org.apache.juneau.Context.Builder#debug()} is enabled. 291 * 292 * @return The input as a string, or <jk>null</jk> if debug mode not enabled. 293 */ 294 public String getInputAsString() { 295 return inputString; 296 } 297 298 /** 299 * Returns the contents of this pipe as a string. 300 * 301 * @return The contents of this pipe as a string. 302 * @throws IOException If thrown from inner reader. 303 */ 304 public String asString() throws IOException { 305 if (inputString == null) 306 inputString = read(getReader()); 307 return inputString; 308 } 309 310 /** 311 * Converts this pipe into a {@link ParserReader}. 312 * 313 * @return The converted pipe. 314 * @throws IOException Thrown by underlying stream. 315 */ 316 public ParserReader getParserReader() throws IOException { 317 if (input == null) 318 return null; 319 if (input instanceof ParserReader) 320 parserReader = (ParserReader)input; 321 else 322 parserReader = new ParserReader(this); 323 return parserReader; 324 } 325 326 /** 327 * Returns <jk>true</jk> if the contents passed into this pipe was a {@link CharSequence}. 328 * 329 * @return <jk>true</jk> if the contents passed into this pipe was a {@link CharSequence}. 330 */ 331 public boolean isString() { 332 return inputString != null; 333 } 334 335 /** 336 * Sets the ParserReader/ParserInputStream/XmlReader constructed from this pipe. 337 * 338 * <p> 339 * Used for gathering the failure position when {@link ParseException} is thrown. 340 * 341 * @param positionable The ParserReader/ParserInputStream/XmlReader constructed from this pipe. 342 */ 343 public void setPositionable(Positionable positionable) { 344 this.positionable = positionable; 345 } 346 347 Position getPosition() { 348 if (positionable == null) 349 return Position.UNKNOWN; 350 Position p = positionable.getPosition(); 351 if (p == null) 352 return Position.UNKNOWN; 353 return p; 354 } 355 356 @Override /* Closeable */ 357 public void close() { 358 try { 359 if (doClose) 360 IOUtils.close(reader, inputStream); 361 } catch (IOException e) { 362 throw new BeanRuntimeException(e); 363 } 364 } 365}