001// ***************************************************************************************************************************
002// * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements.  See the NOTICE file *
003// * distributed with this work for additional information regarding copyright ownership.  The ASF licenses this file        *
004// * to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance            *
005// * with the License.  You may obtain a copy of the License at                                                              *
006// *                                                                                                                         *
007// *  http://www.apache.org/licenses/LICENSE-2.0                                                                             *
008// *                                                                                                                         *
009// * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an  *
010// * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the License for the        *
011// * specific language governing permissions and limitations under the License.                                              *
012// ***************************************************************************************************************************
013package org.apache.juneau.parser;
014
015import static org.apache.juneau.common.internal.IOUtils.*;
016import static org.apache.juneau.common.internal.StringUtils.*;
017import static org.apache.juneau.internal.ClassUtils.*;
018
019import java.io.*;
020import java.nio.charset.*;
021
022import org.apache.juneau.*;
023import org.apache.juneau.common.internal.*;
024
025/**
026 * A wrapper around an object that a parser reads its input from.
027 *
028 * <p>
029 * For character-based parsers, the input object can be any of the following:
030 * <ul>
031 *    <li>{@link Reader}
032 *    <li>{@link CharSequence}
033 *    <li>{@link InputStream}
034 *    <li><code><jk>byte</jk>[]</code>
035 *    <li>{@link File}
036 *    <li><code><jk>null</jk></code>
037 * </ul>
038 *
039 * <p>
040 * For stream-based parsers, the input object can be any of the following:
041 * <ul>
042 *    <li>{@link InputStream}
043 *    <li><code><jk>byte</jk>[]</code>
044 *    <li>{@link File}
045 *    <li>{@link String} - Hex-encoded bytes.  (not BASE-64!)
046 *    <li><code><jk>null</jk></code>
047 * </ul>
048 *
049 * <p>
050 * Note that Readers and InputStreams will NOT be automatically closed when {@link #close()} is called, but
051 * streams and readers created from other types (e.g. Files) WILL be automatically closed.
052 *
053 * <h5 class='section'>See Also:</h5><ul>
054 *    <li class='link'><a class="doclink" href="../../../../index.html#jm.SerializersAndParsers">Serializers and Parsers</a>
055 * </ul>
056 */
057public final class ParserPipe implements Closeable {
058
059   private final Object input;
060   final boolean debug, strict, autoCloseStreams, unbuffered;
061   private final Charset charset;
062
063   private String inputString;
064   private InputStream inputStream;
065   private Reader reader;
066   private ParserReader parserReader;
067   private boolean doClose;
068   private BinaryFormat binaryFormat;
069   private Positionable positionable;
070
071   /**
072    * Constructor for reader-based parsers.
073    *
074    * @param input The parser input object.
075    * @param debug
076    *    If <jk>true</jk>, the input contents will be copied locally and accessible via the {@link #getInputAsString()}
077    *    method.
078    *    This allows the contents of the pipe to be accessed when a problem occurs.
079    * @param strict
080    *    If <jk>true</jk>, sets {@link CodingErrorAction#REPORT} on {@link CharsetDecoder#onMalformedInput(CodingErrorAction)}
081    *    and {@link CharsetDecoder#onUnmappableCharacter(CodingErrorAction)}.
082    *    Otherwise, sets them to {@link CodingErrorAction#REPLACE}.
083    * @param autoCloseStreams
084    *    Automatically close {@link InputStream InputStreams} and {@link Reader Readers} when passed in as input.
085    * @param unbuffered
086    *    If <jk>true</jk>, we read one character at a time from underlying readers when the readers are expected to be parsed
087    *    multiple times.
088    *    <br>Otherwise, we read character data into a reusable buffer.
089    * @param fileCharset
090    *    The charset to expect when reading from {@link File Files}.
091    * @param streamCharset
092    *    The charset to expect when reading from {@link InputStream InputStreams}.
093    */
094   public ParserPipe(Object input, boolean debug, boolean strict, boolean autoCloseStreams, boolean unbuffered, Charset streamCharset, Charset fileCharset) {
095      boolean isFile = input instanceof File;
096      this.input = input;
097      this.debug = debug;
098      this.strict = strict;
099      this.autoCloseStreams = autoCloseStreams;
100      this.unbuffered = unbuffered;
101      Charset cs = isFile ? fileCharset : streamCharset;
102      if (cs == null)
103         cs = (isFile ? Charset.defaultCharset() : UTF8);
104      this.charset = cs;
105      if (input instanceof CharSequence)
106         this.inputString = input.toString();
107      this.binaryFormat = null;
108   }
109
110   /**
111    * Constructor for stream-based parsers.
112    *
113    * @param input The parser input object.
114    * @param debug
115    *    If <jk>true</jk>, the input contents will be copied locally and accessible via the {@link #getInputAsString()}
116    *    method.
117    *    This allows the contents of the pipe to be accessed when a problem occurs.
118    * @param autoCloseStreams
119    *    Automatically close {@link InputStream InputStreams} and {@link Reader Readers} when passed in as input.
120    * @param unbuffered
121    *    If <jk>true</jk>, we read one character at a time from underlying readers when the readers are expected to be parsed
122    *    multiple times.
123    *    <br>Otherwise, we read character data into a reusable buffer.
124    * @param binaryFormat The binary format of input strings when converted to bytes.
125    */
126   public ParserPipe(Object input, boolean debug, boolean autoCloseStreams, boolean unbuffered, BinaryFormat binaryFormat) {
127      this.input = input;
128      this.debug = debug;
129      this.strict = false;
130      this.autoCloseStreams = autoCloseStreams;
131      this.unbuffered = unbuffered;
132      this.charset = null;
133      if (input instanceof CharSequence)
134         this.inputString = input.toString();
135      this.binaryFormat = binaryFormat;
136   }
137
138   /**
139    * Shortcut constructor, typically for straight string input.
140    *
141    * <p>
142    * Equivalent to calling <code><jk>new</jk> ParserPipe(input, <jk>false</jk>, <jk>false</jk>, <jk>null</jk>, <jk>null</jk>);</code>
143    *
144    * @param input The input object.
145    */
146   public ParserPipe(Object input) {
147      this(input, false, false, false, false, null, null);
148   }
149
150   /**
151    * Wraps the specified input object inside an input stream.
152    *
153    * <p>
154    * Subclasses can override this method to implement their own input streams.
155    *
156    * @return The input object wrapped in an input stream, or <jk>null</jk> if the object is null.
157    * @throws IOException If object could not be converted to an input stream.
158    */
159   public InputStream getInputStream() throws IOException {
160      if (input == null)
161         return null;
162
163      if (input instanceof InputStream) {
164         if (debug) {
165            byte[] b = readBytes((InputStream)input);
166            inputString = toHex(b);
167            inputStream = new ByteArrayInputStream(b);
168         } else {
169            inputStream = (InputStream)input;
170            doClose = autoCloseStreams;
171         }
172      } else if (input instanceof byte[]) {
173         if (debug)
174            inputString = toHex((byte[])input);
175         inputStream = new ByteArrayInputStream((byte[])input);
176         doClose = false;
177      } else if (input instanceof String) {
178         inputString = (String)input;
179         inputStream = new ByteArrayInputStream(convertFromString((String)input));
180         doClose = false;
181      } else if (input instanceof File) {
182         if (debug) {
183            byte[] b = readBytes((File)input);
184            inputString = toHex(b);
185            inputStream = new ByteArrayInputStream(b);
186         } else {
187            inputStream = new FileInputStream((File)input);
188            doClose = true;
189         }
190      } else {
191         throw new IOException("Cannot convert object of type "+className(input)+" to an InputStream.");
192      }
193
194      return inputStream;
195   }
196
197   private byte[] convertFromString(String in) {
198      switch(binaryFormat) {
199         case BASE64: return base64Decode(in);
200         case HEX: return fromHex(in);
201         case SPACED_HEX: return fromSpacedHex(in);
202         default: return new byte[0];
203      }
204   }
205
206   /**
207    * Wraps the specified input object inside a reader.
208    *
209    * <p>
210    * Subclasses can override this method to implement their own readers.
211    *
212    * @return The input object wrapped in a Reader, or <jk>null</jk> if the object is null.
213    * @throws IOException If object could not be converted to a reader.
214    */
215   public Reader getReader() throws IOException {
216      if (input == null)
217         return null;
218
219      if (input instanceof Reader) {
220         if (debug) {
221            inputString = read((Reader)input);
222            reader = new StringReader(inputString);
223         } else {
224            reader = (Reader)input;
225            doClose = autoCloseStreams;
226         }
227      } else if (input instanceof CharSequence) {
228         inputString = input.toString();
229         reader = new ParserReader(this);
230         doClose = false;
231      } else if (input instanceof InputStream || input instanceof byte[]) {
232         doClose = input instanceof InputStream && autoCloseStreams;
233         InputStream is = (
234            input instanceof InputStream
235            ? (InputStream)input
236            : new ByteArrayInputStream((byte[])input)
237         );
238         CharsetDecoder cd = charset.newDecoder();
239         if (strict) {
240            cd.onMalformedInput(CodingErrorAction.REPORT);
241            cd.onUnmappableCharacter(CodingErrorAction.REPORT);
242         } else {
243            cd.onMalformedInput(CodingErrorAction.REPLACE);
244            cd.onUnmappableCharacter(CodingErrorAction.REPLACE);
245         }
246         reader = new InputStreamReader(is, cd);
247         if (debug) {
248            inputString = read(reader);
249            reader = new StringReader(inputString);
250         }
251      } else if (input instanceof File) {
252         CharsetDecoder cd = charset.newDecoder();
253         if (strict) {
254            cd.onMalformedInput(CodingErrorAction.REPORT);
255            cd.onUnmappableCharacter(CodingErrorAction.REPORT);
256         } else {
257            cd.onMalformedInput(CodingErrorAction.REPLACE);
258            cd.onUnmappableCharacter(CodingErrorAction.REPLACE);
259         }
260         reader = new InputStreamReader(new FileInputStream((File)input), cd);
261         if (debug) {
262            inputString = read(reader);
263            reader = new StringReader(inputString);
264         }
265         doClose = true;
266      } else {
267         throw new IOException("Cannot convert object of type "+className(input)+" to an InputStream.");
268      }
269
270      return reader;
271   }
272
273   /**
274    * Returns the contents of this pipe as a buffered reader.
275    *
276    * <p>
277    * If the reader passed into this pipe is already a buffered reader, that reader will be returned.
278    *
279    * @return The contents of this pipe as a buffered reader.
280    * @throws IOException Thrown by underlying stream.
281    */
282   public Reader getBufferedReader() throws IOException {
283      return toBufferedReader(getReader());
284   }
285
286   /**
287    * Returns the input to this parser as a plain string.
288    *
289    * <p>
290    * This method only returns a value if {@link org.apache.juneau.Context.Builder#debug()} is enabled.
291    *
292    * @return The input as a string, or <jk>null</jk> if debug mode not enabled.
293    */
294   public String getInputAsString() {
295      return inputString;
296   }
297
298   /**
299    * Returns the contents of this pipe as a string.
300    *
301    * @return The contents of this pipe as a string.
302    * @throws IOException If thrown from inner reader.
303    */
304   public String asString() throws IOException {
305      if (inputString == null)
306         inputString = read(getReader());
307      return inputString;
308   }
309
310   /**
311    * Converts this pipe into a {@link ParserReader}.
312    *
313    * @return The converted pipe.
314    * @throws IOException Thrown by underlying stream.
315    */
316   public ParserReader getParserReader() throws IOException {
317      if (input == null)
318         return null;
319      if (input instanceof ParserReader)
320         parserReader = (ParserReader)input;
321      else
322         parserReader = new ParserReader(this);
323      return parserReader;
324   }
325
326   /**
327    * Returns <jk>true</jk> if the contents passed into this pipe was a {@link CharSequence}.
328    *
329    * @return <jk>true</jk> if the contents passed into this pipe was a {@link CharSequence}.
330    */
331   public boolean isString() {
332      return inputString != null;
333   }
334
335   /**
336    * Sets the ParserReader/ParserInputStream/XmlReader constructed from this pipe.
337    *
338    * <p>
339    * Used for gathering the failure position when {@link ParseException} is thrown.
340    *
341    * @param positionable The ParserReader/ParserInputStream/XmlReader constructed from this pipe.
342    */
343   public void setPositionable(Positionable positionable) {
344      this.positionable = positionable;
345   }
346
347   Position getPosition() {
348      if (positionable == null)
349         return Position.UNKNOWN;
350      Position p = positionable.getPosition();
351      if (p == null)
352         return Position.UNKNOWN;
353      return p;
354   }
355
356   @Override /* Closeable */
357   public void close() {
358      try {
359         if (doClose)
360            IOUtils.close(reader, inputStream);
361      } catch (IOException e) {
362         throw new BeanRuntimeException(e);
363      }
364   }
365}