001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.juneau.parser;
018
019import static org.apache.juneau.commons.utils.IoUtils.*;
020import static org.apache.juneau.commons.utils.StringUtils.*;
021import static org.apache.juneau.commons.utils.ThrowableUtils.*;
022import static org.apache.juneau.commons.utils.Utils.*;
023
024import java.io.*;
025import java.nio.charset.*;
026
027import org.apache.juneau.*;
028import org.apache.juneau.commons.utils.*;
029
030/**
031 * A wrapper around an object that a parser reads its input from.
032 *
033 * <p>
034 * For character-based parsers, the input object can be any of the following:
035 * <ul>
036 *    <li>{@link Reader}
037 *    <li>{@link CharSequence}
038 *    <li>{@link InputStream}
039 *    <li><code><jk>byte</jk>[]</code>
040 *    <li>{@link File}
041 *    <li><code><jk>null</jk></code>
042 * </ul>
043 *
044 * <p>
045 * For stream-based parsers, the input object can be any of the following:
046 * <ul>
047 *    <li>{@link InputStream}
048 *    <li><code><jk>byte</jk>[]</code>
049 *    <li>{@link File}
050 *    <li>{@link String} - Hex-encoded bytes.  (not BASE-64!)
051 *    <li><code><jk>null</jk></code>
052 * </ul>
053 *
054 * <p>
055 * Note that Readers and InputStreams will NOT be automatically closed when {@link #close()} is called, but
056 * streams and readers created from other types (e.g. Files) WILL be automatically closed.
057 *
058 * <h5 class='section'>See Also:</h5><ul>
059 *    <li class='link'><a class="doclink" href="https://juneau.apache.org/docs/topics/SerializersAndParsers">Serializers and Parsers</a>
060 * </ul>
061 */
062@SuppressWarnings("resource")
063public class ParserPipe implements Closeable {
064
065   private final Object input;
066   final boolean debug, strict, autoCloseStreams, unbuffered;
067   private final Charset charset;
068
069   private String inputString;
070   private InputStream inputStream;
071   private Reader reader;
072   private ParserReader parserReader;
073   private boolean doClose;
074   private BinaryFormat binaryFormat;
075   private Positionable positionable;
076
077   /**
078    * Shortcut constructor, typically for straight string input.
079    *
080    * <p>
081    * Equivalent to calling <code><jk>new</jk> ParserPipe(input, <jk>false</jk>, <jk>false</jk>, <jk>null</jk>, <jk>null</jk>);</code>
082    *
083    * @param input The input object.
084    */
085   public ParserPipe(Object input) {
086      this(input, false, false, false, false, null, null);
087   }
088
089   /**
090    * Constructor for stream-based parsers.
091    *
092    * @param input The parser input object.
093    * @param debug
094    *    If <jk>true</jk>, the input contents will be copied locally and accessible via the {@link #getInputAsString()}
095    *    method.
096    *    This allows the contents of the pipe to be accessed when a problem occurs.
097    * @param autoCloseStreams
098    *    Automatically close {@link InputStream InputStreams} and {@link Reader Readers} when passed in as input.
099    * @param unbuffered
100    *    If <jk>true</jk>, we read one character at a time from underlying readers when the readers are expected to be parsed
101    *    multiple times.
102    *    <br>Otherwise, we read character data into a reusable buffer.
103    * @param binaryFormat The binary format of input strings when converted to bytes.
104    */
105   public ParserPipe(Object input, boolean debug, boolean autoCloseStreams, boolean unbuffered, BinaryFormat binaryFormat) {
106      this.input = input;
107      this.debug = debug;
108      this.strict = false;
109      this.autoCloseStreams = autoCloseStreams;
110      this.unbuffered = unbuffered;
111      this.charset = null;
112      if (input instanceof CharSequence input2)
113         this.inputString = input2.toString();
114      this.binaryFormat = binaryFormat;
115   }
116
117   /**
118    * Constructor for reader-based parsers.
119    *
120    * @param input The parser input object.
121    * @param debug
122    *    If <jk>true</jk>, the input contents will be copied locally and accessible via the {@link #getInputAsString()}
123    *    method.
124    *    This allows the contents of the pipe to be accessed when a problem occurs.
125    * @param strict
126    *    If <jk>true</jk>, sets {@link CodingErrorAction#REPORT} on {@link CharsetDecoder#onMalformedInput(CodingErrorAction)}
127    *    and {@link CharsetDecoder#onUnmappableCharacter(CodingErrorAction)}.
128    *    Otherwise, sets them to {@link CodingErrorAction#REPLACE}.
129    * @param autoCloseStreams
130    *    Automatically close {@link InputStream InputStreams} and {@link Reader Readers} when passed in as input.
131    * @param unbuffered
132    *    If <jk>true</jk>, we read one character at a time from underlying readers when the readers are expected to be parsed
133    *    multiple times.
134    *    <br>Otherwise, we read character data into a reusable buffer.
135    * @param fileCharset
136    *    The charset to expect when reading from {@link File Files}.
137    * @param streamCharset
138    *    The charset to expect when reading from {@link InputStream InputStreams}.
139    */
140   public ParserPipe(Object input, boolean debug, boolean strict, boolean autoCloseStreams, boolean unbuffered, Charset streamCharset, Charset fileCharset) {
141      boolean isFile = input instanceof File;
142      this.input = input;
143      this.debug = debug;
144      this.strict = strict;
145      this.autoCloseStreams = autoCloseStreams;
146      this.unbuffered = unbuffered;
147      Charset cs = isFile ? fileCharset : streamCharset;
148      if (cs == null)
149         cs = (isFile ? Charset.defaultCharset() : UTF8);
150      this.charset = cs;
151      if (input instanceof CharSequence cs2)
152         this.inputString = cs2.toString();
153      this.binaryFormat = null;
154   }
155
156   /**
157    * Returns the contents of this pipe as a string.
158    *
159    * @return The contents of this pipe as a string.
160    * @throws IOException If thrown from inner reader.
161    */
162   public String asString() throws IOException {
163      if (inputString == null)
164         inputString = read(getReader());
165      return inputString;
166   }
167
168   @Override /* Overridden from Closeable */
169   public void close() {
170      try {
171         if (doClose)
172            IoUtils.close(reader, inputStream);
173      } catch (IOException e) {
174         throw bex(e);
175      }
176   }
177
178   /**
179    * Returns the contents of this pipe as a buffered reader.
180    *
181    * <p>
182    * If the reader passed into this pipe is already a buffered reader, that reader will be returned.
183    *
184    * @return The contents of this pipe as a buffered reader.
185    * @throws IOException Thrown by underlying stream.
186    */
187   public Reader getBufferedReader() throws IOException { return toBufferedReader(getReader()); }
188
189   /**
190    * Returns the input to this parser as a plain string.
191    *
192    * <p>
193    * This method only returns a value if {@link org.apache.juneau.Context.Builder#debug()} is enabled.
194    *
195    * @return The input as a string, or <jk>null</jk> if debug mode not enabled.
196    */
197   public String getInputAsString() { return inputString; }
198
199   /**
200    * Wraps the specified input object inside an input stream.
201    *
202    * <p>
203    * Subclasses can override this method to implement their own input streams.
204    *
205    * @return The input object wrapped in an input stream, or <jk>null</jk> if the object is null.
206    * @throws IOException If object could not be converted to an input stream.
207    */
208   public InputStream getInputStream() throws IOException {
209      if (input == null)
210         return null;
211
212      if (input instanceof InputStream input2) {
213         if (debug) {
214            var b = readBytes(input2);
215            inputString = toHex(b);
216            inputStream = new ByteArrayInputStream(b);
217         } else {
218            inputStream = input2;
219            doClose = autoCloseStreams;
220         }
221      } else if (input instanceof byte[]) {
222         if (debug)
223            inputString = toHex((byte[])input);
224         inputStream = new ByteArrayInputStream((byte[])input);
225         doClose = false;
226      } else if (input instanceof String input2) {
227         inputString = input2;
228         inputStream = new ByteArrayInputStream(convertFromString(input2));
229         doClose = false;
230      } else if (input instanceof File input2) {
231         if (debug) {
232            var b = readBytes(input2);
233            inputString = toHex(b);
234            inputStream = new ByteArrayInputStream(b);
235         } else {
236            inputStream = new FileInputStream(input2);
237            doClose = true;
238         }
239      } else {
240         throw ioex("Cannot convert object of type {0} to an InputStream.", cn(input));
241      }
242
243      return inputStream;
244   }
245
246   /**
247    * Converts this pipe into a {@link ParserReader}.
248    *
249    * @return The converted pipe.
250    * @throws IOException Thrown by underlying stream.
251    */
252   public ParserReader getParserReader() throws IOException {
253      if (input == null)
254         return null;
255      if (input instanceof ParserReader input2)
256         parserReader = input2;
257      else
258         parserReader = new ParserReader(this);
259      return parserReader;
260   }
261
262   /**
263    * Wraps the specified input object inside a reader.
264    *
265    * <p>
266    * Subclasses can override this method to implement their own readers.
267    *
268    * @return The input object wrapped in a Reader, or <jk>null</jk> if the object is null.
269    * @throws IOException If object could not be converted to a reader.
270    */
271   public Reader getReader() throws IOException {
272      if (input == null)
273         return null;
274
275      if (input instanceof Reader input2) {
276         if (debug) {
277            inputString = read(input2);
278            reader = new StringReader(inputString);
279         } else {
280            reader = input2;
281            doClose = autoCloseStreams;
282         }
283      } else if (input instanceof CharSequence input2) {
284         inputString = input2.toString();
285         reader = new ParserReader(this);
286         doClose = false;
287      } else if (input instanceof InputStream || input instanceof byte[]) {
288         doClose = input instanceof InputStream && autoCloseStreams;
289         InputStream is = (input instanceof InputStream input2 ? input2 : new ByteArrayInputStream((byte[])input));
290         CharsetDecoder cd = charset.newDecoder();
291         if (strict) {
292            cd.onMalformedInput(CodingErrorAction.REPORT);
293            cd.onUnmappableCharacter(CodingErrorAction.REPORT);
294         } else {
295            cd.onMalformedInput(CodingErrorAction.REPLACE);
296            cd.onUnmappableCharacter(CodingErrorAction.REPLACE);
297         }
298         reader = new InputStreamReader(is, cd);
299         if (debug) {
300            inputString = read(reader);
301            reader = new StringReader(inputString);
302         }
303      } else if (input instanceof File input2) {
304         CharsetDecoder cd = charset.newDecoder();
305         if (strict) {
306            cd.onMalformedInput(CodingErrorAction.REPORT);
307            cd.onUnmappableCharacter(CodingErrorAction.REPORT);
308         } else {
309            cd.onMalformedInput(CodingErrorAction.REPLACE);
310            cd.onUnmappableCharacter(CodingErrorAction.REPLACE);
311         }
312         reader = new InputStreamReader(new FileInputStream(input2), cd);
313         if (debug) {
314            inputString = read(reader);
315            reader = new StringReader(inputString);
316         }
317         doClose = true;
318      } else {
319         throw ioex("Cannot convert object of type {0} to an InputStream.", cn(input));
320      }
321
322      return reader;
323   }
324
325   /**
326    * Returns <jk>true</jk> if the contents passed into this pipe was a {@link CharSequence}.
327    *
328    * @return <jk>true</jk> if the contents passed into this pipe was a {@link CharSequence}.
329    */
330   public boolean isString() { return nn(inputString); }
331
332   /**
333    * Sets the ParserReader/ParserInputStream/XmlReader constructed from this pipe.
334    *
335    * <p>
336    * Used for gathering the failure position when {@link ParseException} is thrown.
337    *
338    * @param positionable The ParserReader/ParserInputStream/XmlReader constructed from this pipe.
339    */
340   public void setPositionable(Positionable positionable) { this.positionable = positionable; }
341
342   private byte[] convertFromString(String in) {
343      return switch (binaryFormat) {
344         case BASE64 -> base64Decode(in);
345         case HEX -> fromHex(in);
346         case SPACED_HEX -> fromSpacedHex(in);
347         default -> new byte[0];
348      };
349   }
350
351   Position getPosition() {
352      if (positionable == null)
353         return Position.UNKNOWN;
354      Position p = positionable.getPosition();
355      if (p == null)
356         return Position.UNKNOWN;
357      return p;
358   }
359}