001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.juneau.uon; 018 019import static org.apache.juneau.commons.utils.ThrowableUtils.*; 020 021import java.io.*; 022 023import org.apache.juneau.parser.*; 024 025/** 026 * Same functionality as {@link ParserReader} except automatically decoded <c>%xx</c> escape sequences. 027 * 028 * <p> 029 * Escape sequences are assumed to be encoded UTF-8. Extended Unicode (>\u10000) is supported. 030 * 031 * <p> 032 * If decoding is enabled, the following character replacements occur so that boundaries are not lost: 033 * <ul> 034 * <li><js>'&'</js> -> <js>'\u0001'</js> 035 * <li><js>'='</js> -> <js>'\u0002'</js> 036 * </ul> 037 * 038 * <h5 class='section'>See Also:</h5><ul> 039 * <li class='link'><a class="doclink" href="https://juneau.apache.org/docs/topics/UonBasics">UON Basics</a> 040 041 * </ul> 042 */ 043@SuppressWarnings("resource") 044public class UonReader extends ParserReader { 045 046 private static int fromHexChar(int c) throws IOException { 047 if (c >= '0' && c <= '9') 048 return c - '0'; 049 if (c >= 'a' && c <= 'f') 050 return 10 + c - 'a'; 051 if (c >= 'A' && c <= 'F') 052 return 10 + c - 'A'; 053 throw ioex("Invalid hex character ''{0}'' found in escape pattern.", c); 054 } 055 056 private final boolean decodeChars; 057 058 private final char[] buff; 059 060 // Writable properties. 061 private int iCurrent, iEnd; 062 063 /** 064 * Constructor. 065 * 066 * @param pipe The parser input. 067 * @param decodeChars Whether the input is URL-encoded. 068 * @throws IOException Thrown by underlying stream. 069 */ 070 public UonReader(ParserPipe pipe, boolean decodeChars) throws IOException { 071 super(pipe); 072 this.decodeChars = decodeChars; 073 if (pipe.isString()) { 074 var in = pipe.getInputAsString(); 075 this.buff = new char[in.length() < 1024 ? in.length() : 1024]; 076 } else { 077 this.buff = new char[1024]; 078 } 079 } 080 081 @Override /* Overridden from Reader */ 082 public int read(char[] cbuf, int off, int len) throws IOException { 083 084 if (! decodeChars) 085 return super.read(cbuf, off, len); 086 087 // Copy any remainder to the beginning of the buffer. 088 var remainder = iEnd - iCurrent; 089 if (remainder > 0) 090 System.arraycopy(buff, iCurrent, buff, 0, remainder); 091 iCurrent = 0; 092 093 var expected = buff.length - remainder; 094 095 var x = super.read(buff, remainder, expected); 096 if (x == -1 && remainder == 0) 097 return -1; 098 099 iEnd = remainder + (x == -1 ? 0 : x); 100 101 var i = 0; 102 while (i < len) { 103 if (iCurrent >= iEnd) 104 return i; 105 var c = buff[iCurrent++]; 106 if (c == '+') { 107 cbuf[off + i++] = ' '; 108 } else if (c == '&') { 109 cbuf[off + i++] = '\u0001'; 110 } else if (c == '=') { 111 cbuf[off + i++] = '\u0002'; 112 } else if (c != '%') { 113 cbuf[off + i++] = c; 114 } else { 115 var iMark = iCurrent - 1; // Keep track of current position. 116 117 // Stop if there aren't at least two more characters following '%' in the buffer, 118 // or there aren't at least two more positions open in cbuf to handle double-char chars. 119 if (iMark + 2 >= iEnd || i + 2 > len) { 120 iCurrent--; 121 return i; 122 } 123 124 var b0 = readEncodedByte(); 125 int cx; 126 127 // 0xxxxxxx 128 if (b0 < 128) { 129 cx = b0; 130 131 } else if (b0 < 192) { 132 // 10xxxxxx 133 throw ioex("Invalid hex value for first escape pattern in UTF-8 sequence: {0}", b0); 134 135 } else if (b0 < 224) { 136 // 110xxxxx 10xxxxxx 137 // 11000000(192) - 11011111(223) 138 cx = readUTF8(b0 - 192, 1); 139 if (cx == -1) { 140 iCurrent = iMark; 141 return i; 142 } 143 144 } else if (b0 < 240) { 145 // 1110xxxx 10xxxxxx 10xxxxxx 146 // 11100000(224) - 11101111(239) 147 cx = readUTF8(b0 - 224, 2); 148 if (cx == -1) { 149 iCurrent = iMark; 150 return i; 151 } 152 153 } else if (b0 < 248) { 154 // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 155 // 11110000(240) - 11110111(247) 156 cx = readUTF8(b0 - 240, 3); 157 if (cx == -1) { 158 iCurrent = iMark; 159 return i; 160 } 161 162 } else 163 throw ioex("Invalid hex value for first escape pattern in UTF-8 sequence: {0}", b0); 164 165 if (cx < 0x10000) 166 cbuf[off + i++] = (char)cx; 167 else { 168 cx -= 0x10000; 169 cbuf[off + i++] = (char)(0xd800 + (cx >> 10)); 170 cbuf[off + i++] = (char)(0xdc00 + (cx & 0x3ff)); 171 } 172 } 173 } 174 return i; 175 } 176 177 @Override /* Overridden from ParserReader */ 178 public UonReader unread() throws IOException { 179 super.unread(); 180 return this; 181 } 182 183 private int readEncodedByte() throws IOException { 184 if (iEnd <= iCurrent + 1) 185 throw ioex("Incomplete trailing escape pattern"); 186 int h = buff[iCurrent++]; 187 int l = buff[iCurrent++]; 188 h = fromHexChar(h); 189 l = fromHexChar(l); 190 return (h << 4) + l; 191 } 192 193 private int readHex() throws IOException { 194 var c = buff[iCurrent++]; 195 if (c != '%') 196 throw ioex("Did not find expected '%' character in UTF-8 sequence."); 197 return readEncodedByte(); 198 } 199 200 private int readUTF8(int n, int numBytes) throws IOException { 201 if (iCurrent + numBytes * 3 > iEnd) 202 return -1; 203 for (var i = 0; i < numBytes; i++) { 204 n <<= 6; 205 n += readHex() - 128; 206 } 207 return n; 208 } 209}