001// *************************************************************************************************************************** 002// * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file * 003// * distributed with this work for additional information regarding copyright ownership. The ASF licenses this file * 004// * to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance * 005// * with the License. You may obtain a copy of the License at * 006// * * 007// * http://www.apache.org/licenses/LICENSE-2.0 * 008// * * 009// * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an * 010// * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * 011// * specific language governing permissions and limitations under the License. * 012// *************************************************************************************************************************** 013package org.apache.juneau.uon; 014 015import java.io.*; 016 017import org.apache.juneau.parser.*; 018 019/** 020 * Same functionality as {@link ParserReader} except automatically decoded <c>%xx</c> escape sequences. 021 * 022 * <p> 023 * Escape sequences are assumed to be encoded UTF-8. Extended Unicode (>\u10000) is supported. 024 * 025 * <p> 026 * If decoding is enabled, the following character replacements occur so that boundaries are not lost: 027 * <ul> 028 * <li><js>'&'</js> -> <js>'\u0001'</js> 029 * <li><js>'='</js> -> <js>'\u0002'</js> 030 * </ul> 031 * 032 * <h5 class='section'>See Also:</h5><ul> 033 * <li class='link'><a class="doclink" href="../../../../index.html#jm.UonDetails">UON Details</a> 034 035 * </ul> 036 */ 037public final class UonReader extends ParserReader { 038 039 private final boolean decodeChars; 040 private final char[] buff; 041 042 // Writable properties. 043 private int iCurrent, iEnd; 044 045 046 /** 047 * Constructor. 048 * 049 * @param pipe The parser input. 050 * @param decodeChars Whether the input is URL-encoded. 051 * @throws IOException Thrown by underlying stream. 052 */ 053 public UonReader(ParserPipe pipe, boolean decodeChars) throws IOException { 054 super(pipe); 055 this.decodeChars = decodeChars; 056 if (pipe.isString()) { 057 String in = pipe.getInputAsString(); 058 this.buff = new char[in.length() < 1024 ? in.length() : 1024]; 059 } else { 060 this.buff = new char[1024]; 061 } 062 } 063 064 @Override /* Reader */ 065 public int read(char[] cbuf, int off, int len) throws IOException { 066 067 if (! decodeChars) 068 return super.read(cbuf, off, len); 069 070 // Copy any remainder to the beginning of the buffer. 071 int remainder = iEnd - iCurrent; 072 if (remainder > 0) 073 System.arraycopy(buff, iCurrent, buff, 0, remainder); 074 iCurrent = 0; 075 076 int expected = buff.length - remainder; 077 078 int x = super.read(buff, remainder, expected); 079 if (x == -1 && remainder == 0) 080 return -1; 081 082 iEnd = remainder + (x == -1 ? 0 : x); 083 084 int i = 0; 085 while (i < len) { 086 if (iCurrent >= iEnd) 087 return i; 088 char c = buff[iCurrent++]; 089 if (c == '+') { 090 cbuf[off + i++] = ' '; 091 } else if (c == '&') { 092 cbuf[off + i++] = '\u0001'; 093 } else if (c == '=') { 094 cbuf[off + i++] = '\u0002'; 095 } else if (c != '%') { 096 cbuf[off + i++] = c; 097 } else { 098 int iMark = iCurrent-1; // Keep track of current position. 099 100 // Stop if there aren't at least two more characters following '%' in the buffer, 101 // or there aren't at least two more positions open in cbuf to handle double-char chars. 102 if (iMark+2 >= iEnd || i+2 > len) { 103 iCurrent--; 104 return i; 105 } 106 107 int b0 = readEncodedByte(); 108 int cx; 109 110 // 0xxxxxxx 111 if (b0 < 128) { 112 cx = b0; 113 114 // 10xxxxxx 115 } else if (b0 < 192) { 116 throw new IOException("Invalid hex value for first escape pattern in UTF-8 sequence: "+b0); 117 118 // 110xxxxx 10xxxxxx 119 // 11000000(192) - 11011111(223) 120 } else if (b0 < 224) { 121 cx = readUTF8(b0-192, 1); 122 if (cx == -1) { 123 iCurrent = iMark; 124 return i; 125 } 126 127 // 1110xxxx 10xxxxxx 10xxxxxx 128 // 11100000(224) - 11101111(239) 129 } else if (b0 < 240) { 130 cx = readUTF8(b0-224, 2); 131 if (cx == -1) { 132 iCurrent = iMark; 133 return i; 134 } 135 136 // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 137 // 11110000(240) - 11110111(247) 138 } else if (b0 < 248) { 139 cx = readUTF8(b0-240, 3); 140 if (cx == -1) { 141 iCurrent = iMark; 142 return i; 143 } 144 145 } else 146 throw new IOException("Invalid hex value for first escape pattern in UTF-8 sequence: "+b0); 147 148 if (cx < 0x10000) 149 cbuf[off + i++] = (char)cx; 150 else { 151 cx -= 0x10000; 152 cbuf[off + i++] = (char)(0xd800 + (cx >> 10)); 153 cbuf[off + i++] = (char)(0xdc00 + (cx & 0x3ff)); 154 } 155 } 156 } 157 return i; 158 } 159 160 private int readUTF8(int n, final int numBytes) throws IOException { 161 if (iCurrent + numBytes*3 > iEnd) 162 return -1; 163 for (int i = 0; i < numBytes; i++) { 164 n <<= 6; 165 n += readHex()-128; 166 } 167 return n; 168 } 169 170 private int readHex() throws IOException { 171 int c = buff[iCurrent++]; 172 if (c != '%') 173 throw new IOException("Did not find expected '%' character in UTF-8 sequence."); 174 return readEncodedByte(); 175 } 176 177 private int readEncodedByte() throws IOException { 178 if (iEnd <= iCurrent + 1) 179 throw new IOException("Incomplete trailing escape pattern"); 180 int h = buff[iCurrent++]; 181 int l = buff[iCurrent++]; 182 h = fromHexChar(h); 183 l = fromHexChar(l); 184 return (h << 4) + l; 185 } 186 187 private static int fromHexChar(int c) throws IOException { 188 if (c >= '0' && c <= '9') 189 return c - '0'; 190 if (c >= 'a' && c <= 'f') 191 return 10 + c - 'a'; 192 if (c >= 'A' && c <= 'F') 193 return 10 + c - 'A'; 194 throw new IOException("Invalid hex character '"+c+"' found in escape pattern."); 195 } 196 197 @Override /* ParserReader */ 198 public UonReader unread() throws IOException { 199 super.unread(); 200 return this; 201 } 202}