001// *************************************************************************************************************************** 002// * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file * 003// * distributed with this work for additional information regarding copyright ownership. The ASF licenses this file * 004// * to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance * 005// * with the License. You may obtain a copy of the License at * 006// * * 007// * http://www.apache.org/licenses/LICENSE-2.0 * 008// * * 009// * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an * 010// * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * 011// * specific language governing permissions and limitations under the License. * 012// *************************************************************************************************************************** 013package org.apache.juneau.html; 014 015import static javax.xml.stream.XMLStreamConstants.*; 016import static org.apache.juneau.common.internal.StringUtils.*; 017import static org.apache.juneau.html.HtmlTag.*; 018import static org.apache.juneau.internal.CollectionUtils.*; 019 020import java.io.IOException; 021import java.lang.reflect.*; 022import java.nio.charset.*; 023import java.util.*; 024import java.util.function.*; 025 026import javax.xml.stream.*; 027 028import org.apache.juneau.*; 029import org.apache.juneau.collections.*; 030import org.apache.juneau.html.annotation.*; 031import org.apache.juneau.httppart.*; 032import org.apache.juneau.internal.*; 033import org.apache.juneau.parser.*; 034import org.apache.juneau.swap.*; 035import org.apache.juneau.xml.*; 036 037/** 038 * ContextSession object that lives for the duration of a single use of {@link HtmlParser}. 039 * 040 * <h5 class='section'>Notes:</h5><ul> 041 * <li class='warn'>This class is not thread safe and is typically discarded after one use. 042 * </ul> 043 * 044 * <h5 class='section'>See Also:</h5><ul> 045 * <li class='link'><a class="doclink" href="../../../../index.html#jm.HtmlDetails">HTML Details</a> 046 047 * </ul> 048 */ 049@SuppressWarnings({ "unchecked", "rawtypes" }) 050public final class HtmlParserSession extends XmlParserSession { 051 052 //------------------------------------------------------------------------------------------------------------------- 053 // Static 054 //------------------------------------------------------------------------------------------------------------------- 055 056 private static final Set<String> whitespaceElements = set("br","bs","sp","ff"); 057 058 /** 059 * Creates a new builder for this object. 060 * 061 * @param ctx The context creating this session. 062 * @return A new builder. 063 */ 064 public static Builder create(HtmlParser ctx) { 065 return new Builder(ctx); 066 } 067 068 //------------------------------------------------------------------------------------------------------------------- 069 // Builder 070 //------------------------------------------------------------------------------------------------------------------- 071 072 /** 073 * Builder class. 074 */ 075 @FluentSetters 076 public static class Builder extends XmlParserSession.Builder { 077 078 HtmlParser ctx; 079 080 /** 081 * Constructor 082 * 083 * @param ctx The context creating this session. 084 */ 085 protected Builder(HtmlParser ctx) { 086 super(ctx); 087 this.ctx = ctx; 088 } 089 090 @Override 091 public HtmlParserSession build() { 092 return new HtmlParserSession(this); 093 } 094 095 // <FluentSetters> 096 097 @Override /* GENERATED - org.apache.juneau.ContextSession.Builder */ 098 public <T> Builder apply(Class<T> type, Consumer<T> apply) { 099 super.apply(type, apply); 100 return this; 101 } 102 103 @Override /* GENERATED - org.apache.juneau.ContextSession.Builder */ 104 public Builder debug(Boolean value) { 105 super.debug(value); 106 return this; 107 } 108 109 @Override /* GENERATED - org.apache.juneau.ContextSession.Builder */ 110 public Builder properties(Map<String,Object> value) { 111 super.properties(value); 112 return this; 113 } 114 115 @Override /* GENERATED - org.apache.juneau.ContextSession.Builder */ 116 public Builder property(String key, Object value) { 117 super.property(key, value); 118 return this; 119 } 120 121 @Override /* GENERATED - org.apache.juneau.ContextSession.Builder */ 122 public Builder unmodifiable() { 123 super.unmodifiable(); 124 return this; 125 } 126 127 @Override /* GENERATED - org.apache.juneau.BeanSession.Builder */ 128 public Builder locale(Locale value) { 129 super.locale(value); 130 return this; 131 } 132 133 @Override /* GENERATED - org.apache.juneau.BeanSession.Builder */ 134 public Builder localeDefault(Locale value) { 135 super.localeDefault(value); 136 return this; 137 } 138 139 @Override /* GENERATED - org.apache.juneau.BeanSession.Builder */ 140 public Builder mediaType(MediaType value) { 141 super.mediaType(value); 142 return this; 143 } 144 145 @Override /* GENERATED - org.apache.juneau.BeanSession.Builder */ 146 public Builder mediaTypeDefault(MediaType value) { 147 super.mediaTypeDefault(value); 148 return this; 149 } 150 151 @Override /* GENERATED - org.apache.juneau.BeanSession.Builder */ 152 public Builder timeZone(TimeZone value) { 153 super.timeZone(value); 154 return this; 155 } 156 157 @Override /* GENERATED - org.apache.juneau.BeanSession.Builder */ 158 public Builder timeZoneDefault(TimeZone value) { 159 super.timeZoneDefault(value); 160 return this; 161 } 162 163 @Override /* GENERATED - org.apache.juneau.parser.ParserSession.Builder */ 164 public Builder javaMethod(Method value) { 165 super.javaMethod(value); 166 return this; 167 } 168 169 @Override /* GENERATED - org.apache.juneau.parser.ParserSession.Builder */ 170 public Builder outer(Object value) { 171 super.outer(value); 172 return this; 173 } 174 175 @Override /* GENERATED - org.apache.juneau.parser.ParserSession.Builder */ 176 public Builder schema(HttpPartSchema value) { 177 super.schema(value); 178 return this; 179 } 180 181 @Override /* GENERATED - org.apache.juneau.parser.ParserSession.Builder */ 182 public Builder schemaDefault(HttpPartSchema value) { 183 super.schemaDefault(value); 184 return this; 185 } 186 187 @Override /* GENERATED - org.apache.juneau.parser.ReaderParserSession.Builder */ 188 public Builder fileCharset(Charset value) { 189 super.fileCharset(value); 190 return this; 191 } 192 193 @Override /* GENERATED - org.apache.juneau.parser.ReaderParserSession.Builder */ 194 public Builder streamCharset(Charset value) { 195 super.streamCharset(value); 196 return this; 197 } 198 199 // </FluentSetters> 200 } 201 202 //------------------------------------------------------------------------------------------------------------------- 203 // Instance 204 //------------------------------------------------------------------------------------------------------------------- 205 206 private final HtmlParser ctx; 207 208 /** 209 * Constructor. 210 * 211 * @param builder The builder for this object. 212 */ 213 protected HtmlParserSession(Builder builder) { 214 super(builder); 215 ctx = builder.ctx; 216 } 217 218 @Override /* ParserSession */ 219 protected <T> T doParse(ParserPipe pipe, ClassMeta<T> type) throws IOException, ParseException, ExecutableException { 220 try { 221 return parseAnything(type, getXmlReader(pipe), getOuter(), true, null); 222 } catch (XMLStreamException e) { 223 throw new ParseException(e); 224 } 225 } 226 227 @Override /* ReaderParserSession */ 228 protected <K,V> Map<K,V> doParseIntoMap(ParserPipe pipe, Map<K,V> m, Type keyType, Type valueType) 229 throws Exception { 230 return parseIntoMap(getXmlReader(pipe), m, (ClassMeta<K>)getClassMeta(keyType), 231 (ClassMeta<V>)getClassMeta(valueType), null); 232 } 233 234 @Override /* ReaderParserSession */ 235 protected <E> Collection<E> doParseIntoCollection(ParserPipe pipe, Collection<E> c, Type elementType) 236 throws Exception { 237 return parseIntoCollection(getXmlReader(pipe), c, getClassMeta(elementType), null); 238 } 239 240 /* 241 * Reads anything starting at the current event. 242 * <p> 243 * Precondition: Must be pointing at outer START_ELEMENT. 244 * Postcondition: Pointing at outer END_ELEMENT. 245 */ 246 private <T> T parseAnything(ClassMeta<T> eType, XmlReader r, Object outer, boolean isRoot, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException { 247 248 if (eType == null) 249 eType = (ClassMeta<T>)object(); 250 ObjectSwap<T,Object> swap = (ObjectSwap<T,Object>)eType.getSwap(this); 251 BuilderSwap<T,Object> builder = (BuilderSwap<T,Object>)eType.getBuilderSwap(this); 252 ClassMeta<?> sType = null; 253 if (builder != null) 254 sType = builder.getBuilderClassMeta(this); 255 else if (swap != null) 256 sType = swap.getSwapClassMeta(this); 257 else 258 sType = eType; 259 260 if (sType.isOptional()) 261 return (T)optional(parseAnything(eType.getElementType(), r, outer, isRoot, pMeta)); 262 263 setCurrentClass(sType); 264 265 int event = r.getEventType(); 266 if (event != START_ELEMENT) 267 throw new ParseException(this, "parseAnything must be called on outer start element."); 268 269 if (! isRoot) 270 event = r.next(); 271 boolean isEmpty = (event == END_ELEMENT); 272 273 // Skip until we find a start element, end document, or non-empty text. 274 if (! isEmpty) 275 event = skipWs(r); 276 277 if (event == END_DOCUMENT) 278 throw new ParseException(this, "Unexpected end of stream in parseAnything for type ''{0}''", eType); 279 280 // Handle @Html(asXml=true) beans. 281 HtmlClassMeta hcm = getHtmlClassMeta(sType); 282 if (hcm.getFormat() == HtmlFormat.XML) 283 return super.parseAnything(eType, null, r, outer, false, pMeta); 284 285 Object o = null; 286 287 boolean isValid = true; 288 HtmlTag tag = (event == CHARACTERS ? null : HtmlTag.forString(r.getName().getLocalPart(), false)); 289 290 // If it's not a known tag, then parse it as XML. 291 // Allows us to parse stuff like "<div/>" into HTML5 beans. 292 if (tag == null && event != CHARACTERS) 293 return super.parseAnything(eType, null, r, outer, false, pMeta); 294 295 if (tag == HTML) 296 tag = skipToData(r); 297 298 if (isEmpty) { 299 o = ""; 300 } else if (tag == null || tag.isOneOf(BR,BS,FF,SP)) { 301 String text = parseText(r); 302 if (sType.isObject() || sType.isCharSequence()) 303 o = text; 304 else if (sType.isChar()) 305 o = parseCharacter(text); 306 else if (sType.isBoolean()) 307 o = Boolean.parseBoolean(text); 308 else if (sType.isNumber()) 309 o = parseNumber(text, (Class<? extends Number>)eType.getInnerClass()); 310 else if (sType.canCreateNewInstanceFromString(outer)) 311 o = sType.newInstanceFromString(outer, text); 312 else 313 isValid = false; 314 315 } else if (tag == STRING || (tag == A && pMeta != null && getHtmlBeanPropertyMeta(pMeta).getLink() != null)) { 316 String text = getElementText(r); 317 if (sType.isObject() || sType.isCharSequence()) 318 o = text; 319 else if (sType.isChar()) 320 o = parseCharacter(text); 321 else if (sType.canCreateNewInstanceFromString(outer)) 322 o = sType.newInstanceFromString(outer, text); 323 else 324 isValid = false; 325 skipTag(r, tag == STRING ? xSTRING : xA); 326 327 } else if (tag == NUMBER) { 328 String text = getElementText(r); 329 if (sType.isObject()) 330 o = parseNumber(text, Number.class); 331 else if (sType.isNumber()) 332 o = parseNumber(text, (Class<? extends Number>)sType.getInnerClass()); 333 else 334 isValid = false; 335 skipTag(r, xNUMBER); 336 337 } else if (tag == BOOLEAN) { 338 String text = getElementText(r); 339 if (sType.isObject() || sType.isBoolean()) 340 o = Boolean.parseBoolean(text); 341 else 342 isValid = false; 343 skipTag(r, xBOOLEAN); 344 345 } else if (tag == P) { 346 String text = getElementText(r); 347 if (! "No Results".equals(text)) 348 isValid = false; 349 skipTag(r, xP); 350 351 } else if (tag == NULL) { 352 skipTag(r, NULL); 353 skipTag(r, xNULL); 354 355 } else if (tag == A) { 356 o = parseAnchor(r, eType); 357 skipTag(r, xA); 358 359 } else if (tag == TABLE) { 360 361 String typeName = getAttribute(r, getBeanTypePropertyName(eType), "object"); 362 ClassMeta cm = getClassMeta(typeName, pMeta, eType); 363 364 if (cm != null) { 365 sType = eType = cm; 366 typeName = sType.isCollectionOrArray() ? "array" : "object"; 367 } else if (! "array".equals(typeName)) { 368 // Type name could be a subtype name. 369 typeName = sType.isCollectionOrArray() ? "array" : "object"; 370 } 371 372 if (typeName.equals("object")) { 373 if (sType.isObject()) { 374 o = parseIntoMap(r, newGenericMap(sType), sType.getKeyType(), sType.getValueType(), 375 pMeta); 376 } else if (sType.isMap()) { 377 o = parseIntoMap(r, (Map)(sType.canCreateNewInstance(outer) ? sType.newInstance(outer) 378 : newGenericMap(sType)), sType.getKeyType(), sType.getValueType(), pMeta); 379 } else if (builder != null) { 380 BeanMap m = toBeanMap(builder.create(this, eType)); 381 o = builder.build(this, parseIntoBean(r, m).getBean(), eType); 382 } else if (sType.canCreateNewBean(outer)) { 383 BeanMap m = newBeanMap(outer, sType.getInnerClass()); 384 o = parseIntoBean(r, m).getBean(); 385 } else if (sType.getProxyInvocationHandler() != null) { 386 BeanMap m = newBeanMap(outer, sType.getInnerClass()); 387 o = parseIntoBean(r, m).getBean(); 388 } else { 389 isValid = false; 390 } 391 skipTag(r, xTABLE); 392 393 } else if (typeName.equals("array")) { 394 if (sType.isObject()) 395 o = parseTableIntoCollection(r, (Collection)new JsonList(this), sType, pMeta); 396 else if (sType.isCollection()) 397 o = parseTableIntoCollection(r, (Collection)(sType.canCreateNewInstance(outer) 398 ? sType.newInstance(outer) : new JsonList(this)), sType, pMeta); 399 else if (sType.isArray() || sType.isArgs()) { 400 ArrayList l = (ArrayList)parseTableIntoCollection(r, list(), sType, pMeta); 401 o = toArray(sType, l); 402 } 403 else 404 isValid = false; 405 skipTag(r, xTABLE); 406 407 } else { 408 isValid = false; 409 } 410 411 } else if (tag == UL) { 412 String typeName = getAttribute(r, getBeanTypePropertyName(eType), "array"); 413 ClassMeta cm = getClassMeta(typeName, pMeta, eType); 414 if (cm != null) 415 sType = eType = cm; 416 417 if (sType.isObject()) 418 o = parseIntoCollection(r, new JsonList(this), sType, pMeta); 419 else if (sType.isCollection() || sType.isObject()) 420 o = parseIntoCollection(r, (Collection)(sType.canCreateNewInstance(outer) 421 ? sType.newInstance(outer) : new JsonList(this)), sType, pMeta); 422 else if (sType.isArray() || sType.isArgs()) 423 o = toArray(sType, parseIntoCollection(r, list(), sType, pMeta)); 424 else 425 isValid = false; 426 skipTag(r, xUL); 427 428 } 429 430 if (! isValid) 431 throw new ParseException(this, "Unexpected tag ''{0}'' for type ''{1}''", tag, eType); 432 433 if (swap != null && o != null) 434 o = unswap(swap, o, eType); 435 436 if (outer != null) 437 setParent(eType, o, outer); 438 439 skipWs(r); 440 return (T)o; 441 } 442 443 /* 444 * For parsing output from HtmlDocSerializer, this skips over the head, title, and links. 445 */ 446 private HtmlTag skipToData(XmlReader r) throws ParseException, XMLStreamException { 447 while (true) { 448 int event = r.next(); 449 if (event == START_ELEMENT && "div".equals(r.getLocalName()) && "data".equals(r.getAttributeValue(null, "id"))) { 450 r.nextTag(); 451 event = r.getEventType(); 452 boolean isEmpty = (event == END_ELEMENT); 453 // Skip until we find a start element, end document, or non-empty text. 454 if (! isEmpty) 455 event = skipWs(r); 456 if (event == END_DOCUMENT) 457 throw new ParseException(this, "Unexpected end of stream looking for data."); 458 return (event == CHARACTERS ? null : HtmlTag.forString(r.getName().getLocalPart(), false)); 459 } 460 } 461 } 462 463 private static String getAttribute(XmlReader r, String name, String def) { 464 for (int i = 0; i < r.getAttributeCount(); i++) 465 if (r.getAttributeLocalName(i).equals(name)) 466 return r.getAttributeValue(i); 467 return def; 468 } 469 470 /* 471 * Reads an anchor tag and converts it into a bean. 472 */ 473 private <T> T parseAnchor(XmlReader r, ClassMeta<T> beanType) 474 throws IOException, ParseException, XMLStreamException { 475 String href = r.getAttributeValue(null, "href"); 476 String name = getElementText(r); 477 if (beanType.hasAnnotation(HtmlLink.class)) { 478 Value<String> uriProperty = Value.empty(), nameProperty = Value.empty(); 479 beanType.forEachAnnotation(HtmlLink.class, x -> isNotEmpty(x.uriProperty()), x -> uriProperty.set(x.uriProperty())); 480 beanType.forEachAnnotation(HtmlLink.class, x -> isNotEmpty(x.nameProperty()), x -> nameProperty.set(x.nameProperty())); 481 BeanMap<T> m = newBeanMap(beanType.getInnerClass()); 482 m.put(uriProperty.orElse(""), href); 483 m.put(nameProperty.orElse(""), name); 484 return m.getBean(); 485 } 486 return convertToType(href, beanType); 487 } 488 489 private static Map<String,String> getAttributes(XmlReader r) { 490 Map<String,String> m = new TreeMap<>() ; 491 for (int i = 0; i < r.getAttributeCount(); i++) 492 m.put(r.getAttributeLocalName(i), r.getAttributeValue(i)); 493 return m; 494 } 495 496 /* 497 * Reads contents of <table> element. 498 * Precondition: Must be pointing at <table> event. 499 * Postcondition: Pointing at next START_ELEMENT or END_DOCUMENT event. 500 */ 501 private <K,V> Map<K,V> parseIntoMap(XmlReader r, Map<K,V> m, ClassMeta<K> keyType, 502 ClassMeta<V> valueType, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException { 503 while (true) { 504 HtmlTag tag = nextTag(r, TR, xTABLE); 505 if (tag == xTABLE) 506 break; 507 tag = nextTag(r, TD, TH); 508 // Skip over the column headers. 509 if (tag == TH) { 510 skipTag(r); 511 r.nextTag(); 512 skipTag(r); 513 } else { 514 K key = parseAnything(keyType, r, m, false, pMeta); 515 nextTag(r, TD); 516 V value = parseAnything(valueType, r, m, false, pMeta); 517 setName(valueType, value, key); 518 m.put(key, value); 519 } 520 tag = nextTag(r, xTD, xTR); 521 if (tag == xTD) 522 nextTag(r, xTR); 523 } 524 525 return m; 526 } 527 528 /* 529 * Reads contents of <ul> element. 530 * Precondition: Must be pointing at event following <ul> event. 531 * Postcondition: Pointing at next START_ELEMENT or END_DOCUMENT event. 532 */ 533 private <E> Collection<E> parseIntoCollection(XmlReader r, Collection<E> l, 534 ClassMeta<?> type, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException { 535 int argIndex = 0; 536 while (true) { 537 HtmlTag tag = nextTag(r, LI, xUL, xLI); 538 if (tag == xLI) 539 tag = nextTag(r, LI, xUL, xLI); 540 if (tag == xUL) 541 break; 542 ClassMeta<?> elementType = type.isArgs() ? type.getArg(argIndex++) : type.getElementType(); 543 l.add((E)parseAnything(elementType, r, l, false, pMeta)); 544 } 545 return l; 546 } 547 548 /* 549 * Reads contents of <ul> element. 550 * Precondition: Must be pointing at event following <ul> event. 551 * Postcondition: Pointing at next START_ELEMENT or END_DOCUMENT event. 552 */ 553 private <E> Collection<E> parseTableIntoCollection(XmlReader r, Collection<E> l, 554 ClassMeta<E> type, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException { 555 556 HtmlTag tag = nextTag(r, TR); 557 List<String> keys = list(); 558 while (true) { 559 tag = nextTag(r, TH, xTR); 560 if (tag == xTR) 561 break; 562 keys.add(getElementText(r)); 563 } 564 565 int argIndex = 0; 566 567 while (true) { 568 r.nextTag(); 569 tag = HtmlTag.forEvent(this, r); 570 if (tag == xTABLE) 571 break; 572 573 ClassMeta elementType = null; 574 String beanType = getAttribute(r, getBeanTypePropertyName(type), null); 575 if (beanType != null) 576 elementType = getClassMeta(beanType, pMeta, null); 577 if (elementType == null) 578 elementType = type.isArgs() ? type.getArg(argIndex++) : type.getElementType(); 579 if (elementType == null) 580 elementType = object(); 581 582 BuilderSwap<E,Object> builder = elementType.getBuilderSwap(this); 583 584 if (builder != null || elementType.canCreateNewBean(l)) { 585 BeanMap m = 586 builder != null 587 ? toBeanMap(builder.create(this, elementType)) 588 : newBeanMap(l, elementType.getInnerClass()) 589 ; 590 for (int i = 0; i < keys.size(); i++) { 591 tag = nextTag(r, xTD, TD, NULL); 592 if (tag == xTD) 593 tag = nextTag(r, TD, NULL); 594 if (tag == NULL) { 595 m = null; 596 nextTag(r, xNULL); 597 break; 598 } 599 String key = keys.get(i); 600 BeanMapEntry e = m.getProperty(key); 601 if (e == null) { 602 //onUnknownProperty(key, m, -1, -1); 603 parseAnything(object(), r, l, false, null); 604 } else { 605 BeanPropertyMeta bpm = e.getMeta(); 606 ClassMeta<?> cm = bpm.getClassMeta(); 607 Object value = parseAnything(cm, r, m.getBean(false), false, bpm); 608 setName(cm, value, key); 609 bpm.set(m, key, value); 610 } 611 } 612 l.add( 613 m == null 614 ? null 615 : builder != null 616 ? builder.build(this, m.getBean(), elementType) 617 : (E)m.getBean() 618 ); 619 } else { 620 String c = getAttributes(r).get(getBeanTypePropertyName(type.getElementType())); 621 Map m = (Map)(elementType.isMap() && elementType.canCreateNewInstance(l) ? elementType.newInstance(l) 622 : newGenericMap(elementType)); 623 for (int i = 0; i < keys.size(); i++) { 624 tag = nextTag(r, TD, NULL); 625 if (tag == NULL) { 626 m = null; 627 nextTag(r, xNULL); 628 break; 629 } 630 String key = keys.get(i); 631 if (m != null) { 632 ClassMeta<?> kt = elementType.getKeyType(), vt = elementType.getValueType(); 633 Object value = parseAnything(vt, r, l, false, pMeta); 634 setName(vt, value, key); 635 m.put(convertToType(key, kt), value); 636 } 637 } 638 if (m != null && c != null) { 639 JsonMap m2 = (m instanceof JsonMap ? (JsonMap)m : new JsonMap(m).session(this)); 640 m2.put(getBeanTypePropertyName(type.getElementType()), c); 641 l.add((E)cast(m2, pMeta, elementType)); 642 } else { 643 if (m instanceof JsonMap) 644 l.add((E)convertToType(m, elementType)); 645 else 646 l.add((E)m); 647 } 648 } 649 nextTag(r, xTR); 650 } 651 return l; 652 } 653 654 /* 655 * Reads contents of <table> element. 656 * Precondition: Must be pointing at event following <table> event. 657 * Postcondition: Pointing at next START_ELEMENT or END_DOCUMENT event. 658 */ 659 private <T> BeanMap<T> parseIntoBean(XmlReader r, BeanMap<T> m) throws IOException, ParseException, ExecutableException, XMLStreamException { 660 while (true) { 661 HtmlTag tag = nextTag(r, TR, xTABLE); 662 if (tag == xTABLE) 663 break; 664 tag = nextTag(r, TD, TH); 665 // Skip over the column headers. 666 if (tag == TH) { 667 skipTag(r); 668 r.nextTag(); 669 skipTag(r); 670 } else { 671 String key = getElementText(r); 672 nextTag(r, TD); 673 BeanPropertyMeta pMeta = m.getPropertyMeta(key); 674 if (pMeta == null) { 675 onUnknownProperty(key, m, parseAnything(object(), r, null, false, null)); 676 } else { 677 ClassMeta<?> cm = pMeta.getClassMeta(); 678 Object value = parseAnything(cm, r, m.getBean(false), false, pMeta); 679 setName(cm, value, key); 680 try { 681 pMeta.set(m, key, value); 682 } catch (BeanRuntimeException e) { 683 onBeanSetterException(pMeta, e); 684 throw e; 685 } 686 } 687 } 688 HtmlTag t = nextTag(r, xTD, xTR); 689 if (t == xTD) 690 nextTag(r, xTR); 691 } 692 return m; 693 } 694 695 /* 696 * Reads the next tag. Advances past anything that's not a start or end tag. Throws an exception if 697 * it's not one of the expected tags. 698 * Precondition: Must be pointing before the event we want to parse. 699 * Postcondition: Pointing at the tag just parsed. 700 */ 701 private HtmlTag nextTag(XmlReader r, HtmlTag...expected) throws ParseException, XMLStreamException { 702 int et = r.next(); 703 704 while (et != START_ELEMENT && et != END_ELEMENT && et != END_DOCUMENT) 705 et = r.next(); 706 707 if (et == END_DOCUMENT) 708 throw new ParseException(this, "Unexpected end of document."); 709 710 HtmlTag tag = HtmlTag.forEvent(this, r); 711 if (expected.length == 0) 712 return tag; 713 for (HtmlTag t : expected) 714 if (t == tag) 715 return tag; 716 717 throw new ParseException(this, "Unexpected tag: ''{0}''. Expected one of the following: {1}", tag, expected); 718 } 719 720 /* 721 * Skips over the current element and advances to the next element. 722 * <p> 723 * Precondition: Pointing to opening tag. 724 * Postcondition: Pointing to next opening tag. 725 * 726 * @param r The stream being read from. 727 * @throws XMLStreamException 728 */ 729 private void skipTag(XmlReader r) throws ParseException, XMLStreamException { 730 int et = r.getEventType(); 731 732 if (et != START_ELEMENT) 733 throw new ParseException(this, 734 "skipToNextTag() call on invalid event ''{0}''. Must only be called on START_ELEMENT events.", 735 XmlUtils.toReadableEvent(r) 736 ); 737 738 String n = r.getLocalName(); 739 740 int depth = 0; 741 while (true) { 742 et = r.next(); 743 if (et == START_ELEMENT) { 744 String n2 = r.getLocalName(); 745 if (n.equals(n2)) 746 depth++; 747 } else if (et == END_ELEMENT) { 748 String n2 = r.getLocalName(); 749 if (n.equals(n2)) 750 depth--; 751 if (depth < 0) 752 return; 753 } 754 } 755 } 756 757 private void skipTag(XmlReader r, HtmlTag...expected) throws ParseException, XMLStreamException { 758 HtmlTag tag = HtmlTag.forEvent(this, r); 759 if (tag.isOneOf(expected)) 760 r.next(); 761 else 762 throw new ParseException(this, 763 "Unexpected tag: ''{0}''. Expected one of the following: {1}", 764 tag, expected); 765 } 766 767 private static int skipWs(XmlReader r) throws XMLStreamException { 768 int event = r.getEventType(); 769 while (event != START_ELEMENT && event != END_ELEMENT && event != END_DOCUMENT && r.isWhiteSpace()) 770 event = r.next(); 771 return event; 772 } 773 774 /** 775 * Parses CHARACTERS data. 776 * 777 * <p> 778 * Precondition: Pointing to event immediately following opening tag. 779 * Postcondition: Pointing to closing tag. 780 * 781 * @param r The stream being read from. 782 * @return The parsed string. 783 * @throws XMLStreamException Thrown by underlying XML stream. 784 */ 785 @Override /* XmlParserSession */ 786 protected final String parseText(XmlReader r) throws IOException, ParseException, XMLStreamException { 787 788 StringBuilder sb = getStringBuilder(); 789 790 int et = r.getEventType(); 791 if (et == END_ELEMENT) 792 return ""; 793 794 int depth = 0; 795 796 String characters = null; 797 798 while (true) { 799 if (et == START_ELEMENT) { 800 if (characters != null) { 801 if (sb.length() == 0) 802 characters = trimStart(characters); 803 sb.append(characters); 804 characters = null; 805 } 806 HtmlTag tag = HtmlTag.forEvent(this, r); 807 if (tag == BR) { 808 sb.append('\n'); 809 r.nextTag(); 810 } else if (tag == BS) { 811 sb.append('\b'); 812 r.nextTag(); 813 } else if (tag == SP) { 814 et = r.next(); 815 if (et == CHARACTERS) { 816 String s = r.getText(); 817 if (s.length() > 0) { 818 char c = r.getText().charAt(0); 819 if (c == '\u2003') 820 c = '\t'; 821 sb.append(c); 822 } 823 r.nextTag(); 824 } 825 } else if (tag == FF) { 826 sb.append('\f'); 827 r.nextTag(); 828 } else if (tag.isOneOf(STRING, NUMBER, BOOLEAN)) { 829 et = r.next(); 830 if (et == CHARACTERS) { 831 sb.append(r.getText()); 832 r.nextTag(); 833 } 834 } else { 835 sb.append('<').append(r.getLocalName()); 836 for (int i = 0; i < r.getAttributeCount(); i++) 837 sb.append(' ').append(r.getAttributeName(i)).append('=').append('\'').append(r.getAttributeValue(i)).append('\''); 838 sb.append('>'); 839 depth++; 840 } 841 } else if (et == END_ELEMENT) { 842 if (characters != null) { 843 if (sb.length() == 0) 844 characters = trimStart(characters); 845 if (depth == 0) 846 characters = trimEnd(characters); 847 sb.append(characters); 848 characters = null; 849 } 850 if (depth == 0) 851 break; 852 sb.append('<').append(r.getLocalName()).append('>'); 853 depth--; 854 } else if (et == CHARACTERS) { 855 characters = r.getText(); 856 } 857 et = r.next(); 858 } 859 860 String s = trim(sb.toString()); 861 returnStringBuilder(sb); 862 return s; 863 } 864 865 /** 866 * Identical to {@link #parseText(XmlReader)} except assumes the current event is the opening tag. 867 * 868 * <p> 869 * Precondition: Pointing to opening tag. 870 * Postcondition: Pointing to closing tag. 871 * 872 * @param r The stream being read from. 873 * @return The parsed string. 874 * @throws XMLStreamException Thrown by underlying XML stream. 875 * @throws ParseException Malformed input encountered. 876 */ 877 @Override /* XmlParserSession */ 878 protected final String getElementText(XmlReader r) throws IOException, XMLStreamException, ParseException { 879 r.next(); 880 return parseText(r); 881 } 882 883 @Override /* XmlParserSession */ 884 protected final boolean isWhitespaceElement(XmlReader r) { 885 String s = r.getLocalName(); 886 return whitespaceElements.contains(s); 887 } 888 889 @Override /* XmlParserSession */ 890 protected final String parseWhitespaceElement(XmlReader r) throws IOException, ParseException, XMLStreamException { 891 892 HtmlTag tag = HtmlTag.forEvent(this, r); 893 int et = r.next(); 894 if (tag == BR) { 895 return "\n"; 896 } else if (tag == BS) { 897 return "\b"; 898 } else if (tag == FF) { 899 return "\f"; 900 } else if (tag == SP) { 901 if (et == CHARACTERS) { 902 String s = r.getText(); 903 if (s.charAt(0) == '\u2003') 904 s = "\t"; 905 r.next(); 906 return decodeString(s); 907 } 908 return ""; 909 } else { 910 throw new ParseException(this, "Invalid tag found in parseWhitespaceElement(): ''{0}''", tag); 911 } 912 } 913 914 //----------------------------------------------------------------------------------------------------------------- 915 // Extended metadata 916 //----------------------------------------------------------------------------------------------------------------- 917 918 /** 919 * Returns the language-specific metadata on the specified class. 920 * 921 * @param cm The class to return the metadata on. 922 * @return The metadata. 923 */ 924 protected HtmlClassMeta getHtmlClassMeta(ClassMeta<?> cm) { 925 return ctx.getHtmlClassMeta(cm); 926 } 927 928 /** 929 * Returns the language-specific metadata on the specified bean property. 930 * 931 * @param bpm The bean property to return the metadata on. 932 * @return The metadata. 933 */ 934 protected HtmlBeanPropertyMeta getHtmlBeanPropertyMeta(BeanPropertyMeta bpm) { 935 return ctx.getHtmlBeanPropertyMeta(bpm); 936 } 937}