001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.juneau.html; 018 019import static javax.xml.stream.XMLStreamConstants.*; 020import static org.apache.juneau.commons.utils.AssertionUtils.*; 021import static org.apache.juneau.commons.utils.CollectionUtils.*; 022import static org.apache.juneau.commons.utils.StringUtils.*; 023import static org.apache.juneau.commons.utils.Utils.*; 024import static org.apache.juneau.html.HtmlTag.*; 025 026import java.io.*; 027import java.lang.reflect.*; 028import java.nio.charset.*; 029import java.util.*; 030import java.util.function.*; 031 032import javax.xml.stream.*; 033 034import org.apache.juneau.*; 035import org.apache.juneau.collections.*; 036import org.apache.juneau.commons.lang.*; 037import org.apache.juneau.commons.reflect.*; 038import org.apache.juneau.html.annotation.*; 039import org.apache.juneau.httppart.*; 040import org.apache.juneau.parser.*; 041import org.apache.juneau.swap.*; 042import org.apache.juneau.xml.*; 043 044/** 045 * ContextSession object that lives for the duration of a single use of {@link HtmlParser}. 046 * 047 * <h5 class='section'>Notes:</h5><ul> 048 * <li class='warn'>This class is not thread safe and is typically discarded after one use. 049 * </ul> 050 * 051 * <h5 class='section'>See Also:</h5><ul> 052 * <li class='link'><a class="doclink" href="https://juneau.apache.org/docs/topics/HtmlBasics">HTML Basics</a> 053 054 * </ul> 055 */ 056@SuppressWarnings({ "unchecked", "rawtypes" }) 057public class HtmlParserSession extends XmlParserSession { 058 /** 059 * Builder class. 060 */ 061 public static class Builder extends XmlParserSession.Builder { 062 063 private HtmlParser ctx; 064 065 /** 066 * Constructor 067 * 068 * @param ctx The context creating this session. 069 * <br>Cannot be <jk>null</jk>. 070 */ 071 protected Builder(HtmlParser ctx) { 072 super(assertArgNotNull("ctx", ctx)); 073 this.ctx = ctx; 074 } 075 076 @Override /* Overridden from Builder */ 077 public <T> Builder apply(Class<T> type, Consumer<T> apply) { 078 super.apply(type, apply); 079 return this; 080 } 081 082 @Override 083 public HtmlParserSession build() { 084 return new HtmlParserSession(this); 085 } 086 087 @Override /* Overridden from Builder */ 088 public Builder debug(Boolean value) { 089 super.debug(value); 090 return this; 091 } 092 093 @Override /* Overridden from Builder */ 094 public Builder fileCharset(Charset value) { 095 super.fileCharset(value); 096 return this; 097 } 098 099 @Override /* Overridden from Builder */ 100 public Builder javaMethod(Method value) { 101 super.javaMethod(value); 102 return this; 103 } 104 105 @Override /* Overridden from Builder */ 106 public Builder locale(Locale value) { 107 super.locale(value); 108 return this; 109 } 110 111 @Override /* Overridden from Builder */ 112 public Builder mediaType(MediaType value) { 113 super.mediaType(value); 114 return this; 115 } 116 117 @Override /* Overridden from Builder */ 118 public Builder mediaTypeDefault(MediaType value) { 119 super.mediaTypeDefault(value); 120 return this; 121 } 122 123 @Override /* Overridden from Builder */ 124 public Builder outer(Object value) { 125 super.outer(value); 126 return this; 127 } 128 129 @Override /* Overridden from Builder */ 130 public Builder properties(Map<String,Object> value) { 131 super.properties(value); 132 return this; 133 } 134 135 @Override /* Overridden from Builder */ 136 public Builder property(String key, Object value) { 137 super.property(key, value); 138 return this; 139 } 140 141 @Override /* Overridden from Builder */ 142 public Builder schema(HttpPartSchema value) { 143 super.schema(value); 144 return this; 145 } 146 147 @Override /* Overridden from Builder */ 148 public Builder schemaDefault(HttpPartSchema value) { 149 super.schemaDefault(value); 150 return this; 151 } 152 153 @Override /* Overridden from Builder */ 154 public Builder streamCharset(Charset value) { 155 super.streamCharset(value); 156 return this; 157 } 158 159 @Override /* Overridden from Builder */ 160 public Builder timeZone(TimeZone value) { 161 super.timeZone(value); 162 return this; 163 } 164 165 @Override /* Overridden from Builder */ 166 public Builder timeZoneDefault(TimeZone value) { 167 super.timeZoneDefault(value); 168 return this; 169 } 170 171 @Override /* Overridden from Builder */ 172 public Builder unmodifiable() { 173 super.unmodifiable(); 174 return this; 175 } 176 } 177 178 private static final Set<String> whitespaceElements = set("br", "bs", "sp", "ff"); 179 180 /** 181 * Creates a new builder for this object. 182 * 183 * @param ctx The context creating this session. 184 * <br>Cannot be <jk>null</jk>. 185 * @return A new builder. 186 */ 187 public static Builder create(HtmlParser ctx) { 188 return new Builder(assertArgNotNull("ctx", ctx)); 189 } 190 191 private static String getAttribute(XmlReader r, String name, String def) { 192 for (var i = 0; i < r.getAttributeCount(); i++) 193 if (r.getAttributeLocalName(i).equals(name)) 194 return r.getAttributeValue(i); 195 return def; 196 } 197 198 private static Map<String,String> getAttributes(XmlReader r) { 199 var m = new TreeMap<String,String>(); 200 for (var i = 0; i < r.getAttributeCount(); i++) 201 m.put(r.getAttributeLocalName(i), r.getAttributeValue(i)); 202 return m; 203 } 204 205 private static int skipWs(XmlReader r) throws XMLStreamException { 206 int event = r.getEventType(); 207 while (event != START_ELEMENT && event != END_ELEMENT && event != END_DOCUMENT && r.isWhiteSpace()) 208 event = r.next(); 209 return event; 210 } 211 212 private final HtmlParser ctx; 213 214 /** 215 * Constructor. 216 * 217 * @param builder The builder for this object. 218 */ 219 protected HtmlParserSession(Builder builder) { 220 super(builder); 221 ctx = builder.ctx; 222 } 223 224 /* 225 * Reads the next tag. Advances past anything that's not a start or end tag. Throws an exception if 226 * it's not one of the expected tags. 227 * Precondition: Must be pointing before the event we want to parse. 228 * Postcondition: Pointing at the tag just parsed. 229 */ 230 private HtmlTag nextTag(XmlReader r, HtmlTag...expected) throws ParseException, XMLStreamException { 231 int et = r.next(); 232 233 while (et != START_ELEMENT && et != END_ELEMENT && et != END_DOCUMENT) 234 et = r.next(); 235 236 if (et == END_DOCUMENT) 237 throw new ParseException(this, "Unexpected end of document."); 238 239 var tag = HtmlTag.forEvent(this, r); 240 if (expected.length == 0) 241 return tag; 242 for (var t : expected) 243 if (t == tag) 244 return tag; 245 246 throw new ParseException(this, "Unexpected tag: ''{0}''. Expected one of the following: {1}", tag, expected); 247 } 248 249 /* 250 * Reads an anchor tag and converts it into a bean. 251 */ 252 private <T> T parseAnchor(XmlReader r, ClassMeta<T> beanType) throws IOException, ParseException, XMLStreamException { 253 String href = r.getAttributeValue(null, "href"); 254 String name = getElementText(r); 255 if (nn(beanType) && getAnnotationProvider().has(HtmlLink.class, beanType)) { 256 var uriProperty = Value.<String>empty(); 257 var nameProperty = Value.<String>empty(); 258 beanType.forEachAnnotation(HtmlLink.class, x -> ne(x.uriProperty()), x -> uriProperty.set(x.uriProperty())); 259 beanType.forEachAnnotation(HtmlLink.class, x -> ne(x.nameProperty()), x -> nameProperty.set(x.nameProperty())); 260 BeanMap<T> m = newBeanMap(beanType.inner()); 261 m.put(uriProperty.orElse(""), href); 262 m.put(nameProperty.orElse(""), name); 263 return m.getBean(); 264 } 265 return convertToType(href, beanType); 266 } 267 268 /* 269 * Reads anything starting at the current event. 270 * <p> 271 * Precondition: Must be pointing at outer START_ELEMENT. 272 * Postcondition: Pointing at outer END_ELEMENT. 273 */ 274 private <T> T parseAnything(ClassMeta<T> eType, XmlReader r, Object outer, boolean isRoot, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException { 275 276 if (eType == null) 277 eType = (ClassMeta<T>)object(); 278 var swap = (ObjectSwap<T,Object>)eType.getSwap(this); 279 var builder = (BuilderSwap<T,Object>)eType.getBuilderSwap(this); 280 var sType = (ClassMeta<?>)null; 281 if (nn(builder)) 282 sType = builder.getBuilderClassMeta(this); 283 else if (nn(swap)) 284 sType = swap.getSwapClassMeta(this); 285 else 286 sType = eType; 287 288 if (sType.isOptional()) 289 return (T)opt(parseAnything(eType.getElementType(), r, outer, isRoot, pMeta)); 290 291 setCurrentClass(sType); 292 293 int event = r.getEventType(); 294 if (event != START_ELEMENT) 295 throw new ParseException(this, "parseAnything must be called on outer start element."); 296 297 if (! isRoot) 298 event = r.next(); 299 var isEmpty = (event == END_ELEMENT); 300 301 // Skip until we find a start element, end document, or non-empty text. 302 if (! isEmpty) 303 event = skipWs(r); 304 305 if (event == END_DOCUMENT) 306 throw new ParseException(this, "Unexpected end of stream in parseAnything for type ''{0}''", eType); 307 308 // Handle @Html(asXml=true) beans. 309 var hcm = getHtmlClassMeta(sType); 310 if (hcm.getFormat() == HtmlFormat.XML) 311 return super.parseAnything(eType, null, r, outer, false, pMeta); 312 313 var o = (Object)null; 314 315 var isValid = true; 316 var tag = (event == CHARACTERS ? null : HtmlTag.forString(r.getName().getLocalPart(), false)); 317 318 // If it's not a known tag, then parse it as XML. 319 // Allows us to parse stuff like "<div/>" into HTML5 beans. 320 if (tag == null && event != CHARACTERS) 321 return super.parseAnything(eType, null, r, outer, false, pMeta); 322 323 if (tag == HTML) 324 tag = skipToData(r); 325 326 if (isEmpty) { 327 o = ""; 328 } else if (tag == null || tag.isOneOf(BR, BS, FF, SP)) { 329 String text = parseText(r); 330 if (sType.isObject() || sType.isCharSequence()) 331 o = text; 332 else if (sType.isChar()) 333 o = parseCharacter(text); 334 else if (sType.isBoolean()) 335 o = Boolean.parseBoolean(text); 336 else if (sType.isNumber()) 337 o = parseNumber(text, (Class<? extends Number>)eType.inner()); 338 else if (sType.canCreateNewInstanceFromString(outer)) 339 o = sType.newInstanceFromString(outer, text); 340 else 341 isValid = false; 342 343 } else if (tag == STRING || (tag == A && nn(pMeta) && nn(getHtmlBeanPropertyMeta(pMeta).getLink()))) { 344 String text = getElementText(r); 345 if (sType.isObject() || sType.isCharSequence()) 346 o = text; 347 else if (sType.isChar()) 348 o = parseCharacter(text); 349 else if (sType.canCreateNewInstanceFromString(outer)) 350 o = sType.newInstanceFromString(outer, text); 351 else 352 isValid = false; 353 skipTag(r, tag == STRING ? xSTRING : xA); 354 355 } else if (tag == NUMBER) { 356 String text = getElementText(r); 357 if (sType.isObject()) 358 o = parseNumber(text, Number.class); 359 else if (sType.isNumber()) 360 o = parseNumber(text, (Class<? extends Number>)sType.inner()); 361 else 362 isValid = false; 363 skipTag(r, xNUMBER); 364 365 } else if (tag == BOOLEAN) { 366 String text = getElementText(r); 367 if (sType.isObject() || sType.isBoolean()) 368 o = Boolean.parseBoolean(text); 369 else 370 isValid = false; 371 skipTag(r, xBOOLEAN); 372 373 } else if (tag == P) { 374 String text = getElementText(r); 375 if (! "No Results".equals(text)) 376 isValid = false; 377 skipTag(r, xP); 378 379 } else if (tag == NULL) { 380 skipTag(r, NULL); 381 skipTag(r, xNULL); 382 383 } else if (tag == A) { 384 o = parseAnchor(r, swap == null ? eType : null); 385 skipTag(r, xA); 386 387 } else if (tag == TABLE) { 388 389 String typeName = getAttribute(r, getBeanTypePropertyName(eType), "object"); 390 ClassMeta cm = getClassMeta(typeName, pMeta, eType); 391 392 if (nn(cm)) { 393 sType = eType = cm; 394 typeName = sType.isCollectionOrArray() ? "array" : "object"; 395 } else if (! "array".equals(typeName)) { 396 // Type name could be a subtype name. 397 typeName = sType.isCollectionOrArray() ? "array" : "object"; 398 } 399 400 if (typeName.equals("object")) { 401 if (sType.isObject()) { 402 o = parseIntoMap(r, newGenericMap(sType), sType.getKeyType(), sType.getValueType(), pMeta); 403 } else if (sType.isMap()) { 404 o = parseIntoMap(r, (Map)(sType.canCreateNewInstance(outer) ? sType.newInstance(outer) : newGenericMap(sType)), sType.getKeyType(), sType.getValueType(), pMeta); 405 } else if (nn(builder)) { 406 BeanMap m = toBeanMap(builder.create(this, eType)); 407 o = builder.build(this, parseIntoBean(r, m).getBean(), eType); 408 } else if (sType.canCreateNewBean(outer)) { 409 BeanMap m = newBeanMap(outer, sType.inner()); 410 o = parseIntoBean(r, m).getBean(); 411 } else if (nn(sType.getProxyInvocationHandler())) { 412 BeanMap m = newBeanMap(outer, sType.inner()); 413 o = parseIntoBean(r, m).getBean(); 414 } else { 415 isValid = false; 416 } 417 skipTag(r, xTABLE); 418 419 } else if (typeName.equals("array")) { 420 if (sType.isObject()) 421 o = parseTableIntoCollection(r, (Collection)new JsonList(this), sType, pMeta); 422 else if (sType.isCollection()) 423 o = parseTableIntoCollection(r, (Collection)(sType.canCreateNewInstance(outer) ? sType.newInstance(outer) : new JsonList(this)), sType, pMeta); 424 else if (sType.isArray() || sType.isArgs()) { 425 var l = (ArrayList)parseTableIntoCollection(r, list(), sType, pMeta); 426 o = toArray(sType, l); 427 } else 428 isValid = false; 429 skipTag(r, xTABLE); 430 431 } else { 432 isValid = false; 433 } 434 435 } else if (tag == UL) { 436 String typeName = getAttribute(r, getBeanTypePropertyName(eType), "array"); 437 ClassMeta cm = getClassMeta(typeName, pMeta, eType); 438 if (nn(cm)) 439 sType = eType = cm; 440 441 if (sType.isObject()) 442 o = parseIntoCollection(r, new JsonList(this), sType, pMeta); 443 else if (sType.isCollection() || sType.isObject()) 444 o = parseIntoCollection(r, (Collection)(sType.canCreateNewInstance(outer) ? sType.newInstance(outer) : new JsonList(this)), sType, pMeta); 445 else if (sType.isArray() || sType.isArgs()) 446 o = toArray(sType, parseIntoCollection(r, list(), sType, pMeta)); 447 else 448 isValid = false; 449 skipTag(r, xUL); 450 451 } 452 453 if (! isValid) 454 throw new ParseException(this, "Unexpected tag ''{0}'' for type ''{1}''", tag, eType); 455 456 if (nn(swap) && nn(o)) 457 o = unswap(swap, o, eType); 458 459 if (nn(outer)) 460 setParent(eType, o, outer); 461 462 skipWs(r); 463 return (T)o; 464 } 465 466 /* 467 * Reads contents of <table> element. 468 * Precondition: Must be pointing at event following <table> event. 469 * Postcondition: Pointing at next START_ELEMENT or END_DOCUMENT event. 470 */ 471 private <T> BeanMap<T> parseIntoBean(XmlReader r, BeanMap<T> m) throws IOException, ParseException, ExecutableException, XMLStreamException { 472 while (true) { 473 HtmlTag tag = nextTag(r, TR, xTABLE); 474 if (tag == xTABLE) 475 break; 476 tag = nextTag(r, TD, TH); 477 // Skip over the column headers. 478 if (tag == TH) { 479 skipTag(r); 480 r.nextTag(); 481 skipTag(r); 482 } else { 483 String key = getElementText(r); 484 nextTag(r, TD); 485 var pMeta = m.getPropertyMeta(key); 486 if (pMeta == null) { 487 onUnknownProperty(key, m, parseAnything(object(), r, null, false, null)); 488 } else { 489 var cm = pMeta.getClassMeta(); 490 Object value = parseAnything(cm, r, m.getBean(false), false, pMeta); 491 setName(cm, value, key); 492 try { 493 pMeta.set(m, key, value); 494 } catch (BeanRuntimeException e) { 495 onBeanSetterException(pMeta, e); 496 throw e; 497 } 498 } 499 } 500 HtmlTag t = nextTag(r, xTD, xTR); 501 if (t == xTD) 502 nextTag(r, xTR); 503 } 504 return m; 505 } 506 507 /* 508 * Reads contents of <ul> element. 509 * Precondition: Must be pointing at event following <ul> event. 510 * Postcondition: Pointing at next START_ELEMENT or END_DOCUMENT event. 511 */ 512 private <E> Collection<E> parseIntoCollection(XmlReader r, Collection<E> l, ClassMeta<?> type, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException, XMLStreamException { 513 int argIndex = 0; 514 while (true) { 515 HtmlTag tag = nextTag(r, LI, xUL, xLI); 516 if (tag == xLI) 517 tag = nextTag(r, LI, xUL, xLI); 518 if (tag == xUL) 519 break; 520 var elementType = type.isArgs() ? type.getArg(argIndex++) : type.getElementType(); 521 l.add((E)parseAnything(elementType, r, l, false, pMeta)); 522 } 523 return l; 524 } 525 526 /* 527 * Reads contents of <table> element. 528 * Precondition: Must be pointing at <table> event. 529 * Postcondition: Pointing at next START_ELEMENT or END_DOCUMENT event. 530 */ 531 private <K,V> Map<K,V> parseIntoMap(XmlReader r, Map<K,V> m, ClassMeta<K> keyType, ClassMeta<V> valueType, BeanPropertyMeta pMeta) 532 throws IOException, ParseException, ExecutableException, XMLStreamException { 533 while (true) { 534 HtmlTag tag = nextTag(r, TR, xTABLE); 535 if (tag == xTABLE) 536 break; 537 tag = nextTag(r, TD, TH); 538 // Skip over the column headers. 539 if (tag == TH) { 540 skipTag(r); 541 r.nextTag(); 542 skipTag(r); 543 } else { 544 K key = parseAnything(keyType, r, m, false, pMeta); 545 nextTag(r, TD); 546 V value = parseAnything(valueType, r, m, false, pMeta); 547 setName(valueType, value, key); 548 m.put(key, value); 549 } 550 tag = nextTag(r, xTD, xTR); 551 if (tag == xTD) 552 nextTag(r, xTR); 553 } 554 555 return m; 556 } 557 558 /* 559 * Reads contents of <ul> element. 560 * Precondition: Must be pointing at event following <ul> event. 561 * Postcondition: Pointing at next START_ELEMENT or END_DOCUMENT event. 562 */ 563 private <E> Collection<E> parseTableIntoCollection(XmlReader r, Collection<E> l, ClassMeta<E> type, BeanPropertyMeta pMeta) 564 throws IOException, ParseException, ExecutableException, XMLStreamException { 565 566 HtmlTag tag = nextTag(r, TR); 567 List<String> keys = list(); 568 while (true) { 569 tag = nextTag(r, TH, xTR); 570 if (tag == xTR) 571 break; 572 keys.add(getElementText(r)); 573 } 574 575 int argIndex = 0; 576 577 while (true) { 578 r.nextTag(); 579 tag = HtmlTag.forEvent(this, r); 580 if (tag == xTABLE) 581 break; 582 583 var elementType = (ClassMeta)null; 584 String beanType = getAttribute(r, getBeanTypePropertyName(type), null); 585 if (nn(beanType)) 586 elementType = getClassMeta(beanType, pMeta, null); 587 if (elementType == null) 588 elementType = type.isArgs() ? type.getArg(argIndex++) : type.getElementType(); 589 if (elementType == null) 590 elementType = object(); 591 592 BuilderSwap<E,Object> builder = elementType.getBuilderSwap(this); 593 594 if (nn(builder) || elementType.canCreateNewBean(l)) { 595 // @formatter:off 596 BeanMap m = 597 nn(builder) 598 ? toBeanMap(builder.create(this, elementType)) 599 : newBeanMap(l, elementType.inner()) 600 ; 601 // @formatter:on 602 for (var key : keys) { 603 tag = nextTag(r, xTD, TD, NULL); 604 if (tag == xTD) 605 tag = nextTag(r, TD, NULL); 606 if (tag == NULL) { 607 m = null; 608 nextTag(r, xNULL); 609 break; 610 } 611 BeanMapEntry e = m.getProperty(key); 612 if (e == null) { 613 parseAnything(object(), r, l, false, null); 614 } else { 615 BeanPropertyMeta bpm = e.getMeta(); 616 var cm = bpm.getClassMeta(); 617 Object value = parseAnything(cm, r, m.getBean(false), false, bpm); 618 setName(cm, value, key); 619 bpm.set(m, key, value); 620 } 621 } 622 // @formatter:off 623 l.add( 624 m == null 625 ? null 626 : nn(builder) 627 ? builder.build(this, m.getBean(), elementType) 628 : (E)m.getBean() 629 ); 630 // @formatter:on 631 } else { 632 String c = getAttributes(r).get(getBeanTypePropertyName(type.getElementType())); 633 var m = (Map)(elementType.isMap() && elementType.canCreateNewInstance(l) ? elementType.newInstance(l) : newGenericMap(elementType)); 634 for (var key : keys) { 635 tag = nextTag(r, TD, NULL); 636 if (tag == NULL) { 637 m = null; 638 nextTag(r, xNULL); 639 break; 640 } 641 if (nn(m)) { 642 var kt = elementType.getKeyType(); 643 var vt = elementType.getValueType(); 644 Object value = parseAnything(vt, r, l, false, pMeta); 645 setName(vt, value, key); 646 m.put(convertToType(key, kt), value); 647 } 648 } 649 if (nn(m) && nn(c)) { 650 var m2 = (m instanceof JsonMap ? (JsonMap)m : new JsonMap(m).session(this)); 651 m2.put(getBeanTypePropertyName(type.getElementType()), c); 652 l.add((E)cast(m2, pMeta, elementType)); 653 } else { 654 if (m instanceof JsonMap m2) 655 l.add((E)convertToType(m2, elementType)); 656 else 657 l.add((E)m); 658 } 659 } 660 nextTag(r, xTR); 661 } 662 return l; 663 } 664 665 /* 666 * Skips over the current element and advances to the next element. 667 * <p> 668 * Precondition: Pointing to opening tag. 669 * Postcondition: Pointing to next opening tag. 670 * 671 * @param r The stream being read from. 672 * @throws XMLStreamException 673 */ 674 private void skipTag(XmlReader r) throws ParseException, XMLStreamException { 675 int et = r.getEventType(); 676 677 if (et != START_ELEMENT) 678 throw new ParseException(this, "skipToNextTag() call on invalid event ''{0}''. Must only be called on START_ELEMENT events.", XmlUtils.toReadableEvent(r)); 679 680 String n = r.getLocalName(); 681 682 int depth = 0; 683 while (true) { 684 et = r.next(); 685 if (et == START_ELEMENT) { 686 String n2 = r.getLocalName(); 687 if (n.equals(n2)) 688 depth++; 689 } else if (et == END_ELEMENT) { 690 String n2 = r.getLocalName(); 691 if (n.equals(n2)) 692 depth--; 693 if (depth < 0) 694 return; 695 } 696 } 697 } 698 699 private void skipTag(XmlReader r, HtmlTag...expected) throws ParseException, XMLStreamException { 700 var tag = HtmlTag.forEvent(this, r); 701 if (tag.isOneOf(expected)) 702 r.next(); 703 else 704 throw new ParseException(this, "Unexpected tag: ''{0}''. Expected one of the following: {1}", tag, expected); 705 } 706 707 /* 708 * For parsing output from HtmlDocSerializer, this skips over the head, title, and links. 709 */ 710 private HtmlTag skipToData(XmlReader r) throws ParseException, XMLStreamException { 711 while (true) { 712 var event = r.next(); 713 if (event == START_ELEMENT && "div".equals(r.getLocalName()) && "data".equals(r.getAttributeValue(null, "id"))) { 714 r.nextTag(); 715 event = r.getEventType(); 716 var isEmpty = (event == END_ELEMENT); 717 // Skip until we find a start element, end document, or non-empty text. 718 if (! isEmpty) 719 event = skipWs(r); 720 if (event == END_DOCUMENT) 721 throw new ParseException(this, "Unexpected end of stream looking for data."); 722 return (event == CHARACTERS ? null : HtmlTag.forString(r.getName().getLocalPart(), false)); 723 } 724 } 725 } 726 727 @Override /* Overridden from ParserSession */ 728 protected <T> T doParse(ParserPipe pipe, ClassMeta<T> type) throws IOException, ParseException, ExecutableException { 729 try { 730 return parseAnything(type, getXmlReader(pipe), getOuter(), true, null); 731 } catch (XMLStreamException e) { 732 throw new ParseException(e); 733 } 734 } 735 736 @Override /* Overridden from ReaderParserSession */ 737 protected <E> Collection<E> doParseIntoCollection(ParserPipe pipe, Collection<E> c, Type elementType) throws Exception { 738 return parseIntoCollection(getXmlReader(pipe), c, getClassMeta(elementType), null); 739 } 740 741 @Override /* Overridden from ReaderParserSession */ 742 protected <K,V> Map<K,V> doParseIntoMap(ParserPipe pipe, Map<K,V> m, Type keyType, Type valueType) throws Exception { 743 return parseIntoMap(getXmlReader(pipe), m, (ClassMeta<K>)getClassMeta(keyType), (ClassMeta<V>)getClassMeta(valueType), null); 744 } 745 746 /** 747 * Identical to {@link #parseText(XmlReader)} except assumes the current event is the opening tag. 748 * 749 * <p> 750 * Precondition: Pointing to opening tag. 751 * Postcondition: Pointing to closing tag. 752 * 753 * @param r The stream being read from. 754 * @return The parsed string. 755 * @throws XMLStreamException Thrown by underlying XML stream. 756 * @throws ParseException Malformed input encountered. 757 */ 758 @Override /* Overridden from XmlParserSession */ 759 protected String getElementText(XmlReader r) throws IOException, XMLStreamException, ParseException { 760 r.next(); 761 return parseText(r); 762 } 763 764 /** 765 * Returns the language-specific metadata on the specified bean property. 766 * 767 * @param bpm The bean property to return the metadata on. 768 * @return The metadata. 769 */ 770 protected HtmlBeanPropertyMeta getHtmlBeanPropertyMeta(BeanPropertyMeta bpm) { 771 return ctx.getHtmlBeanPropertyMeta(bpm); 772 } 773 774 /** 775 * Returns the language-specific metadata on the specified class. 776 * 777 * @param cm The class to return the metadata on. 778 * @return The metadata. 779 */ 780 protected HtmlClassMeta getHtmlClassMeta(ClassMeta<?> cm) { 781 return ctx.getHtmlClassMeta(cm); 782 } 783 784 @Override /* Overridden from XmlParserSession */ 785 protected boolean isWhitespaceElement(XmlReader r) { 786 String s = r.getLocalName(); 787 return whitespaceElements.contains(s); 788 } 789 790 /** 791 * Parses CHARACTERS data. 792 * 793 * <p> 794 * Precondition: Pointing to event immediately following opening tag. 795 * Postcondition: Pointing to closing tag. 796 * 797 * @param r The stream being read from. 798 * @return The parsed string. 799 * @throws XMLStreamException Thrown by underlying XML stream. 800 */ 801 @Override /* Overridden from XmlParserSession */ 802 protected String parseText(XmlReader r) throws IOException, ParseException, XMLStreamException { 803 804 StringBuilder sb = getStringBuilder(); 805 806 int et = r.getEventType(); 807 if (et == END_ELEMENT) 808 return ""; 809 810 int depth = 0; 811 812 var characters = (String)null; 813 814 while (true) { 815 if (et == START_ELEMENT) { 816 if (nn(characters)) { 817 if (sb.length() == 0) 818 characters = trimStart(characters); 819 sb.append(characters); 820 characters = null; 821 } 822 var tag = HtmlTag.forEvent(this, r); 823 if (tag == BR) { 824 sb.append('\n'); 825 r.nextTag(); 826 } else if (tag == BS) { 827 sb.append('\b'); 828 r.nextTag(); 829 } else if (tag == SP) { 830 et = r.next(); 831 if (et == CHARACTERS) { 832 String s = r.getText(); 833 if (ne(s)) { 834 var c = r.getText().charAt(0); 835 if (c == '\u2003') 836 c = '\t'; 837 sb.append(c); 838 } 839 r.nextTag(); 840 } 841 } else if (tag == FF) { 842 sb.append('\f'); 843 r.nextTag(); 844 } else if (tag.isOneOf(STRING, NUMBER, BOOLEAN)) { 845 et = r.next(); 846 if (et == CHARACTERS) { 847 sb.append(r.getText()); 848 r.nextTag(); 849 } 850 } else { 851 sb.append('<').append(r.getLocalName()); 852 for (var i = 0; i < r.getAttributeCount(); i++) 853 sb.append(' ').append(r.getAttributeName(i)).append('=').append('\'').append(r.getAttributeValue(i)).append('\''); 854 sb.append('>'); 855 depth++; 856 } 857 } else if (et == END_ELEMENT) { 858 if (nn(characters)) { 859 if (sb.length() == 0) 860 characters = trimStart(characters); 861 if (depth == 0) 862 characters = trimEnd(characters); 863 sb.append(characters); 864 characters = null; 865 } 866 if (depth == 0) 867 break; 868 sb.append('<').append(r.getLocalName()).append('>'); 869 depth--; 870 } else if (et == CHARACTERS) { 871 characters = r.getText(); 872 } 873 et = r.next(); 874 } 875 876 String s = trim(sb.toString()); 877 returnStringBuilder(sb); 878 return s; 879 } 880 881 @Override /* Overridden from XmlParserSession */ 882 protected String parseWhitespaceElement(XmlReader r) throws IOException, ParseException, XMLStreamException { 883 884 var tag = HtmlTag.forEvent(this, r); 885 int et = r.next(); 886 if (tag == BR) { 887 return "\n"; 888 } else if (tag == BS) { 889 return "\b"; 890 } else if (tag == FF) { 891 return "\f"; 892 } else if (tag == SP) { 893 if (et == CHARACTERS) { 894 String s = r.getText(); 895 if (s.charAt(0) == '\u2003') 896 s = "\t"; 897 r.next(); 898 return decodeString(s); 899 } 900 return ""; 901 } else { 902 throw new ParseException(this, "Invalid tag found in parseWhitespaceElement(): ''{0}''", tag); 903 } 904 } 905}