001/** 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.camel.support; 018 019import java.io.Closeable; 020import java.io.IOException; 021import java.io.InputStream; 022import java.io.InputStreamReader; 023import java.io.Reader; 024import java.io.UnsupportedEncodingException; 025import java.util.ArrayList; 026import java.util.HashMap; 027import java.util.HashSet; 028import java.util.Iterator; 029import java.util.List; 030import java.util.Map; 031import java.util.Map.Entry; 032import java.util.Set; 033import java.util.regex.Matcher; 034import java.util.regex.Pattern; 035 036import javax.xml.namespace.QName; 037import javax.xml.stream.XMLStreamConstants; 038import javax.xml.stream.XMLStreamException; 039import javax.xml.stream.XMLStreamReader; 040 041import org.apache.camel.Exchange; 042import org.apache.camel.InvalidPayloadException; 043import org.apache.camel.converter.jaxp.StaxConverter; 044import org.apache.camel.spi.NamespaceAware; 045import org.apache.camel.util.IOHelper; 046import org.apache.camel.util.ObjectHelper; 047import org.slf4j.Logger; 048import org.slf4j.LoggerFactory; 049 050/** 051 * 052 */ 053public class XMLTokenExpressionIterator extends ExpressionAdapter implements NamespaceAware { 054 protected final String path; 055 protected char mode; 056 protected int group; 057 protected Map<String, String> nsmap; 058 059 public XMLTokenExpressionIterator(String path, char mode) { 060 this(path, mode, 1); 061 } 062 063 public XMLTokenExpressionIterator(String path, char mode, int group) { 064 ObjectHelper.notEmpty(path, "path"); 065 this.path = path; 066 this.mode = mode; 067 this.group = group > 1 ? group : 1; 068 } 069 070 @Override 071 public void setNamespaces(Map<String, String> nsmap) { 072 this.nsmap = nsmap; 073 } 074 075 public void setMode(char mode) { 076 this.mode = mode; 077 } 078 079 public void setMode(String mode) { 080 this.mode = mode != null ? mode.charAt(0) : 0; 081 } 082 083 public int getGroup() { 084 return group; 085 } 086 087 public void setGroup(int group) { 088 this.group = group; 089 } 090 091 protected Iterator<?> createIterator(InputStream in, String charset) throws XMLStreamException, UnsupportedEncodingException { 092 Reader reader; 093 if (charset == null) { 094 reader = new InputStreamReader(in); 095 } else { 096 reader = new InputStreamReader(in, charset); 097 } 098 XMLTokenIterator iterator = new XMLTokenIterator(path, nsmap, mode, group, reader); 099 return iterator; 100 } 101 102 protected Iterator<?> createIterator(Reader in) throws XMLStreamException { 103 XMLTokenIterator iterator = new XMLTokenIterator(path, nsmap, mode, group, in); 104 return iterator; 105 } 106 107 @Override 108 public boolean matches(Exchange exchange) { 109 // as a predicate we must close the stream, as we do not return an iterator that can be used 110 // afterwards to iterate the input stream 111 Object value = doEvaluate(exchange, true); 112 return ObjectHelper.evaluateValuePredicate(value); 113 } 114 115 @Override 116 public Object evaluate(Exchange exchange) { 117 // as we return an iterator to access the input stream, we should not close it 118 return doEvaluate(exchange, false); 119 } 120 121 /** 122 * Strategy to evaluate the exchange 123 * 124 * @param exchange the exchange 125 * @param closeStream whether to close the stream before returning from this method. 126 * @return the evaluated value 127 */ 128 protected Object doEvaluate(Exchange exchange, boolean closeStream) { 129 InputStream in = null; 130 try { 131 in = exchange.getIn().getMandatoryBody(InputStream.class); 132 String charset = IOHelper.getCharsetName(exchange); 133 return createIterator(in, charset); 134 } catch (InvalidPayloadException e) { 135 exchange.setException(e); 136 // must close input stream 137 IOHelper.close(in); 138 return null; 139 } catch (XMLStreamException e) { 140 exchange.setException(e); 141 // must close input stream 142 IOHelper.close(in); 143 return null; 144 } catch (UnsupportedEncodingException e) { 145 exchange.setException(e); 146 // must close input stream 147 IOHelper.close(in); 148 return null; 149 } finally { 150 if (closeStream) { 151 IOHelper.close(in); 152 } 153 } 154 } 155 156 157 static class XMLTokenIterator implements Iterator<Object>, Closeable { 158 private static final Logger LOG = LoggerFactory.getLogger(XMLTokenIterator.class); 159 private static final Pattern NAMESPACE_PATTERN = Pattern.compile("xmlns(:\\w+|)\\s*=\\s*('[^']*'|\"[^\"]*\")"); 160 161 private AttributedQName[] splitpath; 162 private int index; 163 private char mode; 164 private int group; 165 private RecordableReader in; 166 private XMLStreamReader reader; 167 private List<QName> path; 168 private List<Map<String, String>> namespaces; 169 private List<String> segments; 170 private List<QName> segmentlog; 171 private List<String> tokens; 172 private int code; 173 private int consumed; 174 private boolean backtrack; 175 private int trackdepth = -1; 176 private int depth; 177 private boolean compliant; 178 179 private Object nextToken; 180 181 public XMLTokenIterator(String path, Map<String, String> nsmap, char mode, InputStream in, String charset) 182 throws XMLStreamException, UnsupportedEncodingException { 183 // woodstox's getLocation().etCharOffset() does not return the offset correctly for InputStream, so use Reader instead. 184 this(path, nsmap, mode, 1, new InputStreamReader(in, charset)); 185 } 186 187 public XMLTokenIterator(String path, Map<String, String> nsmap, char mode, int group, InputStream in, String charset) 188 throws XMLStreamException, UnsupportedEncodingException { 189 // woodstox's getLocation().etCharOffset() does not return the offset correctly for InputStream, so use Reader instead. 190 this(path, nsmap, mode, new InputStreamReader(in, charset)); 191 } 192 193 public XMLTokenIterator(String path, Map<String, String> nsmap, char mode, Reader in) throws XMLStreamException { 194 this(path, nsmap, mode, 1, in); 195 } 196 197 public XMLTokenIterator(String path, Map<String, String> nsmap, char mode, int group, Reader in) throws XMLStreamException { 198 final String[] sl = path.substring(1).split("/"); 199 this.splitpath = new AttributedQName[sl.length]; 200 for (int i = 0; i < sl.length; i++) { 201 String s = sl[i]; 202 if (s.length() > 0) { 203 int d = s.indexOf(':'); 204 String pfx = d > 0 ? s.substring(0, d) : ""; 205 this.splitpath[i] = 206 new AttributedQName( 207 "*".equals(pfx) ? "*" : nsmap == null ? "" : nsmap.get(pfx), d > 0 ? s.substring(d + 1) : s, pfx); 208 } 209 } 210 211 this.mode = mode != 0 ? mode : 'i'; 212 this.group = group > 0 ? group : 1; 213 this.in = new RecordableReader(in); 214 this.reader = new StaxConverter().createXMLStreamReader(this.in); 215 216 LOG.trace("reader.class: {}", reader.getClass()); 217 // perform the first offset compliance test 218 int coff = reader.getLocation().getCharacterOffset(); 219 if (coff != 0) { 220 LOG.error("XMLStreamReader {} not supporting Location"); 221 throw new XMLStreamException("reader not supporting Location"); 222 } 223 224 this.path = new ArrayList<QName>(); 225 226 // wrapped mode needs the segments and the injected mode needs the namespaces 227 if (this.mode == 'w') { 228 this.segments = new ArrayList<String>(); 229 this.segmentlog = new ArrayList<QName>(); 230 } else if (this.mode == 'i') { 231 this.namespaces = new ArrayList<Map<String, String>>(); 232 } 233 // when grouping the tokens, allocate the storage to temporarily store tokens. 234 if (this.group > 1) { 235 this.tokens = new ArrayList<String>(); 236 } 237 this.nextToken = getNextToken(); 238 } 239 240 private boolean isDoS() { 241 return splitpath[index] == null; 242 } 243 244 private AttributedQName current() { 245 return splitpath[index + (isDoS() ? 1 : 0)]; 246 } 247 248 private AttributedQName ancestor() { 249 return index == 0 ? null : splitpath[index - 1]; 250 } 251 252 private void down() { 253 if (isDoS()) { 254 index++; 255 } 256 index++; 257 } 258 259 private void up() { 260 index--; 261 } 262 263 private boolean isBottom() { 264 return index == splitpath.length - (isDoS() ? 2 : 1); 265 } 266 267 private boolean isTop() { 268 return index == 0; 269 } 270 271 private int readNext() throws XMLStreamException { 272 int c = code; 273 if (c > 0) { 274 code = 0; 275 } else { 276 c = reader.next(); 277 } 278 return c; 279 } 280 281 private String getCurrenText() { 282 int pos = reader.getLocation().getCharacterOffset(); 283 String txt = in.getText(pos - consumed); 284 consumed = pos; 285 // keep recording 286 in.record(); 287 return txt; 288 } 289 290 private void pushName(QName name) { 291 path.add(name); 292 } 293 294 private QName popName() { 295 return path.remove(path.size() - 1); 296 } 297 298 private void pushSegment(QName qname, String token) { 299 segments.add(token); 300 segmentlog.add(qname); 301 } 302 303 private String popSegment() { 304 return segments.remove(segments.size() - 1); 305 } 306 307 private QName peekLog() { 308 return segmentlog.get(segmentlog.size() - 1); 309 } 310 311 private QName popLog() { 312 return segmentlog.remove(segmentlog.size() - 1); 313 } 314 315 private void pushNamespaces(XMLStreamReader reader) { 316 Map<String, String> m = new HashMap<String, String>(); 317 if (namespaces.size() > 0) { 318 m.putAll(namespaces.get(namespaces.size() - 1)); 319 } 320 for (int i = 0; i < reader.getNamespaceCount(); i++) { 321 m.put(reader.getNamespacePrefix(i), reader.getNamespaceURI(i)); 322 } 323 namespaces.add(m); 324 } 325 326 private void popNamespaces() { 327 namespaces.remove(namespaces.size() - 1); 328 } 329 330 private Map<String, String> getCurrentNamespaceBindings() { 331 return namespaces.get(namespaces.size() - 1); 332 } 333 334 private void readCurrent(boolean incl) throws XMLStreamException { 335 int d = depth; 336 while (d <= depth) { 337 int code = reader.next(); 338 if (code == XMLStreamConstants.START_ELEMENT) { 339 depth++; 340 } else if (code == XMLStreamConstants.END_ELEMENT) { 341 depth--; 342 } 343 } 344 // either look ahead to the next token or stay at the end element token 345 if (incl) { 346 code = reader.next(); 347 } else { 348 code = reader.getEventType(); 349 if (code == XMLStreamConstants.END_ELEMENT) { 350 // revert the depth count to avoid double counting the up event 351 depth++; 352 } 353 } 354 } 355 356 private String getCurrentToken() throws XMLStreamException { 357 readCurrent(true); 358 popName(); 359 360 String token = createContextualToken(getCurrenText()); 361 if (mode == 'i') { 362 popNamespaces(); 363 } 364 365 return token; 366 } 367 368 private String createContextualToken(String token) { 369 StringBuilder sb = new StringBuilder(); 370 if (mode == 'w' && group == 1) { 371 for (int i = 0; i < segments.size(); i++) { 372 sb.append(segments.get(i)); 373 } 374 sb.append(token); 375 for (int i = path.size() - 1; i >= 0; i--) { 376 QName q = path.get(i); 377 sb.append("</").append(makeName(q)).append(">"); 378 } 379 380 } else if (mode == 'i') { 381 final String stag = token.substring(0, token.indexOf('>') + 1); 382 Set<String> skip = new HashSet<String>(); 383 Matcher matcher = NAMESPACE_PATTERN.matcher(stag); 384 char quote = 0; 385 while (matcher.find()) { 386 String prefix = matcher.group(1); 387 if (prefix.length() > 0) { 388 prefix = prefix.substring(1); 389 } 390 skip.add(prefix); 391 if (quote == 0) { 392 quote = matcher.group(2).charAt(0); 393 } 394 } 395 if (quote == 0) { 396 quote = '"'; 397 } 398 boolean empty = stag.endsWith("/>"); 399 sb.append(token.substring(0, stag.length() - (empty ? 2 : 1))); 400 for (Entry<String, String> e : getCurrentNamespaceBindings().entrySet()) { 401 if (!skip.contains(e.getKey())) { 402 sb.append(e.getKey().length() == 0 ? " xmlns" : " xmlns:") 403 .append(e.getKey()).append("=").append(quote).append(e.getValue()).append(quote); 404 } 405 } 406 sb.append(token.substring(stag.length() - (empty ? 2 : 1))); 407 } else if (mode == 'u') { 408 int bp = token.indexOf(">"); 409 int ep = token.lastIndexOf("</"); 410 if (bp > 0 && ep > 0) { 411 sb.append(token.substring(bp + 1, ep)); 412 } 413 } else if (mode == 't') { 414 int bp = 0; 415 for (;;) { 416 int ep = token.indexOf('>', bp); 417 bp = token.indexOf('<', ep); 418 if (bp < 0) { 419 break; 420 } 421 sb.append(token.substring(ep + 1, bp)); 422 } 423 } else { 424 return token; 425 } 426 427 return sb.toString(); 428 } 429 430 private String getGroupedToken() { 431 StringBuilder sb = new StringBuilder(); 432 if (mode == 'w') { 433 // for wrapped 434 for (int i = 0; i < segments.size(); i++) { 435 sb.append(segments.get(i)); 436 } 437 for (String s : tokens) { 438 sb.append(s); 439 } 440 for (int i = path.size() - 1; i >= 0; i--) { 441 QName q = path.get(i); 442 sb.append("</").append(makeName(q)).append(">"); 443 } 444 } else { 445 // for injected, unwrapped, text 446 sb.append("<group>"); 447 for (String s : tokens) { 448 sb.append(s); 449 } 450 sb.append("</group>"); 451 } 452 tokens.clear(); 453 return sb.toString(); 454 } 455 456 private String getNextToken() throws XMLStreamException { 457 int xcode = 0; 458 while (xcode != XMLStreamConstants.END_DOCUMENT) { 459 xcode = readNext(); 460 461 switch (xcode) { 462 case XMLStreamConstants.START_ELEMENT: 463 depth++; 464 QName name = reader.getName(); 465 if (LOG.isTraceEnabled()) { 466 LOG.trace("se={}; depth={}; trackdepth={}", new Object[]{name, depth, trackdepth}); 467 } 468 469 String token = getCurrenText(); 470 // perform the second compliance test 471 if (!compliant) { 472 if (token != null && token.startsWith("<") && !token.startsWith("<?")) { 473 LOG.error("XMLStreamReader {} not supporting Location"); 474 throw new XMLStreamException("reader not supporting Location"); 475 } 476 compliant = true; 477 } 478 479 LOG.trace("token={}", token); 480 if (!backtrack && mode == 'w') { 481 pushSegment(name, token); 482 } 483 pushName(name); 484 if (mode == 'i') { 485 pushNamespaces(reader); 486 } 487 backtrack = false; 488 if (current().matches(name)) { 489 // mark the position of the match in the segments list 490 if (isBottom()) { 491 // final match 492 token = getCurrentToken(); 493 backtrack = true; 494 trackdepth = depth; 495 if (group > 1) { 496 tokens.add(token); 497 if (group == tokens.size()) { 498 return getGroupedToken(); 499 } 500 } else { 501 return token; 502 } 503 } else { 504 // intermediary match 505 down(); 506 } 507 } else if (isDoS()) { 508 // continue 509 } else { 510 // skip 511 readCurrent(false); 512 } 513 break; 514 case XMLStreamConstants.END_ELEMENT: 515 if ((backtrack || (trackdepth > 0 && depth == trackdepth)) 516 && (mode == 'w' && group > 1 && tokens.size() > 0)) { 517 // flush the left over using the current context 518 code = XMLStreamConstants.END_ELEMENT; 519 return getGroupedToken(); 520 } 521 522 depth--; 523 QName endname = reader.getName(); 524 LOG.trace("ee={}", endname); 525 526 popName(); 527 if (mode == 'i') { 528 popNamespaces(); 529 } 530 531 int pc = 0; 532 if (backtrack || (trackdepth > 0 && depth == trackdepth - 1)) { 533 // reactive backtrack if not backtracking and update the track depth 534 backtrack = true; 535 trackdepth--; 536 if (mode == 'w') { 537 while (!endname.equals(peekLog())) { 538 pc++; 539 popLog(); 540 } 541 } 542 } 543 544 if (backtrack) { 545 if (mode == 'w') { 546 for (int i = 0; i < pc; i++) { 547 popSegment(); 548 } 549 } 550 551 if ((ancestor() == null && !isTop()) 552 || (ancestor() != null && ancestor().matches(endname))) { 553 up(); 554 } 555 } 556 break; 557 case XMLStreamConstants.END_DOCUMENT: 558 LOG.trace("depth={}", depth); 559 if (group > 1 && tokens.size() > 0) { 560 // flush the left over before really going EoD 561 code = XMLStreamConstants.END_DOCUMENT; 562 return getGroupedToken(); 563 } 564 break; 565 default: 566 break; 567 } 568 } 569 return null; 570 } 571 572 private static String makeName(QName qname) { 573 String pfx = qname.getPrefix(); 574 return pfx.length() == 0 ? qname.getLocalPart() : qname.getPrefix() + ":" + qname.getLocalPart(); 575 } 576 577 @Override 578 public boolean hasNext() { 579 return nextToken != null; 580 } 581 582 @Override 583 public Object next() { 584 Object o = nextToken; 585 try { 586 nextToken = getNextToken(); 587 } catch (XMLStreamException e) { 588 nextToken = null; 589 throw new RuntimeException(e); 590 } 591 return o; 592 } 593 594 @Override 595 public void remove() { 596 // noop 597 } 598 599 @Override 600 public void close() throws IOException { 601 try { 602 reader.close(); 603 } catch (XMLStreamException e) { 604 throw new IOException(e); 605 } 606 } 607 } 608 609 static class AttributedQName extends QName { 610 private static final long serialVersionUID = 9878370226894144L; 611 private Pattern lcpattern; 612 private boolean nsany; 613 614 public AttributedQName(String localPart) { 615 super(localPart); 616 checkWildcard("", localPart); 617 } 618 619 public AttributedQName(String namespaceURI, String localPart, String prefix) { 620 super(namespaceURI, localPart, prefix); 621 checkWildcard(namespaceURI, localPart); 622 } 623 624 public AttributedQName(String namespaceURI, String localPart) { 625 super(namespaceURI, localPart); 626 checkWildcard(namespaceURI, localPart); 627 } 628 629 public boolean matches(QName qname) { 630 return (nsany || getNamespaceURI().equals(qname.getNamespaceURI())) 631 && (lcpattern != null 632 ? lcpattern.matcher(qname.getLocalPart()).matches() 633 : getLocalPart().equals(qname.getLocalPart())); 634 } 635 636 private void checkWildcard(String nsa, String lcp) { 637 nsany = "*".equals(nsa); 638 boolean wc = false; 639 for (int i = 0; i < lcp.length(); i++) { 640 char c = lcp.charAt(i); 641 if (c == '?' || c == '*') { 642 wc = true; 643 break; 644 } 645 } 646 if (wc) { 647 StringBuilder sb = new StringBuilder(); 648 for (int i = 0; i < lcp.length(); i++) { 649 char c = lcp.charAt(i); 650 switch (c) { 651 case '.': 652 sb.append("\\."); 653 break; 654 case '*': 655 sb.append(".*"); 656 break; 657 case '?': 658 sb.append('.'); 659 break; 660 default: 661 sb.append(c); 662 break; 663 } 664 } 665 lcpattern = Pattern.compile(sb.toString()); 666 } 667 } 668 } 669}