001/** 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.camel.support; 018 019import java.io.Closeable; 020import java.io.IOException; 021import java.io.InputStream; 022import java.text.MessageFormat; 023import java.util.ArrayList; 024import java.util.Iterator; 025import java.util.LinkedHashMap; 026import java.util.List; 027import java.util.Map; 028import java.util.Scanner; 029import java.util.regex.MatchResult; 030import java.util.regex.Matcher; 031import java.util.regex.Pattern; 032 033import org.apache.camel.Exchange; 034import org.apache.camel.InvalidPayloadException; 035import org.apache.camel.language.simple.SimpleLanguage; 036import org.apache.camel.util.CollectionStringBuffer; 037import org.apache.camel.util.IOHelper; 038import org.apache.camel.util.ObjectHelper; 039import org.apache.camel.util.StringHelper; 040 041/** 042 * {@link org.apache.camel.Expression} to walk a {@link org.apache.camel.Message} XML body 043 * using an {@link java.util.Iterator}, which grabs the content between a XML start and end token, 044 * where the end token corresponds implicitly to either the end tag or the self-closing start tag. 045 * <p/> 046 * The message body must be able to convert to {@link java.io.InputStream} type which is used as stream 047 * to access the message body. 048 * <p/> 049 * Can be used to split big XML files. 050 * <p/> 051 * This implementation supports inheriting namespaces from a parent/root tag. 052 */ 053public class TokenXMLExpressionIterator extends ExpressionAdapter { 054 private static final Pattern NAMESPACE_PATTERN = Pattern.compile("xmlns(:\\w+|)\\s*=\\s*('[^']+'|\"[^\"]+\")"); 055 private static final String SCAN_TOKEN_NS_PREFIX_REGEX = "([^:<>]{1,15}?:|)"; 056 private static final String SCAN_BLOCK_TOKEN_REGEX_TEMPLATE = "<{0}(\\s+[^>]*)?/>|<{0}(\\s+[^>]*)?>(?:(?!(</{0}\\s*>)).)*</{0}\\s*>"; 057 private static final String SCAN_PARENT_TOKEN_REGEX_TEMPLATE = "<{0}(\\s+[^>]*\\s*)?>"; 058 private static final String OPTION_WRAP_TOKEN = "<*>"; 059 private static final String NAMESPACE_SEPERATOR = " "; 060 061 protected final String tagToken; 062 protected final String inheritNamespaceToken; 063 064 public TokenXMLExpressionIterator(String tagToken, String inheritNamespaceToken) { 065 StringHelper.notEmpty(tagToken, "tagToken"); 066 this.tagToken = tagToken; 067 // namespace token is optional 068 this.inheritNamespaceToken = inheritNamespaceToken; 069 } 070 071 protected Iterator<?> createIterator(Exchange exchange, InputStream in, String charset) { 072 String tag = tagToken; 073 if (SimpleLanguage.hasSimpleFunction(tag)) { 074 tag = SimpleLanguage.expression(tag).evaluate(exchange, String.class); 075 } 076 String inherit = inheritNamespaceToken; 077 if (inherit != null && SimpleLanguage.hasSimpleFunction(inherit)) { 078 inherit = SimpleLanguage.expression(inherit).evaluate(exchange, String.class); 079 } 080 081 // must be XML tokens 082 if (!tag.startsWith("<")) { 083 tag = "<" + tag; 084 } 085 if (!tag.endsWith(">")) { 086 tag = tag + ">"; 087 } 088 089 if (inherit != null) { 090 if (!inherit.startsWith("<")) { 091 inherit = "<" + inherit; 092 } 093 if (!inherit.endsWith(">")) { 094 inherit = inherit + ">"; 095 } 096 } 097 098 // must be XML tokens 099 if (!tag.startsWith("<") || !tag.endsWith(">")) { 100 throw new IllegalArgumentException("XML Tag token must be a valid XML tag, was: " + tag); 101 } 102 if (inherit != null && (!inherit.startsWith("<") || !inherit.endsWith(">"))) { 103 throw new IllegalArgumentException("Namespace token must be a valid XML token, was: " + inherit); 104 } 105 106 XMLTokenIterator iterator = new XMLTokenIterator(tag, inherit, in, charset); 107 iterator.init(); 108 return iterator; 109 } 110 111 @Override 112 public boolean matches(Exchange exchange) { 113 // as a predicate we must close the stream, as we do not return an iterator that can be used 114 // afterwards to iterate the input stream 115 Object value = doEvaluate(exchange, true); 116 return ObjectHelper.evaluateValuePredicate(value); 117 } 118 119 @Override 120 public Object evaluate(Exchange exchange) { 121 // as we return an iterator to access the input stream, we should not close it 122 return doEvaluate(exchange, false); 123 } 124 125 /** 126 * Strategy to evaluate the exchange 127 * 128 * @param exchange the exchange 129 * @param closeStream whether to close the stream before returning from this method. 130 * @return the evaluated value 131 */ 132 protected Object doEvaluate(Exchange exchange, boolean closeStream) { 133 InputStream in = null; 134 try { 135 in = exchange.getIn().getMandatoryBody(InputStream.class); 136 // we may read from a file, and want to support custom charset defined on the exchange 137 String charset = IOHelper.getCharsetName(exchange); 138 return createIterator(exchange, in, charset); 139 } catch (InvalidPayloadException e) { 140 exchange.setException(e); 141 // must close input stream 142 IOHelper.close(in); 143 return null; 144 } finally { 145 if (closeStream) { 146 IOHelper.close(in); 147 } 148 } 149 } 150 151 /** 152 * Iterator to walk the input stream 153 */ 154 static class XMLTokenIterator implements Iterator<Object>, Closeable { 155 final String tagToken; 156 final InputStream in; 157 final String charset; 158 Scanner scanner; 159 Object image; 160 161 private final Pattern tagTokenPattern; 162 private final String inheritNamespaceToken; 163 private final boolean wrapToken; 164 private Pattern inheritNamespaceTokenPattern; 165 private String[] rootTokenNamespaces; 166 private String wrapHead; 167 private String wrapTail; 168 169 XMLTokenIterator(String tagToken, String inheritNamespaceToken, InputStream in, String charset) { 170 this.tagToken = tagToken; 171 this.charset = charset; 172 173 // remove any beginning < and ending > as we need to support ns prefixes and attributes, so we use a reg exp patterns 174 this.tagTokenPattern = 175 Pattern.compile(MessageFormat.format(SCAN_BLOCK_TOKEN_REGEX_TEMPLATE, 176 SCAN_TOKEN_NS_PREFIX_REGEX + tagToken.substring(1, tagToken.length() - 1)), 177 Pattern.MULTILINE | Pattern.DOTALL); 178 179 this.inheritNamespaceToken = inheritNamespaceToken; 180 if (inheritNamespaceToken != null && OPTION_WRAP_TOKEN.equals(inheritNamespaceToken)) { 181 this.wrapToken = true; 182 this.in = new RecordableInputStream(in, charset); 183 } else { 184 this.wrapToken = false; 185 this.in = in; 186 if (inheritNamespaceToken != null) { 187 // the inherit namespace token may itself have a namespace prefix 188 // the namespaces on the parent tag can be in multi line, so we need to instruct the dot to support multilines 189 this.inheritNamespaceTokenPattern = 190 Pattern.compile(MessageFormat.format(SCAN_PARENT_TOKEN_REGEX_TEMPLATE, 191 SCAN_TOKEN_NS_PREFIX_REGEX + inheritNamespaceToken.substring(1, inheritNamespaceToken.length() - 1)), 192 Pattern.MULTILINE | Pattern.DOTALL); 193 } 194 } 195 } 196 197 void init() { 198 // use a scanner with the default delimiter 199 this.scanner = new Scanner(in, charset); 200 this.image = scanner.hasNext() ? (String) next(true) : null; 201 } 202 203 String getNext(boolean first) { 204 // initialize inherited namespaces on first 205 if (first && inheritNamespaceToken != null && !wrapToken) { 206 rootTokenNamespaces = getNamespacesFromNamespaceTokenSplitter(scanner.findWithinHorizon(inheritNamespaceTokenPattern, 0)); 207 } 208 209 String next = scanner.findWithinHorizon(tagTokenPattern, 0); 210 if (next == null) { 211 return null; 212 } 213 if (first && wrapToken) { 214 MatchResult mres = scanner.match(); 215 wrapHead = ((RecordableInputStream)in).getText(mres.start()); 216 wrapTail = buildXMLTail(wrapHead); 217 } 218 219 // build answer accordingly to whether namespaces should be inherited or not 220 if (inheritNamespaceToken != null && rootTokenNamespaces != null) { 221 String head = StringHelper.before(next, ">"); 222 boolean empty = false; 223 if (head.endsWith("/")) { 224 head = head.substring(0, head.length() - 1); 225 empty = true; 226 } 227 StringBuilder sb = new StringBuilder(); 228 // append root namespaces to local start token 229 // grab the text 230 String tail = StringHelper.after(next, ">"); 231 // build result with inherited namespaces and skip the prefixes that are declared within the child itself. 232 next = sb.append(head).append(getMissingInherritNamespaces(head)).append(empty ? "/>" : ">").append(tail).toString(); 233 } else if (wrapToken) { 234 // wrap the token 235 StringBuilder sb = new StringBuilder(); 236 next = sb.append(wrapHead).append(next).append(wrapTail).toString(); 237 } 238 239 return next; 240 } 241 242 private String getMissingInherritNamespaces(final String text) { 243 final StringBuilder sb = new StringBuilder(); 244 if (text != null) { 245 boolean first = true; 246 final String[] containedNamespaces = getNamespacesFromNamespaceTokenSplitter(text); 247 for (final String rn : rootTokenNamespaces) { 248 boolean nsExists = false; 249 for (final String cn : containedNamespaces) { 250 if (rn.equals(cn)) { 251 nsExists = true; 252 // already existing namespace in child were found we need a separator, so we set first = false 253 if (first) { 254 first = false; 255 } 256 break; 257 } 258 } 259 if (!nsExists) { 260 sb.append(first ? rn : NAMESPACE_SEPERATOR + rn); 261 if (first) { 262 first = false; 263 } 264 } 265 } 266 } 267 return sb.toString(); 268 } 269 270 private String[] getNamespacesFromNamespaceTokenSplitter(final String text) { 271 final String namespaces = getNamespacesFromNamespaceToken(text); 272 return namespaces == null ? new String[0] : namespaces.split(NAMESPACE_SEPERATOR); 273 } 274 275 private String getNamespacesFromNamespaceToken(String text) { 276 if (text == null) { 277 return null; 278 } 279 280 // find namespaces (there can be attributes mixed, so we should only grab the namespaces) 281 Map<String, String> namespaces = new LinkedHashMap<>(); 282 Matcher matcher = NAMESPACE_PATTERN.matcher(text); 283 while (matcher.find()) { 284 String prefix = matcher.group(1); 285 String url = matcher.group(2); 286 if (ObjectHelper.isEmpty(prefix)) { 287 prefix = "_DEFAULT_"; 288 } else { 289 // skip leading : 290 prefix = prefix.substring(1); 291 } 292 namespaces.put(prefix, url); 293 } 294 295 // did we find any namespaces 296 if (namespaces.isEmpty()) { 297 return null; 298 } 299 300 // build namespace String 301 StringBuilder sb = new StringBuilder(); 302 for (Map.Entry<String, String> entry : namespaces.entrySet()) { 303 String key = entry.getKey(); 304 // note the value is already quoted 305 String value = entry.getValue(); 306 if ("_DEFAULT_".equals(key)) { 307 sb.append(" xmlns=").append(value); 308 } else { 309 sb.append(" xmlns:").append(key).append("=").append(value); 310 } 311 } 312 313 return sb.toString(); 314 } 315 316 @Override 317 public boolean hasNext() { 318 return image != null; 319 } 320 321 @Override 322 public Object next() { 323 return next(false); 324 } 325 326 Object next(boolean first) { 327 Object answer = image; 328 // calculate next 329 if (scanner.hasNext()) { 330 image = getNext(first); 331 } else { 332 image = null; 333 } 334 335 if (answer == null) { 336 // first time the image may be null 337 answer = image; 338 } 339 return answer; 340 } 341 342 @Override 343 public void remove() { 344 // noop 345 } 346 347 @Override 348 public void close() throws IOException { 349 scanner.close(); 350 } 351 352 } 353 354 private static String buildXMLTail(String xmlhead) { 355 // assume the input text is a portion of a well-formed xml 356 List<String> tags = new ArrayList<>(); 357 int p = 0; 358 while (p < xmlhead.length()) { 359 p = xmlhead.indexOf('<', p); 360 if (p < 0) { 361 break; 362 } 363 int nc = xmlhead.charAt(p + 1); 364 if (nc == '?') { 365 p++; 366 continue; 367 } else if (nc == '/') { 368 p++; 369 tags.remove(tags.size() - 1); 370 } else { 371 final int ep = xmlhead.indexOf('>', p); 372 if (xmlhead.charAt(ep - 1) == '/') { 373 p++; 374 continue; 375 } 376 final int sp = xmlhead.substring(p, ep).indexOf(' '); 377 tags.add(xmlhead.substring(p + 1, sp > 0 ? p + sp : ep)); 378 p = ep; 379 } 380 } 381 StringBuilder sb = new StringBuilder(); 382 for (int i = tags.size() - 1; i >= 0; i--) { 383 sb.append("</").append(tags.get(i)).append(">"); 384 } 385 return sb.toString(); 386 } 387}