001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.camel.support;
018
019 import java.io.InputStream;
020 import java.util.Iterator;
021 import java.util.LinkedHashMap;
022 import java.util.Map;
023 import java.util.Scanner;
024 import java.util.regex.Matcher;
025 import java.util.regex.Pattern;
026
027 import org.apache.camel.util.ObjectHelper;
028
029 /**
030 * {@link org.apache.camel.Expression} to walk a {@link org.apache.camel.Message} XML body
031 * using an {@link java.util.Iterator}, which grabs the content between a XML start and end token.
032 * <p/>
033 * The message body must be able to convert to {@link java.io.InputStream} type which is used as stream
034 * to access the message body.
035 * <p/>
036 * Can be used to split big XML files.
037 * <p/>
038 * This implementation supports inheriting namespaces from a parent/root tag.
039 */
040 public class TokenXMLPairExpressionIterator extends TokenPairExpressionIterator {
041
042 private static final Pattern NAMESPACE_PATTERN = Pattern.compile("xmlns(:\\w+|)=\\\"(.*?)\\\"");
043 private static final String SCAN_TOKEN_REGEX = "(\\s+.*?|)>";
044 protected final String inheritNamespaceToken;
045
046 public TokenXMLPairExpressionIterator(String startToken, String endToken, String inheritNamespaceToken) {
047 super(startToken, endToken, true);
048 // namespace token is optional
049 this.inheritNamespaceToken = inheritNamespaceToken;
050
051 // must be XML tokens
052 if (!startToken.startsWith("<") || !startToken.endsWith(">")) {
053 throw new IllegalArgumentException("Start token must be a valid XML token, was: " + startToken);
054 }
055 if (!endToken.startsWith("<") || !endToken.endsWith(">")) {
056 throw new IllegalArgumentException("End token must be a valid XML token, was: " + endToken);
057 }
058 if (inheritNamespaceToken != null && (!inheritNamespaceToken.startsWith("<") || !inheritNamespaceToken.endsWith(">"))) {
059 throw new IllegalArgumentException("Namespace token must be a valid XML token, was: " + inheritNamespaceToken);
060 }
061 }
062
063 @Override
064 protected Iterator<?> createIterator(InputStream in, String charset) {
065 XMLTokenPairIterator iterator = new XMLTokenPairIterator(startToken, endToken, inheritNamespaceToken, in, charset);
066 iterator.init();
067 return iterator;
068 }
069
070 /**
071 * Iterator to walk the input stream
072 */
073 static class XMLTokenPairIterator extends TokenPairIterator {
074
075 private final Pattern startTokenPattern;
076 private final String scanEndToken;
077 private final String inheritNamespaceToken;
078 private Pattern inheritNamespaceTokenPattern;
079 private String rootTokenNamespaces;
080
081 XMLTokenPairIterator(String startToken, String endToken, String inheritNamespaceToken, InputStream in, String charset) {
082 super(startToken, endToken, true, in, charset);
083
084 // remove any ending > as we need to support attributes on the tags, so we need to use a reg exp pattern
085 String token = startToken.substring(0, startToken.length() - 1) + SCAN_TOKEN_REGEX;
086 this.startTokenPattern = Pattern.compile(token);
087 this.scanEndToken = endToken.substring(0, endToken.length() - 1) + SCAN_TOKEN_REGEX;
088 this.inheritNamespaceToken = inheritNamespaceToken;
089 if (inheritNamespaceToken != null) {
090 token = inheritNamespaceToken.substring(0, inheritNamespaceToken.length() - 1) + SCAN_TOKEN_REGEX;
091 // the namespaces on the parent tag can be in multi line, so we need to instruct the dot to support multilines
092 this.inheritNamespaceTokenPattern = Pattern.compile(token, Pattern.MULTILINE | Pattern.DOTALL);
093 }
094 }
095
096 @Override
097 void init() {
098 // use scan end token as delimiter which supports attributes/namespaces
099 this.scanner = new Scanner(in, charset).useDelimiter(scanEndToken);
100 // this iterator will do look ahead as we may have data
101 // after the last end token, which the scanner would find
102 // so we need to be one step ahead of the scanner
103 this.image = scanner.hasNext() ? (String) next(true) : null;
104 }
105
106 @Override
107 String getNext(boolean first) {
108 String next = scanner.next();
109 if (next == null) {
110 return null;
111 }
112
113 // initialize inherited namespaces on first
114 if (first && inheritNamespaceToken != null) {
115 rootTokenNamespaces = getNamespacesFromNamespaceToken(next);
116 }
117
118 // make sure next is positioned at start token as we can have leading data
119 // or we reached EOL and there is no more start tags
120 Matcher matcher = startTokenPattern.matcher(next);
121 if (!matcher.find()) {
122 return null;
123 } else {
124 int index = matcher.start();
125 next = next.substring(index);
126 }
127
128 // build answer accordingly to whether namespaces should be inherited or not
129 StringBuilder sb = new StringBuilder();
130 if (inheritNamespaceToken != null && rootTokenNamespaces != null) {
131 // append root namespaces to local start token
132 String tag = ObjectHelper.before(next, ">");
133 // grab the text
134 String text = ObjectHelper.after(next, ">");
135 // build result with inherited namespaces
136 next = sb.append(tag).append(rootTokenNamespaces).append(">").append(text).append(endToken).toString();
137 } else {
138 next = sb.append(next).append(endToken).toString();
139 }
140
141 return next;
142 }
143
144 private String getNamespacesFromNamespaceToken(String text) {
145 if (text == null) {
146 return null;
147 }
148
149 // grab the namespace tag
150 Matcher mat = inheritNamespaceTokenPattern.matcher(text);
151 if (mat.find()) {
152 text = mat.group(0);
153 } else {
154 // cannot find namespace tag
155 return null;
156 }
157
158 // find namespaces (there can be attributes mixed, so we should only grab the namespaces)
159 Map<String, String> namespaces = new LinkedHashMap<String, String>();
160 Matcher matcher = NAMESPACE_PATTERN.matcher(text);
161 while (matcher.find()) {
162 String prefix = matcher.group(1);
163 String url = matcher.group(2);
164 if (ObjectHelper.isEmpty(prefix)) {
165 prefix = "_DEFAULT_";
166 } else {
167 // skip leading :
168 prefix = prefix.substring(1);
169 }
170 namespaces.put(prefix, url);
171 }
172
173 // did we find any namespaces
174 if (namespaces.isEmpty()) {
175 return null;
176 }
177
178 // build namespace String
179 StringBuilder sb = new StringBuilder();
180 for (Map.Entry<String, String> entry : namespaces.entrySet()) {
181 String key = entry.getKey();
182 String value = entry.getValue();
183 if ("_DEFAULT_".equals(key)) {
184 sb.append(" xmlns=\"").append(value).append("\"");
185 } else {
186 sb.append(" xmlns:").append(key).append("=\"").append(value).append("\"");
187 }
188 }
189
190 return sb.toString();
191 }
192 }
193
194 }