001/**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.camel.support;
018
019import java.io.Closeable;
020import java.io.IOException;
021import java.io.InputStream;
022import java.text.MessageFormat;
023import java.util.ArrayList;
024import java.util.Iterator;
025import java.util.LinkedHashMap;
026import java.util.List;
027import java.util.Map;
028import java.util.Scanner;
029import java.util.regex.MatchResult;
030import java.util.regex.Matcher;
031import java.util.regex.Pattern;
032
033import org.apache.camel.Exchange;
034import org.apache.camel.InvalidPayloadException;
035import org.apache.camel.language.simple.SimpleLanguage;
036import org.apache.camel.util.CollectionStringBuffer;
037import org.apache.camel.util.IOHelper;
038import org.apache.camel.util.ObjectHelper;
039import org.apache.camel.util.StringHelper;
040
041/**
042 * {@link org.apache.camel.Expression} to walk a {@link org.apache.camel.Message} XML body
043 * using an {@link java.util.Iterator}, which grabs the content between a XML start and end token,
044 * where the end token corresponds implicitly to either the end tag or the self-closing start tag.
045 * <p/>
046 * The message body must be able to convert to {@link java.io.InputStream} type which is used as stream
047 * to access the message body.
048 * <p/>
049 * Can be used to split big XML files.
050 * <p/>
051 * This implementation supports inheriting namespaces from a parent/root tag.
052 */
053public class TokenXMLExpressionIterator extends ExpressionAdapter {
054    private static final Pattern NAMESPACE_PATTERN = Pattern.compile("xmlns(:\\w+|)\\s*=\\s*('[^']+'|\"[^\"]+\")");
055    private static final String SCAN_TOKEN_NS_PREFIX_REGEX = "([^:<>]{1,15}?:|)";
056    private static final String SCAN_BLOCK_TOKEN_REGEX_TEMPLATE = "<{0}(\\s+[^>]*)?/>|<{0}(\\s+[^>]*)?>(?:(?!(</{0}\\s*>)).)*</{0}\\s*>";
057    private static final String SCAN_PARENT_TOKEN_REGEX_TEMPLATE = "<{0}(\\s+[^>]*\\s*)?>";
058    private static final String OPTION_WRAP_TOKEN = "<*>";
059    private static final String NAMESPACE_SEPERATOR = " ";
060
061    protected final String tagToken;
062    protected final String inheritNamespaceToken;
063
064    public TokenXMLExpressionIterator(String tagToken, String inheritNamespaceToken) {
065        StringHelper.notEmpty(tagToken, "tagToken");
066        this.tagToken = tagToken;
067        // namespace token is optional
068        this.inheritNamespaceToken = inheritNamespaceToken;
069    }
070
071    protected Iterator<?> createIterator(Exchange exchange, InputStream in, String charset) {
072        String tag = tagToken;
073        if (SimpleLanguage.hasSimpleFunction(tag)) {
074            tag = SimpleLanguage.expression(tag).evaluate(exchange, String.class);
075        }
076        String inherit = inheritNamespaceToken;
077        if (inherit != null && SimpleLanguage.hasSimpleFunction(inherit)) {
078            inherit = SimpleLanguage.expression(inherit).evaluate(exchange, String.class);
079        }
080
081        // must be XML tokens
082        if (!tag.startsWith("<")) {
083            tag = "<" + tag;
084        }
085        if (!tag.endsWith(">")) {
086            tag = tag + ">";
087        }
088
089        if (inherit != null) {
090            if (!inherit.startsWith("<")) {
091                inherit = "<" + inherit;
092            }
093            if (!inherit.endsWith(">")) {
094                inherit = inherit + ">";
095            }
096        }
097
098        // must be XML tokens
099        if (!tag.startsWith("<") || !tag.endsWith(">")) {
100            throw new IllegalArgumentException("XML Tag token must be a valid XML tag, was: " + tag);
101        }
102        if (inherit != null && (!inherit.startsWith("<") || !inherit.endsWith(">"))) {
103            throw new IllegalArgumentException("Namespace token must be a valid XML token, was: " + inherit);
104        }
105
106        XMLTokenIterator iterator = new XMLTokenIterator(tag, inherit, in, charset);
107        iterator.init();
108        return iterator;
109    }
110
111    @Override
112    public boolean matches(Exchange exchange) {
113        // as a predicate we must close the stream, as we do not return an iterator that can be used
114        // afterwards to iterate the input stream
115        Object value = doEvaluate(exchange, true);
116        return ObjectHelper.evaluateValuePredicate(value);
117    }
118
119    @Override
120    public Object evaluate(Exchange exchange) {
121        // as we return an iterator to access the input stream, we should not close it
122        return doEvaluate(exchange, false);
123    }
124
125    /**
126     * Strategy to evaluate the exchange
127     *
128     * @param exchange   the exchange
129     * @param closeStream whether to close the stream before returning from this method.
130     * @return the evaluated value
131     */
132    protected Object doEvaluate(Exchange exchange, boolean closeStream) {
133        InputStream in = null;
134        try {
135            in = exchange.getIn().getMandatoryBody(InputStream.class);
136            // we may read from a file, and want to support custom charset defined on the exchange
137            String charset = IOHelper.getCharsetName(exchange);
138            return createIterator(exchange, in, charset);
139        } catch (InvalidPayloadException e) {
140            exchange.setException(e);
141            // must close input stream
142            IOHelper.close(in);
143            return null;
144        } finally {
145            if (closeStream) {
146                IOHelper.close(in);
147            }
148        }
149    }
150    
151    /**
152     * Iterator to walk the input stream
153     */
154    static class XMLTokenIterator implements Iterator<Object>, Closeable {
155        final String tagToken;
156        final InputStream in;
157        final String charset;
158        Scanner scanner;
159        Object image;
160
161        private final Pattern tagTokenPattern;
162        private final String inheritNamespaceToken;
163        private final boolean wrapToken;
164        private Pattern inheritNamespaceTokenPattern;
165        private String[] rootTokenNamespaces;
166        private String wrapHead;
167        private String wrapTail;
168
169        XMLTokenIterator(String tagToken, String inheritNamespaceToken, InputStream in, String charset) {
170            this.tagToken = tagToken;
171            this.charset = charset;
172          
173            // remove any beginning < and ending > as we need to support ns prefixes and attributes, so we use a reg exp patterns
174            this.tagTokenPattern = 
175                Pattern.compile(MessageFormat.format(SCAN_BLOCK_TOKEN_REGEX_TEMPLATE, 
176                                                     SCAN_TOKEN_NS_PREFIX_REGEX + tagToken.substring(1, tagToken.length() - 1)), 
177                                                     Pattern.MULTILINE | Pattern.DOTALL);
178            
179            this.inheritNamespaceToken = inheritNamespaceToken;
180            if (inheritNamespaceToken != null && OPTION_WRAP_TOKEN.equals(inheritNamespaceToken)) {
181                this.wrapToken = true;
182                this.in = new RecordableInputStream(in, charset);
183            } else {
184                this.wrapToken = false;
185                this.in = in;
186                if (inheritNamespaceToken != null) {
187                    // the inherit namespace token may itself have a namespace prefix
188                    // the namespaces on the parent tag can be in multi line, so we need to instruct the dot to support multilines
189                    this.inheritNamespaceTokenPattern = 
190                        Pattern.compile(MessageFormat.format(SCAN_PARENT_TOKEN_REGEX_TEMPLATE,
191                                                             SCAN_TOKEN_NS_PREFIX_REGEX + inheritNamespaceToken.substring(1, inheritNamespaceToken.length() - 1)), 
192                                                             Pattern.MULTILINE | Pattern.DOTALL);
193                }
194            }
195        }
196
197        void init() {
198            // use a scanner with the default delimiter
199            this.scanner = new Scanner(in, charset);
200            this.image = scanner.hasNext() ? (String) next(true) : null;
201        }
202
203        String getNext(boolean first) {
204            // initialize inherited namespaces on first
205            if (first && inheritNamespaceToken != null && !wrapToken) {
206                rootTokenNamespaces = getNamespacesFromNamespaceTokenSplitter(scanner.findWithinHorizon(inheritNamespaceTokenPattern, 0));
207            }
208
209            String next = scanner.findWithinHorizon(tagTokenPattern, 0);
210            if (next == null) {
211                return null;
212            }
213            if (first && wrapToken) {
214                MatchResult mres = scanner.match();
215                wrapHead = ((RecordableInputStream)in).getText(mres.start());
216                wrapTail = buildXMLTail(wrapHead);
217            }
218
219            // build answer accordingly to whether namespaces should be inherited or not
220            if (inheritNamespaceToken != null && rootTokenNamespaces != null) {
221                String head = StringHelper.before(next, ">");
222                boolean empty = false;
223                if (head.endsWith("/")) {
224                    head = head.substring(0, head.length() - 1);
225                    empty = true;
226                }
227                StringBuilder sb = new StringBuilder();
228                // append root namespaces to local start token
229                // grab the text
230                String tail = StringHelper.after(next, ">");
231                // build result with inherited namespaces and skip the prefixes that are declared within the child itself.
232                next = sb.append(head).append(getMissingInherritNamespaces(head)).append(empty ? "/>" : ">").append(tail).toString();
233            } else if (wrapToken) {
234                // wrap the token
235                StringBuilder sb = new StringBuilder();
236                next = sb.append(wrapHead).append(next).append(wrapTail).toString();
237            }
238            
239            return next;
240        }
241        
242        private String getMissingInherritNamespaces(final String text) {
243            final StringBuilder sb = new StringBuilder();
244            if (text != null) {
245                boolean first = true;
246                final String[] containedNamespaces = getNamespacesFromNamespaceTokenSplitter(text);
247                for (final String rn : rootTokenNamespaces) {
248                    boolean nsExists = false;
249                    for (final String cn : containedNamespaces) {
250                        if (rn.equals(cn)) {
251                            nsExists = true;
252                            // already existing namespace in child were found we need a separator, so we set first = false
253                            if (first) {
254                                first = false;
255                            }
256                            break;
257                        }
258                    }
259                    if (!nsExists) {
260                        sb.append(first ? rn : NAMESPACE_SEPERATOR + rn);
261                        if (first) {
262                            first = false;
263                        }
264                    }
265                }
266            }
267            return sb.toString();
268        }
269
270        private String[] getNamespacesFromNamespaceTokenSplitter(final String text) {
271            final String namespaces = getNamespacesFromNamespaceToken(text);
272            return namespaces == null ? new String[0] : namespaces.split(NAMESPACE_SEPERATOR);
273        }
274
275        private String getNamespacesFromNamespaceToken(String text) {
276            if (text == null) {
277                return null;
278            }
279
280            // find namespaces (there can be attributes mixed, so we should only grab the namespaces)
281            Map<String, String> namespaces = new LinkedHashMap<>();
282            Matcher matcher = NAMESPACE_PATTERN.matcher(text);
283            while (matcher.find()) {
284                String prefix = matcher.group(1);
285                String url = matcher.group(2);
286                if (ObjectHelper.isEmpty(prefix)) {
287                    prefix = "_DEFAULT_";
288                } else {
289                    // skip leading :
290                    prefix = prefix.substring(1);
291                }
292                namespaces.put(prefix, url);
293            }
294
295            // did we find any namespaces
296            if (namespaces.isEmpty()) {
297                return null;
298            }
299
300            // build namespace String
301            StringBuilder sb = new StringBuilder();
302            for (Map.Entry<String, String> entry : namespaces.entrySet()) {
303                String key = entry.getKey();
304                // note the value is already quoted
305                String value = entry.getValue();
306                if ("_DEFAULT_".equals(key)) {
307                    sb.append(" xmlns=").append(value);
308                } else {
309                    sb.append(" xmlns:").append(key).append("=").append(value);
310                }
311            }
312
313            return sb.toString();
314        }
315        
316        @Override
317        public boolean hasNext() {
318            return image != null;
319        }
320
321        @Override
322        public Object next() {
323            return next(false);
324        }
325
326        Object next(boolean first) {
327            Object answer = image;
328            // calculate next
329            if (scanner.hasNext()) {
330                image = getNext(first);
331            } else {
332                image = null;
333            }
334
335            if (answer == null) {
336                // first time the image may be null
337                answer = image;
338            }
339            return answer;
340        }
341
342        @Override
343        public void remove() {
344            // noop
345        }
346
347        @Override
348        public void close() throws IOException {
349            scanner.close();
350        }
351
352    }
353
354    private static String buildXMLTail(String xmlhead) {
355        // assume the input text is a portion of a well-formed xml
356        List<String> tags = new ArrayList<>();
357        int p = 0;
358        while (p < xmlhead.length()) {
359            p = xmlhead.indexOf('<', p);
360            if (p < 0) {
361                break;
362            }
363            int nc = xmlhead.charAt(p + 1); 
364            if (nc == '?') {
365                p++;
366                continue;
367            } else if (nc == '/') {
368                p++;
369                tags.remove(tags.size() - 1);
370            } else {
371                final int ep = xmlhead.indexOf('>', p);
372                if (xmlhead.charAt(ep - 1) == '/') {
373                    p++;
374                    continue;
375                }
376                final int sp = xmlhead.substring(p, ep).indexOf(' ');
377                tags.add(xmlhead.substring(p + 1, sp > 0 ? p + sp : ep));
378                p = ep;
379            }
380        }
381        StringBuilder sb = new StringBuilder();
382        for (int i = tags.size() - 1; i >= 0; i--) {
383            sb.append("</").append(tags.get(i)).append(">");
384        }
385        return sb.toString();
386    }
387}