001/**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.camel.support;
018
019import java.io.Closeable;
020import java.io.IOException;
021import java.io.InputStream;
022import java.io.InputStreamReader;
023import java.io.Reader;
024import java.io.UnsupportedEncodingException;
025import java.util.ArrayList;
026import java.util.HashMap;
027import java.util.HashSet;
028import java.util.Iterator;
029import java.util.List;
030import java.util.Map;
031import java.util.Map.Entry;
032import java.util.Set;
033import java.util.regex.Matcher;
034import java.util.regex.Pattern;
035
036import javax.xml.namespace.QName;
037import javax.xml.stream.XMLStreamConstants;
038import javax.xml.stream.XMLStreamException;
039import javax.xml.stream.XMLStreamReader;
040
041import org.apache.camel.Exchange;
042import org.apache.camel.InvalidPayloadException;
043import org.apache.camel.converter.jaxp.StaxConverter;
044import org.apache.camel.spi.NamespaceAware;
045import org.apache.camel.util.IOHelper;
046import org.apache.camel.util.ObjectHelper;
047import org.slf4j.Logger;
048import org.slf4j.LoggerFactory;
049
050/**
051 *
052 */
053public class XMLTokenExpressionIterator extends ExpressionAdapter implements NamespaceAware {
054    protected final String path;
055    protected char mode;
056    protected int group;
057    protected Map<String, String> nsmap;
058
059    public XMLTokenExpressionIterator(String path, char mode) {
060        this(path, mode, 1);
061    }
062
063    public XMLTokenExpressionIterator(String path, char mode, int group) {
064        ObjectHelper.notEmpty(path, "path");
065        this.path = path;
066        this.mode = mode;
067        this.group = group > 1 ? group : 1;
068    }
069
070    @Override
071    public void setNamespaces(Map<String, String> nsmap) {
072        this.nsmap = nsmap;
073    }
074
075    public void setMode(char mode) {
076        this.mode = mode;
077    }
078
079    public void setMode(String mode) {
080        this.mode = mode != null ? mode.charAt(0) : 0;
081    }
082    
083    public int getGroup() {
084        return group;
085    }
086
087    public void setGroup(int group) {
088        this.group = group;
089    }
090
091    protected Iterator<?> createIterator(InputStream in, String charset) throws XMLStreamException, UnsupportedEncodingException {
092        Reader reader;
093        if (charset == null) {
094            reader = new InputStreamReader(in);
095        } else {
096            reader = new InputStreamReader(in, charset);
097        }
098        XMLTokenIterator iterator = new XMLTokenIterator(path, nsmap, mode, group, reader);
099        return iterator;
100    }
101
102    protected Iterator<?> createIterator(Reader in) throws XMLStreamException {
103        XMLTokenIterator iterator = new XMLTokenIterator(path, nsmap, mode, group, in);
104        return iterator;
105    }
106
107    @Override
108    public boolean matches(Exchange exchange) {
109        // as a predicate we must close the stream, as we do not return an iterator that can be used
110        // afterwards to iterate the input stream
111        Object value = doEvaluate(exchange, true);
112        return ObjectHelper.evaluateValuePredicate(value);
113    }
114
115    @Override
116    public Object evaluate(Exchange exchange) {
117        // as we return an iterator to access the input stream, we should not close it
118        return doEvaluate(exchange, false);
119    }
120
121    /**
122     * Strategy to evaluate the exchange
123     *
124     * @param exchange   the exchange
125     * @param closeStream whether to close the stream before returning from this method.
126     * @return the evaluated value
127     */
128    protected Object doEvaluate(Exchange exchange, boolean closeStream) {
129        InputStream in = null;
130        try {
131            in = exchange.getIn().getMandatoryBody(InputStream.class);
132            String charset = IOHelper.getCharsetName(exchange);
133            return createIterator(in, charset);
134        } catch (InvalidPayloadException e) {
135            exchange.setException(e);
136            // must close input stream
137            IOHelper.close(in);
138            return null;
139        } catch (XMLStreamException e) {
140            exchange.setException(e);
141            // must close input stream
142            IOHelper.close(in);
143            return null;
144        } catch (UnsupportedEncodingException e) {
145            exchange.setException(e);
146            // must close input stream
147            IOHelper.close(in);
148            return null;
149        } finally {
150            if (closeStream) {
151                IOHelper.close(in);
152            }
153        }
154    }
155    
156
157    static class XMLTokenIterator implements Iterator<Object>, Closeable {
158        private static final Logger LOG = LoggerFactory.getLogger(XMLTokenIterator.class);
159        private static final Pattern NAMESPACE_PATTERN = Pattern.compile("xmlns(:\\w+|)\\s*=\\s*('[^']*'|\"[^\"]*\")");
160
161        private AttributedQName[] splitpath;
162        private int index;
163        private char mode;
164        private int group;
165        private RecordableReader in;
166        private XMLStreamReader reader;
167        private List<QName> path;
168        private List<Map<String, String>> namespaces;
169        private List<String> segments;
170        private List<QName> segmentlog;
171        private List<String> tokens;
172        private int code;
173        private int consumed;
174        private boolean backtrack;
175        private int trackdepth = -1;
176        private int depth;
177        private boolean compliant;
178
179        private Object nextToken;
180        
181        public XMLTokenIterator(String path, Map<String, String> nsmap, char mode, InputStream in, String charset) 
182            throws XMLStreamException, UnsupportedEncodingException {
183            // woodstox's getLocation().etCharOffset() does not return the offset correctly for InputStream, so use Reader instead.
184            this(path, nsmap, mode, 1, new InputStreamReader(in, charset));
185        }
186
187        public XMLTokenIterator(String path, Map<String, String> nsmap, char mode, int group, InputStream in, String charset) 
188            throws XMLStreamException, UnsupportedEncodingException {
189            // woodstox's getLocation().etCharOffset() does not return the offset correctly for InputStream, so use Reader instead.
190            this(path, nsmap, mode, new InputStreamReader(in, charset));
191        }
192
193        public XMLTokenIterator(String path, Map<String, String> nsmap, char mode, Reader in) throws XMLStreamException {
194            this(path, nsmap, mode, 1, in);
195        }
196
197        public XMLTokenIterator(String path, Map<String, String> nsmap, char mode, int group, Reader in) throws XMLStreamException {
198            final String[] sl = path.substring(1).split("/");
199            this.splitpath = new AttributedQName[sl.length];
200            for (int i = 0; i < sl.length; i++) {
201                String s = sl[i];
202                if (s.length() > 0) {
203                    int d = s.indexOf(':');
204                    String pfx = d > 0 ? s.substring(0, d) : "";
205                    this.splitpath[i] = 
206                        new AttributedQName(
207                            "*".equals(pfx) ? "*" : nsmap == null ? "" : nsmap.get(pfx), d > 0 ? s.substring(d + 1) : s, pfx);
208                }
209            }
210            
211            this.mode = mode != 0 ? mode : 'i';
212            this.group = group > 0 ? group : 1;
213            this.in = new RecordableReader(in);
214            this.reader = new StaxConverter().createXMLStreamReader(this.in);
215
216            LOG.trace("reader.class: {}", reader.getClass());
217            // perform the first offset compliance test
218            int coff = reader.getLocation().getCharacterOffset();
219            if (coff != 0) {
220                LOG.error("XMLStreamReader {} not supporting Location");
221                throw new XMLStreamException("reader not supporting Location");
222            }
223
224            this.path = new ArrayList<QName>();
225            
226            // wrapped mode needs the segments and the injected mode needs the namespaces
227            if (this.mode == 'w') {
228                this.segments = new ArrayList<String>();
229                this.segmentlog = new ArrayList<QName>();
230            } else if (this.mode == 'i') {
231                this.namespaces = new ArrayList<Map<String, String>>();
232            }
233            // when grouping the tokens, allocate the storage to temporarily store tokens. 
234            if (this.group > 1) {
235                this.tokens = new ArrayList<String>();
236            }       
237            this.nextToken = getNextToken();
238        }
239        
240        private boolean isDoS() {
241            return splitpath[index] == null;
242        }
243        
244        private AttributedQName current() {
245            return splitpath[index + (isDoS() ? 1 : 0)];
246        }
247        
248        private AttributedQName ancestor() {
249            return index == 0 ? null : splitpath[index - 1];
250        }
251
252        private void down() {
253            if (isDoS()) {
254                index++;
255            }
256            index++;
257        }
258        
259        private void up() {
260            index--;
261        }
262        
263        private boolean isBottom() {
264            return index == splitpath.length - (isDoS() ? 2 : 1);
265        }
266        
267        private boolean isTop() {
268            return index == 0;
269        }
270        
271        private int readNext() throws XMLStreamException {
272            int c = code;
273            if (c > 0) {
274                code = 0;
275            } else {
276                c = reader.next();
277            }
278            return c;
279        }
280        
281        private String getCurrenText() {
282            int pos = reader.getLocation().getCharacterOffset();
283            String txt = in.getText(pos - consumed);
284            consumed = pos;
285            // keep recording
286            in.record();
287            return txt;
288        }
289
290        private void pushName(QName name) {
291            path.add(name);
292        }
293
294        private QName popName() {
295            return path.remove(path.size() - 1);
296        }
297
298        private void pushSegment(QName qname, String token) {
299            segments.add(token);
300            segmentlog.add(qname);
301        }
302
303        private String popSegment() {
304            return segments.remove(segments.size() - 1);
305        }
306        
307        private QName peekLog() {
308            return segmentlog.get(segmentlog.size() - 1);
309        }
310        
311        private QName popLog() {
312            return segmentlog.remove(segmentlog.size() - 1);
313        }
314
315        private void pushNamespaces(XMLStreamReader reader) {
316            Map<String, String> m = new HashMap<String, String>();
317            if (namespaces.size() > 0) {
318                m.putAll(namespaces.get(namespaces.size() - 1));
319            }
320            for (int i = 0; i < reader.getNamespaceCount(); i++) {
321                m.put(reader.getNamespacePrefix(i), reader.getNamespaceURI(i));
322            }
323            namespaces.add(m);
324        }
325
326        private void popNamespaces() {
327            namespaces.remove(namespaces.size() - 1);
328        }
329
330        private Map<String, String> getCurrentNamespaceBindings() {
331            return namespaces.get(namespaces.size() - 1);
332        }
333
334        private void readCurrent(boolean incl) throws XMLStreamException {
335            int d = depth;
336            while (d <= depth) {
337                int code = reader.next();
338                if (code == XMLStreamConstants.START_ELEMENT) {
339                    depth++;
340                } else if (code == XMLStreamConstants.END_ELEMENT) {
341                    depth--;
342                }
343            }
344            // either look ahead to the next token or stay at the end element token
345            if (incl) {
346                code = reader.next();
347            } else {
348                code = reader.getEventType();
349                if (code == XMLStreamConstants.END_ELEMENT) {
350                    // revert the depth count to avoid double counting the up event
351                    depth++;
352                }
353            }
354        }
355
356        private String getCurrentToken() throws XMLStreamException {
357            readCurrent(true);
358            popName();
359            
360            String token = createContextualToken(getCurrenText());
361            if (mode == 'i') {
362                popNamespaces();
363            }
364            
365            return token;
366        }
367
368        private String createContextualToken(String token) {
369            StringBuilder sb = new StringBuilder();
370            if (mode == 'w' && group == 1) {
371                for (int i = 0; i < segments.size(); i++) {
372                    sb.append(segments.get(i));
373                }
374                sb.append(token);
375                for (int i = path.size() - 1; i >= 0; i--) {
376                    QName q = path.get(i);
377                    sb.append("</").append(makeName(q)).append(">");
378                }
379
380            } else if (mode == 'i') {
381                final String stag = token.substring(0, token.indexOf('>') + 1);
382                Set<String> skip = new HashSet<String>();
383                Matcher matcher = NAMESPACE_PATTERN.matcher(stag);
384                char quote = 0;
385                while (matcher.find()) {
386                    String prefix = matcher.group(1);
387                    if (prefix.length() > 0) {
388                        prefix = prefix.substring(1);
389                    }
390                    skip.add(prefix);
391                    if (quote == 0) {
392                        quote = matcher.group(2).charAt(0);
393                    }
394                }
395                if (quote == 0) {
396                    quote = '"';
397                }
398                boolean empty = stag.endsWith("/>"); 
399                sb.append(token.substring(0, stag.length() - (empty ? 2 : 1)));
400                for (Entry<String, String> e : getCurrentNamespaceBindings().entrySet()) {
401                    if (!skip.contains(e.getKey())) {
402                        sb.append(e.getKey().length() == 0 ? " xmlns" : " xmlns:")
403                            .append(e.getKey()).append("=").append(quote).append(e.getValue()).append(quote);
404                    }
405                }
406                sb.append(token.substring(stag.length() - (empty ? 2 : 1)));
407            } else if (mode == 'u') {
408                int bp = token.indexOf(">");
409                int ep = token.lastIndexOf("</");
410                if (bp > 0 && ep > 0) {
411                    sb.append(token.substring(bp + 1, ep));
412                }
413            } else if (mode == 't') {
414                int bp = 0;
415                for (;;) {
416                    int ep = token.indexOf('>', bp);
417                    bp = token.indexOf('<', ep);
418                    if (bp < 0) {
419                        break;
420                    }
421                    sb.append(token.substring(ep + 1, bp));
422                }
423            } else {
424                return token;
425            }
426
427            return sb.toString();
428        }
429
430        private String getGroupedToken() {
431            StringBuilder sb = new StringBuilder();
432            if (mode == 'w') {
433                 // for wrapped
434                for (int i = 0; i < segments.size(); i++) {
435                    sb.append(segments.get(i));
436                }
437                for (String s : tokens) {
438                    sb.append(s);
439                }
440                for (int i = path.size() - 1; i >= 0; i--) {
441                    QName q = path.get(i);
442                    sb.append("</").append(makeName(q)).append(">");
443                }
444            } else {
445                // for injected, unwrapped, text
446                sb.append("<group>");
447                for (String s : tokens) {
448                    sb.append(s);
449                }
450                sb.append("</group>");
451            }
452            tokens.clear();
453            return sb.toString();
454        }
455        
456        private String getNextToken() throws XMLStreamException {
457            int xcode = 0;
458            while (xcode != XMLStreamConstants.END_DOCUMENT) {
459                xcode = readNext();
460
461                switch (xcode) {
462                case XMLStreamConstants.START_ELEMENT:
463                    depth++;
464                    QName name = reader.getName();
465                    if (LOG.isTraceEnabled()) {
466                        LOG.trace("se={}; depth={}; trackdepth={}", new Object[]{name, depth, trackdepth});
467                    }
468                    
469                    String token = getCurrenText();
470                    // perform the second compliance test
471                    if (!compliant) {
472                        if (token != null && token.startsWith("<") && !token.startsWith("<?")) {
473                            LOG.error("XMLStreamReader {} not supporting Location");
474                            throw new XMLStreamException("reader not supporting Location");
475                        }
476                        compliant = true;
477                    }
478
479                    LOG.trace("token={}", token);
480                    if (!backtrack && mode == 'w') {
481                        pushSegment(name, token);
482                    }
483                    pushName(name);
484                    if (mode == 'i') {
485                        pushNamespaces(reader);
486                    }
487                    backtrack = false;
488                    if (current().matches(name)) {
489                        // mark the position of the match in the segments list
490                        if (isBottom()) {
491                            // final match
492                            token = getCurrentToken();
493                            backtrack = true;
494                            trackdepth = depth;
495                            if (group > 1) {
496                                tokens.add(token);
497                                if (group == tokens.size()) {
498                                    return getGroupedToken();
499                                }
500                            } else {
501                                return token;    
502                            }
503                        } else {
504                            // intermediary match
505                            down();
506                        }
507                    } else if (isDoS()) {
508                        // continue
509                    } else {
510                        // skip
511                        readCurrent(false);
512                    }
513                    break;
514                case XMLStreamConstants.END_ELEMENT:
515                    if ((backtrack || (trackdepth > 0 && depth == trackdepth))
516                        && (mode == 'w' && group > 1 && tokens.size() > 0)) {
517                        // flush the left over using the current context
518                        code = XMLStreamConstants.END_ELEMENT;
519                        return getGroupedToken();
520                    }
521
522                    depth--;
523                    QName endname = reader.getName();
524                    LOG.trace("ee={}", endname);
525                    
526                    popName();
527                    if (mode == 'i') {
528                        popNamespaces();
529                    }
530                    
531                    int pc = 0;
532                    if (backtrack || (trackdepth > 0 && depth == trackdepth - 1)) {
533                        // reactive backtrack if not backtracking and update the track depth
534                        backtrack = true;
535                        trackdepth--;
536                        if (mode == 'w') {
537                            while (!endname.equals(peekLog())) {
538                                pc++;
539                                popLog();
540                            }
541                        }
542                    }
543
544                    if (backtrack) {
545                        if (mode == 'w') {
546                            for (int i = 0; i < pc; i++) {
547                                popSegment();
548                            }
549                        }
550
551                        if ((ancestor() == null && !isTop())
552                            || (ancestor() != null && ancestor().matches(endname))) {
553                            up();
554                        }
555                    }
556                    break;
557                case XMLStreamConstants.END_DOCUMENT:
558                    LOG.trace("depth={}", depth);
559                    if (group > 1 && tokens.size() > 0) {
560                        // flush the left over before really going EoD
561                        code = XMLStreamConstants.END_DOCUMENT;
562                        return getGroupedToken();
563                    }
564                    break;
565                default:
566                    break;
567                }
568            }
569            return null;
570        }
571
572        private static String makeName(QName qname) {
573            String pfx = qname.getPrefix();
574            return pfx.length() == 0 ? qname.getLocalPart() : qname.getPrefix() + ":" + qname.getLocalPart();
575        }
576
577        @Override
578        public boolean hasNext() {
579            return nextToken != null;
580        }
581
582        @Override
583        public Object next() {
584            Object o = nextToken;
585            try {
586                nextToken = getNextToken();
587            } catch (XMLStreamException e) {
588                nextToken = null;
589                throw new RuntimeException(e);
590            }
591            return o;
592        }
593
594        @Override
595        public void remove() {
596            // noop
597        }
598
599        @Override
600        public void close() throws IOException {
601            try {
602                reader.close();
603            } catch (XMLStreamException e) {
604                throw new IOException(e);
605            }
606        }
607    }
608
609    static class AttributedQName extends QName {
610        private static final long serialVersionUID = 9878370226894144L;
611        private Pattern lcpattern;
612        private boolean nsany;
613        
614        public AttributedQName(String localPart) {
615            super(localPart);
616            checkWildcard("", localPart);
617        }
618
619        public AttributedQName(String namespaceURI, String localPart, String prefix) {
620            super(namespaceURI, localPart, prefix);
621            checkWildcard(namespaceURI, localPart);
622        }
623
624        public AttributedQName(String namespaceURI, String localPart) {
625            super(namespaceURI, localPart);
626            checkWildcard(namespaceURI, localPart);
627        }
628
629        public boolean matches(QName qname) {
630            return (nsany || getNamespaceURI().equals(qname.getNamespaceURI()))
631                && (lcpattern != null 
632                ? lcpattern.matcher(qname.getLocalPart()).matches() 
633                : getLocalPart().equals(qname.getLocalPart()));
634        }
635        
636        private void checkWildcard(String nsa, String lcp) {
637            nsany = "*".equals(nsa);
638            boolean wc = false;
639            for (int i = 0; i < lcp.length(); i++) {
640                char c = lcp.charAt(i);
641                if (c == '?' || c == '*') {
642                    wc = true;
643                    break;
644                }
645            }
646            if (wc) {
647                StringBuilder sb = new StringBuilder();
648                for (int i = 0; i < lcp.length(); i++) {
649                    char c = lcp.charAt(i);
650                    switch (c) {
651                    case '.':
652                        sb.append("\\.");
653                        break;
654                    case '*':
655                        sb.append(".*");
656                        break;
657                    case '?':
658                        sb.append('.');
659                        break;
660                    default:
661                        sb.append(c);
662                        break;
663                    }
664                }
665                lcpattern = Pattern.compile(sb.toString());
666            }
667        }
668    }
669}