001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.camel.util;
018
019import java.io.Closeable;
020import java.io.File;
021import java.io.FileInputStream;
022import java.io.FileNotFoundException;
023import java.io.IOException;
024import java.io.InputStream;
025import java.io.InputStreamReader;
026import java.io.StringReader;
027import java.nio.CharBuffer;
028import java.nio.channels.Channels;
029import java.nio.channels.ReadableByteChannel;
030import java.nio.charset.Charset;
031import java.nio.charset.CharsetDecoder;
032import java.nio.charset.IllegalCharsetNameException;
033import java.nio.charset.UnsupportedCharsetException;
034import java.util.InputMismatchException;
035import java.util.Iterator;
036import java.util.LinkedHashMap;
037import java.util.Map;
038import java.util.Map.Entry;
039import java.util.NoSuchElementException;
040import java.util.Objects;
041import java.util.regex.Matcher;
042import java.util.regex.Pattern;
043
044public final class Scanner implements Iterator<String>, Closeable {
045
046    private static final Map<String, Pattern> CACHE = new LinkedHashMap<String, Pattern>() {
047        @Override
048        protected boolean removeEldestEntry(Entry<String, Pattern> eldest) {
049            return size() >= 7;
050        }
051    };
052
053    private static final String WHITESPACE_PATTERN = "\\s+";
054
055    private static final String FIND_ANY_PATTERN = "(?s).*";
056
057    private static final int BUFFER_SIZE = 1024;
058
059    private Readable source;
060    private Pattern delimPattern;
061    private Matcher matcher;
062    private CharBuffer buf;
063    private int position;
064    private boolean inputExhausted;
065    private boolean needInput;
066    private boolean skipped;
067    private int savedPosition = -1;
068    private boolean closed;
069    private IOException lastIOException;
070
071    public Scanner(InputStream source, String charsetName, String pattern) {
072        this(new InputStreamReader(Objects.requireNonNull(source, "source"), toDecoder(charsetName)), cachePattern(pattern));
073    }
074
075    public Scanner(File source, String charsetName, String pattern) throws FileNotFoundException {
076        this(new FileInputStream(Objects.requireNonNull(source, "source")).getChannel(), charsetName, pattern);
077    }
078
079    public Scanner(String source, String pattern) {
080        this(new StringReader(Objects.requireNonNull(source, "source")), cachePattern(pattern));
081    }
082
083    public Scanner(ReadableByteChannel source, String charsetName, String pattern) {
084        this(Channels.newReader(Objects.requireNonNull(source, "source"), toDecoder(charsetName), -1), cachePattern(pattern));
085    }
086
087    public Scanner(Readable source, String pattern) {
088        this(Objects.requireNonNull(source, "source"), cachePattern(pattern));
089    }
090
091    private Scanner(Readable source, Pattern pattern) {
092        this.source = source;
093        delimPattern = pattern != null ? pattern : cachePattern(WHITESPACE_PATTERN);
094        buf = CharBuffer.allocate(BUFFER_SIZE);
095        buf.limit(0);
096        matcher = delimPattern.matcher(buf);
097        matcher.useTransparentBounds(true);
098        matcher.useAnchoringBounds(false);
099    }
100
101    private static CharsetDecoder toDecoder(String charsetName) {
102        try {
103            Charset cs = charsetName != null ? Charset.forName(charsetName) : Charset.defaultCharset();
104            return cs.newDecoder();
105        } catch (IllegalCharsetNameException | UnsupportedCharsetException e) {
106            throw new IllegalArgumentException(e);
107        }
108    }
109
110    @Override
111    public boolean hasNext() {
112        checkClosed();
113        saveState();
114        while (!inputExhausted) {
115            if (hasTokenInBuffer()) {
116                revertState();
117                return true;
118            }
119            readMore();
120        }
121        boolean result = hasTokenInBuffer();
122        revertState();
123        return result;
124    }
125
126    @Override
127    public String next() {
128        checkClosed();
129        while (true) {
130            String token = getCompleteTokenInBuffer();
131            if (token != null) {
132                skipped = false;
133                return token;
134            }
135            if (needInput) {
136                readMore();
137            } else {
138                throwFor();
139            }
140        }
141    }
142
143    private void saveState() {
144        savedPosition = position;
145    }
146
147    private void revertState() {
148        position = savedPosition;
149        savedPosition = -1;
150        skipped = false;
151    }
152
153    private void readMore() {
154        if (buf.limit() == buf.capacity()) {
155            expandBuffer();
156        }
157        int p = buf.position();
158        buf.position(buf.limit());
159        buf.limit(buf.capacity());
160        int n;
161        try {
162            n = source.read(buf);
163        } catch (IOException ioe) {
164            lastIOException = ioe;
165            n = -1;
166        }
167        if (n == -1) {
168            inputExhausted = true;
169            needInput = false;
170        } else if (n > 0) {
171            needInput = false;
172        }
173        buf.limit(buf.position());
174        buf.position(p);
175    }
176
177    private void expandBuffer() {
178        int offset = savedPosition == -1 ? position : savedPosition;
179        buf.position(offset);
180        if (offset > 0) {
181            buf.compact();
182            translateSavedIndexes(offset);
183            position -= offset;
184            buf.flip();
185        } else {
186            int newSize = buf.capacity() * 2;
187            CharBuffer newBuf = CharBuffer.allocate(newSize);
188            newBuf.put(buf);
189            newBuf.flip();
190            translateSavedIndexes(offset);
191            position -= offset;
192            buf = newBuf;
193            matcher.reset(buf);
194        }
195    }
196
197    private void translateSavedIndexes(int offset) {
198        if (savedPosition != -1) {
199            savedPosition -= offset;
200        }
201    }
202
203    private void throwFor() {
204        skipped = false;
205        if (inputExhausted && position == buf.limit()) {
206            throw new NoSuchElementException();
207        } else {
208            throw new InputMismatchException();
209        }
210    }
211
212    private boolean hasTokenInBuffer() {
213        matcher.usePattern(delimPattern);
214        matcher.region(position, buf.limit());
215        if (matcher.lookingAt()) {
216            position = matcher.end();
217        }
218        return position != buf.limit();
219    }
220
221    private String getCompleteTokenInBuffer() {
222        matcher.usePattern(delimPattern);
223        if (!skipped) {
224            matcher.region(position, buf.limit());
225            if (matcher.lookingAt()) {
226                if (matcher.hitEnd() && !inputExhausted) {
227                    needInput = true;
228                    return null;
229                }
230                skipped = true;
231                position = matcher.end();
232            }
233        }
234        if (position == buf.limit()) {
235            if (inputExhausted) {
236                return null;
237            }
238            needInput = true;
239            return null;
240        }
241        matcher.region(position, buf.limit());
242        boolean foundNextDelim = matcher.find();
243        if (foundNextDelim && (matcher.end() == position)) {
244            foundNextDelim = matcher.find();
245        }
246        if (foundNextDelim) {
247            if (matcher.requireEnd() && !inputExhausted) {
248                needInput = true;
249                return null;
250            }
251            int tokenEnd = matcher.start();
252            matcher.usePattern(cachePattern(FIND_ANY_PATTERN));
253            matcher.region(position, tokenEnd);
254            if (matcher.matches()) {
255                String s = matcher.group();
256                position = matcher.end();
257                return s;
258            } else {
259                return null;
260            }
261        }
262        if (inputExhausted) {
263            matcher.usePattern(cachePattern(FIND_ANY_PATTERN));
264            matcher.region(position, buf.limit());
265            if (matcher.matches()) {
266                String s = matcher.group();
267                position = matcher.end();
268                return s;
269            }
270            return null;
271        }
272        needInput = true;
273        return null;
274    }
275
276    private void checkClosed() {
277        if (closed) {
278            throw new IllegalStateException();
279        }
280    }
281
282    @Override
283    public void close() throws IOException {
284        if (!closed) {
285            closed = true;
286            if (source instanceof Closeable) {
287                try {
288                    ((Closeable) source).close();
289                } catch (IOException e) {
290                    lastIOException = e;
291                }
292            }
293        }
294        if (lastIOException != null) {
295            throw lastIOException;
296        }
297    }
298
299    private static Pattern cachePattern(String pattern) {
300        if (pattern == null) {
301            return null;
302        }
303        return CACHE.computeIfAbsent(pattern, Pattern::compile);
304    }
305
306}