001/**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.camel.language.simple;
018
019import java.util.List;
020import java.util.concurrent.CopyOnWriteArrayList;
021
022import org.apache.camel.language.simple.types.SimpleToken;
023import org.apache.camel.language.simple.types.SimpleTokenType;
024import org.apache.camel.language.simple.types.TokenType;
025import org.apache.camel.util.ObjectHelper;
026
027/**
028 * Tokenizer to create {@link SimpleToken} from the input.
029 */
030public final class SimpleTokenizer {
031
032    // use CopyOnWriteArrayList so we can modify it in the for loop when changing function start/end tokens
033    private static final List<SimpleTokenType> KNOWN_TOKENS = new CopyOnWriteArrayList<SimpleTokenType>();
034
035    static {
036        // add known tokens
037        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, " "));
038        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, "\t"));
039        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, "\n"));
040        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, "\r"));
041        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.singleQuote, "'"));
042        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.doubleQuote, "\""));
043        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.functionStart, "${"));
044        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.functionStart, "$simple{"));
045        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.functionEnd, "}"));
046        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.booleanValue, "true"));
047        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.booleanValue, "false"));
048        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.nullValue, "null"));
049        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.escape, "\\"));
050
051        // binary operators
052        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "=="));
053        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "=~"));
054        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, ">="));
055        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "<="));
056        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, ">"));
057        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "<"));
058        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "!="));
059        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not is"));
060        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "is"));
061        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not contains"));
062        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "contains"));
063        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not regex"));
064        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "regex"));
065        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not in"));
066        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "in"));
067        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "range"));
068        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not range"));
069
070        // unary operators
071        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.unaryOperator, "++"));
072        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.unaryOperator, "--"));
073
074        // logical operators
075        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "&&"));
076        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "||"));
077        // TODO: @deprecated logical operators, to be removed in Camel 3.0
078        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "and"));
079        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "or"));
080    }
081
082    private SimpleTokenizer() {
083        // static methods
084    }
085
086
087    /**
088     * @see SimpleLanguage#changeFunctionStartToken(String...)
089     */
090    public static void changeFunctionStartToken(String... startToken) {
091        for (SimpleTokenType type : KNOWN_TOKENS) {
092            if (type.getType() == TokenType.functionStart) {
093                KNOWN_TOKENS.remove(type);
094            }
095        }
096
097        // add in start of list as its a more common token to be used
098        for (String token : startToken) {
099            KNOWN_TOKENS.add(0, new SimpleTokenType(TokenType.functionStart, token));
100        }
101    }
102
103    /**
104     * @see SimpleLanguage#changeFunctionEndToken(String...)
105     */
106    public static void changeFunctionEndToken(String... endToken) {
107        for (SimpleTokenType type : KNOWN_TOKENS) {
108            if (type.getType() == TokenType.functionEnd) {
109                KNOWN_TOKENS.remove(type);
110            }
111        }
112
113        // add in start of list as its a more common token to be used
114        for (String token : endToken) {
115            KNOWN_TOKENS.add(0, new SimpleTokenType(TokenType.functionEnd, token));
116        }
117    }
118
119    /**
120     * Create the next token
121     *
122     * @param expression  the input expression
123     * @param index       the current index
124     * @param allowEscape whether to allow escapes
125     * @param filter      defines the accepted token types to be returned (character is always used as fallback)
126     * @return the created token, will always return a token
127     */
128    public static SimpleToken nextToken(String expression, int index, boolean allowEscape, TokenType... filter) {
129        return doNextToken(expression, index, allowEscape, filter);
130    }
131
132    /**
133     * Create the next token
134     *
135     * @param expression  the input expression
136     * @param index       the current index
137     * @param allowEscape whether to allow escapes
138     * @return the created token, will always return a token
139     */
140    public static SimpleToken nextToken(String expression, int index, boolean allowEscape) {
141        return doNextToken(expression, index, allowEscape);
142    }
143
144    private static SimpleToken doNextToken(String expression, int index, boolean allowEscape, TokenType... filters) {
145
146        boolean numericAllowed = acceptType(TokenType.numericValue, filters);
147        if (numericAllowed) {
148            // is it a numeric value
149            StringBuilder sb = new StringBuilder();
150            boolean digit = true;
151            while (digit && index < expression.length()) {
152                digit = Character.isDigit(expression.charAt(index));
153                if (digit) {
154                    char ch = expression.charAt(index);
155                    sb.append(ch);
156                    index++;
157                    continue;
158                }
159                // is it a dot or comma as part of a floating point number
160                boolean decimalSeparator = '.' == expression.charAt(index) || ',' == expression.charAt(index);
161                if (decimalSeparator && sb.length() > 0) {
162                    char ch = expression.charAt(index);
163                    sb.append(ch);
164                    index++;
165                    // assume its still a digit
166                    digit = true;
167                    continue;
168                }
169            }
170            if (sb.length() > 0) {
171                return new SimpleToken(new SimpleTokenType(TokenType.numericValue, sb.toString()), index);
172            }
173        }
174
175        boolean escapeAllowed = allowEscape && acceptType(TokenType.escape, filters);
176        if (escapeAllowed) {
177            StringBuilder sb = new StringBuilder();
178            char ch = expression.charAt(index);
179            boolean escaped = '\\' == ch;
180            if (escaped && index < expression.length() - 1) {
181                // grab next character to escape
182                char next = expression.charAt(++index);
183                // special for new line, tabs and carriage return
184                boolean special = false;
185                if ('n' == next) {
186                    sb.append("\n");
187                    special = true;
188                } else if ('t' == next) {
189                    sb.append("\t");
190                    special = true;
191                } else if ('r' == next) {
192                    sb.append("\r");
193                    special = true;
194                } else {
195                    // not special just a regular character
196                    sb.append(ch);
197                }
198
199                // force 2 as length if special
200                return new SimpleToken(new SimpleTokenType(TokenType.character, sb.toString()), index, special ? 2 : 1);
201            }
202        }
203
204        // it could be any of the known tokens
205        String text = expression.substring(index);
206        for (SimpleTokenType token : KNOWN_TOKENS) {
207            if (acceptType(token.getType(), filters)) {
208                if (acceptToken(token, text, expression, index)) {
209                    return new SimpleToken(token, index);
210                }
211            }
212        }
213
214        // fallback and create a character token
215        char ch = expression.charAt(index);
216        SimpleToken token = new SimpleToken(new SimpleTokenType(TokenType.character, "" + ch), index);
217        return token;
218    }
219
220    private static boolean acceptType(TokenType type, TokenType... filters) {
221        if (filters == null || filters.length == 0) {
222            return true;
223        }
224        for (TokenType filter : filters) {
225            if (type == filter) {
226                return true;
227            }
228        }
229        return false;
230    }
231
232    private static boolean acceptToken(SimpleTokenType token, String text, String expression, int index) {
233        if (token.isUnary() && text.startsWith(token.getValue())) {
234            SimpleTokenType functionEndToken = getFunctionEndToken();
235            if (functionEndToken != null) {
236                int endLen = functionEndToken.getValue().length();
237
238                // special check for unary as the previous must be a function end, and the next a whitespace
239                // to ensure unary operators is only applied on functions as intended
240                int len = token.getValue().length();
241
242                String previous = "";
243                if (index - endLen >= 0) {
244                    previous = expression.substring(index - endLen, index);
245                }
246                String after = text.substring(len);
247                boolean whiteSpace = ObjectHelper.isEmpty(after) || after.startsWith(" ");
248                boolean functionEnd = previous.equals(functionEndToken.getValue());
249                return functionEnd && whiteSpace;
250            }
251        }
252
253        return text.startsWith(token.getValue());
254    }
255
256    private static SimpleTokenType getFunctionEndToken() {
257        for (SimpleTokenType token : KNOWN_TOKENS) {
258            if (token.isFunctionEnd()) {
259                return token;
260            }
261        }
262        return null;
263    }
264
265}