001/**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.camel.language.simple;
018
019import java.util.List;
020import java.util.concurrent.CopyOnWriteArrayList;
021
022import org.apache.camel.language.simple.types.SimpleToken;
023import org.apache.camel.language.simple.types.SimpleTokenType;
024import org.apache.camel.language.simple.types.TokenType;
025import org.apache.camel.util.ObjectHelper;
026
027/**
028 * Tokenizer to create {@link SimpleToken} from the input.
029 */
030public final class SimpleTokenizer {
031
032    // use CopyOnWriteArrayList so we can modify it in the for loop when changing function start/end tokens
033    private static final List<SimpleTokenType> KNOWN_TOKENS = new CopyOnWriteArrayList<SimpleTokenType>();
034
035    static {
036        // add known tokens
037        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.functionStart, "${"));
038        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.functionStart, "$simple{"));
039        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.functionEnd, "}"));
040        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, " "));
041        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, "\t"));
042        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, "\n"));
043        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, "\r"));
044        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.singleQuote, "'"));
045        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.doubleQuote, "\""));
046        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.booleanValue, "true"));
047        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.booleanValue, "false"));
048        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.nullValue, "null"));
049        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.escape, "\\"));
050
051        // binary operators
052        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "=="));
053        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "=~"));
054        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, ">="));
055        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "<="));
056        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, ">"));
057        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "<"));
058        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "!="));
059        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not is"));
060        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "is"));
061        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not contains"));
062        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "contains"));
063        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not regex"));
064        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "regex"));
065        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not in"));
066        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "in"));
067        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "range"));
068        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not range"));
069        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "starts with"));
070        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "ends with"));
071
072        // unary operators
073        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.unaryOperator, "++"));
074        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.unaryOperator, "--"));
075
076        // logical operators
077        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "&&"));
078        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "||"));
079        // TODO: @deprecated logical operators, to be removed in Camel 3.0
080        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "and"));
081        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "or"));
082        
083        //binary operator 
084        // it is added as the last item because unary -- has the priority
085        // if unary not found it is highly possible - operator is run into.
086        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.minusValue, "-"));
087    }
088
089    private SimpleTokenizer() {
090        // static methods
091    }
092
093    /**
094     * Does the expression include a simple function.
095     *
096     * @param expression the expression
097     * @return <tt>true</tt> if one or more simple function is included in the expression
098     */
099    public static boolean hasFunctionStartToken(String expression) {
100        if (expression != null) {
101            for (SimpleTokenType type : KNOWN_TOKENS) {
102                if (type.getType() == TokenType.functionStart) {
103                    if (expression.contains(type.getValue())) {
104                        return true;
105                    }
106                } else {
107                    // function start are always first
108                    return false;
109                }
110            }
111        }
112        return false;
113    }
114
115    /**
116     * @see SimpleLanguage#changeFunctionStartToken(String...)
117     */
118    public static void changeFunctionStartToken(String... startToken) {
119        for (SimpleTokenType type : KNOWN_TOKENS) {
120            if (type.getType() == TokenType.functionStart) {
121                KNOWN_TOKENS.remove(type);
122            }
123        }
124
125        // add in start of list as its a more common token to be used
126        for (String token : startToken) {
127            KNOWN_TOKENS.add(0, new SimpleTokenType(TokenType.functionStart, token));
128        }
129    }
130
131    /**
132     * @see SimpleLanguage#changeFunctionEndToken(String...)
133     */
134    public static void changeFunctionEndToken(String... endToken) {
135        for (SimpleTokenType type : KNOWN_TOKENS) {
136            if (type.getType() == TokenType.functionEnd) {
137                KNOWN_TOKENS.remove(type);
138            }
139        }
140
141        // add after the start tokens
142        int pos = 0;
143        for (SimpleTokenType type : KNOWN_TOKENS) {
144            if (type.getType() == TokenType.functionStart) {
145                pos++;
146            }
147        }
148
149        // add after function start of list as its a more common token to be used
150        for (String token : endToken) {
151            KNOWN_TOKENS.add(pos, new SimpleTokenType(TokenType.functionEnd, token));
152        }
153    }
154
155    /**
156     * Create the next token
157     *
158     * @param expression  the input expression
159     * @param index       the current index
160     * @param allowEscape whether to allow escapes
161     * @param filter      defines the accepted token types to be returned (character is always used as fallback)
162     * @return the created token, will always return a token
163     */
164    public static SimpleToken nextToken(String expression, int index, boolean allowEscape, TokenType... filter) {
165        return doNextToken(expression, index, allowEscape, filter);
166    }
167
168    /**
169     * Create the next token
170     *
171     * @param expression  the input expression
172     * @param index       the current index
173     * @param allowEscape whether to allow escapes
174     * @return the created token, will always return a token
175     */
176    public static SimpleToken nextToken(String expression, int index, boolean allowEscape) {
177        return doNextToken(expression, index, allowEscape);
178    }
179
180    private static SimpleToken doNextToken(String expression, int index, boolean allowEscape, TokenType... filters) {
181
182        boolean numericAllowed = acceptType(TokenType.numericValue, filters);
183        if (numericAllowed) {
184            // is it a numeric value
185            StringBuilder sb = new StringBuilder();
186            boolean digit = true;
187            while (digit && index < expression.length()) {
188                digit = Character.isDigit(expression.charAt(index));
189                if (digit) {
190                    char ch = expression.charAt(index);
191                    sb.append(ch);
192                    index++;
193                    continue;
194                }
195                // is it a dot or comma as part of a floating point number
196                boolean decimalSeparator = '.' == expression.charAt(index) || ',' == expression.charAt(index);
197                if (decimalSeparator && sb.length() > 0) {
198                    char ch = expression.charAt(index);
199                    sb.append(ch);
200                    index++;
201                    // assume its still a digit
202                    digit = true;
203                    continue;
204                }
205            }
206            if (sb.length() > 0) {
207                return new SimpleToken(new SimpleTokenType(TokenType.numericValue, sb.toString()), index);
208            }
209        }
210
211        boolean escapeAllowed = allowEscape && acceptType(TokenType.escape, filters);
212        if (escapeAllowed) {
213            StringBuilder sb = new StringBuilder();
214            char ch = expression.charAt(index);
215            boolean escaped = '\\' == ch;
216            if (escaped && index < expression.length() - 1) {
217                // grab next character to escape
218                char next = expression.charAt(++index);
219                // special for new line, tabs and carriage return
220                boolean special = false;
221                if ('n' == next) {
222                    sb.append("\n");
223                    special = true;
224                } else if ('t' == next) {
225                    sb.append("\t");
226                    special = true;
227                } else if ('r' == next) {
228                    sb.append("\r");
229                    special = true;
230                } else if ('}' == next) {
231                    sb.append("}");
232                    special = true;
233                } else {
234                    // not special just a regular character
235                    sb.append(ch);
236                }
237
238                // force 2 as length if special
239                return new SimpleToken(new SimpleTokenType(TokenType.character, sb.toString()), index, special ? 2 : 1);
240            }
241        }
242
243        // it could be any of the known tokens
244        String text = expression.substring(index);
245        for (SimpleTokenType token : KNOWN_TOKENS) {
246            if (acceptType(token.getType(), filters)) {
247                if (acceptToken(token, text, expression, index)) {
248                    return new SimpleToken(token, index);
249                }
250            }
251        }
252
253        // fallback and create a character token
254        char ch = expression.charAt(index);
255        SimpleToken token = new SimpleToken(new SimpleTokenType(TokenType.character, "" + ch), index);
256        return token;
257    }
258
259    private static boolean acceptType(TokenType type, TokenType... filters) {
260        if (filters == null || filters.length == 0) {
261            return true;
262        }
263        for (TokenType filter : filters) {
264            if (type == filter) {
265                return true;
266            }
267        }
268        return false;
269    }
270
271    private static boolean acceptToken(SimpleTokenType token, String text, String expression, int index) {
272        if (token.isUnary() && text.startsWith(token.getValue())) {
273            SimpleTokenType functionEndToken = getFunctionEndToken();
274            if (functionEndToken != null) {
275                int endLen = functionEndToken.getValue().length();
276
277                // special check for unary as the previous must be a function end, and the next a whitespace
278                // to ensure unary operators is only applied on functions as intended
279                int len = token.getValue().length();
280
281                String previous = "";
282                if (index - endLen >= 0) {
283                    previous = expression.substring(index - endLen, index);
284                }
285                String after = text.substring(len);
286                boolean whiteSpace = ObjectHelper.isEmpty(after) || after.startsWith(" ");
287                boolean functionEnd = previous.equals(functionEndToken.getValue());
288                return functionEnd && whiteSpace;
289            }
290        }
291
292        return text.startsWith(token.getValue());
293    }
294
295    private static SimpleTokenType getFunctionEndToken() {
296        for (SimpleTokenType token : KNOWN_TOKENS) {
297            if (token.isFunctionEnd()) {
298                return token;
299            }
300        }
301        return null;
302    }
303
304}