001/** 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.camel.language.simple; 018 019import java.util.List; 020import java.util.concurrent.CopyOnWriteArrayList; 021 022import org.apache.camel.language.simple.types.SimpleToken; 023import org.apache.camel.language.simple.types.SimpleTokenType; 024import org.apache.camel.language.simple.types.TokenType; 025import org.apache.camel.util.ObjectHelper; 026 027/** 028 * Tokenizer to create {@link SimpleToken} from the input. 029 */ 030public final class SimpleTokenizer { 031 032 // use CopyOnWriteArrayList so we can modify it in the for loop when changing function start/end tokens 033 private static final List<SimpleTokenType> KNOWN_TOKENS = new CopyOnWriteArrayList<SimpleTokenType>(); 034 035 static { 036 // add known tokens 037 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.functionStart, "${")); 038 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.functionStart, "$simple{")); 039 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.functionEnd, "}")); 040 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, " ")); 041 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, "\t")); 042 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, "\n")); 043 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, "\r")); 044 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.singleQuote, "'")); 045 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.doubleQuote, "\"")); 046 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.booleanValue, "true")); 047 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.booleanValue, "false")); 048 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.nullValue, "null")); 049 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.escape, "\\")); 050 051 // binary operators 052 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "==")); 053 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "=~")); 054 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, ">=")); 055 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "<=")); 056 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, ">")); 057 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "<")); 058 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "!=")); 059 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not is")); 060 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "is")); 061 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not contains")); 062 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "contains")); 063 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not regex")); 064 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "regex")); 065 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not in")); 066 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "in")); 067 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "range")); 068 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not range")); 069 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "starts with")); 070 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "ends with")); 071 072 // unary operators 073 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.unaryOperator, "++")); 074 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.unaryOperator, "--")); 075 076 // logical operators 077 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "&&")); 078 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "||")); 079 // TODO: @deprecated logical operators, to be removed in Camel 3.0 080 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "and")); 081 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "or")); 082 083 //binary operator 084 // it is added as the last item because unary -- has the priority 085 // if unary not found it is highly possible - operator is run into. 086 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.minusValue, "-")); 087 } 088 089 private SimpleTokenizer() { 090 // static methods 091 } 092 093 /** 094 * Does the expression include a simple function. 095 * 096 * @param expression the expression 097 * @return <tt>true</tt> if one or more simple function is included in the expression 098 */ 099 public static boolean hasFunctionStartToken(String expression) { 100 if (expression != null) { 101 for (SimpleTokenType type : KNOWN_TOKENS) { 102 if (type.getType() == TokenType.functionStart) { 103 if (expression.contains(type.getValue())) { 104 return true; 105 } 106 } else { 107 // function start are always first 108 return false; 109 } 110 } 111 } 112 return false; 113 } 114 115 /** 116 * @see SimpleLanguage#changeFunctionStartToken(String...) 117 */ 118 public static void changeFunctionStartToken(String... startToken) { 119 for (SimpleTokenType type : KNOWN_TOKENS) { 120 if (type.getType() == TokenType.functionStart) { 121 KNOWN_TOKENS.remove(type); 122 } 123 } 124 125 // add in start of list as its a more common token to be used 126 for (String token : startToken) { 127 KNOWN_TOKENS.add(0, new SimpleTokenType(TokenType.functionStart, token)); 128 } 129 } 130 131 /** 132 * @see SimpleLanguage#changeFunctionEndToken(String...) 133 */ 134 public static void changeFunctionEndToken(String... endToken) { 135 for (SimpleTokenType type : KNOWN_TOKENS) { 136 if (type.getType() == TokenType.functionEnd) { 137 KNOWN_TOKENS.remove(type); 138 } 139 } 140 141 // add after the start tokens 142 int pos = 0; 143 for (SimpleTokenType type : KNOWN_TOKENS) { 144 if (type.getType() == TokenType.functionStart) { 145 pos++; 146 } 147 } 148 149 // add after function start of list as its a more common token to be used 150 for (String token : endToken) { 151 KNOWN_TOKENS.add(pos, new SimpleTokenType(TokenType.functionEnd, token)); 152 } 153 } 154 155 /** 156 * Create the next token 157 * 158 * @param expression the input expression 159 * @param index the current index 160 * @param allowEscape whether to allow escapes 161 * @param filter defines the accepted token types to be returned (character is always used as fallback) 162 * @return the created token, will always return a token 163 */ 164 public static SimpleToken nextToken(String expression, int index, boolean allowEscape, TokenType... filter) { 165 return doNextToken(expression, index, allowEscape, filter); 166 } 167 168 /** 169 * Create the next token 170 * 171 * @param expression the input expression 172 * @param index the current index 173 * @param allowEscape whether to allow escapes 174 * @return the created token, will always return a token 175 */ 176 public static SimpleToken nextToken(String expression, int index, boolean allowEscape) { 177 return doNextToken(expression, index, allowEscape); 178 } 179 180 private static SimpleToken doNextToken(String expression, int index, boolean allowEscape, TokenType... filters) { 181 182 boolean numericAllowed = acceptType(TokenType.numericValue, filters); 183 if (numericAllowed) { 184 // is it a numeric value 185 StringBuilder sb = new StringBuilder(); 186 boolean digit = true; 187 while (digit && index < expression.length()) { 188 digit = Character.isDigit(expression.charAt(index)); 189 if (digit) { 190 char ch = expression.charAt(index); 191 sb.append(ch); 192 index++; 193 continue; 194 } 195 // is it a dot or comma as part of a floating point number 196 boolean decimalSeparator = '.' == expression.charAt(index) || ',' == expression.charAt(index); 197 if (decimalSeparator && sb.length() > 0) { 198 char ch = expression.charAt(index); 199 sb.append(ch); 200 index++; 201 // assume its still a digit 202 digit = true; 203 continue; 204 } 205 } 206 if (sb.length() > 0) { 207 return new SimpleToken(new SimpleTokenType(TokenType.numericValue, sb.toString()), index); 208 } 209 } 210 211 boolean escapeAllowed = allowEscape && acceptType(TokenType.escape, filters); 212 if (escapeAllowed) { 213 StringBuilder sb = new StringBuilder(); 214 char ch = expression.charAt(index); 215 boolean escaped = '\\' == ch; 216 if (escaped && index < expression.length() - 1) { 217 // grab next character to escape 218 char next = expression.charAt(++index); 219 // special for new line, tabs and carriage return 220 boolean special = false; 221 if ('n' == next) { 222 sb.append("\n"); 223 special = true; 224 } else if ('t' == next) { 225 sb.append("\t"); 226 special = true; 227 } else if ('r' == next) { 228 sb.append("\r"); 229 special = true; 230 } else if ('}' == next) { 231 sb.append("}"); 232 special = true; 233 } else { 234 // not special just a regular character 235 sb.append(ch); 236 } 237 238 // force 2 as length if special 239 return new SimpleToken(new SimpleTokenType(TokenType.character, sb.toString()), index, special ? 2 : 1); 240 } 241 } 242 243 // it could be any of the known tokens 244 String text = expression.substring(index); 245 for (SimpleTokenType token : KNOWN_TOKENS) { 246 if (acceptType(token.getType(), filters)) { 247 if (acceptToken(token, text, expression, index)) { 248 return new SimpleToken(token, index); 249 } 250 } 251 } 252 253 // fallback and create a character token 254 char ch = expression.charAt(index); 255 SimpleToken token = new SimpleToken(new SimpleTokenType(TokenType.character, "" + ch), index); 256 return token; 257 } 258 259 private static boolean acceptType(TokenType type, TokenType... filters) { 260 if (filters == null || filters.length == 0) { 261 return true; 262 } 263 for (TokenType filter : filters) { 264 if (type == filter) { 265 return true; 266 } 267 } 268 return false; 269 } 270 271 private static boolean acceptToken(SimpleTokenType token, String text, String expression, int index) { 272 if (token.isUnary() && text.startsWith(token.getValue())) { 273 SimpleTokenType functionEndToken = getFunctionEndToken(); 274 if (functionEndToken != null) { 275 int endLen = functionEndToken.getValue().length(); 276 277 // special check for unary as the previous must be a function end, and the next a whitespace 278 // to ensure unary operators is only applied on functions as intended 279 int len = token.getValue().length(); 280 281 String previous = ""; 282 if (index - endLen >= 0) { 283 previous = expression.substring(index - endLen, index); 284 } 285 String after = text.substring(len); 286 boolean whiteSpace = ObjectHelper.isEmpty(after) || after.startsWith(" "); 287 boolean functionEnd = previous.equals(functionEndToken.getValue()); 288 return functionEnd && whiteSpace; 289 } 290 } 291 292 return text.startsWith(token.getValue()); 293 } 294 295 private static SimpleTokenType getFunctionEndToken() { 296 for (SimpleTokenType token : KNOWN_TOKENS) { 297 if (token.isFunctionEnd()) { 298 return token; 299 } 300 } 301 return null; 302 } 303 304}