001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.camel.util;
018
019 import java.util.BitSet;
020
021 /**
022 * Encoder for unsafe URI characters.
023 * <p/>
024 * A good source for details is <a href="http://en.wikipedia.org/wiki/Url_encode">wikipedia url encode</a> article.
025 */
026 public final class UnsafeUriCharactersEncoder {
027 private static BitSet unsafeCharacters;
028 private static final char[] HEX_DIGITS = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C',
029 'D', 'E', 'F'};
030
031 static {
032 unsafeCharacters = new BitSet(256);
033 unsafeCharacters.set(' ');
034 unsafeCharacters.set('"');
035 unsafeCharacters.set('<');
036 unsafeCharacters.set('>');
037 unsafeCharacters.set('#');
038 unsafeCharacters.set('%');
039 unsafeCharacters.set('{');
040 unsafeCharacters.set('}');
041 unsafeCharacters.set('|');
042 unsafeCharacters.set('\\');
043 unsafeCharacters.set('^');
044 unsafeCharacters.set('~');
045 unsafeCharacters.set('[');
046 unsafeCharacters.set(']');
047 unsafeCharacters.set('`');
048 }
049
050 private UnsafeUriCharactersEncoder() {
051 // util class
052 }
053
054 public static String encode(String s) {
055 int n = s == null ? 0 : s.length();
056 if (n == 0) {
057 return s;
058 }
059
060 // First check whether we actually need to encode
061 char chars[] = s.toCharArray();
062 for (int i = 0;;) {
063 // just deal with the ascii character
064 if (chars[i] > 0 && chars[i] < 128) {
065 if (unsafeCharacters.get(chars[i])) {
066 break;
067 }
068 }
069 if (++i >= chars.length) {
070 return s;
071 }
072 }
073
074 // okay there are some unsafe characters so we do need to encode
075 // see details at: http://en.wikipedia.org/wiki/Url_encode
076 StringBuilder sb = new StringBuilder();
077 for (int i = 0; i < chars.length; i++) {
078 char ch = chars[i];
079 if (ch > 0 && ch < 128 && unsafeCharacters.get(ch)) {
080 // special for % sign as it may be a decimal encoded value
081 if (ch == '%') {
082 char next = i + 1 < chars.length ? chars[i + 1] : ' ';
083 char next2 = i + 2 < chars.length ? chars[i + 2] : ' ';
084
085 if (isHexDigit(next) && isHexDigit(next2)) {
086 // its already encoded (decimal encoded) so just append as is
087 sb.append(ch);
088 } else {
089 // must escape then, as its an unsafe character
090 appendEscape(sb, (byte)ch);
091 }
092 } else {
093 // must escape then, as its an unsafe character
094 appendEscape(sb, (byte)ch);
095 }
096 } else {
097 sb.append(ch);
098 }
099 }
100 return sb.toString();
101 }
102
103 private static void appendEscape(StringBuilder sb, byte b) {
104 sb.append('%');
105 sb.append(HEX_DIGITS[(b >> 4) & 0x0f]);
106 sb.append(HEX_DIGITS[(b >> 0) & 0x0f]);
107 }
108
109 private static boolean isHexDigit(char ch) {
110 for (char hex : HEX_DIGITS) {
111 if (hex == ch) {
112 return true;
113 }
114 }
115 return false;
116 }
117
118 }