1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.exoplatform.ecm.utils.text;
18
19 import java.io.UnsupportedEncodingException;
20 import java.util.BitSet;
21
22 /**
23 * This Class provides some text related utilities
24 */
25 public class Text {
26
27 private static final String SPECIAL_CHARACTERS = "&#*@\'\"|.\t\r\n$&\\><:";
28
29 public static String escape(String string, char escape, boolean isPath) {
30 return escape(string, escape, isPath, "");
31 }
32
33 /**
34 * Does an URL encoding of the <code>string</code> using the <code>escape</code> character. The
35 * characters that don't need encoding are those defined 'unreserved' in section 2.3 of the 'URI
36 * generic syntax' RFC 2396, but without the escape character. If <code>isPath</code> is
37 * <code>true</code>, additionally the slash '/' is ignored, too.
38 *
39 * @param string
40 * the string to encode.
41 * @param escape
42 * the escape character.
43 * @param isPath
44 * if <code>true</code>, the string is treated as path
45 * @param extraCharacters
46 * the extra characters that will not be encoded.
47 * @return the escaped string
48 * @throws NullPointerException
49 * if <code>string</code> is <code>null</code>.
50 */
51 public static String escape(String string, char escape, boolean isPath, String extraCharacters) {
52 try {
53 BitSet validChars =
54 isPath ? org.exoplatform.services.jcr.util.Text.URISaveEx : org.exoplatform.services.jcr.util.Text.URISave;
55 BitSet extraBitSet = (BitSet)org.exoplatform.services.jcr.util.Text.URISave.clone();
56 for (char c : extraCharacters.toCharArray()) {
57 extraBitSet.set(c);
58 }
59 byte[] bytes = string.getBytes("utf-8");
60 StringBuffer out = new StringBuffer(bytes.length);
61 for (int i = 0; i < bytes.length; i++) {
62 int c = bytes[i] & 0xff;
63 if ((validChars.get(c) || extraBitSet.get(c))&& c != escape) {
64 out.append((char) c);
65 } else {
66 out.append(escape);
67 out.append(org.exoplatform.services.jcr.util.Text.hexTable[(c >> 4) & 0x0f]);
68 out.append(org.exoplatform.services.jcr.util.Text.hexTable[(c) & 0x0f]);
69 }
70 }
71 return out.toString();
72 } catch (UnsupportedEncodingException e) {
73 throw new InternalError(e.toString());
74 }
75 }
76
77 /**
78 * Escapes all illegal JCR name characters of a string. The encoding is loosely modeled after URI
79 * encoding, but only encodes the characters it absolutely needs to in order to make the resulting
80 * string a valid JCR name. Use {@link #unescapeIllegalJcrChars(String)} for decoding. <br> QName
81 * EBNF:<br>
82 * {@code
83 * <xmp> simplename ::= onecharsimplename | twocharsimplename | threeormorecharname
84 * onecharsimplename ::= (* Any Unicode character except: '.', '/', ':', '[', ']', '*', ''', '"',
85 * '|' or any whitespace character *) twocharsimplename ::= '.' onecharsimplename |
86 * onecharsimplename '.' | onecharsimplename onecharsimplename threeormorecharname ::= nonspace
87 * string nonspace string ::= char | string char char ::= nonspace | ' ' nonspace ::= (* Any
88 * Unicode character except: '/', ':', '[', ']', '*', ''', '"', '|' or any whitespace character *)
89 * </xmp>
90 * }
91 *
92 * @param name
93 * the name to escape
94 * @return the escaped name
95 */
96 public static String escapeIllegalJcrChars(String name) {
97 if (name == null || name.length() == 0) {
98 return "";
99 }
100 StringBuffer buffer = new StringBuffer(name.length() * 2);
101 for (int i = 0; i < name.length(); i++) {
102 char ch = name.charAt(i);
103 if (ch == '&' || ch == '#'
104 || ch == '*' || ch == '\'' || ch == '"' || ch == '|'
105 || (ch == '.' && name.length() < 3) || (ch == ' ' && (i == 0 || i == name.length() - 1))
106 || ch == '\t' || ch == '\r' || ch == '\n' || ch == '\\' || ch == '>' || ch == '<') {
107 buffer.append('%');
108 buffer.append(Character.toUpperCase(Character.forDigit(ch / 16, 16)));
109 buffer.append(Character.toUpperCase(Character.forDigit(ch % 16, 16)));
110 } else {
111 buffer.append(ch);
112 }
113 }
114 return buffer.toString();
115 }
116
117 /**
118 * Unescapes previously escaped jcr chars. <br> Please note, that this does not exactly the same
119 * as the url related unescape(String), since it handles the encoding differently.
120 *
121 * @param name the name to unescape
122 * @return the unescaped name
123 */
124 public static String unescapeIllegalJcrChars(String name) {
125 return org.exoplatform.services.jcr.util.Text.unescapeIllegalJcrChars(name);
126 }
127
128 /**
129 * converts all illegal JCR name characters of a string to '-'
130 *
131 * @param name
132 * the name to escape
133 * @return the converted name
134 */
135 public static String convertJcrChars(String name) {
136 if (name == null || name.length() == 0) {
137 return "";
138 }
139 StringBuffer buffer = new StringBuffer(name.length() * 2);
140 for (int i = 0; i < name.length(); i++) {
141 char ch = name.charAt(i);
142 if (SPECIAL_CHARACTERS.indexOf(ch) != -1){
143 buffer.append('-');
144 } else {
145 buffer.append(ch);
146 }
147 }
148 return buffer.toString();
149 }
150
151 }