View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.exoplatform.ecm.utils.text;
18  
19  import java.io.UnsupportedEncodingException;
20  import java.util.BitSet;
21  
22  /**
23   * This Class provides some text related utilities
24   */
25  public class Text {
26    
27    private static final String SPECIAL_CHARACTERS = "&#*@\'\"|.\t\r\n$&\\><:";
28  
29    public static String escape(String string, char escape, boolean isPath) {
30      return escape(string, escape, isPath, "");
31    }
32  
33    /**
34     * Does an URL encoding of the <code>string</code> using the <code>escape</code> character. The
35     * characters that don't need encoding are those defined 'unreserved' in section 2.3 of the 'URI
36     * generic syntax' RFC 2396, but without the escape character. If <code>isPath</code> is
37     * <code>true</code>, additionally the slash '/' is ignored, too.
38     *
39     * @param string
40     *          the string to encode.
41     * @param escape
42     *          the escape character.
43     * @param isPath
44     *          if <code>true</code>, the string is treated as path
45     * @param extraCharacters
46     *          the extra characters that will not be encoded.
47     * @return the escaped string
48     * @throws NullPointerException
49     *           if <code>string</code> is <code>null</code>.
50     */
51    public static String escape(String string, char escape, boolean isPath, String extraCharacters) {
52      try {
53        BitSet validChars = 
54            isPath ? org.exoplatform.services.jcr.util.Text.URISaveEx : org.exoplatform.services.jcr.util.Text.URISave;
55        BitSet extraBitSet = (BitSet)org.exoplatform.services.jcr.util.Text.URISave.clone();
56        for (char c : extraCharacters.toCharArray()) {
57          extraBitSet.set(c);
58        }
59        byte[] bytes = string.getBytes("utf-8");
60        StringBuffer out = new StringBuffer(bytes.length);
61        for (int i = 0; i < bytes.length; i++) {
62          int c = bytes[i] & 0xff;
63          if ((validChars.get(c) || extraBitSet.get(c))&& c != escape) {
64            out.append((char) c);
65          } else {
66            out.append(escape);
67            out.append(org.exoplatform.services.jcr.util.Text.hexTable[(c >> 4) & 0x0f]);
68            out.append(org.exoplatform.services.jcr.util.Text.hexTable[(c) & 0x0f]);
69          }
70        }
71        return out.toString();
72      } catch (UnsupportedEncodingException e) {
73        throw new InternalError(e.toString());
74      }
75    }
76  
77    /**
78     * Escapes all illegal JCR name characters of a string. The encoding is loosely modeled after URI
79     * encoding, but only encodes the characters it absolutely needs to in order to make the resulting
80     * string a valid JCR name. Use {@link #unescapeIllegalJcrChars(String)} for decoding. <br> QName
81     * EBNF:<br>
82     * {@code
83     * <xmp> simplename ::= onecharsimplename | twocharsimplename | threeormorecharname
84     * onecharsimplename ::= (* Any Unicode character except: '.', '/', ':', '[', ']', '*', ''', '"',
85     * '|' or any whitespace character *) twocharsimplename ::= '.' onecharsimplename |
86     * onecharsimplename '.' | onecharsimplename onecharsimplename threeormorecharname ::= nonspace
87     * string nonspace string ::= char | string char char ::= nonspace | ' ' nonspace ::= (* Any
88     * Unicode character except: '/', ':', '[', ']', '*', ''', '"', '|' or any whitespace character *)
89     * </xmp>
90     * }
91     *
92     * @param name
93     *          the name to escape
94     * @return the escaped name
95     */
96    public static String escapeIllegalJcrChars(String name) {
97      if (name == null || name.length() == 0) {
98        return "";
99      }
100     StringBuffer buffer = new StringBuffer(name.length() * 2);
101     for (int i = 0; i < name.length(); i++) {
102       char ch = name.charAt(i);
103       if (ch == '&' || ch == '#'
104         || ch == '*' || ch == '\'' || ch == '"' || ch == '|'
105           || (ch == '.' && name.length() < 3) || (ch == ' ' && (i == 0 || i == name.length() - 1))
106           || ch == '\t' || ch == '\r' || ch == '\n' || ch == '\\' || ch == '>' || ch == '<') {
107         buffer.append('%');
108         buffer.append(Character.toUpperCase(Character.forDigit(ch / 16, 16)));
109         buffer.append(Character.toUpperCase(Character.forDigit(ch % 16, 16)));
110       } else {
111         buffer.append(ch);
112       }
113     }
114     return buffer.toString();
115   }
116 
117   /**
118    * Unescapes previously escaped jcr chars. <br> Please note, that this does not exactly the same
119    * as the url related unescape(String), since it handles the encoding differently.
120    *
121    * @param name the name to unescape
122    * @return the unescaped name
123    */
124   public static String unescapeIllegalJcrChars(String name) {
125     return org.exoplatform.services.jcr.util.Text.unescapeIllegalJcrChars(name);
126   }
127   
128   /**
129    * converts all illegal JCR name characters of a string to '-'
130    *
131    * @param name
132    *          the name to escape
133    * @return the converted name
134    */
135   public static String convertJcrChars(String name) {
136     if (name == null || name.length() == 0) {
137       return "";
138     }
139     StringBuffer buffer = new StringBuffer(name.length() * 2);
140     for (int i = 0; i < name.length(); i++) {
141       char ch = name.charAt(i);
142       if (SPECIAL_CHARACTERS.indexOf(ch) != -1){
143         buffer.append('-');
144       } else {
145         buffer.append(ch);
146       }
147     }
148     return buffer.toString();
149   }
150 
151 }