Text.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.exoplatform.ecm.utils.text;

import java.io.UnsupportedEncodingException;
import java.util.BitSet;

/**
 * This Class provides some text related utilities
 */
public class Text {
  
  private static final String SPECIAL_CHARACTERS = "&#*@\'\"|.\t\r\n$&\\><:";

  public static String escape(String string, char escape, boolean isPath) {
    return escape(string, escape, isPath, "");
  }

  /**
   * Does an URL encoding of the <code>string</code> using the <code>escape</code> character. The
   * characters that don't need encoding are those defined 'unreserved' in section 2.3 of the 'URI
   * generic syntax' RFC 2396, but without the escape character. If <code>isPath</code> is
   * <code>true</code>, additionally the slash '/' is ignored, too.
   *
   * @param string
   *          the string to encode.
   * @param escape
   *          the escape character.
   * @param isPath
   *          if <code>true</code>, the string is treated as path
   * @param extraCharacters
   *          the extra characters that will not be encoded.
   * @return the escaped string
   * @throws NullPointerException
   *           if <code>string</code> is <code>null</code>.
   */
  public static String escape(String string, char escape, boolean isPath, String extraCharacters) {
    try {
      BitSet validChars = 
          isPath ? org.exoplatform.services.jcr.util.Text.URISaveEx : org.exoplatform.services.jcr.util.Text.URISave;
      BitSet extraBitSet = (BitSet)org.exoplatform.services.jcr.util.Text.URISave.clone();
      for (char c : extraCharacters.toCharArray()) {
        extraBitSet.set(c);
      }
      byte[] bytes = string.getBytes("utf-8");
      StringBuffer out = new StringBuffer(bytes.length);
      for (int i = 0; i < bytes.length; i++) {
        int c = bytes[i] & 0xff;
        if ((validChars.get(c) || extraBitSet.get(c))&& c != escape) {
          out.append((char) c);
        } else {
          out.append(escape);
          out.append(org.exoplatform.services.jcr.util.Text.hexTable[(c >> 4) & 0x0f]);
          out.append(org.exoplatform.services.jcr.util.Text.hexTable[(c) & 0x0f]);
        }
      }
      return out.toString();
    } catch (UnsupportedEncodingException e) {
      throw new InternalError(e.toString());
    }
  }

  /**
   * Escapes all illegal JCR name characters of a string. The encoding is loosely modeled after URI
   * encoding, but only encodes the characters it absolutely needs to in order to make the resulting
   * string a valid JCR name. Use {@link #unescapeIllegalJcrChars(String)} for decoding. <br> QName
   * EBNF:<br>
   * {@code
   * <xmp> simplename ::= onecharsimplename | twocharsimplename | threeormorecharname
   * onecharsimplename ::= (* Any Unicode character except: '.', '/', ':', '[', ']', '*', ''', '"',
   * '|' or any whitespace character *) twocharsimplename ::= '.' onecharsimplename |
   * onecharsimplename '.' | onecharsimplename onecharsimplename threeormorecharname ::= nonspace
   * string nonspace string ::= char | string char char ::= nonspace | ' ' nonspace ::= (* Any
   * Unicode character except: '/', ':', '[', ']', '*', ''', '"', '|' or any whitespace character *)
   * </xmp>
   * }
   *
   * @param name
   *          the name to escape
   * @return the escaped name
   */
  public static String escapeIllegalJcrChars(String name) {
    if (name == null || name.length() == 0) {
      return "";
    }
    StringBuffer buffer = new StringBuffer(name.length() * 2);
    for (int i = 0; i < name.length(); i++) {
      char ch = name.charAt(i);
      if (ch == '&' || ch == '#'
        || ch == '*' || ch == '\'' || ch == '"' || ch == '|'
          || (ch == '.' && name.length() < 3) || (ch == ' ' && (i == 0 || i == name.length() - 1))
          || ch == '\t' || ch == '\r' || ch == '\n' || ch == '\\' || ch == '>' || ch == '<') {
        buffer.append('%');
        buffer.append(Character.toUpperCase(Character.forDigit(ch / 16, 16)));
        buffer.append(Character.toUpperCase(Character.forDigit(ch % 16, 16)));
      } else {
        buffer.append(ch);
      }
    }
    return buffer.toString();
  }

  /**
   * Unescapes previously escaped jcr chars. <br> Please note, that this does not exactly the same
   * as the url related unescape(String), since it handles the encoding differently.
   *
   * @param name the name to unescape
   * @return the unescaped name
   */
  public static String unescapeIllegalJcrChars(String name) {
    return org.exoplatform.services.jcr.util.Text.unescapeIllegalJcrChars(name);
  }
  
  /**
   * converts all illegal JCR name characters of a string to '-'
   *
   * @param name
   *          the name to escape
   * @return the converted name
   */
  public static String convertJcrChars(String name) {
    if (name == null || name.length() == 0) {
      return "";
    }
    StringBuffer buffer = new StringBuffer(name.length() * 2);
    for (int i = 0; i < name.length(); i++) {
      char ch = name.charAt(i);
      if (SPECIAL_CHARACTERS.indexOf(ch) != -1){
        buffer.append('-');
      } else {
        buffer.append(ch);
      }
    }
    return buffer.toString();
  }

}