StringCommonUtils.java
/*
* Copyright (C) 2003-2015 eXo Platform SAS.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.exoplatform.commons.utils;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import org.apache.commons.lang.StringUtils;
/**
* Created by The eXo Platform SAS Author : eXoPlatform exo@exoplatform.com Dec
* 15, 2015
*/
public class StringCommonUtils {
private static final int BUFFER_SIZE = 32;
public static final String EMPTY_STR = "";
public static final String SEMICOLON = ";";
public static final String SPACE = " ";
public static final String AMP_NUMBER = "&#";
public static final String LESS_THAN = "<";
public static final String GREATER_THAN = ">";
public static final String QUOT = """;
public static final String AMP_SPACE = " ";
public static final String AMP_HEX = "&";
public static final String AMP = "&";
private static List<String> tokens = new ArrayList<String>();
private static Map<String, String> charcodes = new HashMap<String, String>();
private static List<String> ignoreLessThanAndGreaterThan = Arrays.asList(LESS_THAN, GREATER_THAN, AMP);
/*
* The distance code number content special character.
* Ex: from ' '(32) to '0'(48): ' ', '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/'
* See: http://www.ascii.cl/htmlcodes.htm
*/
//'0', ' ', 'A', '9', 'a', 'Z', '~', 'z', '\''
private static int[] CHAR_CODES = new int[] { 48, 32, 65, 57, 97, 90, 127, 122, 39 };
public static InputStream compress(String string) throws IOException {
ByteArrayOutputStream os = new ByteArrayOutputStream(string.length());
GZIPOutputStream gos = new GZIPOutputStream(os);
gos.write(string.getBytes());
gos.close();
byte[] compressed = os.toByteArray();
os.close();
return new ByteArrayInputStream(compressed);
}
public static String decompress(InputStream is) throws IOException {
GZIPInputStream gis = new GZIPInputStream(is, BUFFER_SIZE);
ByteArrayOutputStream buffer = new ByteArrayOutputStream();
try {
byte[] data = new byte[BUFFER_SIZE];
int bytesRead;
while ((bytesRead = gis.read(data)) != -1) {
buffer.write(data, 0, bytesRead);
}
return new String(buffer.toByteArray());
} finally {
gis.close();
is.close();
buffer.close();
}
}
/**
* Encode special character, use for input search
* @param s the string input
* @return String
*/
public static String encodeSpecialCharInSearchTerm(String s) {
/*
* + When all characters in param s is special characters has in charIgnore, we must encode all characters.
* + If all characters in param s is not special characters, we can ignore some special characters [!#:?=.,+;~`_]
*/
String charIgnore = "&#<>[]/:?\"'=.,*$%()\\+@!^*-}{;`~_";
if (StringUtils.isNotBlank(s)) {
int i = 0;
while (charIgnore.indexOf(String.valueOf(s.charAt(i))) >= 0) {
++i;
if (i == s.length()) {
charIgnore = EMPTY_STR;
break;
}
}
}
if (StringUtils.isNotBlank(charIgnore)) charIgnore = "&</>!#:?=.,+;~`_";
return encodeSpecialCharToHTMLnumber(s, charIgnore, true);
}
/**
* Encode special character, use for input title or name of the object.
* @param s the string input
* @return String
*/
public static String encodeSpecialCharForSimpleInput(String s) {
/*
* remove double space
*/
if(StringUtils.isNotBlank(s)) {
while (s.indexOf(" ") >= 0) {
s = StringUtils.replace(s, " ", SPACE).trim();
}
}
/*
* charIgnore: Some special characters we ignore
*/
String charIgnore = "!#:?=.,+;~`_";
return encodeSpecialCharToHTMLnumber(s, charIgnore, true);
}
/**
* Encode special character, use for input content of object (only apply for input by CKEditer).
* @param s the string input
* @return String
*/
public static String encodeSpecialCharInHTML(String s) {
/*
* charIgnore: Some special characters we ignore
*/
String charIgnore = "&#<>[]/:?\"=.,*$%()\\+@!^*-}{;`~_";
return encodeSpecialCharToHTMLnumber(s, charIgnore, false);
}
public static boolean isContainSpecialCharacter(String s) {
if (StringUtils.isBlank(s)) {
return false;
}
String charIgnore = "&#<>[]/:?\"=.,*$%()\\+@!^*-}{;`~_";
int i = 0;
while (i < s.length()) {
char c = s.charAt(i);
if (charIgnore.indexOf(String.valueOf(c)) >= 0) {
return true;
}
i++;
}
return false;
}
/**
* Encode special character to html number. Ex: '/' is encoded to /
* @param s the string input
* @param charIgnore the string content ignore some special character can not encode.
* @param isTitle the boolean for check convert is title or not.
* @return String
*/
public static String encodeSpecialCharToHTMLnumber(String s, String charIgnore, boolean isTitle) {
if (StringUtils.isBlank(s)) {
return EMPTY_STR;
}
int i = 0;
StringBuilder builder = new StringBuilder();
while (i < s.length()) {
char c = s.charAt(i);
if (charIgnore.indexOf(String.valueOf(c)) >= 0) {
builder.append(c);
} else {
int t = s.codePointAt(i);
if (t < CHAR_CODES[0] && t > CHAR_CODES[1] || t < CHAR_CODES[2] && t > CHAR_CODES[3] ||
t < CHAR_CODES[4] && t > CHAR_CODES[5] || t < CHAR_CODES[6] && t > CHAR_CODES[7]) {
if (isTitle && (t == 60 || t == 62)) {
if (t == 60) {
builder.append(LESS_THAN);
} else if (t == 62) {
builder.append(GREATER_THAN);
}
} else {
builder.append(AMP_NUMBER).append(t).append(SEMICOLON);
}
} else {
builder.append(c);
}
}
++i;
}
return builder.toString();
}
public static String decodeSpecialCharToHTMLnumber(String s, List<String> lIgnore) {
if (StringUtils.isBlank(s)){
return s;
}
for (String token : tokens) {
if (lIgnore.contains(token)){
continue;
}
while (token != null && s.indexOf(token) >= 0) {
s = StringUtils.replace(s, token, charcodes.get(token));
}
}
return s;
}
static {
if(tokens.isEmpty()) {
String token;
// Tokens by HTML(Decimal) code.
for (int t = Character.MIN_CODE_POINT; t < Character.MAX_CODE_POINT; t++) {
if (t < CHAR_CODES[0] && t > CHAR_CODES[1] || t < CHAR_CODES[2] && t > CHAR_CODES[3] ||
t < CHAR_CODES[4] && t > CHAR_CODES[5] || t < CHAR_CODES[6] && t > CHAR_CODES[7]) {
token = new StringBuilder(AMP_NUMBER).append(t).append(SEMICOLON).toString();
tokens.add(token);
charcodes.put(token, String.valueOf(Character.toChars(t)[0]));
}
}
// Tokens by Entity code.
tokens.add(LESS_THAN);
charcodes.put(LESS_THAN, "<");
tokens.add(GREATER_THAN);
charcodes.put(GREATER_THAN, ">");
tokens.add(QUOT);
charcodes.put(QUOT, "\"");
tokens.add(AMP_SPACE);
charcodes.put(AMP_SPACE, SPACE);
tokens.add(AMP_HEX);
charcodes.put(AMP_HEX, "&");
tokens.add(AMP);
charcodes.put(AMP, "&");
}
}
public static String decodeSpecialCharToHTMLnumber(String s) {
return decodeSpecialCharToHTMLnumber(s, new ArrayList<String>());
}
/**
* Decode special chars to HTML number ignore char {@literal Less than '<' and Greater than '>'}
*
* @param str
* @return
*/
public static String decodeSpecialCharToHTMLnumberIgnore(String str) {
return decodeSpecialCharToHTMLnumber(str, ignoreLessThanAndGreaterThan);
}
}