JcrSearchDriver.java

package org.exoplatform.commons.search.driver.jcr;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang.StringUtils;
import org.exoplatform.commons.api.search.SearchService;
import org.exoplatform.commons.api.search.SearchServiceConnector;
import org.exoplatform.commons.api.search.data.SearchContext;
import org.exoplatform.commons.api.search.data.SearchResult;
import org.exoplatform.commons.search.es.ElasticSearchServiceConnector;
import org.exoplatform.commons.search.service.UnifiedSearchService;
import org.exoplatform.container.xml.InitParams;
import org.exoplatform.services.log.ExoLogger;
import org.exoplatform.services.log.Log;

public class JcrSearchDriver extends SearchService {
    private final static Log LOG = ExoLogger.getLogger(JcrSearchDriver.class);
  private String specialCharacters;
  private String fuzzySyntax;
  private String esFuzzySyntax;

  public JcrSearchDriver(InitParams initParams){
    this.specialCharacters = initParams.get("exo.search.excluded-characters").toString();
    // Escaping characters that are special for regular expression.
      specialCharacters = specialCharacters.replace(".","\\.").replace("-","\\-");
      fuzzySyntax = getFuzzySyntax(false);
      esFuzzySyntax = getFuzzySyntax(true);
  }
    @Override
    public Map<String, Collection<SearchResult>> search(SearchContext context, String query, Collection<String> sites, Collection<String> types, int offset, int limit, String sort, String order) {

      query = replaceSpecialCharacters(query);
      HashMap<String, ArrayList<String>> terms = parse(query); //parse query for single and quoted terms

      Map<String, Collection<SearchResult>> results = new HashMap<String, Collection<SearchResult>>();
      if(StringUtils.isBlank(query)) return results;
        if(null==types || types.isEmpty()) return results;
        List<String> enabledTypes = UnifiedSearchService.getEnabledSearchTypes();
        for(SearchServiceConnector connector:this.getConnectors()){
            if(!enabledTypes.contains(connector.getSearchType())) continue; //ignore disabled types
            if(!types.contains("all") && !types.contains(connector.getSearchType())) continue; //search requested types only
            LOG.debug("\n[UNIFIED SEARCH]: connector = " + connector.getClass().getSimpleName());
            try {
              String connectorQuery = null;
              if(connector instanceof ElasticSearchServiceConnector) {
                connectorQuery = repeat("\\\"%s\\\"", terms.get("quoted"), " ") + " " + repeat("%s" + esFuzzySyntax, terms.get("single"), " "); //add an ES fuzzySyntax after each single term (for fuzzy search)
              } else {
                connectorQuery = repeat("\\\"%s\\\"", terms.get("quoted"), " ") + " " + repeat("%s" + fuzzySyntax, terms.get("single"), " "); //add a fuzzySyntax after each single term (for fuzzy search)
              }
              results.put(connector.getSearchType(), connector.search(context, connectorQuery, sites, offset, limit, sort, order));
            } catch (Exception e) {
                LOG.error(e.getMessage(), e);
                continue; //skip this connector and continue searching with the others
            }
        }
        return results;
    }


    private static HashMap<String, ArrayList<String>> parse(String input) {
        HashMap<String, ArrayList<String>> terms = new HashMap<String, ArrayList<String>>();

        ArrayList<String> quoted = new ArrayList<String>();
        Matcher matcher = Pattern.compile("\"([^\"]+)\"").matcher(input);
        while (matcher.find()) {
            String founds = matcher.group(1);
            quoted.add(founds);
        }
        terms.put("quoted", quoted);

        String remain = matcher.replaceAll("").replaceAll("\"", "").trim(); //remove all remaining double quotes
        ArrayList<String> single = new ArrayList<String>();
        if(!remain.isEmpty()) single.addAll(Arrays.asList(remain.split("\\s+")));
        terms.put("single", single);

        return terms;
    }

    private static String repeat(String format, Collection<String> strArr, String delimiter){
        StringBuilder sb=new StringBuilder();
        String delim = "";
        for(String str:strArr) {
            if (!isEnableFuzzySearch()){
                String disableFuzzy = str.replace("~", "");
                sb.append(delim).append(disableFuzzy);
                delim = delimiter;
            } else if (!isFuzzyManual(str) && str.indexOf("~") != -1) {
                str = str.replace(str.substring(str.indexOf("~")), "~0.5");
                sb.append(delim).append(str);
                delim = delimiter;
            }else if (!isFuzzyManual(str) && isEnableFuzzySearch()){
                sb.append(delim).append(String.format(format, str));
                delim = delimiter;
            }else {
                sb.append(delim).append(str);
                delim = delimiter;
            }
        }
        return sb.toString();
    }

    private static String getFuzzySyntax(boolean esFuzzyExpression) {
        String fuzzySyntax = "";
        String fuzzySimilarity = System.getProperty("unified-search.engine.fuzzy.similarity");
        Double fuzzySimilarityDouble = 0.5;
        if (isEnableFuzzySearch()){
            if (fuzzySimilarity != null) {
                try {
                    fuzzySimilarityDouble = Double.parseDouble(fuzzySimilarity);
                } catch (NumberFormatException e) {
                    fuzzySimilarityDouble = 0.5;
                }
            }
            if (fuzzySimilarityDouble < 0 || fuzzySimilarityDouble >= 1) {
                fuzzySimilarityDouble = 0.5;
            }
            if (esFuzzyExpression) {
              // Value must be 0, 1 or 2 (Levenshtein distance, 0 = disable fuzzy)
            	fuzzySimilarityDouble = (double)(2 - (int)(fuzzySimilarityDouble * 2));
            }
            fuzzySyntax = "~" + String.valueOf(fuzzySimilarityDouble);
        }
        return fuzzySyntax;
    }

    private static boolean isFuzzyManual(String input) {
        Matcher matcher = Pattern.compile(".[~][0]([\\.][0-9])").matcher(input);
        while (matcher.find()){
            return true;
        }
        return false;
    }

    private static boolean isEnableFuzzySearch(){
        String fuzzyEnable = System.getProperty("unified-search.engine.fuzzy.enable");

        if ((fuzzyEnable!=null && Boolean.parseBoolean(fuzzyEnable)==true)
                || fuzzyEnable==null)
            return true;

        return false;
    }

  private String replaceSpecialCharacters(String query){
    return query.replaceAll("[" + specialCharacters + "]", " ");
  }
}