ElasticSearchServiceConnector.java
/*
* Copyright (C) 2003-2015 eXo Platform SAS.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see http://www.gnu.org/licenses/ .
*/
package org.exoplatform.commons.search.es;
import org.apache.commons.lang.StringUtils;
import org.exoplatform.commons.search.es.client.ElasticSearchingClient;
import org.exoplatform.commons.api.search.SearchServiceConnector;
import org.exoplatform.commons.api.search.data.SearchContext;
import org.exoplatform.commons.api.search.data.SearchResult;
import org.exoplatform.container.xml.InitParams;
import org.exoplatform.container.xml.PropertiesParam;
import org.exoplatform.services.log.ExoLogger;
import org.exoplatform.services.log.Log;
import org.exoplatform.services.security.ConversationState;
import org.exoplatform.services.security.IdentityConstants;
import org.exoplatform.services.security.MembershipEntry;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Created by The eXo Platform SAS
* Author : Thibault Clement
* tclement@exoplatform.com
* 7/30/15
*/
public class ElasticSearchServiceConnector extends SearchServiceConnector {
private static final Log LOG = ExoLogger.getLogger(ElasticSearchServiceConnector.class);
public static final String HIGHLIGHT_FRAGMENT_SIZE_PARAM_NAME = "highlightFragmentSize";
public static final int HIGHLIGHT_FRAGMENT_SIZE_DEFAULT_VALUE = 150;
public static final String HIGHLIGHT_FRAGMENT_NUMBER_PARAM_NAME = "highlightFragmentNumber";
public static final int HIGHLIGHT_FRAGMENT_NUMBER_DEFAULT_VALUE = 3;
private final ElasticSearchingClient client;
//ES connector information
//Index is optional: if null, search on all the cluster
private String index;
//Type is optional: if null, search on all the index
private String type;
private List<String> searchFields;
public int highlightFragmentSize;
public int highlightFragmentNumber;
//SearchResult information
private String img;
private String titleElasticFieldName = "title";
private String updatedDateElasticFieldName = "lastUpdatedDate";
private Map<String, String> sortMapping = new HashMap<>();
public ElasticSearchServiceConnector(InitParams initParams, ElasticSearchingClient client) {
super(initParams);
this.client = client;
PropertiesParam param = initParams.getPropertiesParam("constructor.params");
this.index = param.getProperty("index");
this.type = param.getProperty("type");
if (StringUtils.isNotBlank(param.getProperty("titleField"))) this.titleElasticFieldName = param.getProperty("titleField");
if (StringUtils.isNotBlank(param.getProperty("updatedDateField"))) this.updatedDateElasticFieldName = param.getProperty("updatedDateField");
this.searchFields = new ArrayList<>(Arrays.asList(param.getProperty("searchFields").split(",")));
// highlight fragment size
String highlightFragmentSizeParamValue = param.getProperty(HIGHLIGHT_FRAGMENT_SIZE_PARAM_NAME);
if(highlightFragmentSizeParamValue != null) {
try {
this.highlightFragmentSize = Integer.valueOf(highlightFragmentSizeParamValue);
} catch (NumberFormatException e) {
this.highlightFragmentSize = HIGHLIGHT_FRAGMENT_SIZE_DEFAULT_VALUE;
LOG.warn("Value of param highlightFragmentSize of search connector " + this.getClass().getName()
+ " is not a valid number (" + highlightFragmentSizeParamValue + "), default value will be used ("
+ HIGHLIGHT_FRAGMENT_SIZE_DEFAULT_VALUE + ")");
}
} else {
this.highlightFragmentSize = HIGHLIGHT_FRAGMENT_SIZE_DEFAULT_VALUE;
}
// highlight fragment number
String highlightFragmentNumberParamValue = param.getProperty(HIGHLIGHT_FRAGMENT_NUMBER_PARAM_NAME);
if(highlightFragmentNumberParamValue != null) {
try {
this.highlightFragmentNumber = Integer.valueOf(highlightFragmentNumberParamValue);
} catch (NumberFormatException e) {
this.highlightFragmentNumber = HIGHLIGHT_FRAGMENT_NUMBER_DEFAULT_VALUE;
LOG.warn("Value of param highlightFragmentNumber of search connector " + this.getClass().getName()
+ " is not a valid number (" + highlightFragmentNumberParamValue + "), default value will be used ("
+ HIGHLIGHT_FRAGMENT_NUMBER_DEFAULT_VALUE + ")");
}
} else {
this.highlightFragmentNumber = HIGHLIGHT_FRAGMENT_NUMBER_DEFAULT_VALUE;
}
//Indicate in which order element will be displayed
sortMapping.put("relevancy", "_score");
sortMapping.put("date", "lastUpdatedDate");
}
@Override
public Collection<SearchResult> search(SearchContext context, String query, Collection<String> sites,
int offset, int limit, String sort, String order) {
String esQuery = buildQuery(query, sites, offset, limit, sort, order);
String jsonResponse = this.client.sendRequest(esQuery, this.index, this.type);
return buildResult(jsonResponse, context);
}
/**
*
* Search on ES with additional filter on the search query
* Different Filter are:
* - Term Filter (Check if a specific term of a field exist)
* - Not exist Filter (Check if a term not exist)
* - Exist Filter (check if a term exist)
*
* @param context
* @param query
* @param filters
* @param sites
* @param offset
* @param limit
* @param sort
* @param order
* @return a collection of SearchResult
*/
public Collection<SearchResult> filteredSearch(SearchContext context, String query, List<ElasticSearchFilter> filters, Collection<String> sites,
int offset, int limit, String sort, String order) {
String esQuery = buildFilteredQuery(query, sites, filters, offset, limit, sort, order);
String jsonResponse = this.client.sendRequest(esQuery, this.index, this.type);
return buildResult(jsonResponse, context);
}
protected String buildQuery(String query, Collection<String> sites, int offset, int limit, String sort, String order) {
return buildFilteredQuery(query, sites, null, offset, limit, sort, order);
}
protected String buildFilteredQuery(String query, Collection<String> sites, List<ElasticSearchFilter> filters, int offset, int limit, String sort, String order) {
String escapedQuery = escapeReservedCharacters(query);
StringBuilder esQuery = new StringBuilder();
esQuery.append("{\n");
esQuery.append(" \"from\" : " + offset + ",\n");
if(limit >= 0 && limit < Integer.MAX_VALUE) {
esQuery.append(" \"size\" : " + limit + ",\n");
}
//Score are always tracked, even with sort
//https://www.impl.co/guide/en/elasticsearch/reference/current/search-request-sort.html#_track_scores
esQuery.append(" \"track_scores\": true,\n");
esQuery.append(" \"sort\" : [\n");
esQuery.append(" { \"" + (StringUtils.isNotBlank(sortMapping.get(sort))?sortMapping.get(sort):"_score") + "\" : ");
esQuery.append( "{\"order\" : \"" + (StringUtils.isNotBlank(order)?order:"desc") + "\"}}\n");
esQuery.append(" ],\n");
esQuery.append(" \"_source\": [" + getSourceFields() + "],");
esQuery.append(" \"query\": {\n");
esQuery.append(" \"bool\" : {\n");
esQuery.append(" \"must\" : {\n");
esQuery.append(" \"query_string\" : {\n");
esQuery.append(" \"fields\" : [" + getFields() + "],\n");
esQuery.append(" \"query\" : \"" + escapedQuery + "\"\n");
esQuery.append(" }\n");
esQuery.append(" },\n");
esQuery.append(" \"filter\" : {\n");
esQuery.append(" \"bool\" : {\n");
esQuery.append(" \"must\" : [\n");
esQuery.append(" {\n");
esQuery.append(" \"bool\" : {\n");
esQuery.append(" \"should\" : [\n");
esQuery.append(" " + getPermissionFilter() + "\n");
esQuery.append(" ]\n");
esQuery.append(" }\n");
esQuery.append(" }\n");
String sitesFilter = getSitesFilter(sites);
if(StringUtils.isNotBlank(sitesFilter)) {
esQuery.append(" ,{\n");
esQuery.append(" \"bool\" : {\n");
esQuery.append(" \"should\" : \n");
esQuery.append(" " + sitesFilter + "\n");
esQuery.append(" }\n");
esQuery.append(" }");
}
String additionalFilters = getAdditionalFilters(filters);
if(StringUtils.isNotBlank(additionalFilters)) {
esQuery.append(additionalFilters);
}
esQuery.append(" \n");
esQuery.append(" ]\n");
esQuery.append(" }\n");
esQuery.append(" }");
esQuery.append(" }\n");
esQuery.append(" },\n");
esQuery.append(" \"highlight\" : {\n");
esQuery.append(" \"pre_tags\" : [\"<strong>\"],\n");
esQuery.append(" \"post_tags\" : [\"</strong>\"],\n");
esQuery.append(" \"fields\" : {\n");
for (int i=0; i<this.searchFields.size(); i++) {
esQuery.append(" \""+searchFields.get(i)+"\" : {\n")
.append(" \"type\" : \"unified\",\n")
.append(" \"fragment_size\" : " + this.highlightFragmentSize + ",\n")
.append(" \"number_of_fragments\" : " + this.highlightFragmentNumber + "}");
if (i<this.searchFields.size()-1) {
esQuery.append(",");
}
esQuery.append("\n");
}
esQuery.append(" }\n");
esQuery.append(" }\n");
esQuery.append("}");
LOG.debug("Search Query request to ES : {} ", esQuery);
return esQuery.toString();
}
/**
* Escaped reserved characters by ES when using query_string.
* Only ~ is not escaped since it is used for fuzzy search parameter.
* The list of reserved characters is documented at
* https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#_reserved_characters
* @param query The unescaped query string
* @return The escaped query string
*/
protected String escapeReservedCharacters(String query) {
if(StringUtils.isNotEmpty(query)) {
return query.replaceAll("[" + Pattern.quote("+-=&|><!(){}\\[\\]^\"*?:\\/") + "]",
Matcher.quoteReplacement("\\\\") + "$0");
} else {
return query;
}
}
protected Collection<SearchResult> buildResult(String jsonResponse, SearchContext context) {
LOG.debug("Search Query response from ES : {} ", jsonResponse);
Collection<SearchResult> results = new ArrayList<>();
JSONParser parser = new JSONParser();
Map json;
try {
json = (Map)parser.parse(jsonResponse);
} catch (ParseException e) {
throw new ElasticSearchException("Unable to parse JSON response", e);
}
JSONObject jsonResult = (JSONObject) json.get("hits");
if(jsonResult != null) {
JSONArray jsonHits = (JSONArray) jsonResult.get("hits");
if(jsonHits != null) {
for (Object jsonHit : jsonHits) {
results.add(buildHit((JSONObject) jsonHit, context));
}
}
}
return results;
}
protected SearchResult buildHit(JSONObject jsonHit, SearchContext searchContext) {
JSONObject hitSource = (JSONObject) jsonHit.get("_source");
String title = getTitleFromJsonResult(hitSource);
String url = getUrlFromJsonResult(hitSource, searchContext);
Long lastUpdatedDate = getUpdatedDateFromResult(hitSource);
if (lastUpdatedDate == null) lastUpdatedDate = new Date().getTime();
Double score = (Double) jsonHit.get("_score");
String detail = buildDetail(jsonHit, searchContext);
//Get the excerpt
JSONObject hitHighlight = (JSONObject) jsonHit.get("highlight");
StringBuilder excerpt = new StringBuilder();
if(hitHighlight != null) {
Iterator<?> keys = hitHighlight.keySet().iterator();
while (keys.hasNext()) {
String key = (String) keys.next();
JSONArray highlights = (JSONArray) hitHighlight.get(key);
for (Object highlight : highlights) {
excerpt.append("... ").append(highlight);
}
}
}
LOG.debug("Excerpt extract from ES response : {}", excerpt.toString());
return new SearchResult(
url,
title,
excerpt.toString(),
detail,
img,
lastUpdatedDate,
//score must not be null as "track_scores" is part of the query
score.longValue());
}
protected String buildDetail(JSONObject jsonHit, SearchContext searchContext) {
return null;
}
protected Long getUpdatedDateFromResult(JSONObject hitSource) {
Object date = hitSource.get(updatedDateElasticFieldName);
if (date instanceof Long) {
return (Long)date;
} else if (date != null) {
try {
return Long.parseLong(date.toString());
} catch (Exception ex) {
LOG.error("Can not parse updatedDate field as Long {}", date);
}
}
return null;
}
protected String getUrlFromJsonResult(JSONObject hitSource, SearchContext context) {
return (String) hitSource.get("url");
}
protected String getTitleFromJsonResult(JSONObject hitSource) {
return (String) hitSource.get(titleElasticFieldName);
}
protected String getAdditionalFilters(List<ElasticSearchFilter> filters) {
if (filters == null) return "";
StringBuilder filterJSON = new StringBuilder();
for (ElasticSearchFilter filter: filters) {
filterJSON.append(" ,\n");
filterJSON.append(" {\n");
filterJSON.append(" \"bool\" : {\n");
filterJSON.append(" \"should\" : [\n");
filterJSON.append(" " + getFilter(filter) + "\n");
filterJSON.append(" ]\n");
filterJSON.append(" }\n");
filterJSON.append(" }");
}
return filterJSON.toString();
}
private String getFilter(ElasticSearchFilter filter) {
switch (filter.getType()) {
case FILTER_BY_TERM:
return getTermFilter(filter.getField(), filter.getValue());
case FILTER_EXIST:
return getExistFilter(filter.getField());
case FILTER_NOT_EXIST:
return getNotExistFilter(filter.getField());
case FILTER_CUSTOM:
return getCustomFilter(filter.getValue());
}
return "";
}
/**
* Check if a specific term of a field exist
* Note that this field should be set as not analyzed (index = false)
*
* @param field
* @param value
* @return a Term Filter
*/
private String getTermFilter(String field, String value) {
return "{\n \"term\" : { \"" + field + "\" : \"" + value + "\" }\n }";
}
/**
* Check if a specific field not exist
*
* @param field
* @return a not Exist Term Filter
*/
private String getNotExistFilter(String field) {
return "{\n" +
" \"not\": {\n" +
" \"exists\" : { \"field\" : \"" + field + "\" }\n" +
" }\n" +
"}";
}
/**
* Check if a specific field exist
*
* @param field
* @return an Exist Filter
*/
private String getExistFilter(String field) {
return "{\n \"exists\" : { \"field\" : \"" + field + "\" }\n }";
}
protected String getFields() {
List<String> fields = new ArrayList<>();
for (String searchField: searchFields) {
fields.add("\"" + searchField + "\"");
}
return StringUtils.join(fields, ",");
}
/**
* Apply the given value directly as the filter
*
* @param value
* @return a Custom Filter
*/
private String getCustomFilter(String value) {
return value;
}
protected String getPermissionFilter() {
StringBuilder permissionSB = new StringBuilder();
Set<String> membershipSet = getUserMemberships();
if ((membershipSet != null) && (membershipSet.size()>0)) {
String memberships = StringUtils.join(membershipSet.toArray(new String[membershipSet.size()]), "|");
permissionSB.append("{\n")
.append(" \"term\" : { \"permissions\" : \"")
.append(getCurrentUser())
.append("\" }\n")
.append("},\n")
.append("{\n")
.append(" \"term\" : { \"permissions\" : \"")
.append(IdentityConstants.ANY)
.append("\" }\n")
.append("},\n")
.append("{\n")
.append(" \"regexp\" : { \"permissions\" : \"")
.append(memberships)
.append("\" }\n")
.append("}");
}
else {
permissionSB.append("{\n")
.append(" \"term\" : { \"permissions\" : \"")
.append(getCurrentUser())
.append("\" }\n")
.append("},\n")
.append("{\n")
.append(" \"term\" : { \"permissions\" : \"")
.append(IdentityConstants.ANY)
.append("\" }\n")
.append("}");
}
return permissionSB.toString();
}
protected String getSitesFilter(Collection<String> sitesCollection) {
if ((sitesCollection != null) && (sitesCollection.size()>0)) {
List<String> sites = new ArrayList<>();
for (String site : sitesCollection) {
sites.add("\"" + site + "\"");
}
String sitesList = "["+StringUtils.join(sites,",")+"]";
return " [ { \"bool\" : {\n" +
" \"must_not\": {\n" +
" \"exists\" : { \"field\" : \"sites\" }\n" +
" }\n" +
" }\n" +
"},\n" +
"{\n" +
" \"terms\" : { \n" +
" \"sites\" : " + sitesList + "\n" +
" }\n" +
"} ]";
}
else {
return " { \"bool\" : " +
"{\n" +
" \"must_not\": {\n" +
" \"exists\" : { \"field\" : \"sites\" }\n" +
" }\n" +
" }\n" +
"}\n";
}
}
private String getCurrentUser() {
ConversationState conversationState = ConversationState.getCurrent();
if (conversationState == null) {
throw new IllegalStateException("No Identity found: ConversationState.getCurrent() is null");
}
if (ConversationState.getCurrent().getIdentity()==null) {
throw new IllegalStateException("No Identity found: ConversationState.getCurrent().getIdentity() is null");
}
return ConversationState.getCurrent().getIdentity().getUserId();
}
private Set<String> getUserMemberships() {
ConversationState conversationState = ConversationState.getCurrent();
if (conversationState == null) {
throw new IllegalStateException("No Identity found: ConversationState.getCurrent() is null");
}
if (ConversationState.getCurrent().getIdentity()==null) {
throw new IllegalStateException("No Identity found: ConversationState.getCurrent().getIdentity() is null");
}
if (ConversationState.getCurrent().getIdentity().getMemberships()==null) {
//This case is not supported
//The doc says "Any anonymous user automatically becomes a member of the group guests.group when they enter the public pages."
//http://docs.exoplatform.com/PLF42/sect-Reference_Guide-Portal_Default_Permission_Configuration.html
throw new IllegalStateException("No Membership found: ConversationState.getCurrent().getIdentity().getMemberships() is null");
}
Set<String> entries = new HashSet<>();
for (MembershipEntry entry : ConversationState.getCurrent().getIdentity().getMemberships()) {
//If it's a wildcard membership, add a point to transform it to regexp
if (entry.getMembershipType().equals(MembershipEntry.ANY_TYPE)) {
entries.add(entry.toString().replace("*", ".*"));
}
//If it's not a wildcard membership
else {
//Add the membership
entries.add(entry.toString());
//Also add a wildcard membership (not as a regexp) in order to match to wildcard permission
//Ex: membership dev:/pub must match permission dev:/pub and permission *:/pub
entries.add("*:"+entry.getGroup());
}
}
return entries;
}
protected String getSourceFields() {
List<String> fields = new ArrayList<>();
fields.add("url");
fields.add(getTitleElasticFieldName());
List<String> sourceFields = new ArrayList<>();
for (String sourceField: fields) {
sourceFields.add("\"" + sourceField + "\"");
}
return StringUtils.join(sourceFields, ",");
}
public String getIndex() {
return index;
}
public void setIndex(String index) {
this.index = index;
}
public String getImg() {
return img;
}
public void setImg(String img) {
this.img = img;
}
public String getTitleElasticFieldName() {
return titleElasticFieldName;
}
public void setTitleElasticFieldName(String titleElasticFieldName) {
this.titleElasticFieldName = titleElasticFieldName;
}
public List<String> getSearchFields() {
return searchFields;
}
public void setSearchFields(List<String> searchFields) {
this.searchFields = searchFields;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public ElasticSearchingClient getClient() {
return client;
}
}