View Javadoc
1   package org.exoplatform.services.wcm.search.connector;
2   
3   import org.apache.commons.io.IOUtils;
4   import org.apache.commons.lang.StringUtils;
5   import org.exoplatform.commons.search.domain.Document;
6   import org.exoplatform.commons.search.index.impl.ElasticIndexingServiceConnector;
7   import org.exoplatform.commons.utils.CommonsUtils;
8   import org.exoplatform.container.xml.InitParams;
9   import org.exoplatform.services.cms.documents.TrashService;
10  import org.exoplatform.services.jcr.RepositoryService;
11  import org.exoplatform.services.jcr.access.AccessControlList;
12  import org.exoplatform.services.jcr.core.ExtendedNode;
13  import org.exoplatform.services.jcr.core.ExtendedSession;
14  import org.exoplatform.services.jcr.core.ManageableRepository;
15  import org.exoplatform.services.jcr.impl.core.NodeImpl;
16  import org.exoplatform.services.jcr.impl.core.query.QueryImpl;
17  import org.exoplatform.services.log.ExoLogger;
18  import org.exoplatform.services.log.Log;
19  import org.exoplatform.services.wcm.core.NodetypeConstant;
20  import org.exoplatform.services.wcm.utils.WCMCoreUtils;
21  
22  import javax.jcr.*;
23  import javax.jcr.nodetype.NodeTypeManager;
24  import javax.jcr.nodetype.PropertyDefinition;
25  import javax.jcr.query.*;
26  import java.io.IOException;
27  import java.io.InputStream;
28  import java.util.*;
29  import java.util.stream.Collectors;
30  
31  /**
32   * Indexing Connector for Files
33   */
34  public class FileindexingConnector extends ElasticIndexingServiceConnector {
35  
36    private static final Log LOGGER = ExoLogger.getExoLogger(FileindexingConnector.class);
37  
38    public static final String TYPE = "file";
39  
40    private RepositoryService repositoryService;
41  
42    private TrashService trashService;
43  
44    public FileindexingConnector(InitParams initParams) {
45      super(initParams);
46      this.repositoryService = CommonsUtils.getService(RepositoryService.class);
47      this.trashService = CommonsUtils.getService(TrashService.class);
48    }
49  
50    @Override
51    public boolean isNeedIngestPipeline() {
52      return true;
53    }
54  
55    @Override
56    public String getPipelineName() {
57      return "file";
58    }
59  
60    @Override
61    public String getMapping() {
62      StringBuilder mapping = new StringBuilder()
63              .append("{")
64              .append("  \"properties\" : {\n")
65              .append("    \"repository\" : {\"type\" : \"keyword\"},\n")
66              .append("    \"workspace\" : {\"type\" : \"keyword\"},\n")
67              .append("    \"path\" : {\"type\" : \"keyword\"},\n")
68              .append("    \"author\" : {\"type\" : \"keyword\"},\n")
69              .append("    \"permissions\" : {\"type\" : \"keyword\"},\n")
70              .append("    \"createdDate\" : {\"type\" : \"date\", \"format\": \"epoch_millis\"},\n")
71              .append("    \"lastUpdatedDate\" : {\"type\" : \"date\", \"format\": \"epoch_millis\"},\n")
72              .append("    \"fileType\" : {\"type\" : \"keyword\"},\n")
73              .append("    \"fileSize\" : {\"type\" : \"long\"},\n")
74              .append("    \"name\" : {\"type\" : \"text\", \"analyzer\": \"letter_lowercase_asciifolding\"},\n")
75              .append("    \"title\" : {\"type\" : \"text\", \"analyzer\": \"letter_lowercase_asciifolding\"},\n")
76              .append("    \"dc:title\" : {\"type\" : \"text\"},\n")
77              .append("    \"dc:creator\" : {\"type\" : \"text\"},\n")
78              .append("    \"dc:subject\" : {\"type\" : \"text\"},\n")
79              .append("    \"dc:description\" : {\"type\" : \"text\"},\n")
80              .append("    \"dc:publisher\" : {\"type\" : \"text\"},\n")
81              .append("    \"dc:contributor\" : {\"type\" : \"text\"},\n")
82              .append("    \"dc:date\" : {\"type\" : \"date\", \"format\": \"epoch_millis\"},\n")
83              .append("    \"dc:resourceType\" : {\"type\" : \"text\"},\n")
84              .append("    \"dc:format\" : {\"type\" : \"text\"},\n")
85              .append("    \"dc:identifier\" : {\"type\" : \"text\"},\n")
86              .append("    \"dc:source\" : {\"type\" : \"text\"},\n")
87              .append("    \"dc:language\" : {\"type\" : \"text\"},\n")
88              .append("    \"dc:relation\" : {\"type\" : \"text\"},\n")
89              .append("    \"dc:coverage\" : {\"type\" : \"text\"},\n")
90              .append("    \"dc:rights\" : {\"type\" : \"text\"}\n")
91              .append("  }\n")
92              .append("}");
93  
94      return mapping.toString();
95    }
96  
97    @Override
98    public String getAttachmentProcessor() {
99      StringBuilder processors = new StringBuilder()
100             .append("{")
101             .append("  \"description\" : \"File processor\",\n")
102             .append("  \"processors\" : [{\n")
103             .append("    \"attachment\" : {\n")
104             .append("      \"field\" : \"file\",\n")
105             .append("      \"indexed_chars\" : -1,\n")
106             .append("      \"properties\" : [\"content\"]\n")
107             .append("    }\n")
108             .append("  },{\n")
109             .append("    \"remove\" : {\n")
110             .append("      \"field\" : \"file\"\n")
111             .append("    }\n")
112             .append("  }]\n")
113             .append("}");
114 
115     return processors.toString();
116   }
117 
118   @Override
119   public Document create(String id) {
120     if(StringUtils.isEmpty(id)) {
121       return null;
122     }
123 
124     try {
125       ExtendedSession session = (ExtendedSession) WCMCoreUtils.getSystemSessionProvider().getSession("collaboration", repositoryService.getCurrentRepository());
126       Node node = session.getNodeByIdentifier(id);
127 
128       if(node == null || !node.isNodeType(NodetypeConstant.NT_FILE) || trashService.isInTrash(node) || isInContentFolder(node)) {
129         return null;
130       }
131 
132       Map<String, String> fields = new HashMap<>();
133       fields.put("name", node.getName());
134       fields.put("repository", ((ManageableRepository) session.getRepository()).getConfiguration().getName());
135       fields.put("workspace", session.getWorkspace().getName());
136       fields.put("path", node.getPath());
137       if(node.hasProperty(NodetypeConstant.EXO_TITLE)) {
138         fields.put("title", node.getProperty(NodetypeConstant.EXO_TITLE).getString());
139       } else {
140         fields.put("title", node.getName());
141       }
142       if(node.hasProperty(NodetypeConstant.EXO_OWNER)) {
143         fields.put("author", node.getProperty(NodetypeConstant.EXO_OWNER).getString());
144       }
145       if(node.hasProperty("jcr:created")) {
146         fields.put("createdDate", String.valueOf(node.getProperty("jcr:created").getDate().getTimeInMillis()));
147       }
148 
149       Node contentNode = node.getNode(NodetypeConstant.JCR_CONTENT);
150       if(contentNode != null) {
151         if (contentNode.hasProperty(NodetypeConstant.JCR_MIMETYPE)) {
152           fields.put("fileType", contentNode.getProperty(NodetypeConstant.JCR_MIMETYPE).getString());
153         }
154         InputStream fileStream = contentNode.getProperty(NodetypeConstant.JCR_DATA).getStream();
155         byte[] fileBytes = IOUtils.toByteArray(fileStream);
156         fields.put("file", Base64.getEncoder().encodeToString(fileBytes));
157 
158         fields.put("fileSize", String.valueOf(fileBytes.length));
159 
160         // Dublin Core metadata
161         Map<String, String> dublinCoreMetadata = extractDublinCoreMetadata(contentNode);
162         if(dublinCoreMetadata != null) {
163           fields.putAll(dublinCoreMetadata);
164         }
165       }
166 
167       LOGGER.info("ES document generated for file with id={} path=\"{}\"", id, node.getPath());
168       return new Document(TYPE, id, null, new Date(), computePermissions(node), fields);
169     } catch (RepositoryException | IOException e) {
170       LOGGER.error("Error while indexing file " + id, e);
171     }
172 
173     return null;
174   }
175 
176   protected boolean isInContentFolder(Node node) {
177     try {
178       return
179               (  (node.isNodeType("exo:htmlFile") && org.exoplatform.services.cms.impl.Utils.isDocument(node.getParent())) ||
180                  (node.isNodeType("exo:cssFile") && org.exoplatform.services.cms.impl.Utils.isDocument(node.getParent().getParent())) ||
181                  (node.isNodeType("exo:jsFile") && org.exoplatform.services.cms.impl.Utils.isDocument(node.getParent().getParent())) ||
182                  (node.isNodeType("nt:file") && (node.getPath().contains("/medias/images")||node.getPath().contains("/medias/videos")||node.getPath().contains("/medias/audio")) && org.exoplatform.services.cms.impl.Utils.isDocument(node.getParent().getParent().getParent()))
183               );
184     } catch (Exception e) {
185       return false;
186     }
187   }
188 
189 
190   @Override
191   public Document update(String id) {
192     return create(id);
193   }
194 
195   @Override
196   public List<String> getAllIds(int offset, int limit) {
197     List<String> allIds = new ArrayList<>();
198     try {
199       Session session = WCMCoreUtils.getSystemSessionProvider().getSession("collaboration", repositoryService.getCurrentRepository());
200       QueryManager queryManager = session.getWorkspace().getQueryManager();
201       Query query = queryManager.createQuery("select * from " + NodetypeConstant.NT_FILE, Query.SQL);
202       QueryImpl queryImpl = (QueryImpl) query;
203       queryImpl.setOffset(offset);
204       queryImpl.setLimit(limit);
205       QueryResult result = queryImpl.execute();
206       NodeIterator nodeIterator = result.getNodes();
207       while(nodeIterator.hasNext()) {
208         NodeImpl node = (NodeImpl) nodeIterator.nextNode();
209         // use node internal identifier to be sure to have an id for all nodes
210         allIds.add(node.getInternalIdentifier());
211       }
212     } catch (RepositoryException e) {
213      throw new RuntimeException("Error while fetching all nt:file nodes", e);
214     }
215 
216     if(Thread.currentThread().isInterrupted()) {
217       throw new RuntimeException("Indexing queue processing interrupted");
218     }
219 
220     LOGGER.info("Fetched {} files to push in indexing queue (offset={}, limit={})", allIds.size(), offset, limit);
221     return allIds;
222   }
223 
224   protected Map<String, String> extractDublinCoreMetadata(Node contentNode) throws RepositoryException {
225     Map<String, String> dcFields = null;
226     if (contentNode.isNodeType(NodetypeConstant.DC_ELEMENT_SET)) {
227       dcFields = new HashMap<>();
228       NodeTypeManager nodeTypeManager = repositoryService.getCurrentRepository().getNodeTypeManager();
229       PropertyDefinition[] dcPropertyDefinitions = nodeTypeManager.getNodeType(NodetypeConstant.DC_ELEMENT_SET).getPropertyDefinitions();
230       for (PropertyDefinition propertyDefinition : dcPropertyDefinitions) {
231         String propertyName = propertyDefinition.getName();
232         if (contentNode.hasProperty(propertyName)) {
233           Property property = contentNode.getProperty(propertyName);
234           if(property != null) {
235             String strValue = null;
236             if (propertyDefinition.isMultiple()) {
237               Value[] values = property.getValues();
238               if(values != null && values.length > 0) {
239                 Value value = values[0];
240                 if (property.getType() == PropertyType.DATE) {
241                   strValue = String.valueOf(value.getDate().toInstant().toEpochMilli());
242                 } else {
243                   strValue = value.getString();
244                 }
245               }
246             } else {
247               if (property.getType() == PropertyType.DATE) {
248                 strValue = String.valueOf(property.getDate().toInstant().toEpochMilli());
249               } else {
250                 strValue = property.getString();
251               }
252             }
253             if(strValue != null) {
254               dcFields.put(propertyName, strValue);
255             }
256           }
257         }
258       }
259     }
260     return dcFields;
261   }
262 
263   private Set<String> computePermissions(Node node) throws RepositoryException {
264     Set<String> permissions = new HashSet<>();
265 
266     AccessControlList acl = ((ExtendedNode) node).getACL();
267     //Add the owner
268     permissions.add(acl.getOwner());
269     //Add permissions
270     if (acl.getPermissionEntries() != null) {
271       permissions.addAll(acl.getPermissionEntries().stream().map(permission -> permission.getIdentity()).collect(Collectors.toSet()));
272     }
273 
274     return permissions;
275   }
276 }