1 package org.exoplatform.services.wcm.search.connector;
2
3 import org.apache.commons.io.IOUtils;
4 import org.apache.commons.lang.StringUtils;
5 import org.exoplatform.commons.search.domain.Document;
6 import org.exoplatform.commons.search.index.impl.ElasticIndexingServiceConnector;
7 import org.exoplatform.commons.utils.CommonsUtils;
8 import org.exoplatform.container.xml.InitParams;
9 import org.exoplatform.services.cms.documents.TrashService;
10 import org.exoplatform.services.jcr.RepositoryService;
11 import org.exoplatform.services.jcr.access.AccessControlList;
12 import org.exoplatform.services.jcr.core.ExtendedNode;
13 import org.exoplatform.services.jcr.core.ExtendedSession;
14 import org.exoplatform.services.jcr.core.ManageableRepository;
15 import org.exoplatform.services.jcr.impl.core.NodeImpl;
16 import org.exoplatform.services.jcr.impl.core.query.QueryImpl;
17 import org.exoplatform.services.log.ExoLogger;
18 import org.exoplatform.services.log.Log;
19 import org.exoplatform.services.wcm.core.NodetypeConstant;
20 import org.exoplatform.services.wcm.utils.WCMCoreUtils;
21
22 import javax.jcr.*;
23 import javax.jcr.nodetype.NodeTypeManager;
24 import javax.jcr.nodetype.PropertyDefinition;
25 import javax.jcr.query.*;
26 import java.io.IOException;
27 import java.io.InputStream;
28 import java.util.*;
29 import java.util.stream.Collectors;
30
31
32
33
34 public class FileindexingConnector extends ElasticIndexingServiceConnector {
35
36 private static final Log LOGGER = ExoLogger.getExoLogger(FileindexingConnector.class);
37
38 public static final String TYPE = "file";
39
40 private RepositoryService repositoryService;
41
42 private TrashService trashService;
43
44 public FileindexingConnector(InitParams initParams) {
45 super(initParams);
46 this.repositoryService = CommonsUtils.getService(RepositoryService.class);
47 this.trashService = CommonsUtils.getService(TrashService.class);
48 }
49
50 @Override
51 public boolean isNeedIngestPipeline() {
52 return true;
53 }
54
55 @Override
56 public String getPipelineName() {
57 return "file";
58 }
59
60 @Override
61 public String getMapping() {
62 StringBuilder mapping = new StringBuilder()
63 .append("{")
64 .append(" \"properties\" : {\n")
65 .append(" \"repository\" : {\"type\" : \"keyword\"},\n")
66 .append(" \"workspace\" : {\"type\" : \"keyword\"},\n")
67 .append(" \"path\" : {\"type\" : \"keyword\"},\n")
68 .append(" \"author\" : {\"type\" : \"keyword\"},\n")
69 .append(" \"permissions\" : {\"type\" : \"keyword\"},\n")
70 .append(" \"createdDate\" : {\"type\" : \"date\", \"format\": \"epoch_millis\"},\n")
71 .append(" \"lastUpdatedDate\" : {\"type\" : \"date\", \"format\": \"epoch_millis\"},\n")
72 .append(" \"fileType\" : {\"type\" : \"keyword\"},\n")
73 .append(" \"fileSize\" : {\"type\" : \"long\"},\n")
74 .append(" \"name\" : {\"type\" : \"text\", \"analyzer\": \"letter_lowercase_asciifolding\"},\n")
75 .append(" \"title\" : {\"type\" : \"text\", \"analyzer\": \"letter_lowercase_asciifolding\"},\n")
76 .append(" \"dc:title\" : {\"type\" : \"text\"},\n")
77 .append(" \"dc:creator\" : {\"type\" : \"text\"},\n")
78 .append(" \"dc:subject\" : {\"type\" : \"text\"},\n")
79 .append(" \"dc:description\" : {\"type\" : \"text\"},\n")
80 .append(" \"dc:publisher\" : {\"type\" : \"text\"},\n")
81 .append(" \"dc:contributor\" : {\"type\" : \"text\"},\n")
82 .append(" \"dc:date\" : {\"type\" : \"date\", \"format\": \"epoch_millis\"},\n")
83 .append(" \"dc:resourceType\" : {\"type\" : \"text\"},\n")
84 .append(" \"dc:format\" : {\"type\" : \"text\"},\n")
85 .append(" \"dc:identifier\" : {\"type\" : \"text\"},\n")
86 .append(" \"dc:source\" : {\"type\" : \"text\"},\n")
87 .append(" \"dc:language\" : {\"type\" : \"text\"},\n")
88 .append(" \"dc:relation\" : {\"type\" : \"text\"},\n")
89 .append(" \"dc:coverage\" : {\"type\" : \"text\"},\n")
90 .append(" \"dc:rights\" : {\"type\" : \"text\"}\n")
91 .append(" }\n")
92 .append("}");
93
94 return mapping.toString();
95 }
96
97 @Override
98 public String getAttachmentProcessor() {
99 StringBuilder processors = new StringBuilder()
100 .append("{")
101 .append(" \"description\" : \"File processor\",\n")
102 .append(" \"processors\" : [{\n")
103 .append(" \"attachment\" : {\n")
104 .append(" \"field\" : \"file\",\n")
105 .append(" \"indexed_chars\" : -1,\n")
106 .append(" \"properties\" : [\"content\"]\n")
107 .append(" }\n")
108 .append(" },{\n")
109 .append(" \"remove\" : {\n")
110 .append(" \"field\" : \"file\"\n")
111 .append(" }\n")
112 .append(" }]\n")
113 .append("}");
114
115 return processors.toString();
116 }
117
118 @Override
119 public Document create(String id) {
120 if(StringUtils.isEmpty(id)) {
121 return null;
122 }
123
124 try {
125 ExtendedSession session = (ExtendedSession) WCMCoreUtils.getSystemSessionProvider().getSession("collaboration", repositoryService.getCurrentRepository());
126 Node node = session.getNodeByIdentifier(id);
127
128 if(node == null || !node.isNodeType(NodetypeConstant.NT_FILE) || trashService.isInTrash(node) || isInContentFolder(node)) {
129 return null;
130 }
131
132 Map<String, String> fields = new HashMap<>();
133 fields.put("name", node.getName());
134 fields.put("repository", ((ManageableRepository) session.getRepository()).getConfiguration().getName());
135 fields.put("workspace", session.getWorkspace().getName());
136 fields.put("path", node.getPath());
137 if(node.hasProperty(NodetypeConstant.EXO_TITLE)) {
138 fields.put("title", node.getProperty(NodetypeConstant.EXO_TITLE).getString());
139 } else {
140 fields.put("title", node.getName());
141 }
142 if(node.hasProperty(NodetypeConstant.EXO_OWNER)) {
143 fields.put("author", node.getProperty(NodetypeConstant.EXO_OWNER).getString());
144 }
145 if(node.hasProperty("jcr:created")) {
146 fields.put("createdDate", String.valueOf(node.getProperty("jcr:created").getDate().getTimeInMillis()));
147 }
148
149 Node contentNode = node.getNode(NodetypeConstant.JCR_CONTENT);
150 if(contentNode != null) {
151 if (contentNode.hasProperty(NodetypeConstant.JCR_MIMETYPE)) {
152 fields.put("fileType", contentNode.getProperty(NodetypeConstant.JCR_MIMETYPE).getString());
153 }
154 InputStream fileStream = contentNode.getProperty(NodetypeConstant.JCR_DATA).getStream();
155 byte[] fileBytes = IOUtils.toByteArray(fileStream);
156 fields.put("file", Base64.getEncoder().encodeToString(fileBytes));
157
158 fields.put("fileSize", String.valueOf(fileBytes.length));
159
160
161 Map<String, String> dublinCoreMetadata = extractDublinCoreMetadata(contentNode);
162 if(dublinCoreMetadata != null) {
163 fields.putAll(dublinCoreMetadata);
164 }
165 }
166
167 LOGGER.info("ES document generated for file with id={} path=\"{}\"", id, node.getPath());
168 return new Document(TYPE, id, null, new Date(), computePermissions(node), fields);
169 } catch (RepositoryException | IOException e) {
170 LOGGER.error("Error while indexing file " + id, e);
171 }
172
173 return null;
174 }
175
176 protected boolean isInContentFolder(Node node) {
177 try {
178 return
179 ( (node.isNodeType("exo:htmlFile") && org.exoplatform.services.cms.impl.Utils.isDocument(node.getParent())) ||
180 (node.isNodeType("exo:cssFile") && org.exoplatform.services.cms.impl.Utils.isDocument(node.getParent().getParent())) ||
181 (node.isNodeType("exo:jsFile") && org.exoplatform.services.cms.impl.Utils.isDocument(node.getParent().getParent())) ||
182 (node.isNodeType("nt:file") && (node.getPath().contains("/medias/images")||node.getPath().contains("/medias/videos")||node.getPath().contains("/medias/audio")) && org.exoplatform.services.cms.impl.Utils.isDocument(node.getParent().getParent().getParent()))
183 );
184 } catch (Exception e) {
185 return false;
186 }
187 }
188
189
190 @Override
191 public Document update(String id) {
192 return create(id);
193 }
194
195 @Override
196 public List<String> getAllIds(int offset, int limit) {
197 List<String> allIds = new ArrayList<>();
198 try {
199 Session session = WCMCoreUtils.getSystemSessionProvider().getSession("collaboration", repositoryService.getCurrentRepository());
200 QueryManager queryManager = session.getWorkspace().getQueryManager();
201 Query query = queryManager.createQuery("select * from " + NodetypeConstant.NT_FILE, Query.SQL);
202 QueryImpl queryImpl = (QueryImpl) query;
203 queryImpl.setOffset(offset);
204 queryImpl.setLimit(limit);
205 QueryResult result = queryImpl.execute();
206 NodeIterator nodeIterator = result.getNodes();
207 while(nodeIterator.hasNext()) {
208 NodeImpl node = (NodeImpl) nodeIterator.nextNode();
209
210 allIds.add(node.getInternalIdentifier());
211 }
212 } catch (RepositoryException e) {
213 throw new RuntimeException("Error while fetching all nt:file nodes", e);
214 }
215
216 if(Thread.currentThread().isInterrupted()) {
217 throw new RuntimeException("Indexing queue processing interrupted");
218 }
219
220 LOGGER.info("Fetched {} files to push in indexing queue (offset={}, limit={})", allIds.size(), offset, limit);
221 return allIds;
222 }
223
224 protected Map<String, String> extractDublinCoreMetadata(Node contentNode) throws RepositoryException {
225 Map<String, String> dcFields = null;
226 if (contentNode.isNodeType(NodetypeConstant.DC_ELEMENT_SET)) {
227 dcFields = new HashMap<>();
228 NodeTypeManager nodeTypeManager = repositoryService.getCurrentRepository().getNodeTypeManager();
229 PropertyDefinition[] dcPropertyDefinitions = nodeTypeManager.getNodeType(NodetypeConstant.DC_ELEMENT_SET).getPropertyDefinitions();
230 for (PropertyDefinition propertyDefinition : dcPropertyDefinitions) {
231 String propertyName = propertyDefinition.getName();
232 if (contentNode.hasProperty(propertyName)) {
233 Property property = contentNode.getProperty(propertyName);
234 if(property != null) {
235 String strValue = null;
236 if (propertyDefinition.isMultiple()) {
237 Value[] values = property.getValues();
238 if(values != null && values.length > 0) {
239 Value value = values[0];
240 if (property.getType() == PropertyType.DATE) {
241 strValue = String.valueOf(value.getDate().toInstant().toEpochMilli());
242 } else {
243 strValue = value.getString();
244 }
245 }
246 } else {
247 if (property.getType() == PropertyType.DATE) {
248 strValue = String.valueOf(property.getDate().toInstant().toEpochMilli());
249 } else {
250 strValue = property.getString();
251 }
252 }
253 if(strValue != null) {
254 dcFields.put(propertyName, strValue);
255 }
256 }
257 }
258 }
259 }
260 return dcFields;
261 }
262
263 private Set<String> computePermissions(Node node) throws RepositoryException {
264 Set<String> permissions = new HashSet<>();
265
266 AccessControlList acl = ((ExtendedNode) node).getACL();
267
268 permissions.add(acl.getOwner());
269
270 if (acl.getPermissionEntries() != null) {
271 permissions.addAll(acl.getPermissionEntries().stream().map(permission -> permission.getIdentity()).collect(Collectors.toSet()));
272 }
273
274 return permissions;
275 }
276 }