1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.exoplatform.social.service.rest;
18
19 import java.awt.image.BufferedImage;
20 import java.io.IOException;
21 import java.net.MalformedURLException;
22 import java.net.URL;
23 import java.util.ArrayList;
24 import java.util.Collection;
25 import java.util.HashMap;
26 import java.util.List;
27 import java.util.regex.Matcher;
28 import java.util.regex.Pattern;
29
30 import javax.imageio.ImageIO;
31 import javax.xml.bind.annotation.XmlRootElement;
32
33 import org.apache.xerces.xni.Augmentations;
34 import org.apache.xerces.xni.QName;
35 import org.apache.xerces.xni.XMLAttributes;
36 import org.apache.xerces.xni.XMLString;
37 import org.apache.xerces.xni.parser.XMLDocumentFilter;
38 import org.apache.xerces.xni.parser.XMLInputSource;
39 import org.apache.xerces.xni.parser.XMLParserConfiguration;
40 import org.cyberneko.html.HTMLConfiguration;
41 import org.cyberneko.html.filters.DefaultFilter;
42 import org.cyberneko.html.filters.ElementRemover;
43 import org.exoplatform.commons.embedder.EmbedderFactory;
44 import org.exoplatform.commons.embedder.ExoMedia;
45 import org.exoplatform.services.log.ExoLogger;
46 import org.exoplatform.services.log.Log;
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94 @XmlRootElement
95 public class LinkShare extends DefaultFilter {
96
97 private final String MEDIUM_TYPE_NEWS = "news";
98 private final String MEDIUM_TYPE_AUDIO = "audio";
99 private final String MEDIUM_TYPE_IMAGE = "image";
100 private final String MEDIUM_TYPE_VIDEO = "video";
101 private final String MEDIUM_TYPE_BLOG = "blog";
102 private final String MEDIUM_TYPE_MULT = "mult";
103
104 private static final String IMAGE_MIME_TYPE = "image/";
105 private static final String HTML_MIME_TYPE = "text/html";
106
107 private String mediumType = MEDIUM_TYPE_NEWS;
108 private String mediaSrc;
109 private String mediaType;
110 private String mediaTitle;
111 private String mediaArtist;
112 private String mediaAlbum;
113 private String mediaHeight;
114 private String mediaWidth;
115
116 private static final Log LOG = ExoLogger.getLogger(LinkShare.class);
117
118 private static final String HTTP_PROTOCOL = "http://";
119 private static final String HTTPS_PROTOCOL = "https://";
120
121
122
123
124 private static final int MIN_WIDTH = 55;
125 private static final int MIN_HEIGHT = 55;
126
127 private static final int MAX_DESCRIPTION = 500;
128 public static final String ACTIVITY_LINK_PREVIEW_ENABLED_PROPERTY = "exo.activity.link.preview.enabled";
129 private static boolean previewEnabled = isPreviewEnabled();
130
131 private static String lang = "en";
132 private String link;
133 private String title;
134 private String description;
135 private String imageSrc;
136 private List<String> images;
137 private ExoMedia mediaObject;
138
139 private HashMap<String, String> descriptions;
140
141 private String temp;
142
143 private StringBuffer pText;
144
145 private boolean firstPTagParsed = false;
146
147 private boolean onPParsing = false;
148
149
150
151 private boolean headEnded = false;
152
153
154
155
156
157
158 private LinkShare() {
159
160 }
161
162
163
164
165
166 public String getLink() {
167 return this.escapeSpecialCharacters(this.link);
168 }
169
170
171
172
173
174 public String getTitle() {
175 return this.escapeSpecialCharacters(this.title);
176 }
177
178
179
180
181
182 public void setTitle(String title) {
183 this.title = title;
184 }
185
186
187
188
189
190 public String getDescription() {
191 return this.escapeSpecialCharacters(this.description);
192 }
193
194
195
196
197
198 public void setDescription(String description) {
199 this.description = description;
200 }
201
202
203
204
205
206 public List<String> getImages() {
207 return images;
208 }
209
210
211
212
213
214 public String getMediumType() {
215 return mediumType;
216 }
217
218
219
220
221
222
223 public String getMediaSrc() {
224 return mediaSrc;
225 }
226
227
228
229
230
231
232
233
234
235
236
237
238 public String getMediaType() {
239 return mediaType;
240 }
241
242
243
244
245
246
247
248
249 public String getMediaTitle() {
250 return mediaTitle;
251 }
252
253
254
255
256
257
258
259
260
261 public String getMediaArtist() {
262 return mediaArtist;
263 }
264
265
266
267
268
269
270
271
272 public String getMediaAlbum() {
273 return mediaAlbum;
274 }
275
276
277
278
279
280
281
282
283 public String getMediaHeight() {
284 return mediaHeight;
285 }
286
287
288
289
290
291
292
293
294 public String getMediaWidth() {
295 return mediaWidth;
296 }
297
298
299
300
301 public ExoMedia getMediaObject() {
302 return mediaObject;
303 }
304
305
306
307
308
309
310 private void get(String encoding) throws Exception {
311
312 ElementRemover remover = new ElementRemover();
313 remover.acceptElement("head", null);
314 remover.acceptElement("meta", new String[] {"name", "content", "lang"});
315 remover.acceptElement("link", new String[] {"rel", "href"});
316 remover.acceptElement("title", null);
317 remover.acceptElement("img", new String[] {"src", "width", "height"});
318 remover.acceptElement("p", null);
319
320 remover.acceptElement("a", null);
321 remover.acceptElement("b", null);
322 remover.acceptElement("i", null);
323 remover.acceptElement("strong", null);
324
325 remover.removeElement("script");
326
327 XMLDocumentFilter[] filter = {
328 remover
329 };
330 XMLParserConfiguration parser = new HTMLConfiguration();
331 parser.setProperty("http://cyberneko.org/html/properties/default-encoding", "UTF-8");
332 parser.setProperty("http://cyberneko.org/html/properties/filters", filter);
333 parser.setDocumentHandler(this);
334 XMLInputSource source = new XMLInputSource(null, Util.getDecodeQueryURL(link), null);
335 source.setEncoding(encoding);
336 try {
337 parser.parse(source);
338 } catch (NullPointerException ne) {
339 ExoLogger.getLogger(LinkShare.class)
340 .warn("Problem when parsing the link in LinkShare.getInstance(String) method");
341 } catch (IOException e) {
342
343
344 this.title = this.link;
345 } catch (Exception e) {
346 this.title = this.link;
347 }
348 }
349
350
351
352
353
354
355
356 public static LinkShare getInstance(String link) throws Exception {
357 return getInstance(link, lang);
358 }
359
360
361
362
363
364
365
366
367 public static LinkShare getInstance(String link, String lang) throws Exception {
368 if (link == null) {
369 return null;
370 }
371 if (!Util.isValidURL(link)) {
372 return null;
373 }
374
375 if (!(link.toLowerCase().startsWith(HTTP_PROTOCOL) || link.toLowerCase().startsWith(HTTPS_PROTOCOL))) {
376 link = HTTP_PROTOCOL + link;
377 }
378
379 LinkShare linkShare = new LinkShare();
380 linkShare.link = link;
381 LinkShare.lang = lang;
382
383 if(previewEnabled) {
384 linkShare.mediaObject = EmbedderFactory.getInstance(link).getExoMedia();
385
386
387 if (linkShare.mediaObject == null) {
388 String mimeType = org.exoplatform.social.service.rest.Util.getMimeTypeOfURL(link);
389 if (mimeType.toLowerCase().startsWith(IMAGE_MIME_TYPE)) {
390 linkShare.images = new ArrayList<>(0);
391 linkShare.images.add(link);
392 linkShare.description = "";
393 } else if (mimeType.toLowerCase().startsWith(HTML_MIME_TYPE)) {
394 String encoding = (mimeType.contains("charset=")) ? mimeType.split("charset=")[1] : "UTF-8";
395 linkShare.get(encoding);
396 } else {
397 linkShare.images = new ArrayList<>(0);
398 linkShare.description = "";
399 }
400
401 if ((linkShare.title == null) || (linkShare.title.trim().length() == 0)) linkShare.title = link;
402
403
404 if (linkShare.imageSrc != null) {
405 List<String> images = new ArrayList<>();
406 images.add(linkShare.imageSrc);
407 linkShare.images = images;
408 }
409
410 HashMap<String, String> descriptions = linkShare.descriptions;
411 if (descriptions != null) {
412 String description = descriptions.get(LinkShare.lang);
413 if (description == null) {
414 Collection<String> values = descriptions.values();
415
416 description = values.iterator().next();
417 }
418 linkShare.description = description;
419
420 String tail = "";
421 if (description.length() > MAX_DESCRIPTION) {
422 tail = "...";
423 linkShare.description = description.substring(0, MAX_DESCRIPTION - 1) + tail;
424 }
425 }
426 if (linkShare.description == null) linkShare.description = "";
427 if (linkShare.images == null) {
428 linkShare.images = new ArrayList<>();
429 }
430 }
431 }
432 return linkShare;
433 }
434
435 private static boolean isPreviewEnabled() {
436 String previewEnabledPropertyValue = System.getProperty(ACTIVITY_LINK_PREVIEW_ENABLED_PROPERTY);
437 return previewEnabledPropertyValue == null || Boolean.valueOf(previewEnabledPropertyValue);
438 }
439
440
441
442
443 public void startElement(QName element, XMLAttributes attrs, Augmentations augs) {
444 if (headEnded == true && descriptions == null) {
445 if (firstPTagParsed == false) {
446 if ("p".equalsIgnoreCase(element.rawname)) {
447 firstPTagParsed = true;
448 onPParsing = true;
449 }
450 }
451 } else if ("title".equalsIgnoreCase(element.rawname)) {
452 onPParsing = true;
453 }
454 }
455
456
457
458
459
460 public void endElement(QName element, Augmentations augs) {
461
462
463
464 if ("title".equalsIgnoreCase(element.rawname)) {
465 if (title == null) {
466 if (onPParsing) {
467 title = pText.toString();
468 onPParsing = false;
469 pText = null;
470 } else {
471 title = temp;
472 }
473 }
474 }
475
476 if ("head".equalsIgnoreCase(element.rawname)) {
477 headEnded = true;
478 }
479
480 if (onPParsing == true) {
481 if ("p".equalsIgnoreCase(element.rawname)) {
482 onPParsing = false;
483 description = pText.toString();
484 }
485 }
486 }
487
488
489
490
491 public void emptyElement(QName element, XMLAttributes attributes, Augmentations augs) {
492 if("link".equalsIgnoreCase(element.rawname)) {
493 String relValue;
494 String hrefValue;
495 relValue = attributes.getValue("rel");
496 hrefValue = attributes.getValue("href");
497 if (hrefValue != null) hrefValue = getAbsLink(hrefValue);
498 if ("image_src".equalsIgnoreCase(relValue)) {
499 imageSrc = hrefValue;
500 } else if ("audio_src".equalsIgnoreCase(relValue)) {
501 mediaSrc = hrefValue;
502 mediumType = MEDIUM_TYPE_AUDIO;
503 } else if ("video_src".equalsIgnoreCase(relValue)) {
504 mediaSrc = hrefValue;
505 mediumType = MEDIUM_TYPE_VIDEO;
506 }
507 } else if ("meta".equalsIgnoreCase(element.rawname)) {
508 String nameValue;
509 String contentValue;
510 nameValue = attributes.getValue("name");
511 if (nameValue == null) return;
512 contentValue = attributes.getValue("content");
513 if (contentValue == null) return;
514
515 if ("medium".equalsIgnoreCase(nameValue)) {
516 if ("news".equalsIgnoreCase(contentValue)) {
517 mediumType = MEDIUM_TYPE_NEWS;
518 } else if ("audio".equalsIgnoreCase(contentValue)) {
519 mediumType = MEDIUM_TYPE_AUDIO;
520 } else if ("image".equalsIgnoreCase(contentValue)) {
521 mediumType = MEDIUM_TYPE_IMAGE;
522 } else if ("video".equalsIgnoreCase(contentValue)) {
523 mediumType = MEDIUM_TYPE_VIDEO;
524 } else if ("blog".equalsIgnoreCase(contentValue)) {
525 mediumType = MEDIUM_TYPE_BLOG;
526 } else if ("mult".equalsIgnoreCase(contentValue)) {
527 mediumType = MEDIUM_TYPE_MULT;
528 }
529 } else if ("title".equalsIgnoreCase(nameValue)) {
530 title = contentValue;
531 } else if ("description".equalsIgnoreCase(nameValue)) {
532 String langValue = attributes.getValue("lang");
533 if (langValue != null) {
534 if (descriptions == null) descriptions = new HashMap<String, String>();
535 descriptions.put(langValue, contentValue);
536 } else {
537 description = contentValue;
538 }
539 }
540
541 if (mediumType.equals(MEDIUM_TYPE_AUDIO) || mediumType.equals(MEDIUM_TYPE_MULT)) {
542 if ("audio_type".equalsIgnoreCase(nameValue)) {
543 mediaType = contentValue;
544 } else if ("audio_title".equalsIgnoreCase(nameValue)) {
545 mediaTitle = contentValue;
546 } else if ("audio_artist".equalsIgnoreCase(nameValue)) {
547 mediaArtist = contentValue;
548 } else if ("audio_album".equalsIgnoreCase(nameValue)) {
549 mediaAlbum = contentValue;
550 }
551 } else if (mediumType.equals(MEDIUM_TYPE_VIDEO) || mediumType.equals(MEDIUM_TYPE_MULT)) {
552 if ("video_type".equalsIgnoreCase(nameValue)) {
553 mediaType = contentValue;
554 } else if ("video_title".equalsIgnoreCase(nameValue)) {
555 mediaTitle = contentValue;
556 } else if ("video_height".equalsIgnoreCase(nameValue)) {
557 mediaHeight = contentValue;
558 } else if ("video_width".equalsIgnoreCase(nameValue)) {
559 mediaWidth = contentValue;
560 } else if ("video_artist".equalsIgnoreCase(nameValue)) {
561 mediaArtist = contentValue;
562 } else if ("video_album".equalsIgnoreCase(nameValue)) {
563 mediaAlbum = contentValue;
564 }
565 }
566 } else if ((imageSrc == null) && ("img".equalsIgnoreCase(element.rawname))) {
567 String src = attributes.getValue("src");
568 if (src == null) return;
569
570 if (isAcceptableImg(src)) {
571 src = getAbsLink(src);
572 if (images == null) images = new ArrayList<String>();
573 images.add(src);
574 }
575 }
576 }
577
578
579
580
581
582 public void characters(XMLString text, Augmentations augs) {
583 temp = text.toString();
584 if (onPParsing == true) {
585 if (pText == null) pText = new StringBuffer();
586 pText.append(temp);
587 }
588 }
589
590
591
592
593
594
595 private String getAbsLink(String link) {
596 if (link.startsWith("http://") || link.startsWith("https://")) return link;
597 URL url = null;
598 try {
599 url = new URL(this.link);
600 } catch (MalformedURLException e) {
601 LOG.debug("MalformedURLException : Could not initialize url from link.");
602 }
603 String protocol = url.getProtocol();
604 String host = url.getHost();
605 String base = protocol + "://" + host;
606 if (link.startsWith("/")) {
607
608 return base + link;
609 } else if (link.startsWith("./")) {
610 if (this.link.endsWith("/")) {
611 this.link = this.link.substring(0, this.link.length() - 1);
612 }
613 link = link.substring(1, link.length());
614 return this.link + link;
615 } else if (link.startsWith("../")) {
616 String regex = "\\.\\./";
617 Pattern partern = Pattern.compile(regex);
618 Matcher matcher = partern.matcher(link);
619 int level = 0;
620 while (matcher.find()) {
621 level++;
622 }
623 String secondPath = link.replace("(\\.\\./)+", "");
624 String[] str = this.link.split("/");
625 StringBuffer sb = new StringBuffer();
626 level = (str.length - 1) - level;
627 for (int i = 0; i < level; i++) {
628 sb.append(str[i]);
629 }
630 sb.append(secondPath);
631 return sb.toString();
632 } else {
633 if (this.link.endsWith("/")) {
634 return this.link + link;
635 } else {
636 return this.link + "/" + link;
637 }
638 }
639 }
640
641
642
643
644
645
646
647
648 private String escapeSpecialCharacters(String str) {
649 if (str != null) {
650 return str.replaceAll("\r\n|\n\r|\n|\r", "");
651 } else {
652 return "";
653 }
654 }
655
656 private boolean isAcceptableImg(String src) {
657 BufferedImage img = null;
658 try {
659 img = ImageIO.read(new URL(src));
660 int width = img.getWidth();
661 int height = img.getHeight();
662 return (width > MIN_WIDTH && height > MIN_HEIGHT);
663 } catch (MalformedURLException e) {
664 return false;
665 } catch (IOException e) {
666 return false;
667 }
668 }
669 }