LinkShare.java
/*
* Copyright (C) 2003-2009 eXo Platform SAS.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Affero General Public License
* as published by the Free Software Foundation; either version 3
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see<http://www.gnu.org/licenses/>.
*/
package org.exoplatform.social.service.rest;
import java.awt.image.BufferedImage;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.imageio.ImageIO;
import javax.xml.bind.annotation.XmlRootElement;
import org.apache.xerces.xni.Augmentations;
import org.apache.xerces.xni.QName;
import org.apache.xerces.xni.XMLAttributes;
import org.apache.xerces.xni.XMLString;
import org.apache.xerces.xni.parser.XMLDocumentFilter;
import org.apache.xerces.xni.parser.XMLInputSource;
import org.apache.xerces.xni.parser.XMLParserConfiguration;
import org.cyberneko.html.HTMLConfiguration;
import org.cyberneko.html.filters.DefaultFilter;
import org.cyberneko.html.filters.ElementRemover;
import org.exoplatform.commons.embedder.EmbedderFactory;
import org.exoplatform.commons.embedder.ExoMedia;
import org.exoplatform.services.log.ExoLogger;
import org.exoplatform.services.log.Log;
/**
* LinkShare - gets preview information of a link including:
* - link
* - title
* - description
* - images
*
* - media (from popular sites: youtube, vimeo, flickr...) - low priority (NOT IMPLEMENTED YET)
* This should be implemented from the client side to display preview and media player.
* <br>
* In order to provide this preview, always looks for
* the title of the page, a summary of the main content, and an image.
* Looks for preview information by the following priority:
* <br>
* 1.
* <pre>
* {@code
* <meta name="title" content="page_title" />
* <meta name="description" content="page_description" />
* <link rel="image_src" href="image_source" />
* }
* </pre>
* <br>
* 2.
* If title not found, then find in {@code <title>} tag.
* If description not found, then find first <p> tag. If no description, then return ""
* If img_src not found, then find all images in page with max, min specified width + height
* <br>
* 3.
* To specify medium, use tag:
* <pre>
* {@code <meta name="medium" content="medium_type" />}
* </pre>
* In which: medium_type can be "audio", "image", "video", "news", "blog" and "mult".
* <br>
* Created by The eXo Platform SEA
* TODO: hoatle improvement:
* + scans description with MIN_CHARACTER
* + handles exception
* + parser more faster with and without scanning image tag, stop right when things got.
*
* @author <a href="mailto:hoatlevan@gmail.com">hoatle</a>
* @since Oct 8, 2009
* @see "http://activitystrea.ms/"
* @see "http://www.facebook.com/share_partners.php"
*/
@XmlRootElement
public class LinkShare extends DefaultFilter {
private final String MEDIUM_TYPE_NEWS = "news";
private final String MEDIUM_TYPE_AUDIO = "audio";
private final String MEDIUM_TYPE_IMAGE = "image";
private final String MEDIUM_TYPE_VIDEO = "video";
private final String MEDIUM_TYPE_BLOG = "blog";
private final String MEDIUM_TYPE_MULT = "mult";
private static final String IMAGE_MIME_TYPE = "image/";
private static final String HTML_MIME_TYPE = "text/html";
//default medium_type = "news"
private String mediumType = MEDIUM_TYPE_NEWS;
private String mediaSrc;
private String mediaType;
private String mediaTitle;
private String mediaArtist;
private String mediaAlbum;
private String mediaHeight;
private String mediaWidth;
private static final Log LOG = ExoLogger.getLogger(LinkShare.class);
private static final String HTTP_PROTOCOL = "http://";
private static final String HTTPS_PROTOCOL = "https://";
//min with and height of images to get from img attributes in pixel.
// With <img src="img_src" width="55px" height="55px" /> ~ <img src="img_src" width="55" height="55" />
//if width="55pt" => with="55" ~ width="55px" (not correct but can be accepted)
private static final int MIN_WIDTH = 55;
private static final int MIN_HEIGHT = 55;
//maxium description length = 250 characters
private static final int MAX_DESCRIPTION = 500;
public static final String ACTIVITY_LINK_PREVIEW_ENABLED_PROPERTY = "exo.activity.link.preview.enabled";
private static boolean previewEnabled = isPreviewEnabled();
//default lang
private static String lang = "en";
private String link;
private String title;
private String description;
private String imageSrc;
private List<String> images;
private ExoMedia mediaObject;
//Collections of description with key as lang
private HashMap<String, String> descriptions;
//holds temporary string values from characters() method
private String temp;
//store all text from the first p tag
private StringBuffer pText;
//gets all the text from first p tag if no description meta and headEnded = true
private boolean firstPTagParsed = false;
//If on p parsing, get all text from temp to pText
private boolean onPParsing = false;
// to mark the end of the head tag part ~ no more meta tag.
// If no more meta tag (headEnded = true) and no description -> get description.
private boolean headEnded = false;
/**
* Uses LinkShare.getInstance(String link) or
* LinkShare.getInstance(String link, String lang)
* for creating LinkShare object
*/
private LinkShare() {
}
/**
* gets provided link
* @return provided link
*/
public String getLink() {
return this.escapeSpecialCharacters(this.link);
}
/**
* gets title
* @return title
*/
public String getTitle() {
return this.escapeSpecialCharacters(this.title);
}
/**
* Set new value for title.
* @param title
*/
public void setTitle(String title) {
this.title = title;
}
/**
* gets description
* @return description
*/
public String getDescription() {
return this.escapeSpecialCharacters(this.description);
}
/**
* Set new value for description.
* @param description
*/
public void setDescription(String description) {
this.description = description;
}
/**
* gets images list
* @return images
*/
public List<String> getImages() {
return images;
}
/**
* gets mediumType
* @return mediumType
*/
public String getMediumType() {
return mediumType;
}
/**
* gets mediaSrc
* @return mediaSrc
*/
public String getMediaSrc() {
return mediaSrc;
}
/**
* gets mediaType if provided in:
* <pre>
* <meta name="audio_type" content="Content-Type header field" />
* </pre>
* or:
* <pre>
* <meta name="video_type" content="Content-Type header field" />
* </pre>
* @return mediaType
*/
public String getMediaType() {
return mediaType;
}
/**
* gets mediaTitle if provided in:
* <pre>
* <meta name="audio_title" content="audio_title_name" />
* </pre>
* @return mediaTitle
*/
public String getMediaTitle() {
return mediaTitle;
}
/**
* gets mediaArtist if provided in:
* <pre>
* <meta name="audio_artist" content="audio_artist_name" />
* </pre>
* @return mediaArtist
*/
public String getMediaArtist() {
return mediaArtist;
}
/**
* gets mediaAlbum if provided in:
* <pre>
* <meta name="audio_album" content="audio_album_name" />
* </pre>
* @return mediaAlbum
*/
public String getMediaAlbum() {
return mediaAlbum;
}
/**
* gets mediaHeight if provided in:
* <pre>
* <meta name="video_height" content="video_height_value" />
* </pre>
* @return mediaHeight;
*/
public String getMediaHeight() {
return mediaHeight;
}
/**
* gets mediaWidth if provided in:
* <pre>
* <meta name="video_width" content="video_width_value" />
* </pre>
* @return mediaWidth
*/
public String getMediaWidth() {
return mediaWidth;
}
/**
* get mediaObject
* @return
*/
public ExoMedia getMediaObject() {
return mediaObject;
}
/**
* Gets information of the provided link by using remover filter,
* using call back filter methods to get desired information.
* @param encoding
*/
private void get(String encoding) throws Exception {
//Creates element remover filter
ElementRemover remover = new ElementRemover();
remover.acceptElement("head", null);
remover.acceptElement("meta", new String[] {"name", "content", "lang"});
remover.acceptElement("link", new String[] {"rel", "href"});
remover.acceptElement("title", null);
remover.acceptElement("img", new String[] {"src", "width", "height"});
remover.acceptElement("p", null);
//accepts more tags to get text from a <p> tag
remover.acceptElement("a", null);
remover.acceptElement("b", null);
remover.acceptElement("i", null);
remover.acceptElement("strong", null);
remover.removeElement("script");
//Sets up filter chain
XMLDocumentFilter[] filter = {
remover
};
XMLParserConfiguration parser = new HTMLConfiguration();
parser.setProperty("http://cyberneko.org/html/properties/default-encoding", "UTF-8");
parser.setProperty("http://cyberneko.org/html/properties/filters", filter);
parser.setDocumentHandler(this);
XMLInputSource source = new XMLInputSource(null, Util.getDecodeQueryURL(link), null);
source.setEncoding(encoding);
try {
parser.parse(source);
} catch (NullPointerException ne) {
ExoLogger.getLogger(LinkShare.class)
.warn("Problem when parsing the link in LinkShare.getInstance(String) method");
} catch (IOException e) {
// Process as normal behavior in case the link is in the valid form
// but have been blocked or some other same reasons.
this.title = this.link;
} catch (Exception e) {
this.title = this.link;
}
}
/**
* Gets LinkShare instance with specified link. The default lang = "en"
* @param link
* @return LinkShare instance
* @throws Exception
*/
public static LinkShare getInstance(String link) throws Exception {
return getInstance(link, lang);
}
/**
* Gets LinkShare instance with link and lang specified.
* @param link
* @param lang
* @return LinkShare instance
* @throws Exception
*/
public static LinkShare getInstance(String link, String lang) throws Exception {
if (link == null) {
return null;
}
if (!Util.isValidURL(link)) {
return null;
}
if (!(link.toLowerCase().startsWith(HTTP_PROTOCOL) || link.toLowerCase().startsWith(HTTPS_PROTOCOL))) {
link = HTTP_PROTOCOL + link;
}
LinkShare linkShare = new LinkShare();
linkShare.link = link;
LinkShare.lang = lang;
if(previewEnabled) {
linkShare.mediaObject = EmbedderFactory.getInstance(link).getExoMedia();
// if there is no media object, processes link to get page metadata
if (linkShare.mediaObject == null) {
String mimeType = org.exoplatform.social.service.rest.Util.getMimeTypeOfURL(link);
if (mimeType.toLowerCase().startsWith(IMAGE_MIME_TYPE)) {
linkShare.images = new ArrayList<>(0);
linkShare.images.add(link);
linkShare.description = "";
} else if (mimeType.toLowerCase().startsWith(HTML_MIME_TYPE)) {
String encoding = (mimeType.contains("charset=")) ? mimeType.split("charset=")[1] : "UTF-8";
linkShare.get(encoding);
} else {
linkShare.images = new ArrayList<>(0);
linkShare.description = "";
}
if ((linkShare.title == null) || (linkShare.title.trim().length() == 0)) linkShare.title = link;
//If image_src detected from meta tag, sets this image_src to images
if (linkShare.imageSrc != null) {
List<String> images = new ArrayList<>();
images.add(linkShare.imageSrc);
linkShare.images = images;
}
//gets desired description by lang when there are many description meta name with different lang
HashMap<String, String> descriptions = linkShare.descriptions;
if (descriptions != null) {
String description = descriptions.get(LinkShare.lang);
if (description == null) {
Collection<String> values = descriptions.values();
//get the first value in the collection
description = values.iterator().next();
}
linkShare.description = description;
//gets with maximum characters only
String tail = "";
if (description.length() > MAX_DESCRIPTION) {
tail = "...";
linkShare.description = description.substring(0, MAX_DESCRIPTION - 1) + tail;
}
}
if (linkShare.description == null) linkShare.description = "";
if (linkShare.images == null) {
linkShare.images = new ArrayList<>();
}
}
}
return linkShare;
}
private static boolean isPreviewEnabled() {
String previewEnabledPropertyValue = System.getProperty(ACTIVITY_LINK_PREVIEW_ENABLED_PROPERTY);
return previewEnabledPropertyValue == null || Boolean.valueOf(previewEnabledPropertyValue);
}
/**
* filter method is called back when scanning meets start element tag
*/
public void startElement(QName element, XMLAttributes attrs, Augmentations augs) {
if (headEnded == true && descriptions == null) {
if (firstPTagParsed == false) {
if ("p".equalsIgnoreCase(element.rawname)) {
firstPTagParsed = true;
onPParsing = true;
}
}
} else if ("title".equalsIgnoreCase(element.rawname)) {
onPParsing = true;
}
}
/**
* filter method is called back when scanning meets end element tag
*/
public void endElement(QName element, Augmentations augs) {
//System.out.println("( " + element.rawname);
//if end of title -> set temporary title;
//if detect <meta name="title" content="meta_title" />, reset title
if ("title".equalsIgnoreCase(element.rawname)) {
if (title == null) {
if (onPParsing) {
title = pText.toString();
onPParsing = false;
pText = null;
} else {
title = temp;
}
}
}
//set headEnded
if ("head".equalsIgnoreCase(element.rawname)) {
headEnded = true;
}
//Set end of p tag
if (onPParsing == true) {
if ("p".equalsIgnoreCase(element.rawname)) {
onPParsing = false;
description = pText.toString();
}
}
}
/**
* this filter method is called back when scanning meets empty element tag
*/
public void emptyElement(QName element, XMLAttributes attributes, Augmentations augs) {
if("link".equalsIgnoreCase(element.rawname)) { //process link tag
String relValue;
String hrefValue;
relValue = attributes.getValue("rel");
hrefValue = attributes.getValue("href");
if (hrefValue != null) hrefValue = getAbsLink(hrefValue);
if ("image_src".equalsIgnoreCase(relValue)) {
imageSrc = hrefValue;
} else if ("audio_src".equalsIgnoreCase(relValue)) {
mediaSrc = hrefValue;
mediumType = MEDIUM_TYPE_AUDIO;
} else if ("video_src".equalsIgnoreCase(relValue)) {
mediaSrc = hrefValue;
mediumType = MEDIUM_TYPE_VIDEO;
}
} else if ("meta".equalsIgnoreCase(element.rawname)) { //process meta tag
String nameValue;
String contentValue;
nameValue = attributes.getValue("name");
if (nameValue == null) return;
contentValue = attributes.getValue("content");
if (contentValue == null) return;
//Set mediumType
if ("medium".equalsIgnoreCase(nameValue)) {
if ("news".equalsIgnoreCase(contentValue)) {
mediumType = MEDIUM_TYPE_NEWS;
} else if ("audio".equalsIgnoreCase(contentValue)) {
mediumType = MEDIUM_TYPE_AUDIO;
} else if ("image".equalsIgnoreCase(contentValue)) {
mediumType = MEDIUM_TYPE_IMAGE;
} else if ("video".equalsIgnoreCase(contentValue)) {
mediumType = MEDIUM_TYPE_VIDEO;
} else if ("blog".equalsIgnoreCase(contentValue)) {
mediumType = MEDIUM_TYPE_BLOG;
} else if ("mult".equalsIgnoreCase(contentValue)) {
mediumType = MEDIUM_TYPE_MULT;
}
} else if ("title".equalsIgnoreCase(nameValue)) {
title = contentValue;
} else if ("description".equalsIgnoreCase(nameValue)) {
String langValue = attributes.getValue("lang");
if (langValue != null) {
if (descriptions == null) descriptions = new HashMap<String, String>();
descriptions.put(langValue, contentValue);
} else {
description = contentValue;
}
}
if (mediumType.equals(MEDIUM_TYPE_AUDIO) || mediumType.equals(MEDIUM_TYPE_MULT)) {
if ("audio_type".equalsIgnoreCase(nameValue)) {
mediaType = contentValue;
} else if ("audio_title".equalsIgnoreCase(nameValue)) {
mediaTitle = contentValue;
} else if ("audio_artist".equalsIgnoreCase(nameValue)) {
mediaArtist = contentValue;
} else if ("audio_album".equalsIgnoreCase(nameValue)) {
mediaAlbum = contentValue;
}
} else if (mediumType.equals(MEDIUM_TYPE_VIDEO) || mediumType.equals(MEDIUM_TYPE_MULT)) {
if ("video_type".equalsIgnoreCase(nameValue)) {
mediaType = contentValue;
} else if ("video_title".equalsIgnoreCase(nameValue)) {
mediaTitle = contentValue;
} else if ("video_height".equalsIgnoreCase(nameValue)) {
mediaHeight = contentValue;
} else if ("video_width".equalsIgnoreCase(nameValue)) {
mediaWidth = contentValue;
} else if ("video_artist".equalsIgnoreCase(nameValue)) {
mediaArtist = contentValue;
} else if ("video_album".equalsIgnoreCase(nameValue)) {
mediaAlbum = contentValue;
}
}
} else if ((imageSrc == null) && ("img".equalsIgnoreCase(element.rawname))) { //process img tag
String src = attributes.getValue("src");
if (src == null) return;
if (isAcceptableImg(src)) {
src = getAbsLink(src);
if (images == null) images = new ArrayList<String>();
images.add(src);
}
}
}
/**
* filter method is called back when scanning meets the end of text in a tag
*/
public void characters(XMLString text, Augmentations augs) {
temp = text.toString();
if (onPParsing == true) {
if (pText == null) pText = new StringBuffer();
pText.append(temp);
}
}
/**
* Gets absolute link from the provided link
* @param link
* @return absolute link
*/
private String getAbsLink(String link) {
if (link.startsWith("http://") || link.startsWith("https://")) return link;
URL url = null;
try {
url = new URL(this.link);
} catch (MalformedURLException e) {
LOG.debug("MalformedURLException : Could not initialize url from link.");
}
String protocol = url.getProtocol();
String host = url.getHost();
String base = protocol + "://" + host;
if (link.startsWith("/")) {
//Absolute
return base + link;
} else if (link.startsWith("./")) {
if (this.link.endsWith("/")) {
this.link = this.link.substring(0, this.link.length() - 1);
}
link = link.substring(1, link.length());
return this.link + link;
} else if (link.startsWith("../")) {
String regex = "\\.\\./";
Pattern partern = Pattern.compile(regex);
Matcher matcher = partern.matcher(link);
int level = 0;
while (matcher.find()) {
level++;
}
String secondPath = link.replace("(\\.\\./)+", "");
String[] str = this.link.split("/");
StringBuffer sb = new StringBuffer();
level = (str.length - 1) - level;
for (int i = 0; i < level; i++) {
sb.append(str[i]);
}
sb.append(secondPath);
return sb.toString();
} else {
if (this.link.endsWith("/")) {
return this.link + link;
} else {
return this.link + "/" + link;
}
}
}
/**
* Escapes the special characters.
*
* @param str
* @return
* @since 1.2.7
*/
private String escapeSpecialCharacters(String str) {
if (str != null) {
return str.replaceAll("\r\n|\n\r|\n|\r", "");
} else {
return "";
}
}
private boolean isAcceptableImg(String src) {
BufferedImage img = null;
try {
img = ImageIO.read(new URL(src));
int width = img.getWidth();
int height = img.getHeight();
return (width > MIN_WIDTH && height > MIN_HEIGHT);
} catch (MalformedURLException e) {
return false;
} catch (IOException e) {
return false;
}
}
}