DOMParser.java
/*
* Copyright (C) 2003-2011 eXo Platform SAS.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.exoplatform.social.common.xmlprocessor;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.exoplatform.social.common.xmlprocessor.model.Node;
/**
* DOMParser utility
*
* @author Ly Minh Phuong - http://phuonglm.net
*/
public class DOMParser {
public static final Pattern COMMENTPATTERN = Pattern.compile("<!--.*"); // <!--.........>
public static final Pattern TAGSTARTPATTERN = Pattern
.compile("<(?i)(\\w+\\b)\\s*(.*)/?>$"); // <tag ....props.....>
public static final Pattern TAGCLOSEPATTERN = Pattern
.compile("</(?i)(\\w+\\b)\\s*>$"); // </tag .........>
public static final Pattern SELFTCLOSETAGPATTERN = Pattern.compile("<.+/\\s*?>");
public static final Pattern ATTRIBUTESPATTERN = Pattern
.compile("(\\S*)\\s*=\\s*(\"([^\"]*)\"|'([^']*)')"); // prop="...."
/**
* Creates the XML DOM tree from XML token List tree.
*
* @param xmlTokens The HTML token array.
* @return TreeNode contain the content parsed from token list.
*/
public static Node createDOMTree(List<String> xmlTokens) {
return createDOMTree(new Node(), xmlTokens);
}
/**
* Creates the XML DOM tree from XML token List tree as childNodeList of currentNode.
*
* @param currentNode The Node to add childNode to.
* @param xmlTokens The HTML token array
* @return TreeNode contain the content parsed from token list
*/
public static Node createDOMTree(Node currentNode, List<String> xmlTokens) {
Node parsingNode;
for (int i = 0; i < xmlTokens.size(); i++) {
String token = xmlTokens.get(i);
Matcher startMatcher = TAGSTARTPATTERN.matcher(token);
Matcher endMatcher = TAGCLOSEPATTERN.matcher(token);
if (COMMENTPATTERN.matcher(token).find()) {
parsingNode = new Node();
parsingNode.setParentNode(currentNode);
currentNode.addChildNode(parsingNode);
parsingNode.setTitle(token.substring(4, token.length() - 3));
} else if (startMatcher.find()) {
String tag = startMatcher.group(1).toLowerCase();
if (SELFTCLOSETAGPATTERN.matcher(token).find()) {
parsingNode = new Node();
parsingNode.setParentNode(currentNode);
currentNode.addChildNode(parsingNode);
parsingNode.setTitle(tag);
String tokenBody = startMatcher.group(2);
Matcher attributes = ATTRIBUTESPATTERN.matcher(tokenBody);
while (attributes.find()) {
String attr = attributes.group(1).toLowerCase();
String val = attributes.group(4) == null ? attributes.group(3) : attributes.group(4);
parsingNode.addAttribute(attr, val);
}
} else {
int findDeep = 0;
int matchedEnd = 0;
for (int j = i + 1; j < xmlTokens.size(); j++) {
Matcher startFindMatcher = TAGSTARTPATTERN
.matcher(xmlTokens.get(j));
Matcher endFindMatcher = TAGCLOSEPATTERN.matcher(xmlTokens.get(j));
if (startFindMatcher.find()) {
if (!SELFTCLOSETAGPATTERN.matcher(xmlTokens.get(j)).find()) {
findDeep++;
}
} else if (endFindMatcher.find()) {
if (endFindMatcher.group(1).toLowerCase().equals(tag)
&& findDeep == 0) {
matchedEnd = j;
break;
} else {
findDeep--;
}
}
}
if (matchedEnd > 0) {
parsingNode = new Node();
parsingNode.setParentNode(currentNode);
parsingNode.setTitle(tag);
String tokenBody = startMatcher.group(2);
Matcher attributes = ATTRIBUTESPATTERN.matcher(tokenBody);
while (attributes.find()) {
String attr = attributes.group(1).toLowerCase();
String val = attributes.group(4) == null ? attributes.group(3) : attributes.group(4);
parsingNode.addAttribute(attr, val);
}
currentNode.addChildNode(parsingNode);
createDOMTree(parsingNode, xmlTokens.subList(i + 1, matchedEnd));
i = matchedEnd;
} else {
parsingNode = new Node();
parsingNode.setParentNode(currentNode);
currentNode.addChildNode(parsingNode);
parsingNode.setContent(token);
}
}
} else if (endMatcher.find()) {
parsingNode = new Node();
parsingNode.setParentNode(currentNode);
parsingNode.setContent(token);
currentNode.addChildNode(parsingNode);
} else {
parsingNode = new Node();
parsingNode.setParentNode(currentNode);
parsingNode.setContent(token);
currentNode.addChildNode(parsingNode);
}
}
return currentNode;
}
}