Suppose there's a tree structure MyTree, which contains three fields: Integer id, String value, and List<MyTree>
For example,
"(1:AA(2:BB)(3:CC(31:wef(52:huEf)(35:weDf(554:Q)))(43:f34w(42:wefw)))(4:ge(43:werw))(5:zer(99:f5)(100:f5)))"
is the tree string of a MyTree object (the return value of the "public String toString()" method in class MyTree) with id 1, value AA, and four MyTree children with id 2, 3, 4, 5 respectively.
MyTree.java
package com.treeparse;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class MyTree {
private Integer id;
private String value;
private List<MyTree> children;
public MyTree(Integer id, String value){
if (id == null || id < 0)
new Exception("Error: The id should be non-negative.").printStackTrace();
Pattern pattern = Pattern.compile("[^a-zA-Z0-9]");
Matcher matcher = pattern.matcher(value);
if (matcher.find())
new Exception("Error: The value should contain only English letters or numbers.").printStackTrace();
this.id = id;
this.value = value;
children = new ArrayList<MyTree>();
}
public String getValue() {
return value;
}
public void setValue(String value){
this.value = value;
}
public int getId(){
return id;
}
public void setId(int id){
this.id = id;
}
public List<MyTree> getChildren() {
return children;
}
public MyTree getChild(int i){
return children.get(i);
}
public boolean isLeaf() {
return children.size() == 0;
}
public static MyTree valueOf(String treeString){
MyParser mp = new MyParser();
return mp.parse(treeString);
}
@Override
public String toString(){
StringBuilder builder = new StringBuilder("(" + id + ":" + value);
for (MyTree child : children)
builder.append(child);
return builder.append(")").toString();
}
}
MyTokenizer.java
package com.treeparse;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class MyTokenizer {
private String treeString;
protected boolean isInvalid;
protected String value;
protected Integer id;
protected List<String> childUnits;
public MyTokenizer(String treeString) {
this.treeString = treeString.trim();
tokenize();
}
private void tokenize(){
try {
setId();
setValue();
setChildUnits();
} catch (Exception e) {
System.out.println("MyTokenizer.tokenize() Exception e = " + e);
isInvalid = true;
return;
}
}
private void setId(){
try {
Pattern pattern = Pattern.compile("(?<=\\() *\\d+ *(?=:)");
Matcher matcher = pattern.matcher(treeString);
if (matcher.find()){
id = Integer.parseInt(matcher.group().trim());
return;
}
} catch (Exception e) {
System.out.println("MyTokenizer.setId() Exception e = " + e);
}
isInvalid = true;
}
private int tempIndex;
private void setValue(){
try {
Pattern pattern = Pattern.compile("(?<=:) *[a-zA-Z0-9]+ *[\\(\\)]");
Matcher matcher = pattern.matcher(treeString);
if (matcher.find()){
tempIndex = treeString.indexOf(matcher.group()) + matcher.group().length() - 1;
value = matcher.group().substring(0, matcher.group().length() - 1).trim();
return;
}
} catch (Exception e) {
System.out.println("MyTokenizer.setValue() Exception e = " + e);
}
isInvalid = true;
}
/**
* Examine treeString's parentheses from leftParenthesis to treeString.length() - 1
* @param leftParenthesis
*/
private void setChildUnits(){
this.childUnits = new ArrayList<String>();
int cv = 0;
Integer tempStart = null;
for (int i = tempIndex; i < treeString.length() - 1; i ++){
switch (treeString.charAt(i)) {
case '(':
if (++cv == 1)
tempStart = i;
break;
case ')':
if (--cv == 0){
childUnits.add(treeString.substring(tempStart, i+1));
tempStart = null;
}
break;
}
}
if (tempStart != null)
isInvalid = true;
}
}
MyParser.java
package com.treeparse;
import java.util.List;
public class MyParser {
public MyParser() {
}
public MyTree parse(String treeString){
MyTokenizer tokenizer = new MyTokenizer(treeString);
if (tokenizer.isInvalid){
System.err.println("warning: the input is not a valid MyTree string.");
return null;
}
List<String> childUnits = tokenizer.childUnits;
MyTree myTree = new MyTree(tokenizer.id, tokenizer.value);
List<MyTree> children = myTree.getChildren();
for (String childUnit : childUnits)
children.add(parse(childUnit));
return myTree;
}
public static void main(String[] args) {
String testString = "(1 : AA (2 : BB) (3: wef (56:3t4) (6:4g3)) )";
testString = "(1:AA(2:BB)(3:CC(31:wef(52:huEf)(35:weDf(554:Q)))(43:f34w(42:wefw)))(4:ge(43:werw))(5:zer(99:f5)(100:f5)))";
MyTree tree = MyTree.valueOf(testString);
System.out.println(tree);
for (MyTree myTree : tree.getChildren()){
System.out.println(myTree.getId() + " with tree string: " + myTree);
}
}
}
Key point of the algorithm: