Suppose there's a tree structure MyTree, which contains three fields: Integer id, String value, and List<MyTree>
For example,
"(1:AA(2:BB)(3:CC(31:wef(52:huEf)(35:weDf(554:Q)))(43:f34w(42:wefw)))(4:ge(43:werw))(5:zer(99:f5)(100:f5)))"
is the tree string of a MyTree object (the return value of the "public String toString()" method in class MyTree) with id 1, value AA, and four MyTree children with id 2, 3, 4, 5 respectively.
MyTree.java
package com.treeparse; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; public class MyTree { private Integer id; private String value; private List<MyTree> children; public MyTree(Integer id, String value){ if (id == null || id < 0) new Exception("Error: The id should be non-negative.").printStackTrace(); Pattern pattern = Pattern.compile("[^a-zA-Z0-9]"); Matcher matcher = pattern.matcher(value); if (matcher.find()) new Exception("Error: The value should contain only English letters or numbers.").printStackTrace(); this.id = id; this.value = value; children = new ArrayList<MyTree>(); } public String getValue() { return value; } public void setValue(String value){ this.value = value; } public int getId(){ return id; } public void setId(int id){ this.id = id; } public List<MyTree> getChildren() { return children; } public MyTree getChild(int i){ return children.get(i); } public boolean isLeaf() { return children.size() == 0; } public static MyTree valueOf(String treeString){ MyParser mp = new MyParser(); return mp.parse(treeString); } @Override public String toString(){ StringBuilder builder = new StringBuilder("(" + id + ":" + value); for (MyTree child : children) builder.append(child); return builder.append(")").toString(); } }
MyTokenizer.java
package com.treeparse; import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; public class MyTokenizer { private String treeString; protected boolean isInvalid; protected String value; protected Integer id; protected List<String> childUnits; public MyTokenizer(String treeString) { this.treeString = treeString.trim(); tokenize(); } private void tokenize(){ try { setId(); setValue(); setChildUnits(); } catch (Exception e) { System.out.println("MyTokenizer.tokenize() Exception e = " + e); isInvalid = true; return; } } private void setId(){ try { Pattern pattern = Pattern.compile("(?<=\\() *\\d+ *(?=:)"); Matcher matcher = pattern.matcher(treeString); if (matcher.find()){ id = Integer.parseInt(matcher.group().trim()); return; } } catch (Exception e) { System.out.println("MyTokenizer.setId() Exception e = " + e); } isInvalid = true; } private int tempIndex; private void setValue(){ try { Pattern pattern = Pattern.compile("(?<=:) *[a-zA-Z0-9]+ *[\\(\\)]"); Matcher matcher = pattern.matcher(treeString); if (matcher.find()){ tempIndex = treeString.indexOf(matcher.group()) + matcher.group().length() - 1; value = matcher.group().substring(0, matcher.group().length() - 1).trim(); return; } } catch (Exception e) { System.out.println("MyTokenizer.setValue() Exception e = " + e); } isInvalid = true; } /** * Examine treeString's parentheses from leftParenthesis to treeString.length() - 1 * @param leftParenthesis */ private void setChildUnits(){ this.childUnits = new ArrayList<String>(); int cv = 0; Integer tempStart = null; for (int i = tempIndex; i < treeString.length() - 1; i ++){ switch (treeString.charAt(i)) { case '(': if (++cv == 1) tempStart = i; break; case ')': if (--cv == 0){ childUnits.add(treeString.substring(tempStart, i+1)); tempStart = null; } break; } } if (tempStart != null) isInvalid = true; } }
MyParser.java
package com.treeparse; import java.util.List; public class MyParser { public MyParser() { } public MyTree parse(String treeString){ MyTokenizer tokenizer = new MyTokenizer(treeString); if (tokenizer.isInvalid){ System.err.println("warning: the input is not a valid MyTree string."); return null; } List<String> childUnits = tokenizer.childUnits; MyTree myTree = new MyTree(tokenizer.id, tokenizer.value); List<MyTree> children = myTree.getChildren(); for (String childUnit : childUnits) children.add(parse(childUnit)); return myTree; } public static void main(String[] args) { String testString = "(1 : AA (2 : BB) (3: wef (56:3t4) (6:4g3)) )"; testString = "(1:AA(2:BB)(3:CC(31:wef(52:huEf)(35:weDf(554:Q)))(43:f34w(42:wefw)))(4:ge(43:werw))(5:zer(99:f5)(100:f5)))"; MyTree tree = MyTree.valueOf(testString); System.out.println(tree); for (MyTree myTree : tree.getChildren()){ System.out.println(myTree.getId() + " with tree string: " + myTree); } } }Key point of the algorithm: