001 package org.crsh.cmdline.matcher.tokenizer;
002
003 import org.crsh.cmdline.Delimiter;
004
005 import java.util.ArrayList;
006 import java.util.Iterator;
007 import java.util.NoSuchElementException;
008
009 /**
010 * @author <a href="mailto:julien.viet@exoplatform.com">Julien Viet</a>
011 */
012 public class Tokenizer implements Iterator<Token> {
013
014 /** . */
015 private final CharSequence s;
016
017 /** . */
018 private int index;
019
020 /** . */
021 private ArrayList<Token> stack;
022
023 /** . */
024 private int ptr;
025
026 /** . */
027 private Delimiter delimiter;
028
029 public Tokenizer(CharSequence s) {
030 this.s = s;
031 this.stack = new ArrayList<Token>();
032 this.index = 0;
033 this.delimiter = null;
034 }
035
036 public boolean hasNext() {
037 if (ptr < stack.size()) {
038 return true;
039 } else {
040 Token next = parse();
041 if (next != null) {
042 stack.add(next);
043 }
044 return next != null;
045 }
046 }
047
048 private Token parse() {
049 Token token = null;
050 if (index < s.length()) {
051 char c = s.charAt(index);
052 int from = index;
053 while (true) {
054 if (Character.isWhitespace(c)) {
055 index++;
056 if (index < s.length()) {
057 c = s.charAt(index);
058 } else {
059 break;
060 }
061 } else {
062 break;
063 }
064 }
065 if (index > from) {
066 token = new Token.Whitespace(from, s.subSequence(from, index).toString());
067 } else {
068 State state = new State();
069 while (true) {
070 if (Character.isWhitespace(c) && state.escape == Escape.NONE) {
071 break;
072 } else {
073 index++;
074 state.push(c);
075 if (index < s.length()) {
076 c = s.charAt(index);
077 } else {
078 break;
079 }
080 }
081 }
082 if (index > from) {
083 switch (state.status) {
084 case INIT: {
085 token = new Token.Literal.Word(from, s.subSequence(from, index).toString(), state.buffer.toString());
086 break;
087 }
088 case WORD: {
089 token = new Token.Literal.Word(from, s.subSequence(from, index).toString(), state.buffer.toString());
090 break;
091 }
092 case SHORT_OPTION: {
093 token = new Token.Literal.Option.Short(from, s.subSequence(from, index).toString(), state.buffer.toString());
094 break;
095 }
096 case LONG_OPTION: {
097 token = new Token.Literal.Option.Long(from, s.subSequence(from, index).toString(), state.buffer.toString());
098 break;
099 }
100 default:
101 throw new AssertionError(state.status);
102 }
103 delimiter = state.escape.delimiter;
104 return token;
105 }
106 }
107 }
108 return token;
109 }
110
111 public Token next() {
112 if (hasNext()) {
113 return stack.get(ptr++);
114 } else {
115 throw new NoSuchElementException();
116 }
117 }
118
119 public void remove() {
120 throw new UnsupportedOperationException();
121 }
122
123 public int getIndex() {
124 Token peek = peek();
125 if (peek != null) {
126 return peek.getFrom();
127 } else {
128 return index;
129 }
130 }
131
132 public void pushBack() {
133 pushBack(1);
134 }
135
136 public void pushBack(int count) {
137 if (count < 0) {
138 throw new IllegalArgumentException();
139 }
140 if (ptr - count < 0) {
141 throw new IllegalStateException("Trying to push back too many tokens");
142 } else {
143 ptr -= count;
144 }
145 }
146
147 public Token peek() {
148 if (hasNext()) {
149 return stack.get(ptr);
150 } else {
151 return null;
152 }
153 }
154
155 public Delimiter getDelimiter() {
156 return delimiter;
157 }
158 }