QuotedValueTokenizer.java
package org.cyclopsgroup.caff.token;
/**
* String tokenizer which split string into segments considering quotation and character escaping
*
* @author <a href="mailto:jiaqi.guo@gmail.com">Jiaqi Guo</a>
*/
public class QuotedValueTokenizer implements ValueTokenizer {
private enum ParsingState {
/** Ready for escaping next character or end a quoted segment */
ESCAPED_OR_QUOTE_END,
/** Quotation started */
QUOTED,
/** Ready for new word */
READY,
/** Word started without quotation */
WORD_STARTED;
}
private final char delimiter;
private final char quotation;
/** Default constructor that uses white space as delimiter and " as quotation character */
public QuotedValueTokenizer() {
this(' ', '\"');
}
/**
* @param delimiter Delimiter character
* @param quotation Quotation character
*/
public QuotedValueTokenizer(char delimiter, char quotation) {
this.delimiter = delimiter;
this.quotation = quotation;
}
@Override
public void parse(CharSequence input, TokenEventHandler handler) {
ParsingState state = ParsingState.READY;
StringBuilder buf = null;
int wordStart = 0;
for (int i = 0; i < input.length(); i++) {
char c = input.charAt(i);
switch (state) {
case READY:
assert buf == null;
if (c == delimiter) {
continue;
} else if (c == quotation) {
state = ParsingState.QUOTED;
wordStart = i;
buf = new StringBuilder();
} else {
state = ParsingState.WORD_STARTED;
wordStart = i;
buf = new StringBuilder();
buf.append(c);
}
break;
case WORD_STARTED:
assert buf != null;
if (c == delimiter) {
state = ParsingState.READY;
handler.handleEvent(new TokenEvent(buf.toString(), wordStart, i, true));
buf = null;
} else {
buf.append(c);
}
break;
case QUOTED:
assert buf != null;
if (c == quotation) {
state = ParsingState.ESCAPED_OR_QUOTE_END;
} else {
buf.append(c);
}
break;
case ESCAPED_OR_QUOTE_END:
assert buf != null;
if (c == delimiter) {
state = ParsingState.READY;
handler.handleEvent(new TokenEvent(buf.toString(), wordStart, i, true, true));
buf = null;
} else {
buf.append(c);
state = ParsingState.QUOTED;
}
break;
}
}
if (buf != null) {
handler.handleEvent(
new TokenEvent(
buf.toString(), wordStart, input.length(), false, state == ParsingState.QUOTED));
}
}
@Override
public String escape(String output) {
int d = output.indexOf(delimiter);
int q = output.indexOf(quotation);
if (d == -1 && q == -1) {
return output;
}
StringBuffer sb = new StringBuffer().append(quotation);
for (int i = 0, j = 0; i < output.length(); ) {
j = output.indexOf(quotation, i);
if (j == -1) {
sb.append(output.substring(i));
break;
}
sb.append(output.substring(i, j)).append(quotation).append(output.charAt(j));
i = ++j;
}
sb.append(quotation);
return sb.toString();
}
}