CSVParser.java

package org.cyclopsgroup.caff.format;

import java.io.IOException;
import java.nio.CharBuffer;
import org.cyclopsgroup.caff.CharIterator;

/**
 * A general class that knows how to parse CSV syntax
 *
 * <p>TODO Trailing white space isn't handled yet
 *
 * @author <a href="mailto:jiaqi.guo@gmail.com">Jiaqi Guo</a>
 */
public abstract class CSVParser {
  private class ParsingContext {
    private final CharBuffer buffer = CharBuffer.allocate(BUFFER_SIZE);

    private int position = 0;

    private ParsingState state = ParsingState.START;

    private void notifyField() throws IOException {
      buffer.flip();
      handleField(position, buffer);
      buffer.clear();
      position++;
    }

    private void move(ParsingState newState) {
      state = newState;
    }

    private void append(char ch) {
      buffer.append(ch);
    }
  }

  private static enum ParsingState {
    ESCAPING,
    QUOTING,
    START,
    WORD;
  }

  private static final int BUFFER_SIZE = 100;

  /**
   * @param in Char iterator as input
   * @throws IOException Allows {@link IOException}
   */
  public final void parse(CharIterator in) throws IOException {
    ParsingContext context = new ParsingContext();
    while (in.hasNext()) {
      char ch = in.next();
      switch (context.state) {
        case START:
          switch (ch) {
            case ' ':
              break;
            case ',':
              context.notifyField();
              break;
            case '\"':
              context.move(ParsingState.QUOTING);
              break;
            default:
              context.move(ParsingState.WORD);
              context.append(ch);
          }
          break;
        case WORD:
          switch (ch) {
            case ',':
              context.notifyField();
              context.move(ParsingState.START);
              break;
            default:
              context.append(ch);
          }
          break;
        case QUOTING:
          switch (ch) {
            case '"':
              context.move(ParsingState.ESCAPING);
              break;
            default:
              context.append(ch);
          }
          break;
        case ESCAPING:
          switch (ch) {
            case ',':
              context.notifyField();
              context.move(ParsingState.START);
              break;
            default:
              context.append(ch);
              context.move(ParsingState.QUOTING);
          }
          break;
        default:
          throw new AssertionError("Nonsense state " + context.state);
      }
    }
    if (context.buffer.position() > 0) {
      context.notifyField();
    }
  }

  /**
   * @param position Zero based CSV field position
   * @param content Content of CSV field
   * @throws IOException Allows {@link IOException}
   */
  protected abstract void handleField(int position, CharSequence content) throws IOException;
}