/**
 * Title: Comedia Utils. Pascal scanner class.
 * Description: Lexical scanner for Pascal like languages
 * Copyright: Copyright (c) Capella Development Group 2001
 * Company: Capella Development Group
 * @author Sergey Seroukhov
 * @version 1.0
 */

package org.comedia.util.scanner;

/**
 * Performs lexical scanning for Pascal-like languages.
 * Scanner supports all standard operators, keywords or datatypes of ANSI-Pascal.
 * <p>
 * Example of scanner usage:
 * <p><pre>
 * System.out.println("*********** Pascal Scanner Test *************");
 *
 * CPasScanner scanner = new CPasScanner();
 * scanner.setBuffer("while(1.0e2*i := \t\r\n> \'string\'\'\')\n"
 *   + "// comment\n(*second\ncomment*){xxx}");
 * scanner.setShowEol(true);
 * scanner.setShowSpace(true);
 * scanner.setShowComment(true);
 * scanner.setShowKeyword(true);
 * scanner.setShowType(true);
 *
 * // Tests string convertions
 * String str = "The test \'string\'";
 * System.out.println("Start string: " + str);
 * str = scanner.wrapString(str);
 * System.out.println("Wrapped string: " + str);
 * str = scanner.unwrapString(str);
 * System.out.println("Unwrapped string: " + str);
 *
 * System.out.println();
 * System.out.println("Initial string: " + scanner.getBuffer());
 *
 * while (scanner.lex() != EOF) {
 *   switch (scanner.getTokenType()) {
 *     case UNKNOWN: System.out.print("Type: Unknown "); break;
 *     case COMMENT: System.out.print("Type: Comment "); break;
 *     case KEYWORD: System.out.print("Type: Keyword "); break;
 *     case TYPE: System.out.print("Type: Type "); break;
 *     case IDENT: System.out.print("Type: Ident "); break;
 *     case ALPHA: System.out.print("Type: Alpha "); break;
 *     case OPERATOR: System.out.print("Type: Operator "); break;
 *     case BRACE: System.out.print("Type: Brace "); break;
 *     case SEPARATOR: System.out.print("Type: Separator "); break;
 *     case EOL: System.out.print("Type: Eol "); break;
 *     case LF: System.out.print("Type: Lf "); break;
 *     case SPACE: System.out.print("Type: Space "); break;
 *     case INT: System.out.print("Type: Int "); break;
 *     case FLOAT: System.out.print("Type: Float "); break;
 *     case STRING: System.out.print("Type: String "); break;
 *     case BOOL: System.out.print("Type: Bool "); break;
 *     case EOF: System.out.print("Type: Eof "); break;
 *   }
 *   System.out.println("Value: '" + scanner.getToken()
 *     + "' Pos: " + scanner.getPosition() + " Line: " + scanner.getLineNo());
 * }
 * </pre>
 * The result:
 * <p><pre>
 * *********** Pascal Scanner Test *************
 * Start string: The test 'string'
 * Wrapped string: 'The test ''string'''
 * Unwrapped string: The test 'string'
 *
 * Initial string: while(1.0e2*i :=
 * > 'string''')
 * // comment
 * (*second
 * comment*){xxx}
 * Type: Keyword Value: 'while' Pos: 0 Line: 0
 * Type: Brace Value: '(' Pos: 5 Line: 0
 * Type: Float Value: '1.0e2' Pos: 6 Line: 0
 * Type: Operator Value: '*' Pos: 11 Line: 0
 * Type: Ident Value: 'i' Pos: 12 Line: 0
 * Type: Space Value: ' ' Pos: 13 Line: 0
 * Type: Operator Value: ':=' Pos: 14 Line: 0
 * Type: Space Value: ' 	' Pos: 16 Line: 0
 * Type: Lf Value: '
 * ' Pos: 18 Line: 0
 * Type: Eol Value: '
 * ' Pos: 19 Line: 0
 * Type: Operator Value: '>' Pos: 20 Line: 1
 * Type: Space Value: ' ' Pos: 21 Line: 1
 * Type: String Value: ''string'''' Pos: 22 Line: 1
 * Type: Brace Value: ')' Pos: 32 Line: 1
 * Type: Eol Value: '
 * ' Pos: 33 Line: 1
 * Type: Comment Value: '// comment
 * ' Pos: 34 Line: 2
 * Type: Comment Value: '(*second
 * comment*)' Pos: 45 Line: 3
 * Type: Comment Value: '{xxx}' Pos: 63 Line: 4
 * </pre>
 */
public class CPasScanner extends CScanner {
  /**
   * List of Pascal specified operators.
   */
  protected String pasOperators[] = {
    "and","or","not","shr","shl","div","mod"
  };
  /**
   * List of Pascal specified data type keywords.
   */
  protected String pasTypes[] = {
    "integer","longint","byte","char","string","boolean","real","double","word"
  };
  protected String pasKeywords[] = {
    "with","array","function","case","var","const","until","then","set",
    "record","program","procedure","packed","nil","label","in","repeat",
    "of","goto","forward","for","while","file","else","downto","do","to",
    "type","end","begin","if","true","false","unit","interface","uses",
    "implementation","initialization"
  };

  /**
   * Default class constructor.
   */
  public CPasScanner() {
    super();
    operators = pasOperators;
    types = pasTypes;
    keywords = pasKeywords;
  }

  /**
   * Gets a lowlevel token. Presents the main parsing process.
   * @param curr a "Holder" which containes extracted token.
   * @result extracted token type represented by special constant.
   */
  protected int lowRunLex(Lexem curr) {
    innerStartLex(curr);
    if (curr.tokenType != UNKNOWN) return curr.tokenType;

    // Checks for multi-line comments (type 1)
    char temp = curr.token.charAt(0);
    if (temp == '{') {
      while (bufferPos < bufferLen) {
        temp = buffer.charAt(bufferPos);
        curr.token += temp;
        bufferPos++;
        if (temp == '}')
          break;
        if (temp == '\n')
          bufferLine++;
      }
      return (curr.tokenType = COMMENT);
    }

    // Checks for multi-line comments (type 2)
    if ((temp == '(') && (bufferPos < bufferLen)
      && (buffer.charAt(bufferPos) == '*')) {
      char temp1 = (char)0;
      while (bufferPos < bufferLen) {
        temp = buffer.charAt(bufferPos);
        curr.token += temp;
        bufferPos++;
        if ((temp == ')') && (temp1 == '*') && (curr.token.length() > 3))
          break;
        if (temp == '\n')
          bufferLine++;
        temp1 = temp;
      }
      return (curr.tokenType = COMMENT);
    }

    // Checks for single-line comments
    if ((temp == '/') && (bufferPos < bufferLen)
      && (buffer.charAt(bufferPos) == '/'))
      return innerProcLineComment(curr);

    // Checks for braces
    if (new String("()[]").indexOf(temp) >= 0)
      return (curr.tokenType = BRACE);

    // Checks for separators
    if ((temp == ',') || (temp == '.') || (temp == ';') || ((temp == ':')
      && (bufferPos < bufferLen) && (buffer.charAt(bufferPos) != '=')))
      return (curr.tokenType = SEPARATOR);

    // Checks for delimiters
    if (new String(":=+-<>/*^@#").indexOf(temp) >= 0) {
      char temp1 = (bufferPos < bufferLen) ? buffer.charAt(bufferPos) : (char)0;
      if ( (((temp == ':') || (temp == '>')) && (temp1 == '=')) ||
        ((temp == '<') && ((temp1 == '=') || (temp1 == '>'))) ) {
        curr.token += temp1;
        bufferPos++;
      }
      return (curr.tokenType = OPERATOR);
    }

    // Checks for a string
    if (temp == '\'')
      return innerProcPasString(curr);

    // Checks for numbers and identifiers
    innerProcIdent(curr);
    if (curr.tokenType == IDENT) {
      // Checks for operators
      if (searchForString(curr.token.toLowerCase(), operators))
        return (curr.tokenType = OPERATOR);

      // Checks for type keywords
      if (showType && searchForString(curr.token.toLowerCase(), types))
        return (curr.tokenType = TYPE);

      // Checks for reserved keywords
      if (showKeyword && searchForString(curr.token.toLowerCase(), keywords))
        return (curr.tokenType = KEYWORD);
    }
    return curr.tokenType;
  }

  /**
   * Converts a string from ordinary into Pascal-like escape format
   * limited with quotes.
   * @param s a string in ordinary (local) presentation.
   * @result a result string in Pascal-like escape format.
   */
  public static String wrapString(String s) {
    String result = "\'";
    for (int p = 0; p < s.length(); p++) {
      result += s.charAt(p);
      if (s.charAt(p) == '\'')
        result += s.charAt(p);
    }
    return result + "\'";
  }

  /**
   * Converts a string from Pascal-like escape format limited
   * with quotes into oridinary (local) presentation.
   * @param s a string in Pascal-like escape format.
   * @result a result string in ordinary (local) presentation.
   */
  public static String unwrapString(String s) {
    int p = 0;
    int len = s.length();
    if (len == 0) return "";

    // Removes limited quotes
    if (s.charAt(p) == '\'') p++;
    if (s.charAt(len-1) == '\'') len--;
    if ((len - p) <= 0) return "";

    String result = "";
    while (p < len) {
      char temp = s.charAt(p);
      if (temp != '\'')
        result += temp;
      else if ((p < (len-1)) && (s.charAt(p+1) == '\'')) {
        result += '\'';
        p++;
      }
      p++;
    }
    return result;
  }

  /**
   * The main function for test purposes.
   */
  public static void main(String[] args) {
    System.out.println("*********** Pascal Scanner Test *************");

    CPasScanner scanner = new CPasScanner();
    scanner.setBuffer("while(1.0e2*i := \t\r\n> \'string\'\'\')\n"
      + "// comment\n(*second\ncomment*){xxx}");
    scanner.setShowEol(true);
    scanner.setShowSpace(true);
    scanner.setShowComment(true);
    scanner.setShowKeyword(true);
    scanner.setShowType(true);

    // Tests string convertions
    String str = "The test \'string\'";
    System.out.println("Start string: " + str);
    str = scanner.wrapString(str);
    System.out.println("Wrapped string: " + str);
    str = scanner.unwrapString(str);
    System.out.println("Unwrapped string: " + str);

    System.out.println();
    System.out.println("Initial string: " + scanner.getBuffer());

    while (scanner.lex() != EOF) {
      switch (scanner.getTokenType()) {
        case UNKNOWN: System.out.print("Type: Unknown "); break;
        case COMMENT: System.out.print("Type: Comment "); break;
        case KEYWORD: System.out.print("Type: Keyword "); break;
        case TYPE: System.out.print("Type: Type "); break;
        case IDENT: System.out.print("Type: Ident "); break;
        case ALPHA: System.out.print("Type: Alpha "); break;
        case OPERATOR: System.out.print("Type: Operator "); break;
        case BRACE: System.out.print("Type: Brace "); break;
        case SEPARATOR: System.out.print("Type: Separator "); break;
        case EOL: System.out.print("Type: Eol "); break;
        case LF: System.out.print("Type: Lf "); break;
        case SPACE: System.out.print("Type: Space "); break;
        case INT: System.out.print("Type: Int "); break;
        case FLOAT: System.out.print("Type: Float "); break;
        case STRING: System.out.print("Type: String "); break;
        case BOOL: System.out.print("Type: Bool "); break;
        case EOF: System.out.print("Type: Eof "); break;
      }
      System.out.println("Value: '" + scanner.getToken()
        + "' Pos: " + scanner.getPosition() + " Line: " + scanner.getLineNo());
    }
  }
}
