/**
 * Title: Comedia Utils. C scanner class.
 * Description: Lexical scanner for C like languages
 * Copyright: Copyright (c) Capella Development Group 2001
 * Company: Capella Development Group
 * @author Sergey Seroukhov
 * @version 1.0
 */

package org.comedia.util.scanner;

/**
 * Performs lexical scanning for C/C++ languages.
 * Scanner supports all standard operators, keywords or datatypes of C/C++.
 * <p>
 * Example of scanner usage:
 * <p><pre>
 * System.out.println("*********** C Scanner Test *************");
 *
 * CCppScanner scanner = new CCppScanner();
 * scanner.setBuffer("while(1.0e2=i.a >>= \t\r\n-> \"string\\\"\")\n"
 *   + "// comment\n/.*second\ncomment*./{xxx}");
 * scanner.setShowEol(true);
 * scanner.setShowSpace(true);
 * scanner.setShowComment(true);
 * scanner.setShowKeyword(true);
 * scanner.setShowType(true);
 *
 * // Tests string convertions
 * String str = "The test \"string\"";
 * System.out.println("Start string: " + str);
 * str = scanner.wrapString(str);
 * System.out.println("Wrapped string: " + str);
 * str = scanner.unwrapString(str);
 * System.out.println("Unwrapped string: " + str);
 *
 * System.out.println();
 * System.out.println("Initial string: " + scanner.getBuffer());
 *
 * while (scanner.lex() != EOF) {
 *   switch (scanner.getTokenType()) {
 *     case UNKNOWN: System.out.print("Type: Unknown "); break;
 *     case COMMENT: System.out.print("Type: Comment "); break;
 *     case KEYWORD: System.out.print("Type: Keyword "); break;
 *     case TYPE: System.out.print("Type: Type "); break;
 *     case IDENT: System.out.print("Type: Ident "); break;
 *     case ALPHA: System.out.print("Type: Alpha "); break;
 *     case OPERATOR: System.out.print("Type: Operator "); break;
 *     case BRACE: System.out.print("Type: Brace "); break;
 *     case SEPARATOR: System.out.print("Type: Separator "); break;
 *     case EOL: System.out.print("Type: Eol "); break;
 *     case LF: System.out.print("Type: Lf "); break;
 *     case SPACE: System.out.print("Type: Space "); break;
 *     case INT: System.out.print("Type: Int "); break;
 *     case FLOAT: System.out.print("Type: Float "); break;
 *     case STRING: System.out.print("Type: String "); break;
 *     case BOOL: System.out.print("Type: Bool "); break;
 *     case EOF: System.out.print("Type: Eof "); break;
 *   }
 *   System.out.println("Value: '" + scanner.getToken()
 *     + "' Pos: " + scanner.getPosition() + " Line: " + scanner.getLineNo());
 * }
 * </pre>
 * The result:
 * <p><pre>
 * *********** C Scanner Test *************
 * Start string: The test "string"
 * Wrapped string: "The test \"string\""
 * Unwrapped string: The test "string"
 *
 * Initial string: while(1.0e2=i.a >>=
 * -> "string\"")
 * // comment
 * /.*second
 * comment*./{xxx}
 * Type: Keyword Value: 'while' Pos: 0 Line: 0
 * Type: Brace Value: '(' Pos: 5 Line: 0
 * Type: Float Value: '1.0e2' Pos: 6 Line: 0
 * Type: Operator Value: '=' Pos: 11 Line: 0
 * Type: Ident Value: 'i' Pos: 12 Line: 0
 * Type: Separator Value: '.' Pos: 13 Line: 0
 * Type: Ident Value: 'a' Pos: 14 Line: 0
 * Type: Space Value: ' ' Pos: 15 Line: 0
 * Type: Operator Value: '>>=' Pos: 16 Line: 0
 * Type: Space Value: ' 	' Pos: 19 Line: 0
 * Type: Lf Value: '
 * ' Pos: 21 Line: 0
 * Type: Eol Value: '
 * ' Pos: 22 Line: 0
 * Type: Operator Value: '->' Pos: 23 Line: 1
 * Type: Space Value: ' ' Pos: 25 Line: 1
 * Type: String Value: '"string\""' Pos: 26 Line: 1
 * Type: Brace Value: ')' Pos: 36 Line: 1
 * Type: Eol Value: '
 * ' Pos: 37 Line: 1
 * Type: Comment Value: '// comment
 * ' Pos: 38 Line: 2
 * Type: Comment Value: '/*second
 * comment*./' Pos: 49 Line: 3
 * Type: Brace Value: '{' Pos: 67 Line: 4
 * Type: Ident Value: 'xxx' Pos: 68 Line: 4
 * Type: Brace Value: '}' Pos: 71 Line: 4
 * </pre>
 */
public class CCppScanner extends CScanner {
  /**
   * List of C/C++ specified data type keywords.
   */
  String cTypes[] = {
    "int","long","short","char","bool","float","double","void"
  };
  /**
   * List of C/C++ specified reserved keywords.
   */
  String cKeywords[] = {
    "return","if","else","while","do","switch","case","default","typedef",
    "union","class","struct","true","false","goto","break","continue","for",
    "operator","static","public","private","protected","friend","this"
  };

  /**
   * Gets a lowlevel token. Presents the main parsing process.
   * @param curr a "Holder" which containes extracted token.
   * @result extracted token type represented by special constant.
   */
  protected int lowRunLex(Lexem curr) {
    innerStartLex(curr);
    if (curr.tokenType != UNKNOWN)
      return curr.tokenType;

    // Checks for multi-line comment
    innerProcCComment(curr);
    if (curr.tokenType != UNKNOWN)
      return curr.tokenType;

    // Checks for single-line comment
    char temp = curr.token.charAt(0);
    if ((temp == '/') && (bufferPos < bufferLen)
      && (buffer.charAt(bufferPos) == '/'))
      return innerProcLineComment(curr);

    // Checks for brace
    if (new String("{}()[]").indexOf(temp) >= 0)
      return (curr.tokenType = BRACE);

    // Checks for separator
    if (new String(":;.,").indexOf(temp) >= 0)
      return (curr.tokenType = SEPARATOR);

    // Checks for delimiters
    if (new String("=+-<>/*^@#?%!|&~").indexOf(temp) >= 0) {
      // Checks second char
      char temp1 = (bufferPos < bufferLen) ? buffer.charAt(bufferPos): (char)0;
      if (((new String("+-!|~&*/>%").indexOf(temp) >= 0) && (temp1 == '=')) ||
        ((temp == '<') && (temp1 == '=')) ||
        ((new String("<>|&+-").indexOf(temp) >= 0) && (temp1 == temp)) ||
        ((temp == '-') && (temp1 == '>'))) {
        curr.token += temp1;
        bufferPos++;
      }
      // Checks third char
      char temp2 = ((temp1 != (char)0) && (bufferPos < bufferLen))?
        buffer.charAt(bufferPos) : (char)0;
      if (((temp == '>') && (temp1 == '>') && (temp2 == '=')) ||
        ((temp == '<') && (temp1 == '<') && (temp2 == '='))) {
        curr.token += temp2;
        bufferPos++;
      }
      return (curr.tokenType = OPERATOR);
    }

    // Checks for a string
    if (temp == '\"')
      return innerProcCString(curr);

    // Checks for numbers and identifiers
    innerProcIdent(curr);
    if (curr.tokenType == IDENT) {
      // Check for type keywords
      if (showType && searchForString(curr.token, types))
        return (curr.tokenType = TYPE);

      // Checks for reserved keywords
      if (showKeyword && searchForString(curr.token, keywords))
        return (curr.tokenType = KEYWORD);
    }
    return curr.tokenType;
  }

  /**
   * Converts a string from ordinary into C-like escape format
   * limited with quotes.
   * @param s a string in ordinary (local) presentation.
   * @result a result string in C-like escape format.
   */
  public static String wrapString(String s) {
    String result = "\"";
    for (int p = 0; p < s.length(); p++) {
      char temp = s.charAt(p);
      switch (temp) {
        case '\t': result += "\\t"; break;
        case '\n': result += "\\n"; break;
        case '\r': result += "\\r"; break;
        case '\0': result += "\\0"; break;
        case '\"':
        case '\'':
        case '\\': result += '\\';
        default:   result += temp;
      }
    }
    return result + "\"";
  }

  /**
   * Converts a string from C-like escape format limited
   * with quotes into oridinary (local) presentation.
   * @param s a string in C-like escape format.
   * @result a result string in ordinary (local) presentation.
   */
  public static String unwrapString(String s) {
    // Removes limited quotes
    s = CScanner.unwrapString(s);
    if (s.length() == 0) return "";

    String result = "";
    int p = 0;
    while (p < s.length()) {
      char temp = s.charAt(p);
      if (temp == '\\') {
        p++;
        if (p >= s.length())
          break;
        temp = s.charAt(p);
        switch (temp) {
          case 'n': temp = '\n'; break;
          case 'r': temp = '\r'; break;
          case 't': temp = '\t'; break;
          case '0': temp = '\0'; break;
        }
      }
      result += temp;
      p++;
    }
    return result;
  }

  /**
   * Default class constructor.
   */
  public CCppScanner() {
    super();
    keywords = cKeywords;
    types = cTypes;
  }

  /**
   * The main function for test purposes.
   */
  public static void main(String[] args) {
    System.out.println("*********** C Scanner Test *************");

    CCppScanner scanner = new CCppScanner();
    scanner.setBuffer("while(1.0e2=i.a >>= \t\r\n-> \"string\\\"\")\n"
      + "// comment\n/*second\ncomment*/{xxx}");
    scanner.setShowEol(true);
    scanner.setShowSpace(true);
    scanner.setShowComment(true);
    scanner.setShowKeyword(true);
    scanner.setShowType(true);

    // Tests string convertions
    String str = "The test \"string\"";
    System.out.println("Start string: " + str);
    str = scanner.wrapString(str);
    System.out.println("Wrapped string: " + str);
    str = scanner.unwrapString(str);
    System.out.println("Unwrapped string: " + str);

    System.out.println();
    System.out.println("Initial string: " + scanner.getBuffer());

    while (scanner.lex() != EOF) {
      switch (scanner.getTokenType()) {
        case UNKNOWN: System.out.print("Type: Unknown "); break;
        case COMMENT: System.out.print("Type: Comment "); break;
        case KEYWORD: System.out.print("Type: Keyword "); break;
        case TYPE: System.out.print("Type: Type "); break;
        case IDENT: System.out.print("Type: Ident "); break;
        case ALPHA: System.out.print("Type: Alpha "); break;
        case OPERATOR: System.out.print("Type: Operator "); break;
        case BRACE: System.out.print("Type: Brace "); break;
        case SEPARATOR: System.out.print("Type: Separator "); break;
        case EOL: System.out.print("Type: Eol "); break;
        case LF: System.out.print("Type: Lf "); break;
        case SPACE: System.out.print("Type: Space "); break;
        case INT: System.out.print("Type: Int "); break;
        case FLOAT: System.out.print("Type: Float "); break;
        case STRING: System.out.print("Type: String "); break;
        case BOOL: System.out.print("Type: Bool "); break;
        case EOF: System.out.print("Type: Eof "); break;
      }
      System.out.println("Value: '" + scanner.getToken()
        + "' Pos: " + scanner.getPosition() + " Line: " + scanner.getLineNo());
    }
  }
}
