// copyright 2001-2002 by The Mind Electric

package electric.xml.sax;

import java.io.*;
import java.net.URL;
import org.xml.sax.*;
import org.xml.sax.helpers.*;
import electric.util.*;
import electric.util.io.*;

/**
 * <tt>SAXParser</tt>.
 *
 * @author <a href="http://www.themindelectric.com">The Mind Electric</a>
 */

public class SAXParser implements XMLReader, Locator
  {
  /**
   * Ignore whitespace during parsing.
   */
  public static int IGNORE_WHITESPACE = 1;

  // XML start/stop tags
  private static final String COMMENT_START = "<!--";
  private static final String COMMENT_STOP = "-->";
  private static final String DOCTYPE_START = "<!DOCTYPE";
  private static final String DOCTYPE_STOP = ">";
  private static final String XMLDECL_START = "<?xml ";
  private static final String XMLDECL_STOP = "?>";
  private static final String INSTRUCTION_START = "<?";
  private static final String INSTRUCTION_STOP = "?>";
  private static final String CDATA_START = "<![CDATA[";
  private static final String CDATA_STOP = "]]>";
  private static final String ATTLIST_START = "<!ATTLIST";
  private static final String ATTLIST_STOP = ">";
  private static final String ELEMENTDECL_START = "<!ELEMENT";
  private static final String ELEMENTDECL_STOP = ">";
  private static final String ENTITYDECL_START = "<!ENTITY";
  private static final String ENTITYDECL_STOP = ">";
  private static final String NOTATIONDECL_START = "<!NOTATION";
  private static final String NOTATIONDECL_STOP = ">";
  private static final String SYSTEM = "SYSTEM";
  private static final String PUBLIC = "PUBLIC";

  // SAX feature set
  private static final String FEATURES = "http://xml.org/sax/features/";
  private static final String EXTERNAL_GENERAL_ENTITIES = FEATURES + "external-general-entities";
  private static final String EXTERNAL_PARAMETER_ENTITIES = FEATURES + "external-parameter-entities";
  private static final String IS_STANDALONE = FEATURES + "is-standalone";
  private static final String PARAMETER_ENTITIES = FEATURES + "lexical-handler/parameter-entities";
  private static final String NAMESPACES = FEATURES + "namespaces";
  private static final String NAMESPACE_PREFIXES = FEATURES + "namespace-prefixes";
  private static final String RESOLVE_DTD_URIS = FEATURES + "resolve-dtd-uris";
  private static final String STRING_INTERNING = FEATURES + "string-interning";
  private static final String USE_ATTRIBUTES_2 = FEATURES + "use-attributes2";
  private static final String USE_LOCATOR_2 = FEATURES + "use-locator2";
  private static final String USE_ENTITY_RESOLVER_2 = FEATURES + "use-entity-resolver2";
  private static final String VALIDATION = FEATURES + "validation";
  private static final String XMLNS_URIS = FEATURES + "xmlns-uris";

  // SAX property set
  private static final String PROPERTIES = "http://xml.org/sax/properties/";
  private static final String DECLARATION_HANDLER = PROPERTIES + "declaration-handler";
  private static final String DOM_NODE = PROPERTIES + "dom-node";
  private static final String LEXICAL_HANDLER = PROPERTIES + "lexical-handler";
  private static final String XML_STRING = PROPERTIES + "xml-string";

  // SAX exception prefix
  private static final String SAX_EXCEPTION = "http://xml.org/sax/exception/";

  private static final SAXSink sink = new SAXSink();
  private static final AttributesImpl EMPTY_ATTRIBUTES = new AttributesImpl();

  private ContentHandler contentHandler = sink;
  private DTDHandler dtdHandler = sink;
  private ErrorHandler errorHandler = sink;
  private EntityResolver entityResolver;
  private Lex lex;
  private boolean ignoreWhitespace;
  private boolean standalone;
  private String version;
  private String encoding;

  // ********** FEATURES ****************************************************

  /**
   * Return the value of the named feature.
   * @param name The name of the feature.
   * @throws SAXNotRecognizedException
   * @throws SAXNotSupportedException
   */
  public boolean getFeature( String name )
    throws SAXNotRecognizedException, SAXNotSupportedException
    {
    if( name.equals( NAMESPACES ) )
      return true;
    else if( name.equals( RESOLVE_DTD_URIS ) )
      return true;
    else
      return false;
    }

  /**
   * Set the value of the named feature.
   * @param The name of the feature.
   * @param value The new value.
   * @throws SAXNotRecognizedException
   * @throws SAXNotSupportedException
   */
  public void setFeature( String name, boolean value )
    throws SAXNotRecognizedException, SAXNotSupportedException
    {
    throw new SAXNotSupportedException( "feature " + name + " is not currently supported" );
    }

  // ********** PROPERTIES **************************************************

  /**
   * Set the value of the named property.
   * @param name The name of the property.
   * @param value The new value.
   * @throws SAXNotRecognizedException
   * @throws SAXNotSupportedException
   */
  public void setProperty( String name, Object value )
    throws SAXNotRecognizedException, SAXNotSupportedException
    {
    throw new SAXNotSupportedException( "property " + name + " is not currently supported" );
    }

  /**
   * Return the value of the named property.
   * @param name
   * @throws SAXNotRecognizedException
   * @throws SAXNotSupportedException
   */
  public Object getProperty( String name )
    throws SAXNotRecognizedException, SAXNotSupportedException
    {
    throw new SAXNotSupportedException( "property " + name + " is not currently supported" );
    }

  // ********** CONTENT HANDLER *********************************************

  /**
   * Return the current content handler.
   */
  public ContentHandler getContentHandler()
    {
    return contentHandler;
    }

  /**
   * Set the current content handler.
   * @param handler The new value.
   */
  public void setContentHandler( ContentHandler handler )
    {
    this.contentHandler = (handler == null ? sink : handler);
    }

  // ********** DTD HANDLER *************************************************

  /**
   * Return the current DTD handler.
   */
  public DTDHandler getDTDHandler()
    {
    return dtdHandler;
    }

  /**
   * Set the current DTD Handler.
   * @param handler The new value.
   */
  public void setDTDHandler( DTDHandler handler )
    {
    this.dtdHandler = (handler == null ? sink : handler);
    }

  // ********** ERROR HANDLER ***********************************************

  /**
   * Return the current error handler.
   */
  public ErrorHandler getErrorHandler()
    {
    return errorHandler;
    }

  /**
   * Set the current error handler.
   * @param handler The new value.
   */
  public void setErrorHandler( ErrorHandler handler )
    {
    this.errorHandler = (handler == null ? sink : handler);
    }

  // ********** ENTITY RESOLVER *********************************************

  /**
   * Return the current entity resolver.
   */
  public EntityResolver getEntityResolver()
    {
    return entityResolver;
    }

  /**
   * Set the current entity resolver;
   * @param resolver The new value.
   */
  public void setEntityResolver( EntityResolver resolver )
    {
    this.entityResolver = resolver;
    }

  // ********** PARSING *****************************************************

  /**
   * Parse an XML document.
   * @param input The input.
   * @throws IOException An I/O exception from the parser.
   * @throws SAXException Any SAX exception, possibly wrapping another exception.
   */
  public void parse( InputSource input )
    throws IOException, SAXException
    {
    Reader reader = input.getCharacterStream();

    if( reader != null )
      {
      parse( reader );
      return;
      }

    InputStream stream = input.getByteStream();

    if( stream != null )
      {
      parse( stream );
      return;
      }

    URL url = new URL( input.getSystemId() );
    parse( url.openStream() );
    }

  /**
   * Parse the XML document at the specified URL.
   * @param url A URL to the document to parse.
   * @throws IOException An I/O exception from the parser.
   * @throws SAXException Any SAX exception, possibly wrapping another exception.
   */
  public void parse( String url )
    throws IOException, SAXException
    {
    parse( new InputSource( url ) );
    }

  /**
   * @param bytes
   * @throws IOException An I/O exception from the parser.
   * @throws SAXException Any SAX exception, possibly wrapping another exception.
   */
  public void parse( byte[] bytes )
    throws IOException, SAXException
    {
    parse( new FastReader( Strings.toString( bytes ) ) );
    }

  /**
   * @param stream
   * @throws IOException An I/O exception from the parser.
   * @throws SAXException Any SAX exception, possibly wrapping another exception.
   */
  public void parse( InputStream stream )
    throws IOException, SAXException
    {
    parse( new FastBufferedReader( Streams.getReader( stream ) ) );
    }

  /**
   * @param file
   * @throws IOException An I/O exception from the parser.
   * @throws SAXException Any SAX exception, possibly wrapping another exception.
   */
  public void parse( File file )
    throws IOException, SAXException
    {
    parse( new FastBufferedReader( Streams.getReader( file ) ) );
    }

  /**
   * @throws IOException An I/O exception from the parser.
   * @throws SAXException Any SAX exception, possibly wrapping another exception.
   */
  public void parse( Reader reader )
    throws IOException, SAXException
    {
    parse( reader, 0 ); // IGNORE_WHITESPACE );
    }

  // ********** PARSING ENGINE **********************************************

  /**
   * @param reader
   * @param flags
   */
  private void parse( Reader reader, int flags )
    throws IOException, SAXException
    {
    ignoreWhitespace = ((flags & IGNORE_WHITESPACE) != 0);
    lex = new Lex( reader, "<>=/:", Lex.SKIP_WS );

    try
      {
      parse();
      }
    //catch( Exception exception )
    //  {
    //  throw new ParseException( exception.getClass().getName() + ": " + exception.getMessage() + "\n" + lex.getLocation() );
    //  }
    finally
      {
      try
        {
        reader.close();
        }
      catch( IOException exception )
        {
        }
      }
    }

  /**
   * @throws IOException An I/O exception from the parser.
   * @throws SAXException Any SAX exception, possibly wrapping another exception.
   */
  private void parse()
    throws IOException, SAXException
    {
    contentHandler.setDocumentLocator( this );
    contentHandler.startDocument();
    int elements = 0;

    while( true )
      {
      StringBuffer whitespace = lex.readWhitespace();

      if( whitespace != null )
        {
        char[] chars = whitespace.toString().toCharArray();

        if( ignoreWhitespace )
          contentHandler.ignorableWhitespace( chars, 0, chars.length );
        else
          contentHandler.characters( chars, 0, chars.length );
        }

      lex.mark( 2 );
      int ch1 = lex.peekRead();
      int ch2 = lex.peekRead();
      lex.reset();

      if( ch1 == -1 ) // eof
        break;
      else if( ch2 == '!' && lex.peekString( COMMENT_START ) )
        parseComment();
      else if( ch2 == '!' && lex.peekString( DOCTYPE_START ) )
        parseDocType();
      else if( ch2 == '?' && lex.peekString( XMLDECL_START ) )
        parseXMLDecl();
      else if( ch2 == '?' )
        parseInstruction();
      else // start tag "<"
        {
        parseElement( null );
        ++elements;
        }
      }

    if( elements != 1 )
      throw new SAXException( "the document does not have exactly one root" );

    lex.skipWhitespace();

    if( lex.read() != -1 )
      throw new SAXException( "extra stuff at the end" );

    contentHandler.endDocument();
    }

  // ********** COMMENT *****************************************************

  /**
   * @throws IOException
   */
  private void parseComment()
    throws IOException
    {
    lex.skip( COMMENT_START.length() );
    lex.readToPattern( COMMENT_STOP, Lex.CONSUME );
    // report to LexicalHandler?
    }

  // ********** TEXT ********************************************************

  /**
   * @param whitespace
   * @throws IOException
   * @throws SAXException
   */
  private void parseText( StringBuffer whitespace )
    throws IOException, SAXException
    {
    String token = lex.readToPattern( "<", Lex.EOF_OK | Lex.HTML );
    String string = (whitespace == null ? token : whitespace.append( token ).toString());
    char[] chars = string.toCharArray(); // can we avoid this copy?
    contentHandler.characters( chars, 0, chars.length );
    }

  // ********** CDATA *******************************************************

  /**
   * @throws IOException
   * @throws SAXException
   */
  private void parseCData()
    throws IOException, SAXException
    {
    lex.skip( CDATA_START.length() );
    String token = lex.readToPattern( CDATA_STOP, Lex.CONSUME | Lex.INCLUDE );
    String string = token.substring( 0, token.length() - CDATA_STOP.length() );
    // should report using SAX2 extension LexicalHandler
    }

  // ********** XML DECLARATION *********************************************

  /**
   * @throws IOException
   */
  private void parseXMLDecl()
    throws IOException
    {
    lex.skip( XMLDECL_START.length() );
    int peek = 0;

    do
      {
      String key = lex.readToken();
      lex.readChar( '=' );
      lex.skipWhitespace();
      int ch = lex.read();
      String value = null;

      if( ch == '\"' )
        value = lex.readToPattern( "\"", Lex.CONSUME | Lex.HTML );
      else if( ch == '\'' )
        value = lex.readToPattern( "'", Lex.CONSUME | Lex.HTML );
      else
        throw new IOException( "missing quote at start of attribute" );

      if( key.equals( "version" ) )
        version = value;
      else if( key.equals( "encoding" ) )
        encoding = value;
      else if( key.equals( "standalone" ) )
        standalone = value.equals( "yes" );
      else
        throw new IOException( key + " is invalid attribute for XMLDecl" );

      lex.skipWhitespace();
      peek = lex.peek();
      }
    while( peek != '?' );

    lex.readChar( '?' );
    lex.readChar( '>' );
    }

  // ********** PROCESSING INSTRUCTION **************************************

  /**
   * @throws IOException
   * @throws SAXException
   */
  private void parseInstruction()
    throws IOException, SAXException
    {
    lex.skip( INSTRUCTION_START.length() );
    String target = lex.readToken();
    String content = lex.readToPattern( INSTRUCTION_STOP, Lex.SKIP_WS | Lex.CONSUME );
    contentHandler.processingInstruction( target, content );
    }

  // ********** DOCUMENT TYPE ***********************************************

  /**
   * @throws IOException
   * @throws SAXException
   */
  private void parseDocType()
    throws IOException, SAXException
    {
    lex.readChar( '<' );
    lex.readToken( DOCTYPE_START.substring( 1 ) );
    String name = lex.readToDelimiter( "[>" );
    String systemId = null;
    String publicId = null;

    if( name.equals( "[" ) || name.equals( ">" ) )
      throw new IOException( "DOCTYPE is missing a name" );

    String next = lex.readToken();

    if( next.equals( SYSTEM ) )
      {
      systemId = lex.readToDelimiter( "[>", Lex.SKIP_WS | Lex.QUOTES | Lex.STRIP );
      next = lex.readToDelimiter( "[>" );
      }
    else if( next.equals( PUBLIC ) )
      {
      publicId = lex.readToDelimiter( "[>", Lex.SKIP_WS | Lex.QUOTES | Lex.STRIP );
      systemId = lex.readToDelimiter( "[>", Lex.SKIP_WS | Lex.QUOTES | Lex.STRIP );
      next = lex.readToDelimiter( "[>" );
      }

    if( next.equals( "[" ) )
      {
      while( true )
        {
        lex.skipWhitespace();
        int[] chars = new int[ 2 ];
        lex.peek( chars );
        int ch1 = chars[ 0 ];
        int ch2 = chars[ 1 ];

        if( ch1 == ']' )
          break;
        else if( ch1 == -1 ) // eof
          throw new IOException( "could not find matching ']' in DOCTYPE" );
        else if( ch1 == '%' )
          lex.readToPattern( ";", Lex.CONSUME | Lex.INCLUDE );
        else if( ch2 == '!' && lex.peekString( ATTLIST_START ) )
          parseAttlistDecl();
        else if( ch2 == '!' && lex.peekString( ELEMENTDECL_START ) )
          parseElementDecl();
        else if( ch2 == '!' && lex.peekString( ENTITYDECL_START ) )
          parseEntityDecl();
        else if( ch2 == '!' && lex.peekString( NOTATIONDECL_START ) )
          parseNotationDecl();
        else if( ch2 == '!' && lex.peekString( COMMENT_START ) )
          parseComment();
        else if( ch2 == '?' )
          parseInstruction();
        else
          throw new IOException( "illegal entry in DOCTYPE" );
        }

      next = lex.readToken(); // consume "]"
      next = lex.readToken();
      }

    if( !next.equals( ">" ) )
      throw new IOException( "could not find matching '>' in DOCTYPE" );
    }

  /**
   * @throws IOException
   */
  private void parseAttlistDecl()
    throws IOException
    {
    lex.skip( ATTLIST_START.length() );
    String token = lex.readToPattern( ATTLIST_STOP, Lex.CONSUME | Lex.INCLUDE | Lex.QUOTES );
    String content = token.substring( 0, token.length() - ATTLIST_STOP.length() );
    // should report using SAX2 extension DeclHandler
    }

  /**
   * @throws IOException
   */
  private void parseElementDecl()
    throws IOException
    {
    lex.skip( ELEMENTDECL_START.length() );
    String token = lex.readToPattern( ELEMENTDECL_STOP, Lex.CONSUME | Lex.INCLUDE | Lex.QUOTES );
    String content = token.substring( 0, token.length() - ELEMENTDECL_STOP.length() );
    // should report using SAX2 extension DeclHandler
    }

  /**
   * @throws IOException
   */
  private void parseEntityDecl()
    throws IOException
    {
    lex.skip( ENTITYDECL_START.length() );
    String token = lex.readToPattern( ENTITYDECL_STOP, Lex.CONSUME | Lex.INCLUDE | Lex.QUOTES );
    String content = token.substring( 0, token.length() - ENTITYDECL_STOP.length() );
    // dtdHandler.unparsedEntityDecl( name, publicId, systemId, notationName );
    // or should report using SAX2 extension DeclHandler?
    }

  /**
   * @throws IOException
   */
  private void parseNotationDecl()
    throws IOException
    {
    lex.skip( NOTATIONDECL_START.length() );
    String token = lex.readToPattern( NOTATIONDECL_STOP, Lex.CONSUME | Lex.INCLUDE | Lex.QUOTES );
    String content = token.substring( 0, token.length() - NOTATIONDECL_STOP.length() );
    // dtdHandler.notationDecl( name, publicId, systemId );
    }

  // ********** ELEMENT *****************************************************

  /**
   * @param parent
   * @throws IOException An I/O exception from the parser.
   * @throws SAXException Any SAX exception, possibly wrapping another exception.
   */
  private void parseElement( SAXScope parent )
    throws IOException, SAXException
    {
    SAXScope scope = new SAXScope( parent );
    lex.read(); // '<'
    String localName = lex.readToken();
    String prefix = null;
    String namespace = null;

    if( lex.peek() == ':' )
      {
      prefix = localName;
      lex.read(); // ':'
      localName = lex.readToken();
      }

    lex.skipWhitespace();
    int peek = lex.peek();
    Attributes attributes = EMPTY_ATTRIBUTES;

    if( peek != '>' && peek != '/' )
      attributes = parseAttributes( scope );

    if( prefix == null )
      {
      namespace = scope.getNamespace( "" );
      }
    else
      {
      namespace = scope.getNamespace( prefix );

      if( namespace == null )
        throw new SAXException( "could not find namespace with prefix " + prefix );
      }

    String uri = (namespace == null ? "" : namespace);
    String qName = (prefix == null ? localName : prefix + ":" + localName);

    if( scope.namespaces != null )
      for( int i = 0; i < scope.namespaces.length; i++ )
        contentHandler.startPrefixMapping( scope.namespaces[ i ][ 0 ], scope.namespaces[ i ][ 1 ] );

    contentHandler.startElement( uri, localName, qName, attributes );

    int ch = lex.read();

    if( ch == '/' ) // empty
      {
      lex.readChar( '>' );
      }
    else if( ch != '>' )
      {
      throw new IOException( "expected > or /" );
      }
    else // read >, parse children
      {
      parseChildren( scope );
      lex.readChar( '<' );
      lex.readChar( '/' );

      if( prefix != null )
        {
        String endPrefix = lex.readToken();

        if( !prefix.equals( endPrefix ) )
          throw new IOException( "<" + prefix + ":...> does not match </" + endPrefix + ":...>" );

        lex.readChar( ':' );
        }

      String endName = lex.readToken();
      lex.readChar( '>' );

      if( !localName.equals( endName ) )
        throw new IOException( "<..." + localName + "> does not match </..." + endName + ">" );

      contentHandler.endElement( uri, localName, qName );

      if( scope.namespaces != null )
        for( int i = 0; i < scope.namespaces.length; i++ )
          contentHandler.endPrefixMapping( scope.namespaces[ i ][ 0 ] );
      }
    }

  // ********** ATTRIBUTES **************************************************

  /**
   * @param element
   * @throws IOException An I/O exception from the parser.
   * @throws SAXException Any SAX exception, possibly wrapping another exception.
   */
  private Attributes parseAttributes( SAXScope scope )
    throws IOException, SAXException
    {
    AttributesImpl attributes = new AttributesImpl();
    int peek = 0;

    do
      {
      String localName = lex.readToken();
      String prefix = null;

      if( lex.peek() == ':' )
        {
        prefix = localName;
        lex.read();
        localName = lex.readToken();
        }

      lex.readChar( '=' );
      lex.skipWhitespace();
      int ch = lex.read();
      String value = null;
      String namespace = null;

      if( ch == '\"' )
        value = lex.readToPattern( "\"", Lex.CONSUME | Lex.HTML );
      else if( ch == '\'' )
        value = lex.readToPattern( "'", Lex.CONSUME | Lex.HTML );
      else
        throw new IOException( "missing quote at start of attribute" );

      if( "xmlns".equals( localName ) )
        {
        localName = "";
        namespace = "http://www.w3.org/XML/1998/namespace";
        scope.setNamespace( "", value );
        }
      else if( "xmlns".equals( prefix ) )
        {
        prefix = null;
        namespace = "http://www.w3.org/XML/1998/namespace";
        scope.setNamespace( localName, value );
        }
      else
        {
        // don't add xmlns:* by default
        String uri = (namespace == null ? "" : namespace);
        String qName = (prefix == null ? localName : prefix + ":" + localName);
        attributes.addAttribute( uri, localName, qName, "CDATA", value );
        }

      lex.skipWhitespace();
      peek = lex.peek();
      }
    while( peek != '>' && peek != '/' );

    return attributes;
    }

  // ********** CHILDREN ****************************************************

  /**
   * @param parent
   * @throws IOException An I/O exception from the parser.
   * @throws SAXException Any SAX exception, possibly wrapping another exception.
   */
  private void parseChildren( SAXScope parent )
    throws IOException, SAXException
    {
    boolean hasChildren = false;

    while( true )
      {
      StringBuffer whitespace = lex.readWhitespace();
      lex.mark( 2 );
      int ch1 = lex.peekRead();
      int ch2 = lex.peekRead();
      lex.reset();

      if( ch1 == -1 ) // eof
        {
        break;
        }
      else if( ch1 != '<' )
        {
        parseText( whitespace );
        }
      else if( ch2 == '/' ) // end tag "</"
        {
        if( whitespace != null )
          {
          char[] chars = whitespace.toString().toCharArray();

          if( (!hasChildren) || (!ignoreWhitespace) )
            contentHandler.characters( chars, 0, chars.length );
          else
            contentHandler.ignorableWhitespace( chars, 0, chars.length );
          }

        break;
        }
      else
        {
        if( whitespace != null )
          {
          char[] chars = whitespace.toString().toCharArray();

          if( ignoreWhitespace )
            contentHandler.ignorableWhitespace( chars, 0, chars.length );
          else
            contentHandler.characters( chars, 0, chars.length );
          }

        if( ch2 == '!' && lex.peekString( CDATA_START ) )
          parseCData();
        else if( ch2 == '!' && lex.peekString( COMMENT_START ) )
          parseComment();
        else if( ch2 == '?' )
          parseInstruction();
        else // start tag "<"
          {
          hasChildren = true;
          parseElement( parent );
          }
        }
      }
    }

  // ********** LOCATOR *****************************************************

  /**
   * Return the column number where the current document event ends.
   */
  public int getColumnNumber()
    {
    return 0; // update lex to support this
    }

  /**
   * Return the line number where the current document event ends.
   */
  public int getLineNumber()
    {
    return lex.getLineNumber();
    }

  /**
   * Return the public identifier for the current document event.
   */
  public String getPublicId()
    {
    return null;
    }

  /**
   * Return the system identifier for the current document event.
   */
  public String getSystemId()
    {
    return null;
    }
  }