#
# This splits an html file into pure html and commented parts
#
%{
import java.io.*;
import java.net.*;
public class htmlsplit
{
/* Hold the html */
StringBuffer htmlContents = new StringBuffer();
/* Hold the comments */
StringBuffer htmlComments = new StringBuffer();
public static void main(String argv[]) throws Exception
{
htmlsplit myLexer = new htmlsplit();
if (argv.length != 1)
{
System.err.println("Usage: java htmlsplit (URL | file)");
System.exit(1);
}
InputStream inp;
if (argv[0].startsWith("http://"))
{ inp = new URL(argv[0]).openStream(); }
else
{ inp = new FileInputStream(argv[0]); }
myLexer.init(inp);
myLexer.jax_next_token();
System.out.println("Html\n====\n\n" + myLexer.htmlContents);
System.out.println("Comments\n========\n\n"+myLexer.htmlComments);
}
%}
# From the spec:
# A comment declaration consists of `'. Each
# comment starts with `--' and includes all text up to
# and including the next occurrence of `--'.
# In a comment declaration, white space is allowed after each comment,
# but not before the first comment.
/ /
%{ htmlComments.append(jax_text()); %} ;
# Match rest of things quickly
/[^<]+/
%{ htmlContents.append(jax_text()); %} ;
/
%{ htmlContents.append('<'); %} ;
# Trailing section
%{
}
%}