# # This splits an html file into pure html and commented parts # %{ import java.io.*; import java.net.*; public class htmlsplit { /* Hold the html */ StringBuffer htmlContents = new StringBuffer(); /* Hold the comments */ StringBuffer htmlComments = new StringBuffer(); public static void main(String argv[]) throws Exception { htmlsplit myLexer = new htmlsplit(); if (argv.length != 1) { System.err.println("Usage: java htmlsplit (URL | file)"); System.exit(1); } InputStream inp; if (argv[0].startsWith("http://")) { inp = new URL(argv[0]).openStream(); } else { inp = new FileInputStream(argv[0]); } myLexer.init(inp); myLexer.jax_next_token(); System.out.println("Html\n====\n\n" + myLexer.htmlContents); System.out.println("Comments\n========\n\n"+myLexer.htmlComments); } %} # From the spec: # A comment declaration consists of `'. Each # comment starts with `--' and includes all text up to # and including the next occurrence of `--'. # In a comment declaration, white space is allowed after each comment, # but not before the first comment. / / %{ htmlComments.append(jax_text()); %} ; # Match rest of things quickly /[^<]+/ %{ htmlContents.append(jax_text()); %} ; /