edu.iastate.utils.string
Class HTMLHandler

java.lang.Object
  extended byedu.iastate.utils.string.HTMLHandler

public class HTMLHandler
extends java.lang.Object


Constructor Summary
HTMLHandler()
           
 
Method Summary
static java.lang.String escapeHTMLTags(java.lang.String in)
          This method takes a string which may contain HTML tags (ie, <b>, <table>, etc) and converts the '<'' and '>' characters to their HTML escape sequences.
static java.lang.String[] getLinksInHTML(java.lang.String uriStr)
          Getting the Links in an HTML Document This method takes a URI which can be either a filename (e.g. file://c:/dir/file.html)or a URL (e.g. http://host.com/page.html) and returns all HREF links in the document.
static java.util.Vector getNestdBlock(java.lang.String tag, java.lang.CharSequence input, boolean greedy)
          return all tags begin with 'tag' no repeated nesting is detected, like if this case happens, only the top tag nesting is added into list.
static java.util.Vector getNestdBlock(java.lang.String head, java.lang.String tail, java.lang.CharSequence input, boolean greedy)
          get all block with given head and tail head and tail are not included
static java.util.Vector getTopNestdBlock(java.lang.CharSequence inputStr)
          return the content of the highest ...
static java.lang.String Html2Txt(java.io.InputStream stream)
           
static java.lang.String Html2Txt(java.lang.String HTMLstr)
           
static java.lang.String Html2Txt(java.net.URI uri)
          Getting the Text in an HTML Document to reaf from a URI [ a filename (e.g. file://c:/dir/file.html) or a URL (e.g. http://host.com/page.html) ] URL url = new URI( uriStr ).toURL() ; URLConnection conn = url.openConnection() ; Html2Txt( conn.getInputStream() ) ; for String strHTML InputStream stream = new StringBufferInputStream(strHTML); Html2Txt(stream);
static boolean isEmailValid(java.lang.String inputString)
           
static boolean isUrlValid(java.lang.String url)
          protected boolean isUrlValid(String url) ---------------------------------------- Checks whether the URL is valid; really basic for now;
static java.lang.String TagRemover(java.lang.String strHTML)
          Remove the tags "<" between ">"
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Constructor Detail

HTMLHandler

public HTMLHandler()
Method Detail

getTopNestdBlock

public static java.util.Vector getTopNestdBlock(java.lang.CharSequence inputStr)
return the content of the highest ... region

Parameters:
inputStr -
Returns:
null if not found, tag and content if found

getNestdBlock

public static java.util.Vector getNestdBlock(java.lang.String tag,
                                             java.lang.CharSequence input,
                                             boolean greedy)
return all tags begin with 'tag' no repeated nesting is detected, like if this case happens, only the top tag nesting is added into list.

Parameters:
tag -
input -
Returns:
NOT implemented yet

getNestdBlock

public static java.util.Vector getNestdBlock(java.lang.String head,
                                             java.lang.String tail,
                                             java.lang.CharSequence input,
                                             boolean greedy)
get all block with given head and tail head and tail are not included

Parameters:
head -
tail -
input -
greedy -
Returns:

Html2Txt

public static java.lang.String Html2Txt(java.net.URI uri)
Getting the Text in an HTML Document to reaf from a URI [ a filename (e.g. file://c:/dir/file.html) or a URL (e.g. http://host.com/page.html) ] URL url = new URI( uriStr ).toURL() ; URLConnection conn = url.openConnection() ; Html2Txt( conn.getInputStream() ) ; for String strHTML InputStream stream = new StringBufferInputStream(strHTML); Html2Txt(stream);

Returns:

Html2Txt

public static java.lang.String Html2Txt(java.lang.String HTMLstr)

Html2Txt

public static java.lang.String Html2Txt(java.io.InputStream stream)

TagRemover

public static java.lang.String TagRemover(java.lang.String strHTML)
Remove the tags "<" between ">"

Parameters:
strHTML - String
Returns:
String

isEmailValid

public static boolean isEmailValid(java.lang.String inputString)

getLinksInHTML

public static java.lang.String[] getLinksInHTML(java.lang.String uriStr)
Getting the Links in an HTML Document This method takes a URI which can be either a filename (e.g. file://c:/dir/file.html)or a URL (e.g. http://host.com/page.html) and returns all HREF links in the document.

Parameters:
uriStr -
Returns:

isUrlValid

public static boolean isUrlValid(java.lang.String url)
protected boolean isUrlValid(String url) ---------------------------------------- Checks whether the URL is valid; really basic for now;


escapeHTMLTags

public static final java.lang.String escapeHTMLTags(java.lang.String in)
This method takes a string which may contain HTML tags (ie, <b>, <table>, etc) and converts the '<'' and '>' characters to their HTML escape sequences.

Parameters:
in - the text to be converted.
Returns:
the input string with the characters '<' and '>' replaced with their HTML escape sequences.