edu.iastate.utils.string
Class StringUtils

java.lang.Object
  extended byedu.iastate.utils.string.StringUtils

public class StringUtils
extends java.lang.Object

Utility class to peform common String manipulation algorithms.


Field Summary
private static char[] AMP_ENCODE
           
private static java.lang.String cvt
           
private static java.security.MessageDigest digest
          Used by the hash method.
private static int DIGIT
           
private static int EOI
           
private static int fillchar
           
private static char[] GT_ENCODE
           
(package private) static java.lang.String HEX_DIGITS
           
private static int LOWER
           
private static char[] LT_ENCODE
           
private static char[] numbersAndLetters
          Array of numbers and letters of mixed case.
private static int OTHER
           
private static char[] QUOTE_ENCODE
           
private static java.util.Random randGen
          Pseudo-random number generator object for use with randomString().
private static int UPPER
           
private static char[] zeroArray
           
 
Constructor Summary
StringUtils()
           
 
Method Summary
static java.lang.String beautifyString(java.lang.String s)
          Adds spaces in suitable locations of the input string.
static java.lang.String chopAtWord(java.lang.String string, int length)
          Intelligently chops a String at a word boundary (whitespace) that occurs at the specified index in the argument or before.
static java.lang.String dateToMillis(java.util.Date date)
          Formats a Date as a fifteen character long String made up of the Date's padded millisecond value.
static java.lang.String decodeBase64(byte[] data)
          Decodes a base64 aray of bytes.
static java.lang.String decodeBase64(java.lang.String data)
          Decodes a base64 String.
static byte[] decodeHex(java.lang.String hex)
          Turns a hex encoded string into a byte array.
static java.lang.String encodeBase64(byte[] data)
          Encodes a byte array into a base64 String.
static java.lang.String encodeBase64(java.lang.String data)
          Encodes a String as a base64 String.
static java.lang.String encodeHex(byte[] bytes)
          Turns an array of bytes into a String representing each byte as an unsigned hex number.
static java.lang.String escapeForXML(java.lang.String string)
          Escapes all necessary characters in the String so that it can be used in an XML doc.
static java.lang.String escapeHTML(java.lang.String htmlContents)
          Substitutes most common HTML reserved characters with their escaped version.
static java.lang.String escapeHTMLTags(java.lang.String in)
          This method takes a string which may contain HTML tags (ie, <b>, <table>, etc) and converts the '<'' and '>' characters to their HTML escape sequences.
static boolean getBooleanProperty(java.util.Properties props, java.lang.String key, boolean defval)
          Gets a boolean property from a standard Properties list.
private static int getCharKind(int c)
           
static java.lang.String hash(java.lang.String data)
          Hashes a String using the Md5 algorithm and returns the result as a String of hexadecimal numbers.
private static byte hexCharToByte(char ch)
          Returns the the byte value of a hexadecmical char (0-f).
static boolean isPositive(java.lang.String val)
          Returns true, if the string "val" denotes a positive string.
static boolean isStringEmpty(java.lang.String str)
           
static java.lang.String normalizePostData(java.lang.String postData)
          Makes sure that the POSTed data is conforms to certain rules.
static int parseIntParameter(java.lang.String value, int defvalue)
          Parses an integer parameter, returning a default value if the value is null or a non-number.
static java.lang.String quote(java.lang.String literal)
          Puts a quote before and after given string.
static java.lang.String randomString(int length)
          Returns a random String of numbers and letters (lower and upper case) of the specified length.
static java.lang.String removeDuplicate(java.lang.String original, char lookfor)
          Removes all duplicates of specified char in given text.
static java.lang.String replace(java.lang.CharSequence htmlContent, java.lang.String tagToBeReplaced, java.lang.String replacingTag)
          Replaces a sequence with another inside a text content.
static java.lang.String replace(java.lang.String line, java.lang.String oldString, java.lang.String newString)
          Replaces all instances of oldString with newString in line.
static java.lang.String replace(java.lang.String line, java.lang.String oldString, java.lang.String newString, int[] count)
          Replaces all instances of oldString with newString in line.
static java.lang.String replaceEntities(java.lang.String src)
          Replaces the relevant entities inside the String.
static java.lang.String replaceIgnoreCase(java.lang.String line, java.lang.String oldString, java.lang.String newString)
          Replaces all instances of oldString with newString in line with the added feature that matches of newString in oldString ignore case.
static java.lang.String replaceIgnoreCase(java.lang.String line, java.lang.String oldString, java.lang.String newString, int[] count)
          Replaces all instances of oldString with newString in line with the added feature that matches of newString in oldString ignore case.
static java.lang.String replaceString(java.lang.String orig, int start, int end, java.lang.String text)
          Replaces a part of a string with a new String.
static java.lang.String replaceString(java.lang.String orig, java.lang.String src, java.lang.String dest)
          Replaces a string with an other string.
static java.lang.String[] toLowerCaseWordArray(java.lang.String text)
          Converts a line of text into an array of lower case words using a BreakIterator.wordInstance().
static java.lang.String unescapeFromXML(java.lang.String string)
          Unescapes the String by converting XML escape sequences back into normal characters.
static java.lang.String unescapeHTML(java.lang.String htmlContents)
          Substitutes most common HTML escape sequences with their unescaped version.
static java.lang.String unquote(java.lang.String literal)
          Removes all quotes at the beginning and ending of given string.
protected static java.lang.String urlDecode(byte[] bytes)
          URL encoder does not handle all characters correctly.
static java.lang.String urlDecode(java.lang.String data, java.lang.String encoding)
          Provides decoded version of string depending on encoding.
static java.lang.String urlDecodeUTF8(java.lang.String utf8)
          As java.net.URLDecoder class, but for UTF-8 strings.
protected static java.lang.String urlEncode(byte[] rs)
          java.net.URLEncoder.encode() method in JDK < 1.4 is buggy.
static java.lang.String urlEncode(java.lang.String data, java.lang.String encoding)
          Provides encoded version of string depending on encoding.
static java.lang.String urlEncodeUTF8(java.lang.String text)
          As java.net.URLEncoder class, but this does it in UTF8 character set.
static java.lang.String wordWrap(java.lang.String input, int width, java.util.Locale locale)
          Reformats a string where lines that are longer than width are split apart at the earliest wordbreak or at maxLength, whichever is sooner.
static java.lang.String zeroPadString(java.lang.String string, int length)
          Pads the supplied String with 0's to the specified length and returns the result as a new String.
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

QUOTE_ENCODE

private static final char[] QUOTE_ENCODE

AMP_ENCODE

private static final char[] AMP_ENCODE

LT_ENCODE

private static final char[] LT_ENCODE

GT_ENCODE

private static final char[] GT_ENCODE

digest

private static java.security.MessageDigest digest
Used by the hash method.


fillchar

private static final int fillchar
See Also:
Constant Field Values

cvt

private static final java.lang.String cvt
See Also:
Constant Field Values

randGen

private static java.util.Random randGen
Pseudo-random number generator object for use with randomString(). The Random class is not considered to be cryptographically secure, so only use these random Strings for low to medium security applications.


numbersAndLetters

private static char[] numbersAndLetters
Array of numbers and letters of mixed case. Numbers appear in the list twice so that there is a more equal chance that a number will be picked. We can use the array to get a random number or letter by picking a random array index.


zeroArray

private static final char[] zeroArray

HEX_DIGITS

static final java.lang.String HEX_DIGITS
See Also:
Constant Field Values

EOI

private static final int EOI
See Also:
Constant Field Values

LOWER

private static final int LOWER
See Also:
Constant Field Values

UPPER

private static final int UPPER
See Also:
Constant Field Values

DIGIT

private static final int DIGIT
See Also:
Constant Field Values

OTHER

private static final int OTHER
See Also:
Constant Field Values
Constructor Detail

StringUtils

public StringUtils()
Method Detail

replace

public static final java.lang.String replace(java.lang.String line,
                                             java.lang.String oldString,
                                             java.lang.String newString)
Replaces all instances of oldString with newString in line.

Parameters:
line - the String to search to perform replacements on
oldString - the String that should be replaced by newString
newString - the String that will replace all instances of oldString
Returns:
a String will all instances of oldString replaced by newString

replaceIgnoreCase

public static final java.lang.String replaceIgnoreCase(java.lang.String line,
                                                       java.lang.String oldString,
                                                       java.lang.String newString)
Replaces all instances of oldString with newString in line with the added feature that matches of newString in oldString ignore case.

Parameters:
line - the String to search to perform replacements on
oldString - the String that should be replaced by newString
newString - the String that will replace all instances of oldString
Returns:
a String will all instances of oldString replaced by newString

replaceIgnoreCase

public static final java.lang.String replaceIgnoreCase(java.lang.String line,
                                                       java.lang.String oldString,
                                                       java.lang.String newString,
                                                       int[] count)
Replaces all instances of oldString with newString in line with the added feature that matches of newString in oldString ignore case. The count paramater is set to the number of replaces performed.

Parameters:
line - the String to search to perform replacements on
oldString - the String that should be replaced by newString
newString - the String that will replace all instances of oldString
count - a value that will be updated with the number of replaces performed.
Returns:
a String will all instances of oldString replaced by newString

replace

public static final java.lang.String replace(java.lang.String line,
                                             java.lang.String oldString,
                                             java.lang.String newString,
                                             int[] count)
Replaces all instances of oldString with newString in line. The count Integer is updated with number of replaces.

Parameters:
line - the String to search to perform replacements on
oldString - the String that should be replaced by newString
newString - the String that will replace all instances of oldString
Returns:
a String will all instances of oldString replaced by newString

escapeHTML

public static java.lang.String escapeHTML(java.lang.String htmlContents)
Substitutes most common HTML reserved characters with their escaped version.
i.e. it takes all "<" and replaces them with "&lt;".

Returns:
the escaped version of such contents.

escapeHTMLTags

public static final java.lang.String escapeHTMLTags(java.lang.String in)
This method takes a string which may contain HTML tags (ie, <b>, <table>, etc) and converts the '<'' and '>' characters to their HTML escape sequences.

Parameters:
in - the text to be converted.
Returns:
the input string with the characters '<' and '>' replaced with their HTML escape sequences.

hash

public static final java.lang.String hash(java.lang.String data)
Hashes a String using the Md5 algorithm and returns the result as a String of hexadecimal numbers. This method is synchronized to avoid excessive MessageDigest object creation. If calling this method becomes a bottleneck in your code, you may wish to maintain a pool of MessageDigest objects instead of using this method.

A hash is a one-way function -- that is, given an input, an output is easily computed. However, given the output, the input is almost impossible to compute. This is useful for passwords since we can store the hash and a hacker will then have a very hard time determining the original password.

In Jive, every time a user logs in, we simply take their plain text password, compute the hash, and compare the generated hash to the stored hash. Since it is almost impossible that two passwords will generate the same hash, we know if the user gave us the correct password or not. The only negative to this system is that password recovery is basically impossible. Therefore, a reset password method is used instead.

Parameters:
data - the String to compute the hash of.
Returns:
a hashed version of the passed-in String

encodeHex

public static final java.lang.String encodeHex(byte[] bytes)
Turns an array of bytes into a String representing each byte as an unsigned hex number.

Method by Santeri Paavolainen, Helsinki Finland 1996
(c) Santeri Paavolainen, Helsinki Finland 1996
Distributed under LGPL.

Parameters:
bytes - an array of bytes to convert to a hex-string
Returns:
generated hex string

decodeHex

public static final byte[] decodeHex(java.lang.String hex)
Turns a hex encoded string into a byte array. It is specifically meant to "reverse" the toHex(byte[]) method.

Parameters:
hex - a hex encoded String to transform into a byte array.
Returns:
a byte array representing the hex String[

hexCharToByte

private static final byte hexCharToByte(char ch)
Returns the the byte value of a hexadecmical char (0-f). It's assumed that the hexidecimal chars are lower case as appropriate.

Parameters:
ch - a hexedicmal character (0-f)
Returns:
the byte value of the character (0x00-0x0F)

encodeBase64

public static java.lang.String encodeBase64(java.lang.String data)
Encodes a String as a base64 String.

Parameters:
data - a String to encode.
Returns:
a base64 encoded String.

encodeBase64

public static java.lang.String encodeBase64(byte[] data)
Encodes a byte array into a base64 String.

Parameters:
data - a byte array to encode.
Returns:
a base64 encode String.

decodeBase64

public static java.lang.String decodeBase64(java.lang.String data)
Decodes a base64 String.

Parameters:
data - a base64 encoded String to decode.
Returns:
the decoded String.

decodeBase64

public static java.lang.String decodeBase64(byte[] data)
Decodes a base64 aray of bytes.

Parameters:
data - a base64 encode byte array to decode.
Returns:
the decoded String.

toLowerCaseWordArray

public static final java.lang.String[] toLowerCaseWordArray(java.lang.String text)
Converts a line of text into an array of lower case words using a BreakIterator.wordInstance().

This method is under the Jive Open Source Software License and was written by Mark Imbriaco.

Parameters:
text - a String of text to convert into an array of words
Returns:
text broken up into an array of words.

randomString

public static final java.lang.String randomString(int length)
Returns a random String of numbers and letters (lower and upper case) of the specified length. The method uses the Random class that is built-in to Java which is suitable for low to medium grade security uses. This means that the output is only pseudo random, i.e., each number is mathematically generated so is not truly random.

The specified length must be at least one. If not, the method will return null.

Parameters:
length - the desired length of the random String to return.
Returns:
a random String of numbers and letters of the specified length.

chopAtWord

public static final java.lang.String chopAtWord(java.lang.String string,
                                                int length)
Intelligently chops a String at a word boundary (whitespace) that occurs at the specified index in the argument or before. However, if there is a newline character before length, the String will be chopped there. If no newline or whitespace is found in string up to the index length, the String will chopped at length.

For example, chopAtWord("This is a nice String", 10) will return "This is a" which is the first word boundary less than or equal to 10 characters into the original String.

Parameters:
string - the String to chop.
length - the index in string to start looking for a whitespace boundary at.
Returns:
a substring of string whose length is less than or equal to length, and that is chopped at whitespace.

wordWrap

public static java.lang.String wordWrap(java.lang.String input,
                                        int width,
                                        java.util.Locale locale)
Reformats a string where lines that are longer than width are split apart at the earliest wordbreak or at maxLength, whichever is sooner. If the width specified is less than 5 or greater than the input Strings length the string will be returned as is.

Please note that this method can be lossy - trailing spaces on wrapped lines may be trimmed.

Parameters:
input - the String to reformat.
width - the maximum length of any one line.
Returns:
a new String with reformatted as needed.

escapeForXML

public static final java.lang.String escapeForXML(java.lang.String string)
Escapes all necessary characters in the String so that it can be used in an XML doc.

Parameters:
string - the string to escape.
Returns:
the string with appropriate characters escaped.

unescapeFromXML

public static final java.lang.String unescapeFromXML(java.lang.String string)
Unescapes the String by converting XML escape sequences back into normal characters.

Parameters:
string - the string to unescape.
Returns:
the string with appropriate characters unescaped.

zeroPadString

public static final java.lang.String zeroPadString(java.lang.String string,
                                                   int length)
Pads the supplied String with 0's to the specified length and returns the result as a new String. For example, if the initial String is "9999" and the desired length is 8, the result would be "00009999". This type of padding is useful for creating numerical values that need to be stored and sorted as character data. Note: the current implementation of this method allows for a maximum length of 16.

Parameters:
string - the original String to pad.
length - the desired length of the new padded String.
Returns:
a new String padded with the required number of 0's.

dateToMillis

public static final java.lang.String dateToMillis(java.util.Date date)
Formats a Date as a fifteen character long String made up of the Date's padded millisecond value.

Returns:
a Date encoded as a String.

urlEncode

protected static java.lang.String urlEncode(byte[] rs)
java.net.URLEncoder.encode() method in JDK < 1.4 is buggy. This duplicates its functionality.


urlDecode

protected static java.lang.String urlDecode(byte[] bytes)
                                     throws java.io.UnsupportedEncodingException,
                                            java.lang.IllegalArgumentException
URL encoder does not handle all characters correctly. See Bug parade, bug #4257115 for more information.

Thanks to CJB for this fix.

Throws:
java.io.UnsupportedEncodingException
java.lang.IllegalArgumentException

urlEncodeUTF8

public static java.lang.String urlEncodeUTF8(java.lang.String text)
As java.net.URLEncoder class, but this does it in UTF8 character set.


urlDecodeUTF8

public static java.lang.String urlDecodeUTF8(java.lang.String utf8)
As java.net.URLDecoder class, but for UTF-8 strings.


urlEncode

public static java.lang.String urlEncode(java.lang.String data,
                                         java.lang.String encoding)
Provides encoded version of string depending on encoding. Encoding may be UTF-8 or ISO-8859-1 (default).

This implementation is the same as in FileSystemProvider.mangleName().


urlDecode

public static java.lang.String urlDecode(java.lang.String data,
                                         java.lang.String encoding)
                                  throws java.io.UnsupportedEncodingException,
                                         java.lang.IllegalArgumentException
Provides decoded version of string depending on encoding. Encoding may be UTF-8 or ISO-8859-1 (default).

This implementation is the same as in FileSystemProvider.unmangleName().

Throws:
java.io.UnsupportedEncodingException
java.lang.IllegalArgumentException

replaceEntities

public static java.lang.String replaceEntities(java.lang.String src)
Replaces the relevant entities inside the String. All >, < and " are replaced by their respective names.

Since:
1.6.1

replaceString

public static java.lang.String replaceString(java.lang.String orig,
                                             java.lang.String src,
                                             java.lang.String dest)
Replaces a string with an other string.

Parameters:
orig - Original string. Null is safe.
src - The string to find.
dest - The string to replace src with.

replaceString

public static java.lang.String replaceString(java.lang.String orig,
                                             int start,
                                             int end,
                                             java.lang.String text)
Replaces a part of a string with a new String.

Parameters:
start - Where in the original string the replacing should start.
end - Where the replacing should end.
orig - Original string. Null is safe.
text - The new text to insert into the string.

parseIntParameter

public static int parseIntParameter(java.lang.String value,
                                    int defvalue)
Parses an integer parameter, returning a default value if the value is null or a non-number.


getBooleanProperty

public static boolean getBooleanProperty(java.util.Properties props,
                                         java.lang.String key,
                                         boolean defval)
Gets a boolean property from a standard Properties list. Returns the default value, in case the key has not been set.

The possible values for the property are "true"/"false", "yes"/"no", or "on"/"off". Any value not recognized is always defined as "false".

Parameters:
props - A list of properties to search.
key - The property key.
defval - The default value to return.
Returns:
True, if the property "key" was set to "true", "on", or "yes".
Since:
2.0.11

isPositive

public static boolean isPositive(java.lang.String val)
Returns true, if the string "val" denotes a positive string. Allowed values are "yes", "on", and "true". Comparison is case-insignificant. Null values are safe.

Parameters:
val - Value to check.
Returns:
True, if val is "true", "on", or "yes"; otherwise false.
Since:
2.0.26

normalizePostData

public static java.lang.String normalizePostData(java.lang.String postData)
Makes sure that the POSTed data is conforms to certain rules. These rules are: The reason why we're using CRLF is that most browser already return CRLF since that is the closest thing to a HTTP standard.


getCharKind

private static int getCharKind(int c)

beautifyString

public static java.lang.String beautifyString(java.lang.String s)
Adds spaces in suitable locations of the input string. This is used to transform a WikiName into a more readable format.

Parameters:
s - String to be beautified.
Returns:
A beautified string.

isStringEmpty

public static boolean isStringEmpty(java.lang.String str)
Parameters:
str -
Returns:

removeDuplicate

public static java.lang.String removeDuplicate(java.lang.String original,
                                               char lookfor)
Removes all duplicates of specified char in given text.

Parameters:
lookfor - the char to remove duplicates.
Returns:
the updated text.

unquote

public static java.lang.String unquote(java.lang.String literal)
Removes all quotes at the beginning and ending of given string.

Returns:
the unquoted version of such content.

unescapeHTML

public static java.lang.String unescapeHTML(java.lang.String htmlContents)
Substitutes most common HTML escape sequences with their unescaped version.
i.e. it takes all "&lt;" and replaces them with "<".

Returns:
the unescaped version of such contents.

quote

public static java.lang.String quote(java.lang.String literal)
Puts a quote before and after given string.

Returns:
the quoted version of such content.

replace

public static java.lang.String replace(java.lang.CharSequence htmlContent,
                                       java.lang.String tagToBeReplaced,
                                       java.lang.String replacingTag)
Replaces a sequence with another inside a text content.

Parameters:
htmlContent - the text content to update.
tagToBeReplaced - the undesired occurrence.
replacingTag - the desired one.
Returns:
the updated version of the original text.