|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectorg.basex.util.Token
public final class Token
This class provides convenience operations for handling so-called 'Tokens'. Tokens in this project are nothing else than UTF8 encoded strings, stored in a byte array. Note that, to guarantee a consistent string representation, all string conversions should be done via the methods of this class.
Field Summary | |
---|---|
static byte[] |
AMP
Ampersand Entity. |
static byte[] |
APOS
Apostrophe Entity. |
static byte[] |
EMPTY
Empty token. |
static byte[] |
FALSE
False token. |
static byte[] |
GT
GreaterThan Entity. |
static byte[] |
INF
Positive infinity. |
static DecimalFormatSymbols |
LOC
US charset. |
static byte[] |
LT
LessThan Entity. |
static int |
MAXCATS
Maximum number of categories in statistics. |
static int |
MAXLEN
Maximum length for hash calculation and index terms. |
static byte[] |
MZERO
Zero token. |
static byte[] |
NAN
Not available number. |
static byte[] |
NINF
Negative infinity. |
static byte[] |
ONE
One token. |
static byte[] |
QU
Quote Entity. |
static byte[] |
SPACE
Space token. |
static byte[] |
TRUE
True token. |
static String |
UTF16
UTF16 encoding string. |
static String |
UTF16BE
UTF16 encoding string. |
static String |
UTF16LE
UTF16 encoding string. |
static String |
UTF8
UTF8 encoding string. |
static String |
UTF82
UTF8 encoding string (variant). |
static byte[] |
XML
XML Token. |
static byte[] |
XMLNS
XMLNS Token. |
static byte[] |
XMLNSC
XMLNS Token with colon. |
static byte[] |
ZERO
Zero token. |
Method Summary | |
---|---|
static boolean |
ascii(byte[] text)
Checks if the specified token only consists of ASCII characters. |
static byte[] |
chop(byte[] t,
int l)
Chops a token to the specified length and adds dots. |
static byte[] |
chopNumber(byte[] t)
Finishes the numeric token, removing trailing zeroes. |
static int |
cl(byte v)
Returns the expected codepoint length of the specified byte. |
static byte[] |
concat(byte[]... t)
Concatenates the specified tokens. |
static boolean |
contains(byte[] tok,
byte[] sub)
Checks if the first token contains the second token. |
static boolean |
contains(byte[] tok,
int c)
Checks if the first token contains the specified character. |
static int |
cp(byte[] t,
int p)
Returns the codepoint (unicode value) of the specified token, starting at the specified position. |
static byte[] |
delete(byte[] t,
int c)
Deletes the specified character out of the token. |
static int |
diff(byte[] tok,
byte[] tok2)
Calculates the difference of two character arrays. |
static int |
diff(byte c1,
byte c2)
Calculates the difference of two characters. |
static boolean |
digit(int c)
Checks if the specified character is a digit (0 - 9). |
static String |
enc(String enc)
Returns a unified representation of the specified encoding. |
static boolean |
endsWith(byte[] tok,
byte[] sub)
Checks if the first token ends with the second token. |
static boolean |
endsWith(byte[] tok,
int c)
Checks if the first token starts with the specified character. |
static boolean |
eq(byte[] tok,
byte[] tok2)
Compares two character arrays for equality. |
static boolean |
ftChar(int ch)
Returns true if the specified character is a full-text letter or digit. |
static int |
hash(byte[] tok)
Calculates a hash code for the specified token. |
static int |
indexOf(byte[] tok,
byte[] sub)
Returns the position of the specified token or -1. |
static int |
indexOf(byte[] tok,
byte[] sub,
int p)
Returns the position of the specified token or -1. |
static int |
indexOf(byte[] tok,
int c)
Returns the position of the specified character or -1. |
static boolean |
isValidUTF8(byte[] text)
Checks if the specified UTF-8 characters are valid. |
static byte[] |
lc(byte[] t)
Converts the specified token to lower case. |
static int |
lc(int ch)
Converts a character to lower case. |
static int |
len(byte[] text)
Returns the token length. |
static boolean |
letter(int c)
Checks if the specified character is a computer letter (A - Z, a - z, _). |
static boolean |
letterOrDigit(int c)
Checks if the specified character is a computer letter or digit. |
static byte[] |
ln(byte[] name)
Returns the local name of the specified name. |
static String |
md5(String pw)
Returns a md5 hash. |
static byte[] |
norm(byte[] tok)
Normalizes all whitespace occurrences from the specified token. |
static int |
norm(int ch)
Returns a normalized character without diacritics. |
static int |
numDigits(int x)
Checks number of digits of the specified integer. |
static byte[] |
pref(byte[] name)
Returns the prefix of the specified token. |
static byte[] |
removeNonUTF8(byte[] text,
boolean chop)
Removes invalid characters from the UTF-8 sequence. |
static byte[] |
replace(byte[] t,
int s,
int r)
Replaces the specified character and returns the result token. |
static byte[][] |
split(byte[] tok,
int sep)
Splits the token at all whitespaces and returns a array with all tokens. |
static boolean |
startsWith(byte[] tok,
byte[] sub)
Checks if the first token starts with the second token. |
static boolean |
startsWith(byte[] tok,
int c)
Checks if the first token starts with the specified character. |
static String |
string(byte[] text)
Returns the specified token as string. |
static String |
string(byte[] text,
int s,
int l)
Returns the specified token as string. |
static byte[] |
substring(byte[] tok,
int s)
Returns a substring of the specified token. |
static byte[] |
substring(byte[] tok,
int s,
int e)
Returns a substring of the specified token. |
static double |
toDouble(byte[] to)
Converts the specified token into a double value. |
static int |
toInt(byte[] to)
Converts the specified token into an integer value. |
static int |
toInt(byte[] to,
int ts,
int te)
Converts the specified token into an integer value. |
static int |
toInt(String to)
Converts the specified string into an integer value. |
static byte[] |
token(boolean b)
Creates a byte array representation of the specified boolean value. |
static byte[] |
token(double d)
Creates a byte array representation from the specified double value; inspired by Xavier Franc's Qizx. |
static byte[] |
token(float f)
Creates a byte array representation from the specified float value. |
static byte[] |
token(int i)
Creates a byte array representation of the specified integer value. |
static byte[] |
token(long i)
Creates a byte array representation from the specified long value, using Java's standard method. |
static byte[] |
token(String s)
Converts a string to a byte array. |
static long |
toLong(byte[] to)
Converts the specified token into an long value. |
static long |
toLong(byte[] to,
int ts,
int te)
Converts the specified token into an long value. |
static long |
toLong(String to)
Converts the specified string into an long value. |
static int |
toSimpleInt(byte[] to)
Converts the specified token into a positive integer value. |
static byte[] |
trim(byte[] t)
Removes leading and trailing whitespaces from the specified token. |
static byte[] |
uc(byte[] t)
Converts the specified token to upper case. |
static int |
uc(int ch)
Converts a character to upper case. |
static String |
utf8(byte[] text,
int s,
int l)
Returns a string of the specified UTF8 token. |
static byte[] |
utf8(byte[] s,
String enc)
Converts a token from the input encoding to UTF8. |
static boolean |
ws(byte[] tok)
Checks if the specified token has only whitespaces. |
static boolean |
ws(int ch)
Checks if the specified character is a whitespace. |
Methods inherited from class java.lang.Object |
---|
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
public static final int MAXLEN
public static final int MAXCATS
public static final byte[] EMPTY
public static final byte[] XML
public static final byte[] XMLNS
public static final byte[] XMLNSC
public static final byte[] TRUE
public static final byte[] FALSE
public static final byte[] NAN
public static final byte[] INF
public static final byte[] NINF
public static final byte[] SPACE
public static final byte[] ZERO
public static final byte[] MZERO
public static final byte[] ONE
public static final byte[] QU
public static final byte[] AMP
public static final byte[] APOS
public static final byte[] GT
public static final byte[] LT
public static final String UTF8
public static final String UTF82
public static final String UTF16
public static final String UTF16LE
public static final String UTF16BE
public static final DecimalFormatSymbols LOC
Method Detail |
---|
public static String string(byte[] text)
text
- token
public static String string(byte[] text, int s, int l)
text
- tokens
- start positionl
- length
public static String utf8(byte[] text, int s, int l)
text
- tokens
- start positionl
- length
public static boolean ascii(byte[] text)
text
- token
public static boolean isValidUTF8(byte[] text)
text
- UTF-8 characters
public static byte[] removeNonUTF8(byte[] text, boolean chop)
text
- the UTF-8 sequence to remove the invalid chars fromchop
- if true, all leading and trailing whitespaces are removed
public static byte[] token(String s)
s
- string to be converted
public static byte[] utf8(byte[] s, String enc)
s
- token to be convertedenc
- input encoding
public static String enc(String enc)
enc
- input encoding
public static int cp(byte[] t, int p)
t
- tokenp
- character position
public static int cl(byte v)
v
- first character byte
public static int len(byte[] text)
text
- token
public static byte[] token(boolean b)
b
- boolean value to be converted
public static byte[] token(int i)
i
- int value to be converted
public static int numDigits(int x)
x
- number to be checked
public static byte[] token(long i)
i
- int value to be converted
public static byte[] token(double d)
d
- double value to be converted
public static byte[] token(float f)
f
- float value to be converted
public static byte[] chopNumber(byte[] t)
t
- token to be modified
public static double toDouble(byte[] to)
Double.NaN
is returned if the input is invalid.
to
- character array to be converted
public static long toLong(String to)
Long.MIN_VALUE
is returned when the input is invalid.
to
- character array to be converted
public static long toLong(byte[] to)
Long.MIN_VALUE
is returned when the input is invalid.
to
- character array to be converted
public static long toLong(byte[] to, int ts, int te)
Long.MIN_VALUE
is returned when the input is invalid.
to
- character array to be convertedts
- first byte to be parsedte
- last byte to be parsed - exclusive
public static int toInt(String to)
Integer.MIN_VALUE
is returned when the input is invalid.
to
- character array to be converted
public static int toInt(byte[] to)
Integer.MIN_VALUE
is returned when the input is invalid.
to
- character array to be converted
public static int toInt(byte[] to, int ts, int te)
Integer.MIN_VALUE
is returned when the input is invalid.
to
- character array to be convertedts
- first byte to be parsedte
- last byte to be parsed (exclusive)
public static int toSimpleInt(byte[] to)
Integer.MIN_VALUE
is returned if non-digits are found
or if the input is longer than nine characters.
to
- character array to be converted
public static int hash(byte[] tok)
tok
- specified token
public static boolean eq(byte[] tok, byte[] tok2)
tok
- token to be comparedtok2
- second token to be compared
public static int diff(byte[] tok, byte[] tok2)
tok
- token to be comparedtok2
- second token to be compared
public static int diff(byte c1, byte c2)
c1
- first character to be comparedc2
- second character to be compared
public static boolean contains(byte[] tok, byte[] sub)
tok
- first tokensub
- second token
public static boolean contains(byte[] tok, int c)
tok
- first tokenc
- character
public static int indexOf(byte[] tok, int c)
tok
- first tokenc
- character
public static int indexOf(byte[] tok, byte[] sub)
tok
- first tokensub
- second token
public static int indexOf(byte[] tok, byte[] sub, int p)
tok
- first tokensub
- second tokenp
- start position
public static boolean startsWith(byte[] tok, int c)
tok
- first tokenc
- character
public static boolean startsWith(byte[] tok, byte[] sub)
tok
- first tokensub
- second token
public static boolean endsWith(byte[] tok, int c)
tok
- first tokenc
- character
public static boolean endsWith(byte[] tok, byte[] sub)
tok
- first tokensub
- second token
public static byte[] substring(byte[] tok, int s)
tok
- tokens
- start position
public static byte[] substring(byte[] tok, int s, int e)
tok
- tokens
- start positione
- end position
public static byte[][] split(byte[] tok, int sep)
tok
- token to be splitsep
- separation character
public static boolean ws(byte[] tok)
tok
- token
public static byte[] replace(byte[] t, int s, int r)
t
- token to be checkeds
- the character to be replacedr
- the new character
public static byte[] trim(byte[] t)
t
- token to be trimmed
public static byte[] chop(byte[] t, int l)
t
- token to be choppedl
- maximum length
public static byte[] concat(byte[]... t)
t
- tokens
public static byte[] delete(byte[] t, int c)
t
- token to be checkedc
- character to be removed
public static byte[] norm(byte[] tok)
tok
- token
public static boolean ws(int ch)
ch
- the letter to be checked
public static boolean letter(int c)
c
- the letter to be checked
public static boolean digit(int c)
c
- the letter to be checked
public static boolean letterOrDigit(int c)
c
- the letter to be checked
public static boolean ftChar(int ch)
ch
- character to be tested
public static byte[] uc(byte[] t)
t
- token to be converted
public static int uc(int ch)
ch
- character to be converted
public static byte[] lc(byte[] t)
t
- token to be converted
public static int lc(int ch)
ch
- character to be converted
public static byte[] pref(byte[] name)
name
- name
public static String md5(String pw)
pw
- String
public static byte[] ln(byte[] name)
name
- name
public static int norm(int ch)
ch
- character to be converted
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |