|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectorg.basex.util.Token
public final class Token
This class provides convenience operations for handling so-called 'Tokens'. Tokens in BaseX are nothing else than UTF8 encoded strings, stored in a byte array. Note that, to guarantee a consistent string representation, all string conversions should be done via the methods of this class.
Field Summary | |
---|---|
static byte[] |
AMP
Ampersand Entity. |
static byte[] |
APOS
Apostrophe Entity. |
static byte[] |
EMPTY
Empty token. |
static byte[] |
FALSE
False token. |
static byte[] |
GT
GreaterThan Entity. |
static byte[] |
INF
Infinity. |
static java.text.DecimalFormatSymbols |
LOC
US charset. |
static byte[] |
LT
LessThan Entity. |
static int |
MAXLEN
Maximum length for hash calculation and index terms. |
static byte[] |
MZERO
Zero token. |
static byte[] |
NAN
Infinity. |
static byte[] |
NINF
Infinity. |
static byte[] |
NORM
Normalize special characters. |
static byte[] |
NULL
Dots. |
static byte[] |
ONE
One token. |
static byte[] |
QU
Quote Entity. |
static byte[] |
SPACE
Space token. |
static byte[] |
TRUE
True token. |
static java.lang.String |
UTF16BE
UTF16 encoding string. |
static java.lang.String |
UTF16LE
UTF16 encoding string. |
static java.lang.String |
UTF8
UTF8 encoding string. |
static java.lang.String |
UTF82
UTF8 encoding string (variant). |
static byte[] |
XML
XML Token. |
static byte[] |
XMLNS
XMLNS Token. |
static byte[] |
XMLNSC
XMLNS Token with colon. |
static byte[] |
ZERO
Zero token. |
Method Summary | |
---|---|
static boolean |
ascii(byte[] text)
Checks if the specified token only consists of ASCII characters. |
static byte[] |
chopNumber(byte[] t)
Finishes the numeric token, removing trailing zeroes. |
static int |
cl(byte v)
Returns the expected codepoint length of the specified byte. |
static byte[] |
concat(byte[]... t)
Concatenates the specified tokens. |
static boolean |
contains(byte[] tok,
byte[] sub)
Checks if the first token contains the second token. |
static boolean |
contains(byte[] tok,
int c)
Checks if the first token contains the specified character. |
static boolean |
containslc(byte[] tok,
byte[] sub)
Checks if the first token contains the second token in lowercase. |
static int |
cp(byte[] t,
int p)
Returns the codepoint (unicode value) of the specified token, starting at the specified position. |
static byte[] |
dc(byte[] t)
Removes diacritics from the specified token. |
static byte[] |
delete(byte[] t,
byte[] c)
Deletes the specified characters out of the token. |
static byte[] |
delete(byte[] t,
int c)
Deletes the specified character out of the token. |
static int |
diff(byte[] tok,
byte[] tok2)
Calculates the difference of two character arrays. |
static int |
diff(int tok,
int tok2)
Calculates the difference of two characters. |
static boolean |
digit(int c)
Checks if the specified character is a digit. |
static boolean |
endsWith(byte[] tok,
byte[] sub)
Checks if the first token ends with the second token. |
static boolean |
endsWith(byte[] tok,
int c)
Checks if the first token starts with the specified character. |
static boolean |
eq(byte[] tok,
byte[] tok2)
Compares two character arrays for equality. |
static boolean |
eq(byte tok,
byte tok2)
Compares two character arrays for equality. |
static int |
hash(byte[] tok)
Calculates a hash code for the specified token. |
static int |
indexOf(byte[] tok,
byte[] sub)
Returns the position of the specified token or -1. |
static int |
indexOf(byte[] tok,
byte[] sub,
int p)
Returns the position of the specified token or -1. |
static int |
indexOf(byte[] tok,
int c)
Returns the position of the specified character or -1. |
static byte[] |
lc(byte[] t)
Converts the specified token to lower case. |
static int |
lc(int ch)
Converts a character to lower case. |
static int |
len(byte[] text)
Returns the token length. |
static boolean |
letter(int c)
Checks if the specified character is a letter. |
static boolean |
letterOrDigit(int c)
Checks if the specified character is a letter or digit. |
static byte[] |
ln(byte[] name)
Returns the local name of the specified name. |
static byte[] |
norm(byte[] tok)
Normalizes all whitespace occurrences from the specified token. |
static int |
numDigits(int x)
Checks number of digits of the specified integer. |
static byte[] |
pre(byte[] name)
Returns the prefix of the specified token. |
static byte[] |
replace(byte[] t,
int s,
int r)
Replaces the specified character and returns the result token. |
static byte[][] |
split(byte[] tok,
int sep)
Splits the token at all whitespaces and returns a array with all tokens. |
static boolean |
startsWith(byte[] tok,
byte[] sub)
Checks if the first token starts with the second token. |
static boolean |
startsWith(byte[] tok,
int c)
Checks if the first token starts with the specified character. |
static java.lang.String |
string(byte[] text)
Returns the specified token as string. |
static java.lang.String |
string(byte[] text,
int s,
int l)
Returns the specified token as string. |
static byte[] |
substring(byte[] tok,
int s)
Returns a subtoken of the specified token. |
static byte[] |
substring(byte[] tok,
int s,
int e)
Returns a substring of the specified token. |
static double |
toDouble(byte[] to)
Converts the specified token into a double value. |
static int |
toInt(byte[] to)
Converts the specified token into an integer value. |
static int |
toInt(byte[] to,
int ts,
int te)
Converts the specified token into an integer value. |
static int |
toInt(java.lang.String to)
Converts the specified string into an integer value. |
static byte[] |
token(boolean b)
Creates a byte array representation of the specified boolean value. |
static byte[] |
token(double d)
Creates a byte array representation from the specified double value; inspired by Xavier Franc's Qizx. |
static byte[] |
token(float f)
Creates a byte array representation from the specified float value. |
static byte[] |
token(int i)
Creates a byte array representation of the specified integer value. |
static byte[] |
token(long i)
Creates a byte array representation from the specified long value, using Java's standard method. |
static byte[] |
token(java.lang.String s)
Converts a string to a byte array. |
static long |
toLong(byte[] to)
Converts the specified token into an long value. |
static long |
toLong(byte[] to,
int ts,
int te)
Converts the specified token into an long value. |
static long |
toLong(java.lang.String to)
Converts the specified string into an long value. |
static int |
toSimpleInt(byte[] to)
Converts the specified token into a positive integer value. |
static byte[] |
translate(byte[] tok,
byte[] srch,
byte[] rep)
Performs a translation on the specified token. |
static byte[] |
trim(byte[] t)
Removes leading and trailing whitespaces from the specified token. |
static byte[] |
uc(byte[] t)
Converts the specified token to upper case. |
static int |
uc(int ch)
Converts a character to upper case. |
static byte[] |
utf8(byte[] s,
java.lang.String enc)
Converts a token from the input encoding to UTF8. |
static boolean |
ws(byte[] tok)
Checks if the specified token has only whitespaces. |
static boolean |
ws(int ch)
Checks if the specified character is a whitespace. |
Methods inherited from class java.lang.Object |
---|
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
public static final int MAXLEN
public static final byte[] XML
public static final byte[] XMLNS
public static final byte[] XMLNSC
public static final byte[] TRUE
public static final byte[] FALSE
public static final byte[] NAN
public static final byte[] INF
public static final byte[] NINF
public static final byte[] NULL
public static final byte[] EMPTY
public static final byte[] SPACE
public static final byte[] ZERO
public static final byte[] MZERO
public static final byte[] ONE
public static final byte[] QU
public static final byte[] AMP
public static final byte[] APOS
public static final byte[] GT
public static final byte[] LT
public static final java.lang.String UTF8
public static final java.lang.String UTF82
public static final java.lang.String UTF16LE
public static final java.lang.String UTF16BE
public static final java.text.DecimalFormatSymbols LOC
public static final byte[] NORM
Method Detail |
---|
public static java.lang.String string(byte[] text)
text
- token
public static java.lang.String string(byte[] text, int s, int l)
text
- tokens
- start positionl
- length
public static boolean ascii(byte[] text)
text
- token
public static byte[] token(java.lang.String s)
s
- string to be converted
public static byte[] utf8(byte[] s, java.lang.String enc)
s
- token to be convertedenc
- input encoding
public static int cp(byte[] t, int p)
t
- tokenp
- character position
public static int cl(byte v)
v
- first character byte
public static int len(byte[] text)
text
- token
public static byte[] token(boolean b)
b
- boolean value to be converted
public static byte[] token(int i)
i
- int value to be converted
public static int numDigits(int x)
x
- number to be checked
public static byte[] token(long i)
i
- int value to be converted
public static byte[] token(double d)
d
- double value to be converted
public static byte[] token(float f)
f
- float value to be converted
public static byte[] chopNumber(byte[] t)
t
- token to be modified
public static double toDouble(byte[] to)
Double.NaN
is returned if the input is invalid.
to
- character array to be converted
public static long toLong(java.lang.String to)
Long.MIN_VALUE
is returned when the input is invalid.
to
- character array to be converted
public static long toLong(byte[] to)
Long.MIN_VALUE
is returned when the input is invalid.
to
- character array to be converted
public static long toLong(byte[] to, int ts, int te)
Long.MIN_VALUE
is returned when the input is invalid.
to
- character array to be convertedts
- first byte to be parsedte
- last byte to be parsed - exclusive
public static int toInt(java.lang.String to)
Integer.MIN_VALUE
is returned when the input is invalid.
to
- character array to be converted
public static int toInt(byte[] to)
Integer.MIN_VALUE
is returned when the input is invalid.
to
- character array to be converted
public static int toInt(byte[] to, int ts, int te)
Integer.MIN_VALUE
is returned when the input is invalid.
to
- character array to be convertedts
- first byte to be parsedte
- last byte to be parsed (exclusive)
public static int toSimpleInt(byte[] to)
Integer.MIN_VALUE
is returned if non-digits are found
or if the input is longer than nine characters.
to
- character array to be converted
public static int hash(byte[] tok)
tok
- specified token
public static boolean eq(byte[] tok, byte[] tok2)
tok
- token to be comparedtok2
- second token to be compared
public static boolean eq(byte tok, byte tok2)
tok
- token to be comparedtok2
- second token to be compared
public static int diff(byte[] tok, byte[] tok2)
tok
- token to be comparedtok2
- second token to be compared
public static int diff(int tok, int tok2)
tok
- token to be comparedtok2
- second token to be compared
public static boolean containslc(byte[] tok, byte[] sub)
tok
- first tokensub
- second token
public static boolean contains(byte[] tok, byte[] sub)
tok
- first tokensub
- second token
public static boolean contains(byte[] tok, int c)
tok
- first tokenc
- character
public static int indexOf(byte[] tok, int c)
tok
- first tokenc
- character
public static int indexOf(byte[] tok, byte[] sub)
tok
- first tokensub
- second token
public static int indexOf(byte[] tok, byte[] sub, int p)
tok
- first tokensub
- second tokenp
- start position
public static boolean startsWith(byte[] tok, int c)
tok
- first tokenc
- character
public static boolean startsWith(byte[] tok, byte[] sub)
tok
- first tokensub
- second token
public static boolean endsWith(byte[] tok, int c)
tok
- first tokenc
- character
public static boolean endsWith(byte[] tok, byte[] sub)
tok
- first tokensub
- second token
public static byte[] substring(byte[] tok, int s)
tok
- tokens
- start position
public static byte[] substring(byte[] tok, int s, int e)
tok
- tokens
- start positione
- end position
public static byte[][] split(byte[] tok, int sep)
tok
- token to be splitsep
- separation character
public static boolean ws(byte[] tok)
tok
- token
public static byte[] replace(byte[] t, int s, int r)
t
- token to be checkeds
- the character to be replacedr
- the new character
public static byte[] trim(byte[] t)
t
- token to be checked
public static byte[] concat(byte[]... t)
t
- tokens
public static byte[] delete(byte[] t, int c)
t
- token to be checkedc
- character to be removed
public static byte[] delete(byte[] t, byte[] c)
t
- token to be checkedc
- characters to be removed
public static byte[] norm(byte[] tok)
tok
- token
public static byte[] translate(byte[] tok, byte[] srch, byte[] rep)
tok
- tokensrch
- characters to be foundrep
- characters to be replaced
public static boolean ws(int ch)
ch
- the letter to be checked
public static boolean letter(int c)
c
- the letter to be checked
public static boolean digit(int c)
c
- the letter to be checked
public static boolean letterOrDigit(int c)
c
- the letter to be checked
public static byte[] uc(byte[] t)
t
- token to be converted
public static int uc(int ch)
ch
- character to be converted
public static byte[] lc(byte[] t)
t
- token to be converted
public static int lc(int ch)
ch
- character to be converted
public static byte[] dc(byte[] t)
t
- token to be converted
public static byte[] pre(byte[] name)
name
- name
public static byte[] ln(byte[] name)
name
- name
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |