public final class Token extends Object
This class provides convenience operations for handling 'Tokens'. A token is a UTF-8 encoded string. It is represented as a byte array.
In order to ensure a consistent representation of tokens in the project, all string conversions should be done via the methods of this class.
Modifier and Type | Field and Description |
---|---|
static byte[] |
COLON
Colon.
|
static Comparator<byte[]> |
COMP
Comparator for byte arrays.
|
static byte[] |
EMPTY
Empty token.
|
static byte[] |
FALSE
Token 'false'.
|
static byte[] |
HEX
Hex codes.
|
static byte[] |
INF
Token 'INF'.
|
static Comparator<byte[]> |
LC_COMP
Case-insensitive comparator for byte arrays.
|
static byte[] |
MINLONG
Minimum long value.
|
static byte[] |
NAN
Token 'NaN'.
|
static byte[] |
NINF
Token '-INF'.
|
static byte[] |
NULL
Token 'null'.
|
static byte[] |
ONE
Number '1'.
|
static byte[] |
SLASH
Slash.
|
static byte[] |
SPACE
Space.
|
static byte[] |
TRUE
Token 'true'.
|
static byte[] |
XML
XML token.
|
static byte[] |
XMLC
XML token with colon.
|
static byte[] |
XMLNS
XMLNS token.
|
static byte[] |
XMLNSC
XMLNS token with colon.
|
static byte[] |
ZERO
Number '0'.
|
Modifier and Type | Method and Description |
---|---|
static boolean |
ascii(byte[] token)
Checks if the specified token only consists of ASCII characters.
|
static byte[] |
chop(byte[] token,
int max)
Chops a token to the specified length and adds dots.
|
static byte[] |
chopNumber(byte[] token)
Finishes the numeric token, removing trailing zeroes.
|
static int |
cl(byte cp)
Returns the length of the specified UTF8 byte.
|
static int |
cl(byte[] token,
int pos)
Returns the length of a UTF8 character at the specified position.
|
static byte[] |
concat(byte[] token1,
byte[] token2)
Concatenates two tokens.
|
static byte[] |
concat(byte[] token1,
byte[] token2,
byte[] token3)
Concatenates three tokens.
|
static boolean |
contains(byte[] token,
byte[] sub)
Checks if the first token contains the second token.
|
static boolean |
contains(byte[] token,
byte[] sub,
int pos)
Checks if the first token contains the second token.
|
static boolean |
contains(byte[] token,
int ch)
Checks if the first token contains the specified character.
|
static int |
cp(byte[] token,
int pos)
Returns the codepoint (unicode value) of the specified token, starting at
the specified position.
|
static int[] |
cps(byte[] token)
Converts a token to a sequence of codepoints.
|
static byte[] |
delete(byte[] token,
int ch)
Deletes a character from the token.
|
static int |
diff(byte[] token,
byte[] compare)
Compares two tokens lexicographically.
|
static boolean |
digit(int ch)
Checks if the specified character is a digit (0 - 9).
|
static boolean |
endsWith(byte[] token,
byte[] sub)
Checks if the first token ends with the second token.
|
static boolean |
endsWith(byte[] token,
int ch)
Checks if the first token starts with the specified character.
|
static boolean |
eq(byte[] token,
byte[]... tokens)
Compares several tokens for equality.
|
static boolean |
eq(byte[] token1,
byte[] token2)
Compares two tokens for equality.
|
static byte[] |
escape(byte[] token)
Escapes the specified token.
|
static int |
hash(byte[] token)
Calculates a hash code for the specified token.
|
static byte[] |
hex(byte[] value,
boolean uc)
Returns a hex representation of the specified byte array.
|
static int |
indexOf(byte[] token,
byte[] sub)
Returns the position of the specified token or -1.
|
static int |
indexOf(byte[] token,
byte[] sub,
int pos)
Returns the position of the specified token or -1.
|
static int |
indexOf(byte[] token,
int ch)
Returns the position of the specified character or -1.
|
static int |
lastIndexOf(byte[] token,
int ch)
Returns the last position of the specified character or -1.
|
static byte[] |
lc(byte[] token)
Converts the specified token to lower case.
|
static int |
lc(int ch)
Converts a character to lower case.
|
static int |
length(byte[] token)
Returns the number of codepoints in the token.
|
static boolean |
letter(int ch)
Checks if the specified character is a computer letter (A - Z, a - z, _).
|
static boolean |
letterOrDigit(int ch)
Checks if the specified character is a computer letter or digit.
|
static byte[] |
local(byte[] name)
Returns the local name of the specified name.
|
static byte[] |
max(byte[] token,
byte[] compare)
Returns the bigger token.
|
static byte[] |
min(byte[] token,
byte[] compare)
Returns the smaller token.
|
static byte[] |
normalize(byte[] token)
Normalizes all whitespace occurrences from the specified token.
|
static int |
numDigits(int integer)
Checks number of digits of the specified integer.
|
static byte[] |
prefix(byte[] name)
Returns the prefix of the specified token.
|
static byte[] |
replace(byte[] token,
int search,
int replace)
Replaces the specified character and returns the result token.
|
static byte[][] |
split(byte[] token,
int sep)
Splits a token around matches of the given separator.
|
static boolean |
startsWith(byte[] token,
byte[] sub)
Checks if the first token starts with the second token.
|
static boolean |
startsWith(byte[] token,
byte[] sub,
int pos)
Checks if the first token starts with the second token.
|
static boolean |
startsWith(byte[] token,
int ch)
Checks if the first token starts with the specified character.
|
static String |
string(byte[] token)
Returns the specified token as string.
|
static String |
string(byte[] token,
int start,
int length)
Returns the specified token as string.
|
static byte[] |
substring(byte[] token,
int start)
Returns a substring of the specified token.
|
static byte[] |
substring(byte[] token,
int start,
int end)
Returns a substring of the specified token.
|
static byte[] |
subtoken(byte[] token,
int start)
Returns a partial token.
|
static byte[] |
subtoken(byte[] token,
int start,
int end)
Returns a partial token.
|
static double |
toDouble(byte[] token)
Converts the specified token into a double value.
|
static int |
toInt(byte[] token)
Converts the specified token into an integer value.
|
static byte[] |
token(boolean bool)
Creates a byte array representation of the specified boolean value.
|
static byte[] |
token(double dbl)
Creates a byte array representation from the specified double value;
inspired by Xavier Franc's Qizx/open processor.
|
static byte[] |
token(float flt)
Creates a byte array representation from the specified float value.
|
static byte[] |
token(int integer)
Creates a byte array representation of the specified integer value.
|
static byte[] |
token(long integer)
Creates a byte array representation from the specified long value,
using Java's standard method.
|
static byte[] |
token(String string)
Converts a string to a byte array.
|
static byte[][] |
tokens(String... strings)
Converts the specified strings to tokens.
|
static long |
toLong(byte[] token)
Converts the specified token into an long value.
|
static long |
toLong(byte[] token,
int start,
int end)
Converts the specified token into an long value.
|
static int |
toSimpleInt(byte[] token)
Converts the specified token into a positive integer value.
|
static byte[] |
trim(byte[] token)
Removes leading and trailing whitespaces from the specified token.
|
static byte[] |
uc(byte[] token)
Converts the specified token to upper case.
|
static int |
uc(int ch)
Converts a character to upper case.
|
static byte[] |
uri(byte[] token,
boolean iri)
Returns a URI encoded token.
|
static byte[] |
utf8(byte[] token,
String encoding)
Converts a token from the input encoding to UTF8.
|
static boolean |
ws(byte[] token)
Checks if the specified token has only whitespaces.
|
static boolean |
ws(int ch)
Checks if the specified character is a whitespace.
|
public static final byte[] EMPTY
public static final byte[] XML
public static final byte[] XMLC
public static final byte[] XMLNS
public static final byte[] XMLNSC
public static final byte[] TRUE
public static final byte[] FALSE
public static final byte[] NULL
public static final byte[] NAN
public static final byte[] INF
public static final byte[] NINF
public static final byte[] MINLONG
public static final byte[] SPACE
public static final byte[] ZERO
public static final byte[] ONE
public static final byte[] SLASH
public static final byte[] COLON
public static final byte[] HEX
public static final Comparator<byte[]> COMP
public static final Comparator<byte[]> LC_COMP
public static String string(byte[] token)
token
- tokenpublic static String string(byte[] token, int start, int length)
token
- tokenstart
- start positionlength
- lengthpublic static boolean ascii(byte[] token)
token
- tokenpublic static byte[] token(String string)
string
- string to be convertedpublic static byte[][] tokens(String... strings)
strings
- stringspublic static byte[] utf8(byte[] token, String encoding)
token
- token to be convertedencoding
- input encodingpublic static int cp(byte[] token, int pos)
token
- tokenpos
- character positionpublic static int cl(byte cp)
cp
- codepointpublic static int cl(byte[] token, int pos)
token
- tokenpos
- positionpublic static int[] cps(byte[] token)
token
- tokenpublic static int length(byte[] token)
token
- tokenpublic static byte[] token(boolean bool)
bool
- boolean value to be convertedpublic static byte[] token(int integer)
integer
- int value to be convertedpublic static int numDigits(int integer)
integer
- number to be checkedpublic static byte[] token(long integer)
integer
- value to be convertedpublic static byte[] token(double dbl)
dbl
- double value to be convertedpublic static byte[] token(float flt)
flt
- float value to be convertedpublic static byte[] chopNumber(byte[] token)
token
- token to be modifiedpublic static double toDouble(byte[] token)
token
- token to be convertedDouble.NaN
is returned if the input is invalidpublic static long toLong(byte[] token)
Long.MIN_VALUE
is returned if the input is invalid.
Note that this may also be the actual value (MINLONG
)..token
- token to be convertedpublic static long toLong(byte[] token, int start, int end)
Long.MIN_VALUE
is returned if the input is invalid.
Note that this may also be the actual value (MINLONG
)..token
- token to be convertedstart
- first byte to be parsedend
- last byte to be parsed - exclusivepublic static int toInt(byte[] token)
Integer.MIN_VALUE
is returned if the input is invalid.token
- token to be convertedpublic static int toSimpleInt(byte[] token)
Integer.MIN_VALUE
is returned if non-digits are found
or if the input is longer than nine characters.token
- token to be convertedpublic static int hash(byte[] token)
token
- specified tokenpublic static boolean eq(byte[] token1, byte[] token2)
token1
- first tokentoken2
- token to be comparedpublic static boolean eq(byte[] token, byte[]... tokens)
token
- tokentokens
- tokens to be comparedpublic static int diff(byte[] token, byte[] compare)
token
- first tokencompare
- token to be comparedpublic static byte[] min(byte[] token, byte[] compare)
token
- first tokencompare
- token to be comparedpublic static byte[] max(byte[] token, byte[] compare)
token
- first tokencompare
- token to be comparedpublic static boolean contains(byte[] token, byte[] sub)
token
- tokensub
- token to be foundpublic static boolean contains(byte[] token, byte[] sub, int pos)
token
- tokensub
- token to be foundpos
- start positionpublic static boolean contains(byte[] token, int ch)
token
- tokench
- character to be foundpublic static int indexOf(byte[] token, int ch)
token
- tokench
- character to be found-1
public static int lastIndexOf(byte[] token, int ch)
token
- tokench
- character to be found-1
public static int indexOf(byte[] token, byte[] sub)
token
- tokensub
- token to be found-1
public static int indexOf(byte[] token, byte[] sub, int pos)
token
- tokensub
- token to be foundpos
- start positionpublic static boolean startsWith(byte[] token, int ch)
token
- tokench
- character to be foundpublic static boolean startsWith(byte[] token, byte[] sub)
token
- tokensub
- token to be foundpublic static boolean startsWith(byte[] token, byte[] sub, int pos)
token
- tokensub
- token to be foundpos
- start positionpublic static boolean endsWith(byte[] token, int ch)
token
- tokench
- character to be boundpublic static boolean endsWith(byte[] token, byte[] sub)
token
- tokensub
- token to be foundpublic static byte[] substring(byte[] token, int start)
subtoken(byte[], int)
instead.token
- input tokenstart
- start positionpublic static byte[] substring(byte[] token, int start, int end)
subtoken(byte[], int)
instead.token
- input tokenstart
- start positionend
- end positionpublic static byte[] subtoken(byte[] token, int start)
token
- input tokenstart
- start positionpublic static byte[] subtoken(byte[] token, int start, int end)
token
- input textstart
- start positionend
- end positionpublic static byte[][] split(byte[] token, int sep)
token
- token to be splitsep
- separation characterpublic static boolean ws(byte[] token)
token
- tokenpublic static byte[] replace(byte[] token, int search, int replace)
token
- token to be checkedsearch
- the character to be replacedreplace
- the new characterpublic static byte[] trim(byte[] token)
token
- token to be trimmedpublic static byte[] chop(byte[] token, int max)
token
- token to be choppedmax
- maximum lengthpublic static byte[] concat(byte[] token1, byte[] token2)
token1
- first tokentoken2
- second tokenpublic static byte[] concat(byte[] token1, byte[] token2, byte[] token3)
TokenBuilder
instance can be used to
concatenate more than three tokens.token1
- first tokentoken2
- second tokentoken3
- third tokenpublic static byte[] delete(byte[] token, int ch)
token
- tokench
- character to be removedpublic static byte[] normalize(byte[] token)
token
- tokenpublic static boolean ws(int ch)
ch
- the letter to be checkedpublic static boolean letter(int ch)
ch
- the letter to be checkedpublic static boolean digit(int ch)
ch
- the letter to be checkedpublic static boolean letterOrDigit(int ch)
ch
- the letter to be checkedpublic static byte[] uc(byte[] token)
token
- token to be convertedpublic static int uc(int ch)
ch
- character to be convertedpublic static byte[] lc(byte[] token)
token
- token to be convertedpublic static int lc(int ch)
ch
- character to be convertedpublic static byte[] prefix(byte[] name)
name
- namepublic static byte[] local(byte[] name)
name
- namepublic static byte[] uri(byte[] token, boolean iri)
token
- tokeniri
- inputpublic static byte[] escape(byte[] token)
token
- tokenpublic static byte[] hex(byte[] value, boolean uc)
value
- values to be mappeduc
- upper caseCopyright © 2005–2015 BaseX Team. All rights reserved.