This class stores C/C++ style null terminated string. More...
#include <cel_string.h>
Classes | |
| class | Transform |
Public Types | |
| enum | UnicodeNormalizeForm { , U_NFC = 1, U_NFKD = 2, U_NFKC = 3 } |
Public Member Functions | |
| String () | |
| String (NULL_STRING null) | |
| String (const utf8s &inString, size_t length=STR_AUTOSIZE) | |
| String (const char *inString, size_t length=STR_AUTOSIZE) | |
| String (const std::string &inString, size_t length=STR_AUTOSIZE) | |
| String (const String &inString) | |
| String (const String &inString, size_t start, size_t length) | |
| String (const ReferableString *inString) | |
| String (const UChar2 *inString, size_t length=STR_AUTOSIZE) | |
| String (const UChar4 *inString, size_t length=STR_AUTOSIZE) | |
| String & | operator= (NULL_STRING null) |
| String & | operator= (const utf8s &inString) |
| String & | operator= (const char *inString) |
| String & | operator= (const String &inString) |
| String & | operator= (const UChar2 *inString) |
| String & | operator= (const UChar4 *inString) |
| void | clear () |
| void | swap (String &inString) |
| void | format (const char *format,...) |
| void | format (const wchar_t *format,...) |
| void | format_utf8 (const char *format,...) |
| String | substring (size_t start, size_t length=STR_AUTOSIZE) const |
| String | substringByChar (size_t startChar, size_t charCount=STR_AUTOSIZE) const |
| String | removeBoms () const |
| String | operator+ (const String &str) const |
| String & | operator+= (const String &str) |
| bool | isEmpty () const |
| int | compare (const String &str) const |
| int | compareI (const String &str) const |
| bool | operator== (const String &str) const |
| bool | operator< (const String &str) const |
| bool | operator> (const String &str) const |
| bool | operator<= (const String &str) const |
| bool | operator>= (const String &str) const |
| bool | operator!= (const String &str) const |
| UChar1 | operator[] (size_t n) const |
| UChar1 & | operator[] (size_t n) |
| UChar4 | operator() (size_t n) const |
| size_t | getNumOfChars () const |
| size_t | getLength () const |
| const UChar1 * | c_str () const |
| const utf8s | c_utf8str () const |
| size_t | getNthCharPos (size_t nCharPos) const |
| const UChar1 * | getNthCharPtr (size_t nCharPos) const |
| const utf8s | getNthCharUtf8Ptr (size_t nCharPos) const |
| UChar4 | getNthUnicodeChar (size_t nCharPos) const |
| int | toInt () const |
| size_t | toSizeT () const |
| uint64_t | toUInt64 () const |
| double | toDouble () const |
| const UChar1 * | getFirstCharPtr (UChar4 u) const |
| size_t | getCharPos (UChar4 u, size_t from=0) const |
| size_t | getFirstCharPos (UChar4 u) const |
| const UChar1 * | getLastCharPtr (UChar4 u) const |
| size_t | getLastCharPos (UChar4 u) const |
| const UChar1 * | pbrk (const String &str, size_t from=0) const |
| size_t | nbrk (const String &str, size_t from=0) const |
| size_t | getPatternPos (const String &str, size_t from=0) const |
| UChar1 * | allocate (size_t length) |
| String | trimHeadingSpaces () const |
| String | trimTrailingSpaces () const |
| String | trimSpaces () const |
| String | chomp () const |
| size_t | findPos (const String &inPattern) const |
| bool | startWith (const String &inPattern) const |
| bool | endWith (const String &inPattern) const |
| bool | split (const String &inPattern, SimpleArray< String > &outSubstrs, bool inRemoveEmptyStrings=false) const |
| bool | match (const String &inPattern, SimpleArray< String > &outMatches) const |
| AutoPtr< Region > | match (const String &inPattern) const |
| size_t | findOneOf (const UChar4 *inChars, size_t inCharCount) const |
| size_t | findOneOf (const String &inChars) const |
| const UChar1 * | findPtr (const String &inPattern) const |
| String | replace (const String &inReplacee, const String &inReplacer, u32 inFlags=0) const |
| String | erase (size_t inPos, size_t inLength) const |
| String | erase (const String &inPattern) const |
| void | serialize (Stream *inStream, size_t inLevel, Endian inEndian) const |
| void | deserialize (Stream *inStream, size_t inLevel, Endian inEndian) |
| const UChar1 * | begin () const |
| const UChar1 * | end () const |
| const UChar1 * | raw_ptr () const |
| const UChar1 * | raw_end () const |
| String | toUpper () const |
| String | toLower () const |
| String | applyTransform (Transform &inTransform) |
| String | normalize (UnicodeNormalizeForm form) const |
| const char * | toMbs () const |
| const wchar_t * | toWcs () const |
| const UChar2 * | toUcs2 () const |
| const UChar4 * | toUcs4 () const |
| String | resolveEntityReferences () const |
Static Public Member Functions | |
| static int | compare (const String &str1, const String &str2) |
| static int | compareI (const String &str1, const String &str2) |
| static String | concat (const utf8s &str1, const Celartem::String &str2) |
| static String | concat (const SimpleArray< String > &inStrings, const String &inSeparator=NullString) |
| static const UChar1 * | next (const UChar1 *inStrPtr) |
| static UChar1 * | next (UChar1 *inStrPtr) |
| static size_t | getCharSize (const UChar1 *inStrPtr) |
| static UChar4 | getCharcode (const UChar1 *inStrPtr) |
| static bool | isWhiteSpace (const UChar1 *inStrPtr) |
| static bool | isLineTerminator (const UChar1 *inStrPtr) |
| static bool | isValidUTF8Sequence (const u8 *inDataToValidate, size_t inSize) |
This class stores C/C++ style null terminated string.
For security related reason, this class zero-clears the memory block used by the string when resizing and destruction.
Normalization Form Selector; used with normalize function. For more information, see Unicode Standard Annex #15 Unicode Normalization Forms.
| Celartem::String::String | ( | ) |
This constructor initializes the String instance with "", not NULL; The string contains a '\0'.
| Celartem::String::String | ( | NULL_STRING | null | ) |
This constructor accepts NullString and initializes the String instance with "", not NULL; The string contains a '\0'.
| null | It should be NullString. |
| Celartem::String::String | ( | const utf8s & | inString, | |
| size_t | length = STR_AUTOSIZE | |||
| ) |
This constructor initializes the String with UTF-8 string.
| inString | UTF-8 string to be preserved. | |
| length | the length of the string to be preserved; you can remove the trailing string by intensionally set the length less than actual size. You can also omit the parameter and then String automatically measures the length of string (regarding the string is terminated by '\0'). |
| Celartem::String::String | ( | const char * | inString, | |
| size_t | length = STR_AUTOSIZE | |||
| ) |
This constructor initializes the String with platform dependent multibyte string. Since String stores strings as UTF-8 string, this constructor converts the input string into UTF-8 string. If the input string is only contains ASCII 7bit characters and you think you don't want to take the conversion cost, use the utf8s version of the constructor. Mac OS X's native multibyte is UTF-8 and this constructor does not cost so much in Mac OS X.
| inString | Platform/Locale specific multibyte character string. | |
| length | The length of the string to be preserved; you can remove the trailing string by intensionally set the length less than actual size. You can also omit the parameter and then String automatically measures the length of string (regarding the string is terminated by '\0'). |
| Celartem::String::String | ( | const std::string & | inString, | |
| size_t | length = STR_AUTOSIZE | |||
| ) |
This constructor initializes the String with std::string. Since String stores strings as UTF-8 string, this constructor converts the input string into UTF-8 string.
| inString | Platform/Locale specific multibyte character string. | |
| length | The length of the string to be preserved; you can remove the trailing string by intensionally set the length less than actual size. You can also omit the parameter and then String automatically measures the length of string (regarding the string is terminated by '\0'). |
| Celartem::String::String | ( | const String & | inString | ) |
This constructor duplicates the input String.
Since the String class manages the strings by reference count mechanism, this constructor does not cost so much.
| inString | string to copy. |
| Celartem::String::String | ( | const String & | inString, | |
| size_t | start, | |||
| size_t | length | |||
| ) |
This constructor duplicates the input String.
Since the String class manages the strings by reference count mechanism, this constructor does not cost so much.
| inString | string to copy. | |
| start | The position of the substring to copy. | |
| length | The length of the string to be preserved; you can remove the trailing string by intensionally set the length less than actual size. You can also omit the parameter and then String automatically measures the length of string (regarding the string is terminated by '\0'). |
| Celartem::String::String | ( | const ReferableString * | inString | ) |
This constructor duplicates the input String.
Since the String class manages the strings by reference count mechanism, this constructor does not cost so much.
| inString | string to copy. |
| Celartem::String::String | ( | const UChar2 * | inString, | |
| size_t | length = STR_AUTOSIZE | |||
| ) |
This constructor initializes the String with UTF-16 string. Since the String class stores strings as UTF-8, this constructor is more effecient than the multibyte version.
| inString | String in UTF-16. | |
| length | The length of the string to be preserved in characters; you can remove the trailing string by intensionally set the length less than actual size. You can also omit the parameter and then String automatically measures the length of string (regarding the string is terminated by '\0'). |
| Celartem::String::String | ( | const UChar4 * | inString, | |
| size_t | length = STR_AUTOSIZE | |||
| ) |
This constructor initializes the String with UTF-32 string. Since the String class stores strings as UTF-8, this constructor is more effecient than the multibyte version.
| inString | A string in UCS-4(UTF-32). | |
| length | the length of the string to be preserved; you can remove the trailing string by intensionally set the length less than actual size. You can also omit the parameter and then String automatically measures the length of string (regarding the string is terminated by '\0'). |
| UChar1* Celartem::String::allocate | ( | size_t | length | ) |
This method allocates bytes specified by length. You don't have to make the room for trailing null-terminator. It will be automatically appended and initialized with '\0'.
UChar1 *p = str1.allocate(3); std::memcpy(p, "123", 4); // You don't have to add '\0'. UChar1 *q = str2.allocate(5); q[0] = '0'; q[0] = '1'; q[0] = '2'; q[0] = '3'; q[0] = '4';
| length | the buffer length to allocate. It don't have to include the terminating '\0'. |
| const UChar1* Celartem::String::begin | ( | ) | const [inline] |
This function is provided for the compatibility with STL. Completely same to c_str() function.
Since String class internally uses Pascal style non null-terminated string, the function may take some time to convert the raw string into C/C++ compatible null-terminated string. If you don't want such overheads, use raw_ptr() and raw_end() function.
| const UChar1* Celartem::String::c_str | ( | ) | const |
This method returns the pointer to the raw UTF-8 string. Be careful to use this function with String related methods; they may regard the string as the environment native multibyte rather than UTF-8.
Since String class internally uses Pascal style non null-terminated string, the function may take some time to convert the raw string into C/C++ compatible null-terminated string. If you don't want such overheads, use raw_ptr() and raw_end() function.
Referenced by begin(), c_utf8str(), end(), Celartem::RegularExpression::findFirst(), and Celartem::RegularExpression::RegularExpression().
| const utf8s Celartem::String::c_utf8str | ( | ) | const [inline] |
This method returns the pointer to the raw UTF-8 string. This method is identical to c_str() method except it returns utf8s string proxy object.
Since String class internally uses Pascal style non null-terminated string, the function may take some time to convert the raw string into C/C++ compatible null-terminated string. If you don't want such overheads, use raw_ptr() and raw_end() function.
| String Celartem::String::chomp | ( | ) | const |
This method removes the trailing CR/LF/CRLF codes if exist.
| void Celartem::String::clear | ( | ) |
This method resets the string to "". This method also confirms the memory block is zero-cleared.
This function compares two strings.
| str1 | ||
| str2 | Strings to compare. |
| int Celartem::String::compare | ( | const String & | str | ) | const |
This method compares this string with another string.
| str | String to compare with. |
This function compares two strings in 7-bit ASCII case insensitive manner.
| str1 | ||
| str2 | Strings to compare. |
| int Celartem::String::compareI | ( | const String & | str | ) | const |
This method compares this string with another string in 7-bit ASCII case insensitive manner.
| str | String to compare with. |
| static String Celartem::String::concat | ( | const utf8s & | str1, | |
| const Celartem::String & | str2 | |||
| ) | [static] |
This function enables the concatenation of the strings in the faster way than String(str1) + str2.
| str1 | ||
| str2 | Strings to be combined. |
| static String Celartem::String::concat | ( | const SimpleArray< String > & | inStrings, | |
| const String & | inSeparator = NullString | |||
| ) | [static] |
This function enables the concatenation of the strings.
| inStrings | Strings to be combined. | |
| inSeparator | An optional string which is placed between a string and the next. |
This method is just a helper function to deal with SerializableData template. For more information, see SerializableData.
| const UChar1* Celartem::String::end | ( | ) | const [inline] |
This function is provided for the compatibility with STL. Since String class internally uses Pascal style non null-terminated string, the function may take some time to convert the raw string into C/C++ compatible null-terminated string. If you don't want such overheads, use raw_ptr() and raw_end() function.
| bool Celartem::String::endWith | ( | const String & | inPattern | ) | const |
This function checks whether the string ends with the specified pattern or not.
| inPattern | The pattern in String. |
true if the string ends with the specified pattern, otherwise false. | String Celartem::String::erase | ( | size_t | inPos, | |
| size_t | inLength | |||
| ) | const |
This function erases the substring.
| inPos | The index of the first character in the string to be removed. | |
| inLength | The number of elements that will be removed. |
This function removes all the occurrence of the specified pattern from the string.
| inPattern | The pattern to be removed. |
| size_t Celartem::String::findOneOf | ( | const UChar4 * | inChars, | |
| size_t | inCharCount | |||
| ) | const |
This function tries to find the first occurrence of one of the specified characters and returns the position by index.
| inChars | The characters to be find. | |
| inCharCount | The number of characters in the array. |
| size_t Celartem::String::findOneOf | ( | const String & | inChars | ) | const |
This function tries to find the first occurrence of one of the specified characters and returns the position by index.
| inChars | The characters to be find. |
| size_t Celartem::String::findPos | ( | const String & | inPattern | ) | const |
This function tries to find the specified pattern from the string and return the index to the character.
| inPattern | The string to find. |
This function tries to find the specified pattern from the string and returns the pointer to the character.
| inPattern | The string to find. |
| void Celartem::String::format | ( | const char * | format, | |
| ... | ||||
| ) |
This method formats the string; the result is identical to sprintf. This method may be affected by the current locale and can be used with platform specific printf extensions.
| format | String that specifies the format of parameters. |
| void Celartem::String::format | ( | const wchar_t * | format, | |
| ... | ||||
| ) |
This method formats the string; the result is identical to swprintf. This method may be affected by the current locale and can be used with platform specific printf extensions.
| format | String that specifies the format of parameters. |
| void Celartem::String::format_utf8 | ( | const char * | format, | |
| ... | ||||
| ) |
This method is UTF-8 version of format method. It formats the string; the result is identical to sprintf. This function is not affected by the current locale and it does not accept platform specific printf extensions.
| format | String that specifies the format of parameters. |
This function helps you to determine the UCS-4 character code of the specified location.
| inStrPtr | The pointer to a valid UTF-8 boundary on a string. |
Referenced by getNthUnicodeChar().
| size_t Celartem::String::getCharPos | ( | UChar4 | u, | |
| size_t | from = 0 | |||
| ) | const |
This method searches the string for the occurrence of a character that matches the specified character.
| u | UCS-4 character code to search. | |
| from | Where the search starts from. For the further information, see the sample code below: |
To search the same character repeatedly, do like the following code:
size_t pos = 0; for(;;) { pos = str.getCharPos('/', pos); if(pos == notFound) break; // no more occurrences // do the task for this occurrence // .... pos++; // prepare for the next search };
| static size_t Celartem::String::getCharSize | ( | const UChar1 * | inStrPtr | ) | [static] |
| size_t Celartem::String::getFirstCharPos | ( | UChar4 | u | ) | const |
This method searches the string for the first occurrence of a character that matches the specified character. The behavior is almost identical to strchr function except it accepts UCS-4 character code and returns 0-based index.
| u | UCS-4 character code to search. |
This method searches the string for the first occurrence of a character that matches the specified character. The behavior is almost identical to strchr function except it accepts UCS-4 character code.
| u | UCS-4 character code to search. |
NULL. | size_t Celartem::String::getLastCharPos | ( | UChar4 | u | ) | const |
This method searches the string for the last occurrence of a specified character. The behavior is almost identical to strrchr function except it accepts UCS-4 character code and returns 0-based index.
| u | UCS-4 character code to search. |
This method searches the string for the last occurrence of a specified character. The behavior is almost identical to strrchr function except it accepts UCS-4 character code.
| u | UCS-4 character code to search. |
NULL. | size_t Celartem::String::getLength | ( | ) | const |
This method returns the number of characters in the string.
Referenced by end(), and Celartem::DjVu::Link::getType().
| size_t Celartem::String::getNthCharPos | ( | size_t | nCharPos | ) | const |
This method returns the position of n-th UCS-4 character in the string.
| nCharPos | The position of the UCS-4 character. |
| const UChar1* Celartem::String::getNthCharPtr | ( | size_t | nCharPos | ) | const |
This method returns the pointer to n-th UCS-4 character in the string.
| nCharPos | The position of the UCS-4 character. |
NULL. Referenced by getNthCharUtf8Ptr(), and getNthUnicodeChar().
| const utf8s Celartem::String::getNthCharUtf8Ptr | ( | size_t | nCharPos | ) | const [inline] |
This method returns the pointer to n-th UCS-4 character in the string by utf8s proxy object.
| nCharPos | The position of the UCS-4 character. |
| UChar4 Celartem::String::getNthUnicodeChar | ( | size_t | nCharPos | ) | const [inline] |
This method returns n-th UCS-4 character in the string.
| nCharPos | The position of the UCS-4 character. |
Referenced by operator()().
| size_t Celartem::String::getNumOfChars | ( | ) | const |
This method returns the number of UCS-4 characters in the string.
| size_t Celartem::String::getPatternPos | ( | const String & | str, | |
| size_t | from = 0 | |||
| ) | const [inline] |
This method searches the string for the first occurrence of a character contained in a specified string. This search does not include the null terminator. The behavior is almost identical to strpbrk function except it returns 0-based index. This method is just an alias of nbrk.
| str | The string that contains the characters for which to search. | |
| from | Where the search starts from. |
| bool Celartem::String::isEmpty | ( | ) | const |
This method verifies the string is empty or not.
true if the string is empty, otherwise false. Referenced by Celartem::DjVu::Link::getType(), Celartem::DjVu::TextWithFontConfig::isEmpty(), Celartem::DjVu::Link::isEmptyLink(), and Celartem::DjVu::FontConfig::isValid().
| static bool Celartem::String::isLineTerminator | ( | const UChar1 * | inStrPtr | ) | [static] |
This function helps you to determine whether the pointed character sequence is defined as a line terminator or not.
| inStrPtr | The pointer to a valid UTF-8 boundary on a string. |
true if the character sequence is a line terminator; otherwise false. | static bool Celartem::String::isValidUTF8Sequence | ( | const u8 * | inDataToValidate, | |
| size_t | inSize | |||
| ) | [static] |
This function helps you to determine wether the specified data is a valid UTF-8 character sequence or not.
| inDataToValidate | The pointer to a data block to validate. | |
| inSize | The size of the data. |
true if the data is a valid UTF-8 character sequence; otherwise false. | static bool Celartem::String::isWhiteSpace | ( | const UChar1 * | inStrPtr | ) | [static] |
This function helps you to determine whether the pointed character sequence is defined as a whitespace or not. param inStrPtr The pointer to a valid UTF-8 boundary on a string.
true if the character sequence is a white space; otherwise false. This function tries to match the string to the specified regular expression pattern and returns the match strings (a portion of the original string).
For more information, see RegularExpression.
| inPattern | The regular expression pattern to match. |
NULL. | bool Celartem::String::match | ( | const String & | inPattern, | |
| SimpleArray< String > & | outMatches | |||
| ) | const |
This function tries to match the string to the specified regular expression pattern and returns the match strings (a portion of the original string).
For more information, see RegularExpression.
| inPattern | The regular expression pattern to match. | |
| outMatches | The match result. outMatches[0] is all the matching string and outMatches[n] is the n-th matched substring. |
true if the match is successful, otherwise false. | size_t Celartem::String::nbrk | ( | const String & | str, | |
| size_t | from = 0 | |||
| ) | const |
This method searches the string for the first occurrence of a character contained in a specified string. This search does not include the null terminator. The behavior is almost identical to strpbrk function except it returns 0-based index.
This method is just an alias of getPatternPos.
| str | The string that contains the characters for which to search. | |
| from | Where the search starts from. |
Referenced by getPatternPos().
This function helps you to move to the next valid UCS character position.
| inStrPtr | The pointer on a UTF-8 string. |
This function helps you to move to the next valid UCS character position.
| inStrPtr | The pointer on a UTF-8 string. |
| String Celartem::String::normalize | ( | UnicodeNormalizeForm | form | ) | const |
This function normalizes the input text in the specified manner. For more information, see Unicode Standard Annex #15 Unicode Normalization Forms. The following code illustrates how to use the function:
String str = "...."; String normalized = str.normalize(String::U_NFKD);
| inForm | One of UnicodeNormalizeForm enumeration. |
| bool Celartem::String::operator!= | ( | const String & | str | ) | const |
For direct comparison with the other String. This method internally uses compare method.
| str | A string to compare with. |
true if the strings are not equal value in the dictionary order, otherwise false. | UChar4 Celartem::String::operator() | ( | size_t | n | ) | const [inline] |
This method is to get the character of specified position. This method works much like as if the string were stored as UCS-4 character array.
| n | The position of the UCS-4 character. |
This method concatenates the specified string to the tail. This method DOES NOT modify the original.
| str | A string to concatenate. |
This method concatenates the specified string to the tail. This method DOES modify the original. The behavior of this method is almost identical to strcat function.
| str | A string to concatenate. |
*this ). | bool Celartem::String::operator< | ( | const String & | str | ) | const |
For direct comparison with the other String. This method internally uses compare method.
| str | A string to compare with. |
true if the string is less than str in the dictionary order, otherwise false. | bool Celartem::String::operator<= | ( | const String & | str | ) | const |
For direct comparison with the other String. This method internally uses compare method.
| str | A string to compare with. |
true if the string is no more than str in the dictionary order, otherwise false. This method duplicates the specified UTF-8 string.
| inString | string to be copied. |
String s = utf8s("Hello, world!");
This method duplicates the specified string.
| inString | String in UTF-16. |
| String& Celartem::String::operator= | ( | const char * | inString | ) |
This method duplicates the specified multibyte string. Since String stores strings as UTF-8 string, this method converts the input string into UTF-8 string. If the input string is only contains ASCII 7bit characters and you think you don't want to take the conversion cost, use the utf8s version of the constructor. Mac OS X's native multibyte is UTF-8 and this constructor does not cost so much in Mac OS X.
| inString | Platform/Locale depended multibyte string. |
This method duplicates the specified string. Since the String class manages the strings by reference count mechanism, this constructor does not cost so much.
| inString | String to be copied. |
This method duplicates the specified string.
| inString | String in UCS-4(UTF-32). |
| String& Celartem::String::operator= | ( | NULL_STRING | null | ) |
This method accepts NullString and clears the string.
| null | It should be NullString. |
| bool Celartem::String::operator== | ( | const String & | str | ) | const |
For direct comparison with the other String. This method internally uses compare method.
| str | A string to compare with. |
true if the strings are same value, otherwise false. | bool Celartem::String::operator> | ( | const String & | str | ) | const |
For direct comparison with the other String. This method internally uses compare method.
| str | A string to compare with. |
true if the string is larger than str in the dictionary order, otherwise false. | bool Celartem::String::operator>= | ( | const String & | str | ) | const |
For direct comparison with the other String. This method internally uses compare method.
| str | A string to compare with. |
true if the string is no less than str in the dictionary order, otherwise false. | UChar1 Celartem::String::operator[] | ( | size_t | n | ) | const |
This method is to get the character of specified position.
| n | The position of the character. |
| UChar1& Celartem::String::operator[] | ( | size_t | n | ) |
This method is to get the character of specified position. This method may internally duplicate the string to realize modification of the string and it potentially has heavy overhead.
| n | The position of the character. |
This method searches the string for the first occurrence of a character contained in a specified string. This search does not include the null terminator. The behavior is almost identical to strpbrk function.
| str | The string that contains the characters for which to search. | |
| from | Where the search starts from. |
NULL. | const UChar1* Celartem::String::raw_end | ( | ) | const |
This function returns the pointer to the end position of the internal string.
The following sample illustrates how to use the function:
const UChar1 * const end = str.raw_end(); for(const UChar1 *p = str.raw_ptr(); p < end; p++) { // do something on each character. }
| const UChar1* Celartem::String::raw_ptr | ( | ) | const |
| String Celartem::String::removeBoms | ( | ) | const |
This method removes any heading BOMs from the string. BOM is usually only on the head of Unicode strings but it may be in the middle of the string due to incorrect string operations. This function checks all the string and removes all occurrences of BOM. If the string does not contain BOMs, this function simply returns the string.
| String Celartem::String::replace | ( | const String & | inReplacee, | |
| const String & | inReplacer, | |||
| u32 | inFlags = 0 | |||
| ) | const |
This function replaces strings in a string by another string.
| inReplacee | The string to be replaced. | |
| inReplacer | The string that replaces the strings. | |
| inFlags | Reserved, must be 0. |
| String Celartem::String::resolveEntityReferences | ( | ) | const |
This method resolves all the entity references in the string.
This method resolves all the HTML 4.01 defined entity references to the actual Unicode entities.
This method is just a helper function to deal with SerializableData template. For more information, see SerializableData.
| bool Celartem::String::split | ( | const String & | inPattern, | |
| SimpleArray< String > & | outSubstrs, | |||
| bool | inRemoveEmptyStrings = false | |||
| ) | const |
This function tries to split the string by the specified regular expression pattern and returns the splitted substrings. For more information, see RegularExpression.
| inPattern | The regular expression pattern to separates the string. | |
| outSubstrs | The splitted substrings. | |
| inRemoveEmptyStrings | Whether to remove empty substrings or not. |
true if the split process is successful, otherwise false. | bool Celartem::String::startWith | ( | const String & | inPattern | ) | const |
This function checks whether the string starts from the specified pattern or not.
| inPattern | The pattern in String. |
true if the string starts from the specified pattern, otherwise false. | String Celartem::String::substring | ( | size_t | start, | |
| size_t | length = STR_AUTOSIZE | |||
| ) | const |
This method extracts a portion of the string. There is no guarantee that the result string is a vaild UTF-8 string if you specify some invalid position and/or length.
| start | 0 based index from which we extract the sub-string. If this value is larger than the length of the string, this function returns NullString. | |
| length | the number of characters to be extracted. You don't have to think about the room for terminating '\0'. If length exceeds the length of the string, this method fits the length to the original string. |
Referenced by Celartem::DjVu::Link::getType().
| String Celartem::String::substringByChar | ( | size_t | startChar, | |
| size_t | charCount = STR_AUTOSIZE | |||
| ) | const |
This method extracts a portion of the string. This method works much like as if the string were stored as UCS-4 character array.
| startChar | 0 based index in UCS-4 character array. If this value points exceeds the end of the string, this function returns NullString. | |
| charCount | the number of UCS-4 characters to be extracted. You don't have to think about the room for terminating '\0'. If length exceeds the length of the string, this method fits the length to the original string. |
| void Celartem::String::swap | ( | String & | inString | ) |
This method swaps two strings. inString A String instance to swap with.
| double Celartem::String::toDouble | ( | ) | const |
This method converts the string into double value.
| int Celartem::String::toInt | ( | ) | const |
This method converts the string into int value.
Referenced by Celartem::DjVu::Link::getType().
| String Celartem::String::toLower | ( | ) | const |
This function convert all upper-case characters into lower-case ones. What this function exactly does is convert [A-Z] to [a-z] and never understand any locale specific issues.
| const char* Celartem::String::toMbs | ( | ) | const |
This method converts the string into locale specific multi-byte string.
| size_t Celartem::String::toSizeT | ( | ) | const |
This method converts the string into size_t value.
| const UChar2* Celartem::String::toUcs2 | ( | ) | const |
This method converts the string into UCS-2 (UTF-16) string.
| const UChar4* Celartem::String::toUcs4 | ( | ) | const |
This method converts the string into UCS-4 (UTF-32) string.
| uint64_t Celartem::String::toUInt64 | ( | ) | const |
This method converts the string into uint64_t value.
| String Celartem::String::toUpper | ( | ) | const |
This function convert all lower-case characters into lower-case ones. What this function exactly does is convert [a-z] to [A-Z] and never understand any locale specific issues.
| const wchar_t* Celartem::String::toWcs | ( | ) | const |
This method converts the string into wchar_t based string.
| String Celartem::String::trimHeadingSpaces | ( | ) | const |
This method removes any space characters (' ', \t) in the head of the string if exist.
| String Celartem::String::trimSpaces | ( | ) | const |
This method removes any space characters (' ', \t) in both of the head and the tail of the string if exist.
| String Celartem::String::trimTrailingSpaces | ( | ) | const |
This method removes any space characters (' ', \t) in the tail of the string if exist.