ModErn Text Analysis
META Enumerates Textual Applications
utf.h
Go to the documentation of this file.
1 
10 #ifndef META_UTF8_H_
11 #define META_UTF8_H_
12 
13 #include <functional>
14 #include <string>
15 
16 namespace meta
17 {
18 namespace utf
19 {
20 
27 std::string to_utf8(const std::string& str, const std::string& charset);
28 
35 std::u16string to_utf16(const std::string& str, const std::string& charset);
36 
43 std::string to_utf8(const std::u16string& str);
44 
51 std::u16string to_utf16(const std::string& str);
52 
59 std::string tolower(const std::string& str);
60 
67 std::string toupper(const std::string& str);
68 
76 std::string foldcase(const std::string& str);
77 
86 std::string transform(const std::string& str, const std::string& id);
87 
97 std::string remove_if(const std::string& str,
98  std::function<bool(uint32_t)> pred);
99 
104 uint64_t length(const std::string& str);
105 
110 bool isalpha(uint32_t codepoint);
111 
116 bool isblank(uint32_t codepoint);
117 }
118 }
119 
120 #endif
std::string tolower(const std::string &str)
Lowercases a utf8 string.
Definition: utf.cpp:60
bool isblank(uint32_t codepoint)
Definition: utf.cpp:131
std::string remove_if(const std::string &str, std::function< bool(uint32_t)> pred)
Removes UTF-32 codepoints that match the given function.
Definition: utf.cpp:109
std::string to_utf8(const std::string &str, const std::string &charset)
Converts a string from the given charset to utf8.
Definition: utf.cpp:24
std::string toupper(const std::string &str)
Uppercases a utf8 string.
Definition: utf.cpp:76
uint64_t length(const std::string &str)
Definition: utf.cpp:136
bool isalpha(uint32_t codepoint)
Definition: utf.cpp:126
std::u16string to_utf16(const std::string &str, const std::string &charset)
Converts a string fro the given charset to utf16.
Definition: utf.cpp:31
std::string transform(const std::string &str, const std::string &id)
Transliterates a utf8 string, using the rules defined in ICU.
Definition: transformer.cpp:86
std::string foldcase(const std::string &str)
Folds the case of a utf8 string.
Definition: utf.cpp:92
The ModErn Text Analysis toolkit is a suite of natural language processing, classification, information retreival, data mining, and other applications of text processing.
Definition: analyzer.h:24