|
3 | 3 | local ffi = require("ffi") |
4 | 4 |
|
5 | 5 | ffi.cdef[[ |
6 | | -typedef int8_t utf8proc_int8_t; |
7 | 6 | typedef uint8_t utf8proc_uint8_t; |
8 | | -typedef int16_t utf8proc_int16_t; |
9 | | -typedef uint16_t utf8proc_uint16_t; |
10 | 7 | typedef int32_t utf8proc_int32_t; |
11 | | -typedef uint32_t utf8proc_uint32_t; |
12 | 8 | typedef ssize_t utf8proc_ssize_t; |
13 | | -typedef size_t utf8proc_size_t; |
14 | | -typedef bool utf8proc_bool; |
15 | | -typedef enum { |
16 | | - UTF8PROC_NULLTERM = 1, |
17 | | - UTF8PROC_STABLE = 2, |
18 | | - UTF8PROC_COMPAT = 4, |
19 | | - UTF8PROC_COMPOSE = 8, |
20 | | - UTF8PROC_DECOMPOSE = 16, |
21 | | - UTF8PROC_IGNORE = 32, |
22 | | - UTF8PROC_REJECTNA = 64, |
23 | | - UTF8PROC_NLF2LS = 128, |
24 | | - UTF8PROC_NLF2PS = 256, |
25 | | - UTF8PROC_NLF2LF = 384, |
26 | | - UTF8PROC_STRIPCC = 512, |
27 | | - UTF8PROC_CASEFOLD = 1024, |
28 | | - UTF8PROC_CHARBOUND = 2048, |
29 | | - UTF8PROC_LUMP = 4096, |
30 | | - UTF8PROC_STRIPMARK = 8192, |
31 | | - UTF8PROC_STRIPNA = 16384, |
32 | | -} utf8proc_option_t; |
33 | | -static const int UTF8PROC_ERROR_NOMEM = -1; |
34 | | -static const int UTF8PROC_ERROR_OVERFLOW = -2; |
35 | | -static const int UTF8PROC_ERROR_INVALIDUTF8 = -3; |
36 | | -static const int UTF8PROC_ERROR_NOTASSIGNED = -4; |
37 | | -static const int UTF8PROC_ERROR_INVALIDOPTS = -5; |
38 | | -typedef short int utf8proc_propval_t; |
39 | | -struct utf8proc_property_struct { |
40 | | - utf8proc_propval_t category; |
41 | | - utf8proc_propval_t combining_class; |
42 | | - utf8proc_propval_t bidi_class; |
43 | | - utf8proc_propval_t decomp_type; |
44 | | - utf8proc_uint16_t decomp_seqindex; |
45 | | - utf8proc_uint16_t casefold_seqindex; |
46 | | - utf8proc_uint16_t uppercase_seqindex; |
47 | | - utf8proc_uint16_t lowercase_seqindex; |
48 | | - utf8proc_uint16_t titlecase_seqindex; |
49 | | - utf8proc_uint16_t comb_index : 10; |
50 | | - utf8proc_uint16_t comb_length : 5; |
51 | | - utf8proc_uint16_t comb_issecond : 1; |
52 | | - unsigned int bidi_mirrored : 1; |
53 | | - unsigned int comp_exclusion : 1; |
54 | | - unsigned int ignorable : 1; |
55 | | - unsigned int control_boundary : 1; |
56 | | - unsigned int charwidth : 2; |
57 | | - unsigned int ambiguous_width : 1; |
58 | | - unsigned int pad : 1; |
59 | | - unsigned int boundclass : 6; |
60 | | - unsigned int indic_conjunct_break : 2; |
61 | | -}; |
62 | | -typedef struct utf8proc_property_struct utf8proc_property_t; |
63 | | -typedef enum { |
64 | | - UTF8PROC_CATEGORY_CN = 0, |
65 | | - UTF8PROC_CATEGORY_LU = 1, |
66 | | - UTF8PROC_CATEGORY_LL = 2, |
67 | | - UTF8PROC_CATEGORY_LT = 3, |
68 | | - UTF8PROC_CATEGORY_LM = 4, |
69 | | - UTF8PROC_CATEGORY_LO = 5, |
70 | | - UTF8PROC_CATEGORY_MN = 6, |
71 | | - UTF8PROC_CATEGORY_MC = 7, |
72 | | - UTF8PROC_CATEGORY_ME = 8, |
73 | | - UTF8PROC_CATEGORY_ND = 9, |
74 | | - UTF8PROC_CATEGORY_NL = 10, |
75 | | - UTF8PROC_CATEGORY_NO = 11, |
76 | | - UTF8PROC_CATEGORY_PC = 12, |
77 | | - UTF8PROC_CATEGORY_PD = 13, |
78 | | - UTF8PROC_CATEGORY_PS = 14, |
79 | | - UTF8PROC_CATEGORY_PE = 15, |
80 | | - UTF8PROC_CATEGORY_PI = 16, |
81 | | - UTF8PROC_CATEGORY_PF = 17, |
82 | | - UTF8PROC_CATEGORY_PO = 18, |
83 | | - UTF8PROC_CATEGORY_SM = 19, |
84 | | - UTF8PROC_CATEGORY_SC = 20, |
85 | | - UTF8PROC_CATEGORY_SK = 21, |
86 | | - UTF8PROC_CATEGORY_SO = 22, |
87 | | - UTF8PROC_CATEGORY_ZS = 23, |
88 | | - UTF8PROC_CATEGORY_ZL = 24, |
89 | | - UTF8PROC_CATEGORY_ZP = 25, |
90 | | - UTF8PROC_CATEGORY_CC = 26, |
91 | | - UTF8PROC_CATEGORY_CF = 27, |
92 | | - UTF8PROC_CATEGORY_CS = 28, |
93 | | - UTF8PROC_CATEGORY_CO = 29, |
94 | | -} utf8proc_category_t; |
95 | | -typedef enum { |
96 | | - UTF8PROC_BIDI_CLASS_L = 1, |
97 | | - UTF8PROC_BIDI_CLASS_LRE = 2, |
98 | | - UTF8PROC_BIDI_CLASS_LRO = 3, |
99 | | - UTF8PROC_BIDI_CLASS_R = 4, |
100 | | - UTF8PROC_BIDI_CLASS_AL = 5, |
101 | | - UTF8PROC_BIDI_CLASS_RLE = 6, |
102 | | - UTF8PROC_BIDI_CLASS_RLO = 7, |
103 | | - UTF8PROC_BIDI_CLASS_PDF = 8, |
104 | | - UTF8PROC_BIDI_CLASS_EN = 9, |
105 | | - UTF8PROC_BIDI_CLASS_ES = 10, |
106 | | - UTF8PROC_BIDI_CLASS_ET = 11, |
107 | | - UTF8PROC_BIDI_CLASS_AN = 12, |
108 | | - UTF8PROC_BIDI_CLASS_CS = 13, |
109 | | - UTF8PROC_BIDI_CLASS_NSM = 14, |
110 | | - UTF8PROC_BIDI_CLASS_BN = 15, |
111 | | - UTF8PROC_BIDI_CLASS_B = 16, |
112 | | - UTF8PROC_BIDI_CLASS_S = 17, |
113 | | - UTF8PROC_BIDI_CLASS_WS = 18, |
114 | | - UTF8PROC_BIDI_CLASS_ON = 19, |
115 | | - UTF8PROC_BIDI_CLASS_LRI = 20, |
116 | | - UTF8PROC_BIDI_CLASS_RLI = 21, |
117 | | - UTF8PROC_BIDI_CLASS_FSI = 22, |
118 | | - UTF8PROC_BIDI_CLASS_PDI = 23, |
119 | | -} utf8proc_bidi_class_t; |
120 | | -typedef enum { |
121 | | - UTF8PROC_DECOMP_TYPE_FONT = 1, |
122 | | - UTF8PROC_DECOMP_TYPE_NOBREAK = 2, |
123 | | - UTF8PROC_DECOMP_TYPE_INITIAL = 3, |
124 | | - UTF8PROC_DECOMP_TYPE_MEDIAL = 4, |
125 | | - UTF8PROC_DECOMP_TYPE_FINAL = 5, |
126 | | - UTF8PROC_DECOMP_TYPE_ISOLATED = 6, |
127 | | - UTF8PROC_DECOMP_TYPE_CIRCLE = 7, |
128 | | - UTF8PROC_DECOMP_TYPE_SUPER = 8, |
129 | | - UTF8PROC_DECOMP_TYPE_SUB = 9, |
130 | | - UTF8PROC_DECOMP_TYPE_VERTICAL = 10, |
131 | | - UTF8PROC_DECOMP_TYPE_WIDE = 11, |
132 | | - UTF8PROC_DECOMP_TYPE_NARROW = 12, |
133 | | - UTF8PROC_DECOMP_TYPE_SMALL = 13, |
134 | | - UTF8PROC_DECOMP_TYPE_SQUARE = 14, |
135 | | - UTF8PROC_DECOMP_TYPE_FRACTION = 15, |
136 | | - UTF8PROC_DECOMP_TYPE_COMPAT = 16, |
137 | | -} utf8proc_decomp_type_t; |
138 | | -typedef enum { |
139 | | - UTF8PROC_BOUNDCLASS_START = 0, |
140 | | - UTF8PROC_BOUNDCLASS_OTHER = 1, |
141 | | - UTF8PROC_BOUNDCLASS_CR = 2, |
142 | | - UTF8PROC_BOUNDCLASS_LF = 3, |
143 | | - UTF8PROC_BOUNDCLASS_CONTROL = 4, |
144 | | - UTF8PROC_BOUNDCLASS_EXTEND = 5, |
145 | | - UTF8PROC_BOUNDCLASS_L = 6, |
146 | | - UTF8PROC_BOUNDCLASS_V = 7, |
147 | | - UTF8PROC_BOUNDCLASS_T = 8, |
148 | | - UTF8PROC_BOUNDCLASS_LV = 9, |
149 | | - UTF8PROC_BOUNDCLASS_LVT = 10, |
150 | | - UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR = 11, |
151 | | - UTF8PROC_BOUNDCLASS_SPACINGMARK = 12, |
152 | | - UTF8PROC_BOUNDCLASS_PREPEND = 13, |
153 | | - UTF8PROC_BOUNDCLASS_ZWJ = 14, |
154 | | - UTF8PROC_BOUNDCLASS_E_BASE = 15, |
155 | | - UTF8PROC_BOUNDCLASS_E_MODIFIER = 16, |
156 | | - UTF8PROC_BOUNDCLASS_GLUE_AFTER_ZWJ = 17, |
157 | | - UTF8PROC_BOUNDCLASS_E_BASE_GAZ = 18, |
158 | | - UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC = 19, |
159 | | - UTF8PROC_BOUNDCLASS_E_ZWG = 20, |
160 | | -} utf8proc_boundclass_t; |
161 | | -typedef utf8proc_int32_t (*utf8proc_custom_func)(utf8proc_int32_t, void *); |
162 | | -extern const utf8proc_int8_t utf8proc_utf8class[256]; |
163 | | -const char *utf8proc_version(void); |
164 | | -const char *utf8proc_unicode_version(void); |
165 | | -const char *utf8proc_errmsg(utf8proc_ssize_t); |
166 | | -utf8proc_ssize_t utf8proc_iterate(const utf8proc_uint8_t *, utf8proc_ssize_t, utf8proc_int32_t *); |
167 | | -utf8proc_bool utf8proc_codepoint_valid(utf8proc_int32_t); |
168 | 9 | utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t, utf8proc_uint8_t *); |
169 | | -const utf8proc_property_t *utf8proc_get_property(utf8proc_int32_t); |
170 | | -utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t, utf8proc_int32_t *, utf8proc_ssize_t, utf8proc_option_t, int *); |
171 | | -utf8proc_ssize_t utf8proc_decompose(const utf8proc_uint8_t *, utf8proc_ssize_t, utf8proc_int32_t *, utf8proc_ssize_t, utf8proc_option_t); |
172 | | -utf8proc_ssize_t utf8proc_decompose_custom(const utf8proc_uint8_t *, utf8proc_ssize_t, utf8proc_int32_t *, utf8proc_ssize_t, utf8proc_option_t, utf8proc_custom_func, void *); |
173 | | -utf8proc_ssize_t utf8proc_normalize_utf32(utf8proc_int32_t *, utf8proc_ssize_t, utf8proc_option_t); |
174 | | -utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *, utf8proc_ssize_t, utf8proc_option_t); |
175 | | -utf8proc_bool utf8proc_grapheme_break_stateful(utf8proc_int32_t, utf8proc_int32_t, utf8proc_int32_t *); |
176 | | -utf8proc_bool utf8proc_grapheme_break(utf8proc_int32_t, utf8proc_int32_t); |
177 | | -utf8proc_int32_t utf8proc_tolower(utf8proc_int32_t); |
178 | | -utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t); |
179 | | -utf8proc_int32_t utf8proc_totitle(utf8proc_int32_t); |
180 | | -int utf8proc_islower(utf8proc_int32_t); |
181 | | -int utf8proc_isupper(utf8proc_int32_t); |
182 | | -int utf8proc_charwidth(utf8proc_int32_t); |
183 | | -utf8proc_category_t utf8proc_category(utf8proc_int32_t); |
184 | | -const char *utf8proc_category_string(utf8proc_int32_t); |
185 | | -utf8proc_ssize_t utf8proc_map(const utf8proc_uint8_t *, utf8proc_ssize_t, utf8proc_uint8_t **, utf8proc_option_t); |
186 | | -utf8proc_ssize_t utf8proc_map_custom(const utf8proc_uint8_t *, utf8proc_ssize_t, utf8proc_uint8_t **, utf8proc_option_t, utf8proc_custom_func, void *); |
187 | | -utf8proc_uint8_t *utf8proc_NFD(const utf8proc_uint8_t *); |
| 10 | +utf8proc_ssize_t utf8proc_iterate(const utf8proc_uint8_t *, utf8proc_ssize_t, utf8proc_int32_t *); |
188 | 11 | utf8proc_uint8_t *utf8proc_NFC(const utf8proc_uint8_t *); |
189 | | -utf8proc_uint8_t *utf8proc_NFKD(const utf8proc_uint8_t *); |
190 | | -utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *); |
191 | 12 | utf8proc_uint8_t *utf8proc_NFKC_Casefold(const utf8proc_uint8_t *); |
| 13 | +utf8proc_int32_t utf8proc_tolower(utf8proc_int32_t); |
| 14 | +utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t); |
192 | 15 | ]] |
0 commit comments