@@ -33,19 +33,21 @@ struct OrtxTokenizerBlob {
33
33
const size_t reserved_blob_1_len;
34
34
35
35
#ifdef __cplusplus
36
- OrtxTokenizerBlob (const std::string_view& config_json_blob,
37
- const std::string_view& vocab_json_blob,
38
- const std::string_view& token_module_blob = {},
39
- const std::string_view& raw_model_blob = {})
40
- : config_json_blob(config_json_blob.data()), vocab_json_blob(vocab_json_blob.data()),
41
- token_module_blob(token_module_blob.data()), raw_model_blob(raw_model_blob.data()),
42
- reserved_blob_1(nullptr ), config_blob_len(config_json_blob.size()),
43
- vocab_blob_len(vocab_json_blob.size()), token_module_blob_len(token_module_blob.size()),
44
- raw_model_blob_len(raw_model_blob.size()), reserved_blob_1_len(0 ) {}
36
+ OrtxTokenizerBlob (const std::string_view& config_json_blob, const std::string_view& vocab_json_blob,
37
+ const std::string_view& token_module_blob = {}, const std::string_view& raw_model_blob = {})
38
+ : config_json_blob(config_json_blob.data()),
39
+ vocab_json_blob(vocab_json_blob.data()),
40
+ token_module_blob(token_module_blob.data()),
41
+ raw_model_blob(raw_model_blob.data()),
42
+ reserved_blob_1(nullptr ),
43
+ config_blob_len(config_json_blob.size()),
44
+ vocab_blob_len(vocab_json_blob.size()),
45
+ token_module_blob_len(token_module_blob.size()),
46
+ raw_model_blob_len(raw_model_blob.size()),
47
+ reserved_blob_1_len(0 ) {}
45
48
#endif
46
49
};
47
50
48
-
49
51
#ifdef __cplusplus
50
52
extern " C" {
51
53
#endif
@@ -64,8 +66,8 @@ extError_t ORTX_API_CALL OrtxCreateTokenizer(OrtxTokenizer** tokenizer, const ch
64
66
* \param tokenizer_blob Pointer to the tokenizer blob
65
67
* \return Error code indicating the success or failure of the operation
66
68
*/
67
- extError_t ORTX_API_CALL OrtxCreateTokenizerFromBlob (OrtxTokenizer** tokenizer, const struct OrtxTokenizerBlob * tokenizer_blob);
68
-
69
+ extError_t ORTX_API_CALL OrtxCreateTokenizerFromBlob (OrtxTokenizer** tokenizer,
70
+ const struct OrtxTokenizerBlob * tokenizer_blob);
69
71
70
72
/* * \brief Tokenize the input using the specified tokenizer
71
73
*
@@ -75,8 +77,8 @@ extError_t ORTX_API_CALL OrtxCreateTokenizerFromBlob(OrtxTokenizer** tokenizer,
75
77
* \param output Pointer to store the tokenized result
76
78
* \return Error code indicating the success or failure of the operation
77
79
*/
78
- extError_t ORTX_API_CALL OrtxTokenize (
79
- const OrtxTokenizer* tokenizer, const char * input[], size_t batch_size, OrtxTokenId2DArray** output);
80
+ extError_t ORTX_API_CALL OrtxTokenize (const OrtxTokenizer* tokenizer, const char * input[], size_t batch_size,
81
+ OrtxTokenId2DArray** output);
80
82
81
83
/* *
82
84
* Converts a token to its corresponding ID.
@@ -101,8 +103,8 @@ extError_t ORTX_API_CALL OrtxConvertTokenToId(const OrtxTokenizer* tokenizer, co
101
103
* @param output A pointer to the OrtxTokenId2DArray object to store the output.
102
104
* @return An extError_t value indicating the success or failure of the operation.
103
105
*/
104
- extError_t ORTX_API_CALL OrtxGetDecoderPromptIds (
105
- const OrtxTokenizer* tokenizer, size_t batch_size, const char * lang, const char * task, int no_timestamps, OrtxTokenId2DArray** output);
106
+ extError_t ORTX_API_CALL OrtxGetDecoderPromptIds (const OrtxTokenizer* tokenizer, size_t batch_size, const char * lang,
107
+ const char * task, int no_timestamps, OrtxTokenId2DArray** output);
106
108
107
109
/* * \brief Detokenize the input using the specified tokenizer
108
110
*
@@ -111,8 +113,8 @@ extError_t ORTX_API_CALL OrtxGetDecoderPromptIds(
111
113
* \param output Pointer to store the detokenized result
112
114
* \return Error code indicating the success or failure of the operation
113
115
*/
114
- extError_t ORTX_API_CALL OrtxDetokenize (
115
- const OrtxTokenizer* tokenizer, const OrtxTokenId2DArray* input, OrtxStringArray** output);
116
+ extError_t ORTX_API_CALL OrtxDetokenize (const OrtxTokenizer* tokenizer, const OrtxTokenId2DArray* input,
117
+ OrtxStringArray** output);
116
118
117
119
/* * \brief Detokenize the input using the specified tokenizer (1D version)
118
120
*
@@ -122,8 +124,8 @@ extError_t ORTX_API_CALL OrtxDetokenize(
122
124
* \param output Pointer to store the detokenized result
123
125
* \return Error code indicating the success or failure of the operation
124
126
*/
125
- extError_t ORTX_API_CALL OrtxDetokenize1D (
126
- const OrtxTokenizer* tokenizer, const extTokenId_t* input, size_t len, OrtxStringArray** output);
127
+ extError_t ORTX_API_CALL OrtxDetokenize1D (const OrtxTokenizer* tokenizer, const extTokenId_t* input, size_t len,
128
+ OrtxStringArray** output);
127
129
128
130
/* * \brief Detokenize the input using the specified tokenizer with caching
129
131
*
@@ -133,8 +135,8 @@ extError_t ORTX_API_CALL OrtxDetokenize1D(
133
135
* \param text_out Pointer to store the detokenized text
134
136
* \return Error code indicating the success or failure of the operation
135
137
*/
136
- extError_t ORTX_API_CALL OrtxDetokenizeCached (
137
- const OrtxTokenizer* tokenizer, OrtxDetokenizerCache* cache, extTokenId_t next_id, const char ** text_out);
138
+ extError_t ORTX_API_CALL OrtxDetokenizeCached (const OrtxTokenizer* tokenizer, OrtxDetokenizerCache* cache,
139
+ extTokenId_t next_id, const char ** text_out);
138
140
139
141
/* *
140
142
* @brief Retrieves the C-style string representation from an OrtxString object.
@@ -182,28 +184,30 @@ extError_t ORTX_API_CALL OrtxTokenId2DArrayGetBatch(const OrtxTokenId2DArray* to
182
184
* \param length Pointer to store the length of the item
183
185
* \return Error code indicating the success or failure of the operation
184
186
*/
185
- extError_t ORTX_API_CALL OrtxTokenId2DArrayGetItem (
186
- const OrtxTokenId2DArray* token_id_2d_array, size_t index, const extTokenId_t** item, size_t * length);
187
+ extError_t ORTX_API_CALL OrtxTokenId2DArrayGetItem (const OrtxTokenId2DArray* token_id_2d_array, size_t index,
188
+ const extTokenId_t** item, size_t * length);
187
189
188
190
/* *
189
191
* @brief Applies a chat template to the given input.
190
192
*
191
193
* This function processes the specified template with the provided input using the
192
- * tokenizer, and outputs the resulting string array . Optionally, it can include a
194
+ * tokenizer, and outputs the resulting tensor result . Optionally, it can include a
193
195
* generation prompt in the output. The chat template can be provided as a string or
194
- * be retrieved from a loaded tokenizer json file which contains the chat template its json file.
195
- * if both tokenizer and template_str are provided, the template_str will supersede the tokenizer.
196
+ * be retrieved from a loaded tokenizer json file which contains the chat template in its json file.
197
+ * If both tokenizer and template_str are provided, the template_str will supersede the tokenizer.
196
198
*
197
- * @param tokenizer Pointer to an OrtxTokenizer used for template processing
198
- * @param template_str Null-terminated string representing the chat template, can be null if tokenizer.json has one.
199
+ * @param tokenizer Pointer to an OrtxTokenizer used for template processing.
200
+ * @param template_str Null-terminated string representing the chat template; can be null if tokenizer.json has one.
199
201
* @param input Null-terminated string containing the input to be processed.
200
- * @param output an OrtxString that will be populated with the output strings.
202
+ * @param output Pointer to an OrtxTensorResult that will be populated with the output strings,
203
+ * if tokenize is true, the ids will be in the output as indexed 1.
201
204
* @param add_generation_prompt Indicates whether to add a generation prompt to the output.
205
+ * @param tokenize Indicates whether to tokenize the templated text to IDs
202
206
* @return extError_t Returns an error code indicating success or the type of failure.
203
207
*/
204
208
extError_t ORTX_API_CALL OrtxApplyChatTemplate (const OrtxTokenizer* tokenizer, const char * template_str,
205
- const char * input, OrtxString ** output,
206
- bool add_generation_prompt);
209
+ const char * input, OrtxTensorResult ** output,
210
+ bool add_generation_prompt, bool tokenize );
207
211
208
212
#ifdef __cplusplus
209
213
}
0 commit comments