38inline std::vector<llama_token>
tokenize(
const llama_vocab *vocab,
39 const std::string &text,
40 bool add_special,
bool parse_special) {
49 -llama_tokenize(vocab, text.c_str(),
static_cast<int32_t
>(text.length()),
51 0, add_special, parse_special);
55 "for text: '%.50s...'",
61 std::vector<llama_token> tokens(n_tokens);
62 const int n_tokenized =
63 llama_tokenize(vocab, text.c_str(),
static_cast<int32_t
>(text.length()),
64 tokens.data(), n_tokens, add_special, parse_special);
66 if (n_tokenized != n_tokens) {
68 "(expected %d, got %d)",
69 n_tokens, n_tokenized);
74 text.length(), n_tokens);
91inline std::string
detokenize(
const llama_vocab *vocab, llama_token token,
99 std::string piece(64,
'\0');
102 llama_token_to_piece(vocab, token, &piece[0], piece.size(), 0, special);
106 piece.resize(-n_chars);
108 llama_token_to_piece(vocab, token, &piece[0], piece.size(), 0, special);
109 if (check != -n_chars) {
111 "[tokenizer::detokenize] ERROR: Inconsistent sizing for token %d",
116 piece.resize(n_chars);
136 const llama_token *tokens, int32_t n_tokens,
137 bool remove_special,
bool unparse_special) {
138 if (!vocab || !tokens || n_tokens <= 0) {
140 "(vocab=%p, tokens=%p, n_tokens=%d)",
141 vocab, tokens, n_tokens);
147 int32_t required_size = llama_detokenize(vocab, tokens, n_tokens,
149 0, remove_special, unparse_special);
151 if (required_size < 0) {
153 required_size = -required_size;
156 if (required_size == 0) {
158 "detokenization result for %d tokens",
164 std::vector<char> buffer(required_size + 1);
166 llama_detokenize(vocab, tokens, n_tokens, buffer.data(), required_size,
167 remove_special, unparse_special);
171 "failed (needed %d bytes, got %d)",
172 required_size, written);
176 std::string result(buffer.data(), written);
178 "[tokenizer::detokenize_batch] Detokenized %d tokens → %zu bytes",
179 n_tokens, result.size());