38inline std::vector<llama_token>
tokenize(
const llama_vocab *vocab,
39 const std::string &text,
40 bool add_special,
bool parse_special) {
49 -llama_tokenize(vocab, text.c_str(),
static_cast<int32_t
>(text.length()),
51 0, add_special, parse_special);
55 "for text: '%.50s...'",
61 std::vector<llama_token> tokens(n_tokens);
62 const int n_tokenized =
63 llama_tokenize(vocab, text.c_str(),
static_cast<int32_t
>(text.length()),
64 tokens.data(), n_tokens, add_special, parse_special);
66 if (n_tokenized != n_tokens) {
68 "(expected %d, got %d)",
69 n_tokens, n_tokenized);
74 text.length(), n_tokens);
91inline std::string
detokenize(
const llama_vocab *vocab, llama_token token,
104 llama_token_to_piece(vocab, token, &piece[0], piece.size(), 0, special);
108 piece.resize(-n_chars);
110 llama_token_to_piece(vocab, token, &piece[0], piece.size(), 0, special);
111 if (check != -n_chars) {
113 "[tokenizer::detokenize] ERROR: Inconsistent sizing for token %d",
118 piece.resize(n_chars);
138 const llama_token *tokens, int32_t n_tokens,
139 bool remove_special,
bool unparse_special) {
140 if (!vocab || !tokens || n_tokens <= 0) {
142 "(vocab=%p, tokens=%p, n_tokens=%d)",
143 vocab, tokens, n_tokens);
149 int32_t required_size = llama_detokenize(vocab, tokens, n_tokens,
151 0, remove_special, unparse_special);
153 if (required_size < 0) {
155 required_size = -required_size;
158 if (required_size == 0) {
160 "detokenization result for %d tokens",
166 std::vector<char> buffer(required_size + 1);
168 llama_detokenize(vocab, tokens, n_tokens, buffer.data(), required_size,
169 remove_special, unparse_special);
173 "failed (needed %d bytes, got %d)",
174 required_size, written);
178 std::string result(buffer.data(), written);
180 "[tokenizer::detokenize_batch] Detokenized %d tokens → %zu bytes",
181 n_tokens, result.size());