41#include <llama/llama.h>
43#include <nlohmann/json.hpp>
89 common_chat_format
format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
139 using json = nlohmann::ordered_json;
143 common_chat_templates_ptr tmpls = common_chat_templates_init(model, inputs.
template_override);
151 std::vector<common_chat_msg> messages = common_chat_msgs_parse_oaicompat(messages_array);
154 common_chat_templates_inputs tmpl_inputs;
155 tmpl_inputs.messages = messages;
157 tmpl_inputs.use_jinja =
true;
161 json tools_array = json::parse(inputs.
tools_json);
162 tmpl_inputs.tools = common_chat_tools_parse_oaicompat(tools_array);
163 tmpl_inputs.tool_choice = common_chat_tool_choice_parse_oaicompat(inputs.
tool_choice);
168 tmpl_inputs.reasoning_format = common_reasoning_format_from_name(inputs.
reasoning_format);
173 tmpl_inputs.grammar = inputs.
grammar;
176 common_chat_params params = common_chat_templates_apply(tmpls.get(), tmpl_inputs);
183 if (!messages.empty() && messages[0].role ==
"system" && messages[0].content.empty()) {
184 common_chat_msg sys_msg;
185 sys_msg.role =
"system";
186 sys_msg.content =
"";
188 common_chat_templates_inputs sys_inputs;
189 sys_inputs.messages = {sys_msg};
190 sys_inputs.add_generation_prompt =
false;
191 sys_inputs.use_jinja =
true;
192 auto sys_params = common_chat_templates_apply(tmpls.get(), sys_inputs);
194 const auto& sys_prefix = sys_params.prompt;
195 if (!sys_prefix.empty() &&
196 params.prompt.size() >= sys_prefix.size() &&
197 params.prompt.substr(0, sys_prefix.size()) == sys_prefix) {
198 params.prompt = params.prompt.substr(sys_prefix.size());
199 LLOYAL_LOG_DEBUG(
"[chat_in::format] Stripped empty system prefix (%zu bytes)", sys_prefix.size());
204 result.
prompt = params.prompt;
206 result.
format = params.format;
207 result.
grammar = params.grammar;
212 result.
parser = params.parser;
218 "[chat_in::format] Successfully formatted with format=%d, %zu stop tokens, grammar=%zu bytes",
219 static_cast<int>(result.
format),
225 }
catch (
const std::exception &e) {
226 LLOYAL_LOG_DEBUG(
"[chat_in::format] Template processing failed: %s", e.what());
232 using json = nlohmann::ordered_json;
234 std::string fallback_prompt;
235 for (
const auto &msg : messages) {
236 if (msg.contains(
"role") && msg.contains(
"content")) {
237 std::string role = msg[
"role"].get<std::string>();
239 const auto& c = msg[
"content"];
242 }
else if (c.is_string()) {
243 content = c.get<std::string>();
247 fallback_prompt += role +
": " + content +
"\n";
251 result.
prompt = fallback_prompt;
255 "[chat_in::format] Using fallback format (%zu bytes)",
256 fallback_prompt.size());
259 }
catch (
const std::exception &e) {
261 "[chat_in::format] ERROR: Failed to parse messages JSON: %s",
280inline bool validate(
const std::string &template_str) {
282 bool isValid = common_chat_verify_template(template_str,
true);
284 isValid ?
"valid" :
"invalid");
286 }
catch (
const std::exception &e) {
302 if (model ==
nullptr) {
305 const llama_vocab* vocab = llama_model_get_vocab(model);
306 llama_token eot = llama_vocab_eot(vocab);
307 if (eot == LLAMA_TOKEN_NULL) {
308 eot = llama_vocab_eos(vocab);
310 if (eot != LLAMA_TOKEN_NULL) {
324 if (!model || token == LLAMA_TOKEN_NULL) {
371 using json = nlohmann::ordered_json;
373 if (!model)
return {};
376 const std::string SENTINEL =
"\x1F__LLOYAL_SEP__\x1F";
377 const std::string SENTINEL2 =
"\x1F__LLOYAL_SEP2__\x1F";
381 common_chat_templates_ptr tmpls = common_chat_templates_init(model,
"");
387 std::vector<common_chat_msg> messages = {
388 {.role =
"user", .content =
"X"},
389 {.role =
"assistant", .content = SENTINEL},
390 {.role =
"user", .content = SENTINEL2}
393 common_chat_templates_inputs inputs;
394 inputs.messages = messages;
395 inputs.add_generation_prompt =
false;
396 inputs.use_jinja =
true;
398 auto params = common_chat_templates_apply(tmpls.get(), inputs);
399 const std::string& formatted = params.prompt;
402 size_t sep_start = formatted.rfind(SENTINEL);
403 if (sep_start == std::string::npos) {
406 sep_start += SENTINEL.length();
408 size_t sep_end = formatted.find(SENTINEL2, sep_start);
409 if (sep_end == std::string::npos) {
413 std::string between = formatted.substr(sep_start, sep_end - sep_start);
414 if (between.empty()) {
419 const auto* vocab = llama_model_get_vocab(model);
421 if (tokens.empty()) {
426 std::vector<llama_token> separator;
427 bool found_eog =
false;
429 for (
auto tok : tokens) {
431 separator.push_back(tok);
438 bool is_whitespace = !text.empty() && std::all_of(text.begin(), text.end(),
439 [](
unsigned char c) { return c ==
' ' || c ==
'\n' || c ==
'\r' || c ==
'\t'; });
441 separator.push_back(tok);
448 if (separator.empty() || !found_eog) {
454 }
catch (
const std::exception& e) {
#define LLOYAL_LOG_DEBUG(...)
liblloyal - Common definitions and logging
Chat input formatting with full format awareness.
std::string get_token_safe(const llama_model *model, llama_token token)
Get token text safely.
std::vector< llama_token > get_turn_separator(const llama_model *model)
Get turn separator tokens for the model's chat template.
FormatResult format(const llama_model *model, const FormatInputs &inputs)
Format chat messages using model's chat template with full format awareness.
std::vector< llama_token > fallback_to_eog(const llama_model *model)
Get EOG token as fallback when template parsing fails.
bool validate(const std::string &template_str)
Validate chat template syntax.
std::vector< llama_token > tokenize(const llama_vocab *vocab, const std::string &text, bool add_special, bool parse_special)
Tokenize text to token array.
std::string detokenize(const llama_vocab *vocab, llama_token token, bool special)
Detokenize SINGLE token to text (streaming use case)
bool is_eog(const llama_vocab *vocab, llama_token token)
Check if token is end-of-generation marker.
Text Tokenization Operations.