140 using json = nlohmann::ordered_json;
144 common_chat_templates_ptr tmpls = common_chat_templates_init(model, inputs.
template_override);
152 std::vector<common_chat_msg> messages = common_chat_msgs_parse_oaicompat(messages_array);
155 common_chat_templates_inputs tmpl_inputs;
156 tmpl_inputs.messages = messages;
158 tmpl_inputs.use_jinja =
true;
162 json tools_array = json::parse(inputs.
tools_json);
163 tmpl_inputs.tools = common_chat_tools_parse_oaicompat(tools_array);
164 tmpl_inputs.tool_choice = common_chat_tool_choice_parse_oaicompat(inputs.
tool_choice);
169 tmpl_inputs.reasoning_format = common_reasoning_format_from_name(inputs.
reasoning_format);
174 tmpl_inputs.grammar = inputs.
grammar;
177 common_chat_params params = common_chat_templates_apply(tmpls.get(), tmpl_inputs);
184 if (!messages.empty() && messages[0].role ==
"system" && messages[0].content.empty()) {
185 bool stripped =
false;
189 common_chat_msg sys_msg;
190 sys_msg.role =
"system";
191 sys_msg.content =
"";
193 common_chat_templates_inputs sys_inputs;
194 sys_inputs.messages = {sys_msg};
195 sys_inputs.add_generation_prompt =
false;
196 sys_inputs.use_jinja =
true;
197 auto sys_params = common_chat_templates_apply(tmpls.get(), sys_inputs);
199 const auto& sys_prefix = sys_params.prompt;
200 if (!sys_prefix.empty() &&
201 params.prompt.size() >= sys_prefix.size() &&
202 params.prompt.substr(0, sys_prefix.size()) == sys_prefix) {
203 params.prompt = params.prompt.substr(sys_prefix.size());
205 LLOYAL_LOG_DEBUG(
"[chat_in::format] Stripped empty system prefix (%zu bytes)", sys_prefix.size());
207 }
catch (
const std::exception &e) {
208 LLOYAL_LOG_DEBUG(
"[chat_in::format] Primary stripping failed: %s", e.what());
216 static const std::string SENTINEL =
"\x1F__LLOYAL_SYS_STRIP__\x1F";
218 common_chat_msg sys_msg;
219 sys_msg.role =
"system";
220 sys_msg.content =
"";
221 common_chat_msg user_msg;
222 user_msg.role =
"user";
223 user_msg.content = SENTINEL;
225 common_chat_templates_inputs with_sys;
226 with_sys.messages = {sys_msg, user_msg};
227 with_sys.add_generation_prompt =
false;
228 with_sys.use_jinja =
true;
229 auto with_sys_params = common_chat_templates_apply(tmpls.get(), with_sys);
231 common_chat_templates_inputs without_sys;
232 without_sys.messages = {user_msg};
233 without_sys.add_generation_prompt =
false;
234 without_sys.use_jinja =
true;
235 auto without_sys_params = common_chat_templates_apply(tmpls.get(), without_sys);
237 const auto& with_prompt = with_sys_params.prompt;
238 const auto& without_prompt = without_sys_params.prompt;
241 if (with_prompt.size() > without_prompt.size() &&
242 with_prompt.substr(with_prompt.size() - without_prompt.size()) == without_prompt) {
243 std::string sys_prefix = with_prompt.substr(0, with_prompt.size() - without_prompt.size());
244 if (!sys_prefix.empty() &&
245 params.prompt.size() >= sys_prefix.size() &&
246 params.prompt.substr(0, sys_prefix.size()) == sys_prefix) {
247 params.prompt = params.prompt.substr(sys_prefix.size());
248 LLOYAL_LOG_DEBUG(
"[chat_in::format] Stripped empty system prefix via sentinel (%zu bytes)", sys_prefix.size());
251 LLOYAL_LOG_DEBUG(
"[chat_in::format] Sentinel subtraction failed, skipping strip");
253 }
catch (
const std::exception &e) {
254 LLOYAL_LOG_DEBUG(
"[chat_in::format] Sentinel stripping also failed: %s", e.what());
260 result.
prompt = params.prompt;
262 result.
format = params.format;
263 result.
grammar = params.grammar;
268 result.
parser = params.parser;
274 "[chat_in::format] Successfully formatted with format=%d, %zu stop tokens, grammar=%zu bytes",
275 static_cast<int>(result.
format),
281 }
catch (
const std::exception &e) {
282 LLOYAL_LOG_DEBUG(
"[chat_in::format] Template processing failed: %s", e.what());
288 using json = nlohmann::ordered_json;
291 common_chat_templates_ptr tmpls = common_chat_templates_init(model, inputs.
template_override);
293 static const std::string SENTINEL =
"\x1F__LLOYAL_RETRY__\x1F";
295 std::vector<common_chat_msg> messages = common_chat_msgs_parse_oaicompat(messages_array);
298 bool has_user =
false;
299 for (
const auto& m : messages) {
300 if (m.role ==
"user") { has_user =
true;
break; }
305 std::vector<common_chat_msg> augmented = messages;
306 common_chat_msg sentinel_user;
307 sentinel_user.role =
"user";
308 sentinel_user.content = SENTINEL;
309 augmented.push_back(sentinel_user);
311 common_chat_templates_inputs tmpl_inputs;
312 tmpl_inputs.messages = augmented;
313 tmpl_inputs.add_generation_prompt =
false;
314 tmpl_inputs.use_jinja =
true;
318 json tools_array = json::parse(inputs.
tools_json);
319 tmpl_inputs.tools = common_chat_tools_parse_oaicompat(tools_array);
320 tmpl_inputs.tool_choice = common_chat_tool_choice_parse_oaicompat(inputs.
tool_choice);
323 tmpl_inputs.reasoning_format = common_reasoning_format_from_name(inputs.
reasoning_format);
326 tmpl_inputs.grammar = inputs.
grammar;
328 common_chat_params params = common_chat_templates_apply(tmpls.get(), tmpl_inputs);
331 common_chat_msg user_only_msg;
332 user_only_msg.role =
"user";
333 user_only_msg.content = SENTINEL;
335 common_chat_templates_inputs user_only_inputs;
336 user_only_inputs.messages = {user_only_msg};
337 user_only_inputs.add_generation_prompt =
false;
338 user_only_inputs.use_jinja =
true;
339 auto user_only_params = common_chat_templates_apply(tmpls.get(), user_only_inputs);
341 const auto& full = params.prompt;
342 const auto& user_suffix = user_only_params.prompt;
345 if (full.size() > user_suffix.size() &&
346 full.substr(full.size() - user_suffix.size()) == user_suffix) {
347 params.prompt = full.substr(0, full.size() - user_suffix.size());
350 if (!messages.empty() && messages[0].role ==
"system" && messages[0].content.empty()) {
352 common_chat_msg sys_msg; sys_msg.role =
"system"; sys_msg.content =
"";
353 common_chat_msg usr_msg; usr_msg.role =
"user"; usr_msg.content = SENTINEL;
355 common_chat_templates_inputs with_sys_inputs;
356 with_sys_inputs.messages = {sys_msg, usr_msg};
357 with_sys_inputs.add_generation_prompt =
false;
358 with_sys_inputs.use_jinja =
true;
360 common_chat_templates_inputs without_sys_inputs;
361 without_sys_inputs.messages = {usr_msg};
362 without_sys_inputs.add_generation_prompt =
false;
363 without_sys_inputs.use_jinja =
true;
366 auto with_sys = common_chat_templates_apply(tmpls.get(), with_sys_inputs);
367 auto without_sys = common_chat_templates_apply(tmpls.get(), without_sys_inputs);
368 if (with_sys.prompt.size() > without_sys.prompt.size() &&
369 with_sys.prompt.substr(with_sys.prompt.size() - without_sys.prompt.size()) == without_sys.prompt) {
370 std::string sys_prefix = with_sys.prompt.substr(0, with_sys.prompt.size() - without_sys.prompt.size());
371 if (!sys_prefix.empty() &&
372 params.prompt.size() >= sys_prefix.size() &&
373 params.prompt.substr(0, sys_prefix.size()) == sys_prefix) {
374 params.prompt = params.prompt.substr(sys_prefix.size());
375 LLOYAL_LOG_DEBUG(
"[chat_in::format] Retry: stripped empty system prefix (%zu bytes)", sys_prefix.size());
383 result.
prompt = params.prompt;
385 result.
format = params.format;
386 result.
grammar = params.grammar;
391 result.
parser = params.parser;
395 "[chat_in::format] Retry with synthetic user succeeded, format=%d (%zu bytes)",
403 }
catch (
const std::exception &e2) {
411 using json = nlohmann::ordered_json;
413 std::string fallback_prompt;
414 for (
const auto &msg : messages) {
415 if (msg.contains(
"role") && msg.contains(
"content")) {
416 std::string role = msg[
"role"].get<std::string>();
418 const auto& c = msg[
"content"];
421 }
else if (c.is_string()) {
422 content = c.get<std::string>();
426 fallback_prompt += role +
": " + content +
"\n";
430 result.
prompt = fallback_prompt;
434 "[chat_in::format] Using fallback format (%zu bytes)",
435 fallback_prompt.size());
438 }
catch (
const std::exception &e) {
440 "[chat_in::format] ERROR: Failed to parse messages JSON: %s",
550 using json = nlohmann::ordered_json;
552 if (!model)
return {};
555 const std::string SENTINEL =
"\x1F__LLOYAL_SEP__\x1F";
556 const std::string SENTINEL2 =
"\x1F__LLOYAL_SEP2__\x1F";
560 common_chat_templates_ptr tmpls = common_chat_templates_init(model,
"");
566 std::vector<common_chat_msg> messages = {
567 {.role =
"user", .content =
"X"},
568 {.role =
"assistant", .content = SENTINEL},
569 {.role =
"user", .content = SENTINEL2}
572 common_chat_templates_inputs inputs;
573 inputs.messages = messages;
574 inputs.add_generation_prompt =
false;
575 inputs.use_jinja =
true;
577 auto params = common_chat_templates_apply(tmpls.get(), inputs);
578 const std::string& formatted = params.prompt;
581 size_t sep_start = formatted.rfind(SENTINEL);
582 if (sep_start == std::string::npos) {
585 sep_start += SENTINEL.length();
587 size_t sep_end = formatted.find(SENTINEL2, sep_start);
588 if (sep_end == std::string::npos) {
592 std::string between = formatted.substr(sep_start, sep_end - sep_start);
593 if (between.empty()) {
598 const auto* vocab = llama_model_get_vocab(model);
600 if (tokens.empty()) {
605 std::vector<llama_token> separator;
606 bool found_eog =
false;
608 for (
auto tok : tokens) {
610 separator.push_back(tok);
617 bool is_whitespace = !text.empty() && std::all_of(text.begin(), text.end(),
618 [](
unsigned char c) { return c ==
' ' || c ==
'\n' || c ==
'\r' || c ==
'\t'; });
620 separator.push_back(tok);
627 if (separator.empty() || !found_eog) {
633 }
catch (
const std::exception& e) {