liblloyal 1.0.0
Branched Inference for llama.cpp
Loading...
Searching...
No Matches
chat_out.hpp
Go to the documentation of this file.
1#pragma once
2
3// SPDX-License-Identifier: Apache-2.0
4// Copyright 2026 Lloyal Labs
5
27#include "common.hpp"
28#include <llama/llama.h>
29#include <chat.h>
30#include <peg-parser.h>
31#include <exception>
32#include <string>
33#include <vector>
34
43
47struct ToolCall {
48 std::string name;
49 std::string arguments;
50 std::string id;
51};
52
78 std::string content;
79 std::string reasoning_content;
80 std::vector<ToolCall> tool_calls;
81};
82
143 const std::string& output,
144 common_chat_format format,
145 common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE,
146 bool is_partial = false,
147 bool thinking_forced_open = false,
148 const std::string& parser_data = ""
149) {
150 ParseResult result;
151
152 try {
153 // Build parser params
154 common_chat_parser_params syntax;
155 syntax.format = format;
156 syntax.reasoning_format = reasoning_format;
157 syntax.thinking_forced_open = thinking_forced_open;
158
159 // Load serialized PEG parser if provided (required for PEG format models)
160 if (!parser_data.empty()) {
161 syntax.parser.load(parser_data);
162 }
163
164 // Call llama.cpp's output parser
165 common_chat_msg msg = common_chat_parse(output, is_partial, syntax);
166
167 // Convert to ParseResult
168 result.content = msg.content;
169 result.reasoning_content = msg.reasoning_content;
170
171 for (const auto& tc : msg.tool_calls) {
172 result.tool_calls.push_back({tc.name, tc.arguments, tc.id});
173 }
174
175 } catch (const std::exception& e) {
176 LLOYAL_LOG_DEBUG("[chat_out::parse] Parse failed: %s, returning raw output", e.what());
177 result.content = output;
178 }
179
180 return result;
181}
182
201 const llama_model* model,
202 const std::string& output,
203 bool is_partial = false
204) {
205 ParseResult result;
206
207 try {
208 // Init templates to detect format
209 common_chat_templates_ptr tmpls = common_chat_templates_init(model, "");
210 if (!tmpls) {
211 result.content = output;
212 return result;
213 }
214
215 // Apply with empty messages to get format detection
216 common_chat_templates_inputs inputs;
217 inputs.messages = {{.role = "user", .content = ""}};
218 inputs.add_generation_prompt = true;
219 inputs.use_jinja = true;
220
221 common_chat_params params = common_chat_templates_apply(tmpls.get(), inputs);
222
223 // Delegate to explicit-format overload
224 return parse(output, params.format, COMMON_REASONING_FORMAT_NONE, is_partial,
225 params.thinking_forced_open);
226
227 } catch (const std::exception& e) {
228 LLOYAL_LOG_DEBUG("[chat_out::parse] Auto-detect failed: %s", e.what());
229 result.content = output;
230 return result;
231 }
232}
233
234} // namespace lloyal::chat_out
#define LLOYAL_LOG_DEBUG(...)
liblloyal - Common definitions and logging
Definition common.hpp:47
Chat output parsing (tool calls, reasoning, content)
Definition chat_out.hpp:42
ParseResult parse(const std::string &output, common_chat_format format, common_reasoning_format reasoning_format=COMMON_REASONING_FORMAT_NONE, bool is_partial=false, bool thinking_forced_open=false, const std::string &parser_data="")
Parse model output with explicit format.
Definition chat_out.hpp:142
common_reasoning_format reasoning_format
Reasoning format for output parsing.
Definition chat_in.hpp:98
Result from parsing model output.
Definition chat_out.hpp:77
std::vector< ToolCall > tool_calls
Extracted tool calls (empty array if none)
Definition chat_out.hpp:80
std::string content
Main response text (visible to user)
Definition chat_out.hpp:78
std::string reasoning_content
Extracted thinking/reasoning blocks (empty if none)
Definition chat_out.hpp:79
A single tool call extracted from model output.
Definition chat_out.hpp:47
std::string name
Tool/function name.
Definition chat_out.hpp:48
std::string id
Tool call ID (may be empty if model doesn't generate IDs)
Definition chat_out.hpp:50
std::string arguments
JSON string of arguments.
Definition chat_out.hpp:49