liblloyal 1.0.0
Branched Inference for llama.cpp
Loading...
Searching...
No Matches
chat_out.hpp
Go to the documentation of this file.
1#pragma once
2
3// SPDX-License-Identifier: Apache-2.0
4// Copyright 2026 Lloyal Labs
5
6
28#include "common.hpp"
29#include <llama/llama.h>
30#include <chat.h>
31#include <peg-parser.h>
32#include <exception>
33#include <string>
34#include <vector>
35
44
48struct ToolCall {
49 std::string name;
50 std::string arguments;
51 std::string id;
52};
53
79 std::string content;
80 std::string reasoning_content;
81 std::vector<ToolCall> tool_calls;
82};
83
144 const std::string& output,
145 common_chat_format format,
146 common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE,
147 bool is_partial = false,
148 const std::string& generation_prompt = "",
149 const std::string& parser_data = ""
150) {
151 ParseResult result;
152
153 try {
154 // Build parser params
155 common_chat_parser_params syntax;
156 syntax.format = format;
157 syntax.reasoning_format = reasoning_format;
158 syntax.generation_prompt = generation_prompt;
159
160 // Load serialized PEG parser if provided (required for PEG format models)
161 if (!parser_data.empty()) {
162 syntax.parser.load(parser_data);
163 }
164
165 // Call llama.cpp's output parser
166 common_chat_msg msg = common_chat_parse(output, is_partial, syntax);
167
168 // Convert to ParseResult
169 result.content = msg.content;
170 result.reasoning_content = msg.reasoning_content;
171
172 for (const auto& tc : msg.tool_calls) {
173 result.tool_calls.push_back({tc.name, tc.arguments, tc.id});
174 }
175
176 } catch (const std::exception& e) {
177 LLOYAL_LOG_DEBUG("[chat_out::parse] Parse failed: %s, returning raw output", e.what());
178 result.content = output;
179 }
180
181 return result;
182}
183
202 const llama_model* model,
203 const std::string& output,
204 bool is_partial = false
205) {
206 ParseResult result;
207
208 try {
209 // Init templates to detect format
210 common_chat_templates_ptr tmpls = common_chat_templates_init(model, "");
211 if (!tmpls) {
212 result.content = output;
213 return result;
214 }
215
216 // Apply with empty messages to get format detection
217 common_chat_templates_inputs inputs;
218 inputs.messages = {{.role = "user", .content = ""}};
219 inputs.add_generation_prompt = true;
220 inputs.use_jinja = true;
221
222 common_chat_params params = common_chat_templates_apply(tmpls.get(), inputs);
223
224 // Delegate to explicit-format overload
225 return parse(output, params.format, COMMON_REASONING_FORMAT_NONE, is_partial,
226 params.generation_prompt);
227
228 } catch (const std::exception& e) {
229 LLOYAL_LOG_DEBUG("[chat_out::parse] Auto-detect failed: %s", e.what());
230 result.content = output;
231 return result;
232 }
233}
234
235} // namespace lloyal::chat_out
#define LLOYAL_LOG_DEBUG(...)
liblloyal - Common definitions and logging
Definition common.hpp:48
Chat output parsing (tool calls, reasoning, content)
Definition chat_out.hpp:43
ParseResult parse(const std::string &output, common_chat_format format, common_reasoning_format reasoning_format=COMMON_REASONING_FORMAT_NONE, bool is_partial=false, const std::string &generation_prompt="", const std::string &parser_data="")
Parse model output with explicit format.
Definition chat_out.hpp:143
common_reasoning_format reasoning_format
Reasoning format for output parsing.
Definition chat_in.hpp:99
Result from parsing model output.
Definition chat_out.hpp:78
std::vector< ToolCall > tool_calls
Extracted tool calls (empty array if none)
Definition chat_out.hpp:81
std::string content
Main response text (visible to user)
Definition chat_out.hpp:79
std::string reasoning_content
Extracted thinking/reasoning blocks (empty if none)
Definition chat_out.hpp:80
A single tool call extracted from model output.
Definition chat_out.hpp:48
std::string name
Tool/function name.
Definition chat_out.hpp:49
std::string id
Tool call ID (may be empty if model doesn't generate IDs)
Definition chat_out.hpp:51
std::string arguments
JSON string of arguments.
Definition chat_out.hpp:50