liblloyal 1.0.0
Branched Inference for llama.cpp
Loading...
Searching...
No Matches
grammar.hpp
Go to the documentation of this file.
1#pragma once
2
3// SPDX-License-Identifier: Apache-2.0
4// Copyright 2026 Lloyal Labs
5
6#include "common.hpp"
7#include "tokenizer.hpp"
8#include <llama/llama.h>
9#include <json-schema-to-grammar.h> // llama.cpp common library
10#include <nlohmann/json.hpp>
11#include <stdexcept>
12#include <string>
13#include <vector>
14
32namespace lloyal::grammar {
33
46inline std::string from_json_schema(const std::string &schema_json) {
48 "[grammar::from_json_schema] Converting JSON schema (%zu bytes)",
49 schema_json.size());
50
51 try {
52 // Parse JSON schema
53 nlohmann::ordered_json schema = nlohmann::ordered_json::parse(schema_json);
54
55 LLOYAL_LOG_DEBUG("[grammar::from_json_schema] Schema parsed, calling "
56 "json_schema_to_grammar");
57
58 // Call json_schema_to_grammar from llama.cpp common library
59 // Parameters: (schema, force_gbnf)
60 // force_gbnf=false allows EBNF optimization when possible
61 std::string grammar = json_schema_to_grammar(schema, false);
62
63 if (grammar.empty()) {
64 LLOYAL_LOG_DEBUG("[grammar::from_json_schema] ERROR: Conversion produced "
65 "empty grammar");
66 throw std::runtime_error("Grammar conversion produced empty result");
67 }
68
70 "[grammar::from_json_schema] Generated GBNF grammar (%zu bytes)",
71 grammar.size());
72 return grammar;
73
74 } catch (const nlohmann::json::parse_error &e) {
75 std::string errorMsg = std::string("JSON parse error: ") + e.what();
76 LLOYAL_LOG_DEBUG("[grammar::from_json_schema] ERROR: %s", errorMsg.c_str());
77 throw std::runtime_error(errorMsg);
78 } catch (const std::exception &e) {
79 std::string errorMsg =
80 std::string("Grammar conversion failed: ") + e.what();
81 LLOYAL_LOG_DEBUG("[grammar::from_json_schema] ERROR: %s", errorMsg.c_str());
82 throw std::runtime_error(errorMsg);
83 }
84}
85
86// ===== SAMPLER INITIALIZATION =====
87
106inline llama_sampler *init_sampler(const llama_model *model,
107 const std::string &grammar_str,
108 const std::string &root_rule = "root") {
109 if (!model) {
110 LLOYAL_LOG_DEBUG("[grammar::init_sampler] ERROR: model is null");
111 return nullptr;
112 }
113
114 if (grammar_str.empty()) {
115 LLOYAL_LOG_DEBUG("[grammar::init_sampler] ERROR: grammar_str is empty");
116 return nullptr;
117 }
118
119 const llama_vocab *vocab = tokenizer::get_vocab(model);
120 if (!vocab) {
121 LLOYAL_LOG_DEBUG("[grammar::init_sampler] ERROR: get_vocab returned null");
122 return nullptr;
123 }
124
125 LLOYAL_LOG_DEBUG("[grammar::init_sampler] Initializing grammar sampler "
126 "(grammar: %zu bytes, root: %s)",
127 grammar_str.size(), root_rule.c_str());
128
129 llama_sampler *sampler =
130 llama_sampler_init_grammar(vocab, grammar_str.c_str(), root_rule.c_str());
131
132 if (!sampler) {
133 LLOYAL_LOG_DEBUG("[grammar::init_sampler] ERROR: "
134 "llama_sampler_init_grammar returned null");
135 }
136
137 return sampler;
138}
139
157inline llama_sampler *init_lazy_sampler(
158 const llama_model *model,
159 const std::string &grammar_str,
160 const std::vector<std::string> &trigger_patterns,
161 const std::vector<llama_token> &trigger_tokens,
162 const std::string &root_rule = "root") {
163 if (!model) {
164 LLOYAL_LOG_DEBUG("[grammar::init_lazy_sampler] ERROR: model is null");
165 return nullptr;
166 }
167
168 if (grammar_str.empty()) {
169 LLOYAL_LOG_DEBUG("[grammar::init_lazy_sampler] ERROR: grammar_str is empty");
170 return nullptr;
171 }
172
173 const llama_vocab *vocab = tokenizer::get_vocab(model);
174 if (!vocab) {
175 LLOYAL_LOG_DEBUG("[grammar::init_lazy_sampler] ERROR: get_vocab returned null");
176 return nullptr;
177 }
178
179 std::vector<const char *> patterns_c;
180 patterns_c.reserve(trigger_patterns.size());
181 for (const auto &p : trigger_patterns) patterns_c.push_back(p.c_str());
182
183 LLOYAL_LOG_DEBUG("[grammar::init_lazy_sampler] Initializing lazy grammar "
184 "(grammar: %zu bytes, %zu patterns, %zu tokens, root: %s)",
185 grammar_str.size(), trigger_patterns.size(),
186 trigger_tokens.size(), root_rule.c_str());
187
188 llama_sampler *sampler = llama_sampler_init_grammar_lazy_patterns(
189 vocab, grammar_str.c_str(), root_rule.c_str(),
190 patterns_c.data(), patterns_c.size(),
191 trigger_tokens.data(), trigger_tokens.size());
192
193 if (!sampler) {
194 LLOYAL_LOG_DEBUG("[grammar::init_lazy_sampler] ERROR: "
195 "llama_sampler_init_grammar_lazy_patterns returned null");
196 }
197
198 return sampler;
199}
200
212inline llama_sampler *clone_sampler(llama_sampler *smpl) {
213 if (!smpl) {
214 LLOYAL_LOG_DEBUG("[grammar::clone_sampler] Input is null, returning null");
215 return nullptr;
216 }
217
218 llama_sampler *cloned = llama_sampler_clone(smpl);
219
220 if (!cloned) {
221 LLOYAL_LOG_DEBUG("[grammar::clone_sampler] ERROR: llama_sampler_clone failed");
222 } else {
223 LLOYAL_LOG_DEBUG("[grammar::clone_sampler] Cloned sampler successfully");
224 }
225
226 return cloned;
227}
228
234inline void free_sampler(llama_sampler* smpl) {
235 if (smpl) {
236 llama_sampler_free(smpl);
237 }
238}
239
248inline void apply(llama_sampler* smpl, llama_token_data_array* cur_p) {
249 if (smpl && cur_p) {
250 llama_sampler_apply(smpl, cur_p);
251 }
252}
253
262inline void accept(llama_sampler* smpl, llama_token token) {
263 if (smpl) {
264 llama_sampler_accept(smpl, token);
265 }
266}
267
268} // namespace lloyal::grammar
#define LLOYAL_LOG_DEBUG(...)
liblloyal - Common definitions and logging
Definition common.hpp:47
void free_sampler(llama_sampler *smpl)
Free a grammar sampler.
Definition grammar.hpp:234
llama_sampler * clone_sampler(llama_sampler *smpl)
Clone a grammar sampler (for fork/branching).
Definition grammar.hpp:212
llama_sampler * init_sampler(const llama_model *model, const std::string &grammar_str, const std::string &root_rule="root")
Initialize a grammar sampler from GBNF grammar string.
Definition grammar.hpp:106
llama_sampler * init_lazy_sampler(const llama_model *model, const std::string &grammar_str, const std::vector< std::string > &trigger_patterns, const std::vector< llama_token > &trigger_tokens, const std::string &root_rule="root")
Initialize a lazy grammar sampler from GBNF grammar string.
Definition grammar.hpp:157
std::string from_json_schema(const std::string &schema_json)
Convert JSON schema to GBNF (Grammar BNF) format.
Definition grammar.hpp:46
void accept(llama_sampler *smpl, llama_token token)
Accept a token into grammar state.
Definition grammar.hpp:262
void apply(llama_sampler *smpl, llama_token_data_array *cur_p)
Apply grammar constraint to candidates.
Definition grammar.hpp:248
const llama_vocab * get_vocab(const llama_model *model)
Get vocabulary from model.
Text Tokenization Operations.