liblloyal 1.0.0
Branched Inference for llama.cpp
Loading...
Searching...
No Matches
grammar.hpp
Go to the documentation of this file.
1#pragma once
2
3// SPDX-License-Identifier: Apache-2.0
4// Copyright 2026 Lloyal Labs
5
6
7#include "common.hpp"
8#include "tokenizer.hpp"
9#include <llama/llama.h>
10#include <json-schema-to-grammar.h> // llama.cpp common library
11#include <nlohmann/json.hpp>
12#include <stdexcept>
13#include <string>
14#include <vector>
15
33namespace lloyal::grammar {
34
47inline std::string from_json_schema(const std::string &schema_json) {
49 "[grammar::from_json_schema] Converting JSON schema (%zu bytes)",
50 schema_json.size());
51
52 try {
53 // Parse JSON schema
54 nlohmann::ordered_json schema = nlohmann::ordered_json::parse(schema_json);
55
56 LLOYAL_LOG_DEBUG("[grammar::from_json_schema] Schema parsed, calling "
57 "json_schema_to_grammar");
58
59 // Call json_schema_to_grammar from llama.cpp common library
60 // Parameters: (schema, force_gbnf)
61 // force_gbnf=false allows EBNF optimization when possible
62 std::string grammar = json_schema_to_grammar(schema, false);
63
64 if (grammar.empty()) {
65 LLOYAL_LOG_DEBUG("[grammar::from_json_schema] ERROR: Conversion produced "
66 "empty grammar");
67 throw std::runtime_error("Grammar conversion produced empty result");
68 }
69
71 "[grammar::from_json_schema] Generated GBNF grammar (%zu bytes)",
72 grammar.size());
73 return grammar;
74
75 } catch (const nlohmann::json::parse_error &e) {
76 std::string errorMsg = std::string("JSON parse error: ") + e.what();
77 LLOYAL_LOG_DEBUG("[grammar::from_json_schema] ERROR: %s", errorMsg.c_str());
78 throw std::runtime_error(errorMsg);
79 } catch (const std::exception &e) {
80 std::string errorMsg =
81 std::string("Grammar conversion failed: ") + e.what();
82 LLOYAL_LOG_DEBUG("[grammar::from_json_schema] ERROR: %s", errorMsg.c_str());
83 throw std::runtime_error(errorMsg);
84 }
85}
86
87// ===== SAMPLER INITIALIZATION =====
88
107inline llama_sampler *init_sampler(const llama_model *model,
108 const std::string &grammar_str,
109 const std::string &root_rule = "root") {
110 if (!model) {
111 LLOYAL_LOG_DEBUG("[grammar::init_sampler] ERROR: model is null");
112 return nullptr;
113 }
114
115 if (grammar_str.empty()) {
116 LLOYAL_LOG_DEBUG("[grammar::init_sampler] ERROR: grammar_str is empty");
117 return nullptr;
118 }
119
120 const llama_vocab *vocab = tokenizer::get_vocab(model);
121 if (!vocab) {
122 LLOYAL_LOG_DEBUG("[grammar::init_sampler] ERROR: get_vocab returned null");
123 return nullptr;
124 }
125
126 LLOYAL_LOG_DEBUG("[grammar::init_sampler] Initializing grammar sampler "
127 "(grammar: %zu bytes, root: %s)",
128 grammar_str.size(), root_rule.c_str());
129
130 llama_sampler *sampler =
131 llama_sampler_init_grammar(vocab, grammar_str.c_str(), root_rule.c_str());
132
133 if (!sampler) {
134 LLOYAL_LOG_DEBUG("[grammar::init_sampler] ERROR: "
135 "llama_sampler_init_grammar returned null");
136 }
137
138 return sampler;
139}
140
158inline llama_sampler *init_lazy_sampler(
159 const llama_model *model,
160 const std::string &grammar_str,
161 const std::vector<std::string> &trigger_patterns,
162 const std::vector<llama_token> &trigger_tokens,
163 const std::string &root_rule = "root") {
164 if (!model) {
165 LLOYAL_LOG_DEBUG("[grammar::init_lazy_sampler] ERROR: model is null");
166 return nullptr;
167 }
168
169 if (grammar_str.empty()) {
170 LLOYAL_LOG_DEBUG("[grammar::init_lazy_sampler] ERROR: grammar_str is empty");
171 return nullptr;
172 }
173
174 const llama_vocab *vocab = tokenizer::get_vocab(model);
175 if (!vocab) {
176 LLOYAL_LOG_DEBUG("[grammar::init_lazy_sampler] ERROR: get_vocab returned null");
177 return nullptr;
178 }
179
180 std::vector<const char *> patterns_c;
181 patterns_c.reserve(trigger_patterns.size());
182 for (const auto &p : trigger_patterns) patterns_c.push_back(p.c_str());
183
184 LLOYAL_LOG_DEBUG("[grammar::init_lazy_sampler] Initializing lazy grammar "
185 "(grammar: %zu bytes, %zu patterns, %zu tokens, root: %s)",
186 grammar_str.size(), trigger_patterns.size(),
187 trigger_tokens.size(), root_rule.c_str());
188
189 llama_sampler *sampler = llama_sampler_init_grammar_lazy_patterns(
190 vocab, grammar_str.c_str(), root_rule.c_str(),
191 patterns_c.data(), patterns_c.size(),
192 trigger_tokens.data(), trigger_tokens.size());
193
194 if (!sampler) {
195 LLOYAL_LOG_DEBUG("[grammar::init_lazy_sampler] ERROR: "
196 "llama_sampler_init_grammar_lazy_patterns returned null");
197 }
198
199 return sampler;
200}
201
213inline llama_sampler *clone_sampler(llama_sampler *smpl) {
214 if (!smpl) {
215 LLOYAL_LOG_DEBUG("[grammar::clone_sampler] Input is null, returning null");
216 return nullptr;
217 }
218
219 llama_sampler *cloned = llama_sampler_clone(smpl);
220
221 if (!cloned) {
222 LLOYAL_LOG_DEBUG("[grammar::clone_sampler] ERROR: llama_sampler_clone failed");
223 } else {
224 LLOYAL_LOG_DEBUG("[grammar::clone_sampler] Cloned sampler successfully");
225 }
226
227 return cloned;
228}
229
235inline void free_sampler(llama_sampler* smpl) {
236 if (smpl) {
237 llama_sampler_free(smpl);
238 }
239}
240
249inline void apply(llama_sampler* smpl, llama_token_data_array* cur_p) {
250 if (smpl && cur_p) {
251 llama_sampler_apply(smpl, cur_p);
252 }
253}
254
263inline void accept(llama_sampler* smpl, llama_token token) {
264 if (smpl) {
265 llama_sampler_accept(smpl, token);
266 }
267}
268
269} // namespace lloyal::grammar
#define LLOYAL_LOG_DEBUG(...)
liblloyal - Common definitions and logging
Definition common.hpp:48
void free_sampler(llama_sampler *smpl)
Free a grammar sampler.
Definition grammar.hpp:235
llama_sampler * clone_sampler(llama_sampler *smpl)
Clone a grammar sampler (for fork/branching).
Definition grammar.hpp:213
llama_sampler * init_sampler(const llama_model *model, const std::string &grammar_str, const std::string &root_rule="root")
Initialize a grammar sampler from GBNF grammar string.
Definition grammar.hpp:107
llama_sampler * init_lazy_sampler(const llama_model *model, const std::string &grammar_str, const std::vector< std::string > &trigger_patterns, const std::vector< llama_token > &trigger_tokens, const std::string &root_rule="root")
Initialize a lazy grammar sampler from GBNF grammar string.
Definition grammar.hpp:158
std::string from_json_schema(const std::string &schema_json)
Convert JSON schema to GBNF (Grammar BNF) format.
Definition grammar.hpp:47
void accept(llama_sampler *smpl, llama_token token)
Accept a token into grammar state.
Definition grammar.hpp:263
void apply(llama_sampler *smpl, llama_token_data_array *cur_p)
Apply grammar constraint to candidates.
Definition grammar.hpp:249
const llama_vocab * get_vocab(const llama_model *model)
Get vocabulary from model.
Text Tokenization Operations.