liblloyal 1.0.0
Composable primitives for llama.cpp inference
Loading...
Searching...
No Matches
grammar.hpp
Go to the documentation of this file.
1#pragma once
2
3// SPDX-License-Identifier: Apache-2.0
4// Copyright 2026 Lloyal Labs
5
6#include "common.hpp"
8#include "tokenizer.hpp"
9#include <llama/llama.h>
10#include <lloyal/nlohmann/json.hpp>
11#include <stdexcept>
12#include <string>
13
31namespace lloyal::grammar {
32
45inline std::string from_json_schema(const std::string &schema_json) {
47 "[grammar::from_json_schema] Converting JSON schema (%zu bytes)",
48 schema_json.size());
49
50 try {
51 // Parse JSON schema
52 nlohmann::ordered_json schema = nlohmann::ordered_json::parse(schema_json);
53
54 LLOYAL_LOG_DEBUG("[grammar::from_json_schema] Schema parsed, calling "
55 "json_schema_to_grammar");
56
57 // Call lloyal::json_schema_to_grammar from json-schema-to-grammar.hpp
58 // Parameters: (schema, force_gbnf)
59 // force_gbnf=false allows EBNF optimization when possible
60 std::string grammar = lloyal::json_schema_to_grammar(schema, false);
61
62 if (grammar.empty()) {
63 LLOYAL_LOG_DEBUG("[grammar::from_json_schema] ERROR: Conversion produced "
64 "empty grammar");
65 throw std::runtime_error("Grammar conversion produced empty result");
66 }
67
69 "[grammar::from_json_schema] Generated GBNF grammar (%zu bytes)",
70 grammar.size());
71 return grammar;
72
73 } catch (const nlohmann::json::parse_error &e) {
74 std::string errorMsg = std::string("JSON parse error: ") + e.what();
75 LLOYAL_LOG_DEBUG("[grammar::from_json_schema] ERROR: %s", errorMsg.c_str());
76 throw std::runtime_error(errorMsg);
77 } catch (const std::exception &e) {
78 std::string errorMsg =
79 std::string("Grammar conversion failed: ") + e.what();
80 LLOYAL_LOG_DEBUG("[grammar::from_json_schema] ERROR: %s", errorMsg.c_str());
81 throw std::runtime_error(errorMsg);
82 }
83}
84
85// ===== SAMPLER INITIALIZATION =====
86
105inline llama_sampler *init_sampler(const llama_model *model,
106 const std::string &grammar_str,
107 const std::string &root_rule = "root") {
108 if (!model) {
109 LLOYAL_LOG_DEBUG("[grammar::init_sampler] ERROR: model is null");
110 return nullptr;
111 }
112
113 if (grammar_str.empty()) {
114 LLOYAL_LOG_DEBUG("[grammar::init_sampler] ERROR: grammar_str is empty");
115 return nullptr;
116 }
117
118 const llama_vocab *vocab = tokenizer::get_vocab(model);
119 if (!vocab) {
120 LLOYAL_LOG_DEBUG("[grammar::init_sampler] ERROR: get_vocab returned null");
121 return nullptr;
122 }
123
124 LLOYAL_LOG_DEBUG("[grammar::init_sampler] Initializing grammar sampler "
125 "(grammar: %zu bytes, root: %s)",
126 grammar_str.size(), root_rule.c_str());
127
128 llama_sampler *sampler =
129 llama_sampler_init_grammar(vocab, grammar_str.c_str(), root_rule.c_str());
130
131 if (!sampler) {
132 LLOYAL_LOG_DEBUG("[grammar::init_sampler] ERROR: "
133 "llama_sampler_init_grammar returned null");
134 }
135
136 return sampler;
137}
138
150inline llama_sampler *clone_sampler(llama_sampler *smpl) {
151 if (!smpl) {
152 LLOYAL_LOG_DEBUG("[grammar::clone_sampler] Input is null, returning null");
153 return nullptr;
154 }
155
156 llama_sampler *cloned = llama_sampler_clone(smpl);
157
158 if (!cloned) {
159 LLOYAL_LOG_DEBUG("[grammar::clone_sampler] ERROR: llama_sampler_clone failed");
160 } else {
161 LLOYAL_LOG_DEBUG("[grammar::clone_sampler] Cloned sampler successfully");
162 }
163
164 return cloned;
165}
166
172inline void free_sampler(llama_sampler* smpl) {
173 if (smpl) {
174 llama_sampler_free(smpl);
175 }
176}
177
186inline void apply(llama_sampler* smpl, llama_token_data_array* cur_p) {
187 if (smpl && cur_p) {
188 llama_sampler_apply(smpl, cur_p);
189 }
190}
191
200inline void accept(llama_sampler* smpl, llama_token token) {
201 if (smpl) {
202 llama_sampler_accept(smpl, token);
203 }
204}
205
206} // namespace lloyal::grammar
#define LLOYAL_LOG_DEBUG(...)
liblloyal - Common definitions and logging
Definition common.hpp:47
void free_sampler(llama_sampler *smpl)
Free a grammar sampler.
Definition grammar.hpp:172
llama_sampler * clone_sampler(llama_sampler *smpl)
Clone a grammar sampler (for fork/branching).
Definition grammar.hpp:150
llama_sampler * init_sampler(const llama_model *model, const std::string &grammar_str, const std::string &root_rule="root")
Initialize a grammar sampler from GBNF grammar string.
Definition grammar.hpp:105
std::string from_json_schema(const std::string &schema_json)
Convert JSON schema to GBNF (Grammar BNF) format.
Definition grammar.hpp:45
void accept(llama_sampler *smpl, llama_token token)
Accept a token into grammar state.
Definition grammar.hpp:200
void apply(llama_sampler *smpl, llama_token_data_array *cur_p)
Apply grammar constraint to candidates.
Definition grammar.hpp:186
const llama_vocab * get_vocab(const llama_model *model)
Get vocabulary from model.
std::string json_schema_to_grammar(const json &schema, bool force_gbnf=false)
Convert JSON schema to GBNF grammar.
Text Tokenization Operations.