liblloyal/grammar_8hpp_source.html

#pragma once


// SPDX-License-Identifier: Apache-2.0

// Copyright 2026 Lloyal Labs


#include "common.hpp"

#include "tokenizer.hpp"

#include <llama/llama.h>

#include <json-schema-to-grammar.h>  // llama.cpp common library

#include <nlohmann/json.hpp>

#include <stdexcept>

#include <string>

#include <vector>


namespace lloyal::grammar {


inline std::string from_json_schema(const std::string &schema_json) {

  LLOYAL_LOG_DEBUG(

      "[grammar::from_json_schema] Converting JSON schema (%zu bytes)",

      schema_json.size());


  try {

    // Parse JSON schema

    nlohmann::ordered_json schema = nlohmann::ordered_json::parse(schema_json);


    LLOYAL_LOG_DEBUG("[grammar::from_json_schema] Schema parsed, calling "

                     "json_schema_to_grammar");


    // Call json_schema_to_grammar from llama.cpp common library

    // Parameters: (schema, force_gbnf)

    // force_gbnf=false allows EBNF optimization when possible

    std::string grammar = json_schema_to_grammar(schema, false);


    if (grammar.empty()) {

      LLOYAL_LOG_DEBUG("[grammar::from_json_schema] ERROR: Conversion produced "

                       "empty grammar");

      throw std::runtime_error("Grammar conversion produced empty result");

    }


    LLOYAL_LOG_DEBUG(

        "[grammar::from_json_schema] Generated GBNF grammar (%zu bytes)",

        grammar.size());

    return grammar;


  } catch (const nlohmann::json::parse_error &e) {

    std::string errorMsg = std::string("JSON parse error: ") + e.what();

    LLOYAL_LOG_DEBUG("[grammar::from_json_schema] ERROR: %s", errorMsg.c_str());

    throw std::runtime_error(errorMsg);

  } catch (const std::exception &e) {

    std::string errorMsg =

        std::string("Grammar conversion failed: ") + e.what();

    LLOYAL_LOG_DEBUG("[grammar::from_json_schema] ERROR: %s", errorMsg.c_str());

    throw std::runtime_error(errorMsg);

  }

}


// ===== SAMPLER INITIALIZATION =====


inline llama_sampler *init_sampler(const llama_model *model,

                                   const std::string &grammar_str,

                                   const std::string &root_rule = "root") {

  if (!model) {

    LLOYAL_LOG_DEBUG("[grammar::init_sampler] ERROR: model is null");

    return nullptr;

  }


  if (grammar_str.empty()) {

    LLOYAL_LOG_DEBUG("[grammar::init_sampler] ERROR: grammar_str is empty");

    return nullptr;

  }


  const llama_vocab *vocab = tokenizer::get_vocab(model);

  if (!vocab) {

    LLOYAL_LOG_DEBUG("[grammar::init_sampler] ERROR: get_vocab returned null");

    return nullptr;

  }


  LLOYAL_LOG_DEBUG("[grammar::init_sampler] Initializing grammar sampler "

                   "(grammar: %zu bytes, root: %s)",

                   grammar_str.size(), root_rule.c_str());


  llama_sampler *sampler =

      llama_sampler_init_grammar(vocab, grammar_str.c_str(), root_rule.c_str());


  if (!sampler) {

    LLOYAL_LOG_DEBUG("[grammar::init_sampler] ERROR: "

                     "llama_sampler_init_grammar returned null");

  }


  return sampler;

}


inline llama_sampler *init_lazy_sampler(

    const llama_model *model,

    const std::string &grammar_str,

    const std::vector<std::string> &trigger_patterns,

    const std::vector<llama_token> &trigger_tokens,

    const std::string &root_rule = "root") {

  if (!model) {

    LLOYAL_LOG_DEBUG("[grammar::init_lazy_sampler] ERROR: model is null");

    return nullptr;

  }


  if (grammar_str.empty()) {

    LLOYAL_LOG_DEBUG("[grammar::init_lazy_sampler] ERROR: grammar_str is empty");

    return nullptr;

  }


  const llama_vocab *vocab = tokenizer::get_vocab(model);

  if (!vocab) {

    LLOYAL_LOG_DEBUG("[grammar::init_lazy_sampler] ERROR: get_vocab returned null");

    return nullptr;

  }


  std::vector<const char *> patterns_c;

  patterns_c.reserve(trigger_patterns.size());

  for (const auto &p : trigger_patterns) patterns_c.push_back(p.c_str());


  LLOYAL_LOG_DEBUG("[grammar::init_lazy_sampler] Initializing lazy grammar "

                   "(grammar: %zu bytes, %zu patterns, %zu tokens, root: %s)",

                   grammar_str.size(), trigger_patterns.size(),

                   trigger_tokens.size(), root_rule.c_str());


  llama_sampler *sampler = llama_sampler_init_grammar_lazy_patterns(

      vocab, grammar_str.c_str(), root_rule.c_str(),

      patterns_c.data(), patterns_c.size(),

      trigger_tokens.data(), trigger_tokens.size());


  if (!sampler) {

    LLOYAL_LOG_DEBUG("[grammar::init_lazy_sampler] ERROR: "

                     "llama_sampler_init_grammar_lazy_patterns returned null");

  }


  return sampler;

}


inline llama_sampler *clone_sampler(llama_sampler *smpl) {

  if (!smpl) {

    LLOYAL_LOG_DEBUG("[grammar::clone_sampler] Input is null, returning null");

    return nullptr;

  }


  llama_sampler *cloned = llama_sampler_clone(smpl);


  if (!cloned) {

    LLOYAL_LOG_DEBUG("[grammar::clone_sampler] ERROR: llama_sampler_clone failed");

  } else {

    LLOYAL_LOG_DEBUG("[grammar::clone_sampler] Cloned sampler successfully");

  }


  return cloned;

}


inline void free_sampler(llama_sampler* smpl) {

  if (smpl) {

    llama_sampler_free(smpl);

  }

}


inline void apply(llama_sampler* smpl, llama_token_data_array* cur_p) {

  if (smpl && cur_p) {

    llama_sampler_apply(smpl, cur_p);

  }

}


inline void accept(llama_sampler* smpl, llama_token token) {

  if (smpl) {

    llama_sampler_accept(smpl, token);

  }

}


} // namespace lloyal::grammar


common.hpp

LLOYAL_LOG_DEBUG
#define LLOYAL_LOG_DEBUG(...)
liblloyal - Common definitions and logging
Definition common.hpp:47

lloyal::grammar
Definition grammar.hpp:32

lloyal::grammar::free_sampler
void free_sampler(llama_sampler *smpl)
Free a grammar sampler.
Definition grammar.hpp:234

lloyal::grammar::clone_sampler
llama_sampler * clone_sampler(llama_sampler *smpl)
Clone a grammar sampler (for fork/branching).
Definition grammar.hpp:212

lloyal::grammar::init_sampler
llama_sampler * init_sampler(const llama_model *model, const std::string &grammar_str, const std::string &root_rule="root")
Initialize a grammar sampler from GBNF grammar string.
Definition grammar.hpp:106

lloyal::grammar::init_lazy_sampler
llama_sampler * init_lazy_sampler(const llama_model *model, const std::string &grammar_str, const std::vector< std::string > &trigger_patterns, const std::vector< llama_token > &trigger_tokens, const std::string &root_rule="root")
Initialize a lazy grammar sampler from GBNF grammar string.
Definition grammar.hpp:157

lloyal::grammar::from_json_schema
std::string from_json_schema(const std::string &schema_json)
Convert JSON schema to GBNF (Grammar BNF) format.
Definition grammar.hpp:46

lloyal::grammar::accept
void accept(llama_sampler *smpl, llama_token token)
Accept a token into grammar state.
Definition grammar.hpp:262

lloyal::grammar::apply
void apply(llama_sampler *smpl, llama_token_data_array *cur_p)
Apply grammar constraint to candidates.
Definition grammar.hpp:248

lloyal::tokenizer::get_vocab
const llama_vocab * get_vocab(const llama_model *model)
Get vocabulary from model.
Definition tokenizer.hpp:194

tokenizer.hpp
Text Tokenization Operations.