liblloyal/model__registry_8hpp_source.html

#pragma once


// SPDX-License-Identifier: Apache-2.0

// Copyright 2026 Lloyal Labs


#include "common.hpp"

#include <functional>

#include <llama/llama.h>

#include <memory>

#include <mutex>

#include <string>

#include <unordered_map>


namespace lloyal {


struct ModelKey {

  std::string canonPath;

  int n_gpu_layers;

  bool use_mmap;


  bool operator==(const ModelKey &o) const {

    return n_gpu_layers == o.n_gpu_layers && use_mmap == o.use_mmap &&

           canonPath == o.canonPath;

  }


};


// Compiler-specific sanitizer suppression (GCC/Clang only; no-op on MSVC)

#if defined(__GNUC__) || defined(__clang__)

  #define LLOYAL_NO_SANITIZE_OVERFLOW __attribute__((no_sanitize("unsigned-integer-overflow")))

#else

  #define LLOYAL_NO_SANITIZE_OVERFLOW

#endif


struct ModelKeyHash {

  LLOYAL_NO_SANITIZE_OVERFLOW


  size_t operator()(const ModelKey &k) const {

    std::hash<std::string> Hs;

    std::hash<int> Hi;

    std::hash<bool> Hb;

    return Hs(k.canonPath) ^

           (Hi(k.n_gpu_layers) + 0x9e3779b9 + (Hb(k.use_mmap) << 6));

  }


};


#undef LLOYAL_NO_SANITIZE_OVERFLOW


class ModelRegistry {

public:

  static std::shared_ptr<llama_model> acquire(const std::string &fsPath,

                                              const llama_model_params &params);


private:

  inline static std::mutex mu_;


  inline static std::unordered_map<ModelKey, std::weak_ptr<llama_model>,

                                   ModelKeyHash>

      cache_;


  static ModelKey makeKey(const std::string &fsPath,

                          const llama_model_params &params);

};


} // namespace lloyal


namespace lloyal::detail {


inline void freeModel(llama_model *model) {

  LLOYAL_LOG_DEBUG(

      "[ModelRegistry] Freeing model: ptr=%p (last reference released)",

      (void *)model);

  llama_model_free(model);

  LLOYAL_LOG_DEBUG("[ModelRegistry] Model freed: ptr=%p", (void *)model);

}


} // namespace lloyal::detail


namespace lloyal {


// ===== IMPLEMENTATION =====


// Normalize path to ensure "file:///path" and "/path" map to same key

inline ModelKey ModelRegistry::makeKey(const std::string &fsPath,

                                       const llama_model_params &params) {

  // Inline path normalization (removes file:// prefix if present)

  std::string canonPath = fsPath;

  const std::string filePrefix = "file://";

  if (canonPath.substr(0, filePrefix.length()) == filePrefix) {

    canonPath = canonPath.substr(filePrefix.length());

  }


  return {canonPath, params.n_gpu_layers, params.use_mmap};

}


// Acquire model from cache or load new

// 1. Check cache (thread-safe)

// 2. Return existing if found (cache hit)

// 3. Load new if expired (cache miss)

// 4. Store as weak_ptr, return shared_ptr

inline std::shared_ptr<llama_model>


ModelRegistry::acquire(const std::string &fsPath,

                       const llama_model_params &params) {

  ModelKey key = makeKey(fsPath, params);


  LLOYAL_LOG_DEBUG("[ModelRegistry] Acquiring model: path='%s', "

                   "n_gpu_layers=%d, use_mmap=%s",

                   key.canonPath.c_str(), key.n_gpu_layers,

                   key.use_mmap ? "true" : "false");


  std::lock_guard<std::mutex> lock(mu_);


  auto cacheEntry = cache_.find(key);

  if (cacheEntry != cache_.end()) {

    // Try to upgrade weak_ptr to shared_ptr

    if (auto existingModel = cacheEntry->second.lock()) {

      long refCount = existingModel.use_count();

      LLOYAL_LOG_DEBUG(

          "[ModelRegistry] Cache HIT - Reusing model: ptr=%p, refcount=%ld",

          (void *)existingModel.get(), refCount);

      return existingModel;

    } else {

      LLOYAL_LOG_DEBUG("[ModelRegistry] Cache entry expired (model was freed), "

                       "removing stale entry");

      cache_.erase(cacheEntry);

    }

  }


  LLOYAL_LOG_DEBUG("[ModelRegistry] Cache MISS - Loading NEW model from disk");

  LLOYAL_LOG_DEBUG("[ModelRegistry]   Path: %s", key.canonPath.c_str());

  LLOYAL_LOG_DEBUG("[ModelRegistry]   GPU layers: %d", key.n_gpu_layers);

  LLOYAL_LOG_DEBUG("[ModelRegistry]   Memory mapping: %s",

                   key.use_mmap ? "enabled" : "disabled");


  llama_model *rawModel =

      llama_model_load_from_file(key.canonPath.c_str(), params);


  if (!rawModel) {

    // Let caller handle error (will throw structured error)

    LLOYAL_LOG_DEBUG(

        "[ModelRegistry] ERROR: llama_model_load_from_file returned NULL");

    return nullptr;

  }


  size_t modelSize = llama_model_size(rawModel);

  LLOYAL_LOG_DEBUG("[ModelRegistry] Model loaded:");

  LLOYAL_LOG_DEBUG("[ModelRegistry]   Pointer: %p", (void *)rawModel);

  LLOYAL_LOG_DEBUG("[ModelRegistry]   Size: %zu bytes (%.2f MB)", modelSize,

                   modelSize / (1024.0 * 1024.0));


  auto sharedModel = std::shared_ptr<llama_model>(rawModel, detail::freeModel);


  // Store as weak_ptr (allows automatic cleanup when all contexts release the

  // model)

  cache_[key] = sharedModel;

  LLOYAL_LOG_DEBUG("[ModelRegistry] Model cached as weak_ptr, returning "

                   "shared_ptr (refcount=1)");


  return sharedModel;

}


} // namespace lloyal

lloyal::ModelRegistry
Definition model_registry.hpp:115

lloyal::ModelRegistry::acquire
static std::shared_ptr< llama_model > acquire(const std::string &fsPath, const llama_model_params &params)
Acquire a model from cache, or load from disk on cache miss.
Definition model_registry.hpp:199

common.hpp

LLOYAL_LOG_DEBUG
#define LLOYAL_LOG_DEBUG(...)
liblloyal - Common definitions and logging
Definition common.hpp:47

LLOYAL_NO_SANITIZE_OVERFLOW
#define LLOYAL_NO_SANITIZE_OVERFLOW
Hash functor for ModelKey.
Definition model_registry.hpp:56

lloyal::detail
Definition model_registry.hpp:156

lloyal
Boundary Tracker Stub for OSS liblloyal.
Definition boundaries.hpp:16

lloyal::ModelKeyHash
Definition model_registry.hpp:59

lloyal::ModelKeyHash::operator()
size_t operator()(const ModelKey &k) const
Compute hash for ModelKey.
Definition model_registry.hpp:69

lloyal::ModelKey
Model cache key combining file path and GPU configuration.
Definition model_registry.hpp:35

lloyal::ModelKey::canonPath
std::string canonPath
Normalized file path (file:// prefix removed)
Definition model_registry.hpp:36

lloyal::ModelKey::use_mmap
bool use_mmap
Whether to use memory mapping.
Definition model_registry.hpp:38

lloyal::ModelKey::n_gpu_layers
int n_gpu_layers
Number of layers offloaded to GPU (-1 = all)
Definition model_registry.hpp:37

lloyal::ModelKey::operator==
bool operator==(const ModelKey &o) const
Definition model_registry.hpp:40