KV Cache Physics. More...

#include "common.hpp"
#include "decode.hpp"
#include <cassert>
#include <cstdint>
#include <llama/llama.h>
#include <type_traits>
#include <vector>

Classes
struct	lloyal::kv::tenancy::State
	Tenancy state — tracks seq_id vacancy and leases. More...

struct	lloyal::kv::FileData
	Data structure returned by read_file. More...

Namespaces
namespace	lloyal
	Boundary Tracker Stub for OSS liblloyal.

namespace	lloyal::kv

namespace	lloyal::kv::cache_type

namespace	lloyal::kv::tenancy

Functions
ggml_type	lloyal::kv::cache_type::from_str (const std::string &s)
	Map string name to ggml_type enum (matches llama.cpp CLI `-ctk`/`-ctv` flags).

bool	lloyal::kv::remove_range (llama_context *ctx, llama_seq_id seq, llama_pos p0, llama_pos p1)
	Remove token range from KV cache sequence.

llama_pos	lloyal::kv::pos_max (llama_context *ctx, llama_seq_id seq)
	Get maximum position in KV cache sequence.

void	lloyal::kv::seq_cp (llama_context *ctx, llama_seq_id src, llama_seq_id dst, llama_pos p0=0, llama_pos p1=-1)
	Copy KV cache from one sequence to another.

void	lloyal::kv::seq_keep (llama_context *ctx, llama_seq_id seq)
	Keep only one sequence, removing all others.

State	lloyal::kv::tenancy::init (llama_context *ctx, llama_seq_id n_seq_max)
	Initialize tenancy with all seq_ids vacant.

llama_seq_id	lloyal::kv::tenancy::acquire (State &s)
	Acquire a seq_id from the vacant pool.

void	lloyal::kv::tenancy::release (State &s, llama_seq_id seq)
	Release a seq_id back to vacant — bookkeeping only, no KV calls.

void	lloyal::kv::tenancy::evict (State &s, llama_seq_id seq)
	Evict a seq_id — strip all KV tags then release.

void	lloyal::kv::tenancy::retain (State &s, llama_seq_id keep)
	Nuclear retain — keep one seq, rebuild vacancy from scratch.

void	lloyal::kv::tenancy::evict_all (State &s)
	Evict every leased seq_id.

size_t	lloyal::kv::tenancy::available (const State &s)
	Number of vacant seq_ids available for acquisition.

size_t	lloyal::kv::state_size (llama_context *ctx, llama_seq_id seq)
	Get size needed to serialize sequence state.

size_t	lloyal::kv::state_save (llama_context ctx, llama_seq_id seq, uint8_t dst, size_t size)
	Save sequence state to buffer.

size_t	lloyal::kv::state_load (llama_context ctx, llama_seq_id seq, const uint8_t src, size_t size)
	Restore sequence state from buffer.

size_t	lloyal::kv::global_state_size (llama_context *ctx)
	Get size needed to serialize global state.

size_t	lloyal::kv::global_state_save (llama_context ctx, uint8_t dst, size_t size)
	Save global state to buffer.

size_t	lloyal::kv::global_state_load (llama_context ctx, const uint8_t src, size_t size)
	Restore global state from buffer.

void	lloyal::kv::log_build_info (llama_context *ctx)
	Log KV cache build info and current state.

void	lloyal::kv::clear_all (llama_context *ctx)
	Clear all KV cache (complete reset)

void	lloyal::kv::clear_metadata (llama_context *ctx)
	Clear KV cache metadata only (fast reset)

void	lloyal::kv::clear_and_reseed (llama_context *ctx, const std::vector< llama_token > &original_sinks, const std::vector< llama_token > &tail, int32_t n_batch)

size_t	lloyal::kv::write_file (llama_context *ctx, llama_seq_id seq, const std::string &filepath, const std::vector< llama_token > &tokens)
	Write KV state to file with self-describing format.

FileData	lloyal::kv::read_file (llama_context *ctx, llama_seq_id seq, const std::string &filepath)

Variables
constexpr llama_seq_id	lloyal::kv::NO_LEASE = static_cast<llama_seq_id>(-1)
	Sentinel value indicating a branch has no KV residency.

Detailed Description

KV Cache Physics.

Two layers of KV cache management:

Tenancy — the seq_id vacancy manager (the real logic in this file). Tracks which sequences are leased (owned by a branch) and which are vacant, enforcing the invariant that vacant seq_ids are always clean (no KV tags). See KV Tenancy for the full API.

Primitives — thin wrappers over llama.cpp's llama_memory_* and llama_state_* APIs. These add null-safety, error handling, debug logging, and fragmentation fallbacks, but contain no domain logic:

Sequence ops: remove_range, pos_max, seq_cp, seq_keep
State snapshots: state_size, state_save, state_load
Global state: global_state_size, global_state_save, global_state_load
Cache clearing: clear_all, clear_metadata
Context compression: clear_and_reseed
File I/O: write_file, read_file

Definition in file kv.hpp.

Classes

Namespaces

Functions

Variables

Detailed Description