Embedding Extraction and Normalization. More...

#include "common.hpp"
#include <common.h>
#include <algorithm>
#include <cmath>
#include <cstdint>
#include <llama/llama.h>
#include <stdexcept>
#include <vector>

Classes
struct	lloyal::embedding::detail::BatchGuard
	RAII guard for automatic batch cleanup Ensures llama_batch_free is called even if exceptions occur. More...

Namespaces
namespace	lloyal
	Boundary Tracker Stub for OSS liblloyal.

namespace	lloyal::embedding

namespace	lloyal::embedding::detail

Enumerations
enum class	lloyal::embedding::Normalize : int32_t { lloyal::embedding::None = 0 , lloyal::embedding::L2 = 1 }
	Normalization modes for embedding vectors. More...

Functions
bool	lloyal::embedding::has_embeddings (const llama_model *model)
	Check if model supports embeddings.

int32_t	lloyal::embedding::dimension (const llama_model *model)
	Get embedding dimension for model.

bool	lloyal::embedding::has_pooling (llama_context *ctx)
	Check if context has pooling enabled.

int32_t	lloyal::embedding::pooling_type (llama_context *ctx)
	Get pooling type for context.

void	lloyal::embedding::detail::apply_l2_normalize (std::vector< float > &vec)
	Apply L2 normalization to embedding vector (in-place)

void	lloyal::embedding::encode (llama_context ctx, const llama_token tokens, int32_t n_tokens, int32_t n_batch)
	Encode tokens for embedding extraction.

void	lloyal::embedding::encode (llama_context *ctx, const std::vector< llama_token > &tokens, int32_t n_batch)
	Convenience overload for std::vector<llama_token>

std::vector< float >	lloyal::embedding::get (llama_context *ctx, Normalize normalize=Normalize::L2)
	Get embeddings for last decoded batch.

std::vector< float >	lloyal::embedding::get_seq (llama_context *ctx, llama_seq_id seq, Normalize normalize=Normalize::L2)
	Get embeddings for specific sequence.

std::vector< float >	lloyal::embedding::get_ith (llama_context *ctx, int32_t idx, Normalize normalize=Normalize::L2)
	Get embeddings for specific token index in last batch.

float	lloyal::embedding::cosine_similarity (const std::vector< float > &a, const std::vector< float > &b)
	Compute cosine similarity between two embedding vectors.

Detailed Description

Embedding Extraction and Normalization.

Wraps llama.cpp embedding APIs with pooling mode management and L2 normalization. Provides both context-bound extraction and model capability checks.

Architecture:

Context-bound primitives for embedding extraction
Model-accepting overloads for capability checks
Built-in L2 normalization for cosine similarity

Definition in file embedding.hpp.

Classes

Namespaces

Enumerations

Functions

Detailed Description