11#include <llama/llama.h>
144 for (
float v :
vec) {
150 for (
float &
v :
vec) {
155 "[embedding::detail::apply_l2_normalize] WARNING: near-zero norm");
209 throw std::runtime_error(
"embedding::encode - NULL context");
214 throw std::runtime_error(
"embedding::encode - Invalid token array");
220 throw std::runtime_error(
221 "embedding::encode - token count exceeds batch size (truncation not "
222 "supported, increase n_batch or reduce input length)");
240 throw std::runtime_error(
"embedding::encode - llama_decode failed");
275 throw std::invalid_argument(
"embedding::get: ctx is null");
281 LLOYAL_LOG_DEBUG(
"[embedding::get] ERROR: failed to get model from context");
282 throw std::runtime_error(
"embedding::get: failed to get model");
288 "[embedding::get] WARNING: pooling not enabled, embeddings may be "
289 "invalid. Create context with pooling_type != NONE");
297 LLOYAL_LOG_DEBUG(
"[embedding::get] Using llama_get_embeddings_seq for pooled "
301 LLOYAL_LOG_DEBUG(
"[embedding::get] Using llama_get_embeddings (no pooling)");
306 "Ensure context was created with embeddings=true and "
307 "tokens were encoded with logits=true for all tokens.");
308 throw std::runtime_error(
309 "embedding::get: embeddings unavailable (ensure embeddings=true in "
310 "context params and use encode_for_embeddings())");
322 LLOYAL_LOG_DEBUG(
"[embedding::get] Extracted embeddings (dim=%d, normalize=%d)",
345 throw std::invalid_argument(
"embedding::get_seq: ctx is null");
351 throw std::runtime_error(
"embedding::get_seq: failed to get model");
370 throw std::runtime_error(
"embedding::get_seq: embeddings unavailable");
404 throw std::invalid_argument(
"embedding::get_ith: ctx is null");
410 throw std::runtime_error(
"embedding::get_ith: failed to get model");
418 throw std::runtime_error(
"embedding::get_ith: embeddings unavailable");
452 const std::vector<float> &
b) {
453 if (
a.size() !=
b.size()) {
454 LLOYAL_LOG_DEBUG(
"[embedding::cosine_similarity] ERROR: dimension mismatch "
457 throw std::invalid_argument(
458 "embedding::cosine_similarity: dimension mismatch");
467 for (
size_t i = 0;
i <
a.size(); ++
i) {
#define LLOYAL_LOG_DEBUG(...)
liblloyal - Common definitions and logging
void apply_l2_normalize(std::vector< float > &vec)
Apply L2 normalization to embedding vector (in-place)
std::vector< float > get_seq(llama_context *ctx, llama_seq_id seq, Normalize normalize=Normalize::L2)
Get embeddings for specific sequence.
float cosine_similarity(const std::vector< float > &a, const std::vector< float > &b)
Compute cosine similarity between two embedding vectors.
void encode(llama_context *ctx, const llama_token *tokens, int32_t n_tokens, int32_t n_batch)
Encode tokens for embedding extraction.
std::vector< float > get(llama_context *ctx, Normalize normalize=Normalize::L2)
Get embeddings for last decoded batch.
Normalize
Normalization modes for embedding vectors.
int32_t pooling_type(llama_context *ctx)
Get pooling type for context.
std::vector< float > get_ith(llama_context *ctx, int32_t idx, Normalize normalize=Normalize::L2)
Get embeddings for specific token index in last batch.
bool has_embeddings(const llama_model *model)
Check if model supports embeddings.
bool has_pooling(llama_context *ctx)
Check if context has pooling enabled.
int32_t dimension(const llama_model *model)
Get embedding dimension for model.
void batch_clear(llama_batch &batch)
Clear batch to empty state.
void batch_add(llama_batch &batch, llama_token id, int32_t pos, const std::vector< llama_seq_id > &seq_ids, bool logits, int32_t capacity=-1)
Add single token to batch with position and sequence info.
RAII guard for automatic batch cleanup Ensures llama_batch_free is called even if exceptions occur.
BatchGuard(llama_batch &b)