Compute model-level surprisal for picked token.
Compute model-level surprisal for picked tokenSurprisal = -log p(tokenₜ | context) = uncertainty of the model's choice Higher surprisal = more surprising token (lower probability)
Use model logits (before temperature/top-k/p) to measure model's inherent uncertainty.
float* logits = lloyal::logits::get(ctx); int n_vocab = llama_vocab_n_tokens(vocab); llama_token token = sample(logits); float s = metrics::model_surprisal(logits, n_vocab, token); if (s > 5.0f) { // High uncertainty - consider retrieval }
#pragma once
#include <algorithm>
#include <cmath>
#include <cstdint>
#include <limits>
namespace detail {
constexpr float LN2 = 0.693147180559945309417232121458176568f;
float m = -std::numeric_limits<float>::infinity();
for (int i = 0; i < n; ++i) {
const float v = a[i];
if (std::isfinite(v) && v > m) m = v;
}
return m;
}
inline float log_sum_exp(
const float* a,
int n,
float shift) {
float s = 0.0f;
for (int i = 0; i < n; ++i) {
const float v = a[i];
if (std::isfinite(v)) s += std::exp(v - shift);
}
if (s == 0.0f) return -std::numeric_limits<float>::infinity();
return shift + std::log(s);
}
}
struct PerplexityState {
};
struct BranchMetricsState {
};
const float* logits,
int n_vocab,
int picked_id,
) {
if (!logits || n_vocab == 0) {
return std::numeric_limits<float>::infinity();
}
if (picked_id < 0 || picked_id >= n_vocab) {
return std::numeric_limits<float>::infinity();
}
const float picked = logits[picked_id];
if (!std::isfinite(picked)) return std::numeric_limits<float>::infinity();
if (!std::isfinite(m)) return std::numeric_limits<float>::infinity();
if (!std::isfinite(log_z)) return std::numeric_limits<float>::infinity();
const float surprisal_nats = std::max(0.0f, -(picked - log_z));
}
const float* logits,
int n_vocab,
) {
if (!logits || n_vocab == 0) {
return std::numeric_limits<float>::infinity();
}
if (!std::isfinite(m)) return std::numeric_limits<float>::infinity();
if (!std::isfinite(log_z)) return std::numeric_limits<float>::infinity();
float ez = 0.0f;
for (int i = 0; i < n_vocab; ++i) {
const float z = logits[i];
if (!std::isfinite(z)) continue;
const float p = std::exp(z - log_z);
ez += p * z;
}
const float h_nats = std::max(0.0f, log_z - ez);
}
const float* candidate_logits,
const int32_t* candidate_ids,
int n_candidates,
int picked_id,
) {
if (!candidate_logits || !candidate_ids || n_candidates == 0) {
return std::numeric_limits<float>::infinity();
}
int local = -1;
for (int i = 0; i < n_candidates; ++i) {
if (candidate_ids[i] == picked_id) {
local = i;
break;
}
}
if (local == -1) return std::numeric_limits<float>::infinity();
if (n_candidates == 1) return 0.0f;
const float picked = candidate_logits[local];
if (!std::isfinite(picked)) return std::numeric_limits<float>::infinity();
if (!std::isfinite(m)) return std::numeric_limits<float>::infinity();
if (!std::isfinite(log_z)) return std::numeric_limits<float>::infinity();
const float surprisal_nats = std::max(0.0f, -(picked - log_z));
}
const float* candidate_logits,
int n_candidates,
) {
if (!candidate_logits || n_candidates == 0) {
return std::numeric_limits<float>::infinity();
}
if (n_candidates == 1) return 0.0f;
if (!std::isfinite(m)) return std::numeric_limits<float>::infinity();
if (!std::isfinite(log_z)) return std::numeric_limits<float>::infinity();
float ez = 0.0f;
for (int i = 0; i < n_candidates; ++i) {
const float z = candidate_logits[i];
if (!std::isfinite(z)) continue;
const float p = std::exp(z - log_z);
ez += p * z;
}
const float h_nats = std::max(0.0f, log_z - ez);
}
}
float max_finite(const float *a, int n)
Find maximum finite value in array Used for log-sum-exp shift to prevent overflow.
float log_sum_exp(const float *a, int n, float shift)
Numerically stable log-sum-exp Computes log(Σ exp(aᵢ)) using shift trick to avoid overflow.
float sampling_surprisal(const float *candidate_logits, const int32_t *candidate_ids, int n_candidates, int picked_id, Base base=Base::Nats)
Compute sampling-level surprisal for picked token.
float model_entropy(const float *logits, int n_vocab, Base base=Base::Nats)
float sampling_entropy(const float *candidate_logits, int n_candidates, Base base=Base::Nats)
Compute sampling-level entropy of candidate distribution.
float model_surprisal(const float *logits, int n_vocab, int picked_id, Base base=Base::Nats)
PerplexityState model
Model-level (raw logits before filters)
PerplexityState sampling
Sampling-level (post top-k/p/temp)