Coverage for node / src / stigmem_node / embedding / base.py: 91%
28 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-25 01:49 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-25 01:49 +0000
1"""EmbeddingModel abstract base — Phase 9 (spec §20 / design memo §2)."""
3from __future__ import annotations
5from abc import ABC, abstractmethod
7Vector = list[float]
10class EmbeddingModel(ABC):
11 """Swappable embedding-model adapter.
13 All implementations MUST normalize output vectors to unit length (L2) so
14 that cosine similarity reduces to a dot product in sqlite-vec (design memo §2).
15 """
17 @property
18 @abstractmethod
19 def model_id(self) -> str:
20 """Stable identifier for this model, e.g. ``'nomic-embed-text-v1.5'``."""
21 raise NotImplementedError
23 @property
24 @abstractmethod
25 def dimension(self) -> int:
26 """Output vector dimensionality."""
27 raise NotImplementedError
29 @abstractmethod
30 def embed(self, texts: list[str]) -> list[Vector]:
31 """Embed *texts* and return L2-normalised vectors.
33 Returns one vector per input text, in the same order.
34 Raises ``EmbeddingError`` on unrecoverable failures.
35 """
36 raise NotImplementedError
39class EmbeddingError(RuntimeError):
40 """Raised when embedding fails unrecoverably (network error, API quota, etc.)."""
43def l2_normalize(vec: list[float]) -> list[float]:
44 """Return L2-normalised copy of *vec*. Zero vector returned unchanged."""
45 import math
47 norm = math.sqrt(sum(x * x for x in vec))
48 if norm == 0.0:
49 return vec
50 return [x / norm for x in vec]
53def compose_triple_text(entity: str, relation: str, value_type: str, value_v: str) -> str:
54 """Compose the canonical text to embed for a fact triple (design memo §2 Option B).
56 For ref-typed values, uses the last path segment of the URI as the display
57 name to improve semantic alignment.
58 """
59 if value_type == "ref" and value_v:
60 display_v = value_v.rstrip("/").rsplit("/", 1)[-1]
61 else:
62 display_v = str(value_v) if value_v is not None else ""
64 entity_display = entity.rstrip("/").rsplit("/", 1)[-1]
65 return f"{entity_display} {relation} {display_v}"