Coverage for node / src / stigmem_node / embedding / base.py: 91%

28 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-25 01:49 +0000

1"""EmbeddingModel abstract base — Phase 9 (spec §20 / design memo §2).""" 

2 

3from __future__ import annotations 

4 

5from abc import ABC, abstractmethod 

6 

7Vector = list[float] 

8 

9 

10class EmbeddingModel(ABC): 

11 """Swappable embedding-model adapter. 

12 

13 All implementations MUST normalize output vectors to unit length (L2) so 

14 that cosine similarity reduces to a dot product in sqlite-vec (design memo §2). 

15 """ 

16 

17 @property 

18 @abstractmethod 

19 def model_id(self) -> str: 

20 """Stable identifier for this model, e.g. ``'nomic-embed-text-v1.5'``.""" 

21 raise NotImplementedError 

22 

23 @property 

24 @abstractmethod 

25 def dimension(self) -> int: 

26 """Output vector dimensionality.""" 

27 raise NotImplementedError 

28 

29 @abstractmethod 

30 def embed(self, texts: list[str]) -> list[Vector]: 

31 """Embed *texts* and return L2-normalised vectors. 

32 

33 Returns one vector per input text, in the same order. 

34 Raises ``EmbeddingError`` on unrecoverable failures. 

35 """ 

36 raise NotImplementedError 

37 

38 

39class EmbeddingError(RuntimeError): 

40 """Raised when embedding fails unrecoverably (network error, API quota, etc.).""" 

41 

42 

43def l2_normalize(vec: list[float]) -> list[float]: 

44 """Return L2-normalised copy of *vec*. Zero vector returned unchanged.""" 

45 import math 

46 

47 norm = math.sqrt(sum(x * x for x in vec)) 

48 if norm == 0.0: 

49 return vec 

50 return [x / norm for x in vec] 

51 

52 

53def compose_triple_text(entity: str, relation: str, value_type: str, value_v: str) -> str: 

54 """Compose the canonical text to embed for a fact triple (design memo §2 Option B). 

55 

56 For ref-typed values, uses the last path segment of the URI as the display 

57 name to improve semantic alignment. 

58 """ 

59 if value_type == "ref" and value_v: 

60 display_v = value_v.rstrip("/").rsplit("/", 1)[-1] 

61 else: 

62 display_v = str(value_v) if value_v is not None else "" 

63 

64 entity_display = entity.rstrip("/").rsplit("/", 1)[-1] 

65 return f"{entity_display} {relation} {display_v}"