Coverage for node / src / stigmem_node / embedding / stub_adapter.py: 95%

32 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-25 01:49 +0000

1"""Deterministic stub EmbeddingModel — for tests and offline development. 

2 

3Produces reproducible L2-normalised vectors derived from text hashes. 

4Never requires external dependencies. 

5""" 

6 

7from __future__ import annotations 

8 

9import hashlib 

10 

11from .base import EmbeddingModel, Vector, l2_normalize 

12 

13_DEFAULT_STUB_DIM = 4 # minimal dimension; tests override via constructor 

14 

15 

16class StubEmbeddingModel(EmbeddingModel): 

17 """Test / offline stub that returns deterministic unit vectors.""" 

18 

19 def __init__(self, dim: int = _DEFAULT_STUB_DIM, model_id: str = "stub") -> None: 

20 self._dim = dim 

21 self._model_id = model_id 

22 

23 @property 

24 def model_id(self) -> str: 

25 return self._model_id 

26 

27 @property 

28 def dimension(self) -> int: 

29 return self._dim 

30 

31 def embed(self, texts: list[str]) -> list[Vector]: 

32 result: list[Vector] = [] 

33 for text in texts: 

34 raw: list[float] = [] 

35 counter = 0 

36 while len(raw) < self._dim: 

37 digest = hashlib.sha256(f"{text}:{counter}".encode()).digest() 

38 counter += 1 

39 for offset in range(0, len(digest), 4): 

40 chunk = digest[offset : offset + 4] 

41 if len(chunk) < 4: 41 ↛ 42line 41 didn't jump to line 42 because the condition on line 41 was never true

42 continue 

43 unit = int.from_bytes(chunk, "big") / 0xFFFFFFFF 

44 raw.append((unit * 2.0) - 1.0) 

45 if len(raw) == self._dim: 

46 break 

47 result.append(l2_normalize(raw)) 

48 return result