Coverage for node / src / stigmem_node / observability / tracing.py: 61%

73 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-25 01:49 +0000

1"""OpenTelemetry tracing setup — spec §23 (Phase 13). 

2 

3Activated when ``opentelemetry-sdk`` is installed and 

4``STIGMEM_OTEL_ENABLED=true``. All calls are no-ops when the SDK is absent 

5or the feature is disabled, so the node runs without the dependency. 

6 

7Usage from routes:: 

8 

9 from .observability.tracing import start_span 

10 

11 with start_span("stigmem.assert_fact") as span: 

12 span.set_attribute("stigmem.tenant", tenant_id) 

13 # ... do work ... 

14""" 

15 

16from __future__ import annotations 

17 

18import contextlib 

19import importlib.util 

20import logging 

21from collections.abc import Generator 

22from typing import Any 

23 

24logger = logging.getLogger("stigmem.tracing") 

25 

26_OTEL_ENABLED = False 

27_tracer: Any = None 

28 

29 

30def _is_module_available(module_name: str) -> bool: 

31 try: 

32 return importlib.util.find_spec(module_name) is not None 

33 except ModuleNotFoundError: 

34 return False 

35 

36 

37_OTEL_SDK_AVAILABLE = all( 

38 _is_module_available(module_name) 

39 for module_name in ( 

40 "opentelemetry.trace", 

41 "opentelemetry.sdk.resources", 

42 "opentelemetry.sdk.trace", 

43 ) 

44) 

45 

46 

47def init_tracing(service_name: str, otlp_endpoint: str) -> None: 

48 """Initialize the OTel SDK. Called once at app startup when otel_enabled=True.""" 

49 global _OTEL_ENABLED, _tracer 

50 if not _OTEL_SDK_AVAILABLE: 50 ↛ 57line 50 didn't jump to line 57 because the condition on line 50 was always true

51 logging.getLogger("stigmem").warning( 

52 "STIGMEM_OTEL_ENABLED=true but opentelemetry-sdk is not installed; " 

53 "install stigmem-node[observability] to enable tracing." 

54 ) 

55 return 

56 

57 from opentelemetry import trace as _otel_trace 

58 from opentelemetry.sdk.resources import Resource 

59 from opentelemetry.sdk.trace import TracerProvider 

60 from opentelemetry.sdk.trace.export import BatchSpanProcessor 

61 

62 resource = Resource(attributes={"service.name": service_name}) 

63 provider = TracerProvider(resource=resource) 

64 

65 if otlp_endpoint: 

66 _exporter_added = False 

67 try: 

68 from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter 

69 

70 exporter = OTLPSpanExporter(endpoint=f"{otlp_endpoint.rstrip('/')}/v1/traces") 

71 provider.add_span_processor(BatchSpanProcessor(exporter)) 

72 _exporter_added = True 

73 except ImportError as exc: 

74 logger.debug("HTTP OTLP exporter unavailable: %s", exc) 

75 

76 if not _exporter_added: 

77 try: 

78 from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import ( 

79 OTLPSpanExporter as GrpcOTLPSpanExporter, 

80 ) 

81 

82 grpc_exporter = GrpcOTLPSpanExporter(endpoint=otlp_endpoint) 

83 provider.add_span_processor(BatchSpanProcessor(grpc_exporter)) 

84 except ImportError as exc: 

85 logger.debug( 

86 "gRPC OTLP exporter unavailable; traces collected locally only: %s", 

87 exc, 

88 ) 

89 

90 _otel_trace.set_tracer_provider(provider) 

91 _tracer = _otel_trace.get_tracer( 

92 "stigmem.node", schema_url="https://opentelemetry.io/schemas/1.21.0" 

93 ) 

94 _OTEL_ENABLED = True 

95 

96 

97class _NoopSpan: 

98 """Minimal no-op span used when OTel is disabled.""" 

99 

100 def set_attribute(self, key: str, value: Any) -> None: # noqa: ARG002 

101 pass 

102 

103 def record_exception(self, exc: BaseException, *, escaped: bool = False) -> None: # noqa: ARG002 

104 pass 

105 

106 def set_status(self, status: Any, description: str | None = None) -> None: # noqa: ARG002 

107 pass 

108 

109 def add_event(self, name: str, attributes: dict[str, Any] | None = None) -> None: # noqa: ARG002 

110 pass 

111 

112 

113@contextlib.contextmanager 

114def start_span(name: str, **initial_attributes: Any) -> Generator[_NoopSpan | Any, None, None]: 

115 """Start a span as a context manager. 

116 

117 Yields a live OTel ``Span`` (when enabled) or a no-op ``_NoopSpan`` 

118 (when disabled). Always safe to call; never blocks. 

119 """ 

120 if not _OTEL_ENABLED or _tracer is None: 

121 yield _NoopSpan() 

122 return 

123 

124 with _tracer.start_as_current_span(name) as span: 

125 for key, value in initial_attributes.items(): 

126 span.set_attribute(key, value) 

127 try: 

128 yield span 

129 except Exception as exc: 

130 try: 

131 from opentelemetry.trace import StatusCode 

132 

133 span.record_exception(exc) 

134 span.set_status(StatusCode.ERROR, str(exc)) 

135 except Exception as inner_exc: # noqa: BLE001 # nosec B110 — best-effort 

136 logger.debug("OTel span error recording failed: %s", inner_exc) 

137 raise 

138 

139 

140def is_enabled() -> bool: 

141 return _OTEL_ENABLED