tribalmemory 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tribalmemory/__init__.py +3 -0
- tribalmemory/a21/__init__.py +38 -0
- tribalmemory/a21/config/__init__.py +20 -0
- tribalmemory/a21/config/providers.py +104 -0
- tribalmemory/a21/config/system.py +184 -0
- tribalmemory/a21/container/__init__.py +8 -0
- tribalmemory/a21/container/container.py +212 -0
- tribalmemory/a21/providers/__init__.py +32 -0
- tribalmemory/a21/providers/base.py +241 -0
- tribalmemory/a21/providers/deduplication.py +99 -0
- tribalmemory/a21/providers/lancedb.py +232 -0
- tribalmemory/a21/providers/memory.py +128 -0
- tribalmemory/a21/providers/mock.py +54 -0
- tribalmemory/a21/providers/openai.py +151 -0
- tribalmemory/a21/providers/timestamp.py +88 -0
- tribalmemory/a21/system.py +293 -0
- tribalmemory/cli.py +298 -0
- tribalmemory/interfaces.py +306 -0
- tribalmemory/mcp/__init__.py +9 -0
- tribalmemory/mcp/__main__.py +6 -0
- tribalmemory/mcp/server.py +484 -0
- tribalmemory/performance/__init__.py +1 -0
- tribalmemory/performance/benchmarks.py +285 -0
- tribalmemory/performance/corpus_generator.py +171 -0
- tribalmemory/portability/__init__.py +1 -0
- tribalmemory/portability/embedding_metadata.py +320 -0
- tribalmemory/server/__init__.py +9 -0
- tribalmemory/server/__main__.py +6 -0
- tribalmemory/server/app.py +187 -0
- tribalmemory/server/config.py +115 -0
- tribalmemory/server/models.py +206 -0
- tribalmemory/server/routes.py +378 -0
- tribalmemory/services/__init__.py +15 -0
- tribalmemory/services/deduplication.py +115 -0
- tribalmemory/services/embeddings.py +273 -0
- tribalmemory/services/import_export.py +506 -0
- tribalmemory/services/memory.py +275 -0
- tribalmemory/services/vector_store.py +360 -0
- tribalmemory/testing/__init__.py +22 -0
- tribalmemory/testing/embedding_utils.py +110 -0
- tribalmemory/testing/fixtures.py +123 -0
- tribalmemory/testing/metrics.py +256 -0
- tribalmemory/testing/mocks.py +560 -0
- tribalmemory/testing/semantic_expansions.py +91 -0
- tribalmemory/utils.py +23 -0
- tribalmemory-0.1.0.dist-info/METADATA +275 -0
- tribalmemory-0.1.0.dist-info/RECORD +51 -0
- tribalmemory-0.1.0.dist-info/WHEEL +5 -0
- tribalmemory-0.1.0.dist-info/entry_points.txt +3 -0
- tribalmemory-0.1.0.dist-info/licenses/LICENSE +190 -0
- tribalmemory-0.1.0.dist-info/top_level.txt +1 -0
tribalmemory/__init__.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""A2.1 Interface-First Implementation.
|
|
2
|
+
|
|
3
|
+
This module provides a highly abstracted, plugin-based architecture for
|
|
4
|
+
tribal memory. Key design principles:
|
|
5
|
+
|
|
6
|
+
1. **Provider Pattern**: All backends (embeddings, storage, timestamps) are
|
|
7
|
+
swappable providers that implement standard interfaces.
|
|
8
|
+
|
|
9
|
+
2. **Dependency Injection**: Components receive their dependencies through
|
|
10
|
+
a central container, making testing and configuration easier.
|
|
11
|
+
|
|
12
|
+
3. **Configuration-Driven**: Setup is driven by configuration objects,
|
|
13
|
+
not hardcoded values.
|
|
14
|
+
|
|
15
|
+
4. **Forward Compatible**: Interfaces are designed to accommodate future
|
|
16
|
+
features (multi-tenancy, sharding, replication) without breaking changes.
|
|
17
|
+
|
|
18
|
+
Usage:
|
|
19
|
+
from tribalmemory.a21 import MemorySystem, SystemConfig
|
|
20
|
+
|
|
21
|
+
config = SystemConfig.from_env()
|
|
22
|
+
system = MemorySystem(config)
|
|
23
|
+
|
|
24
|
+
await system.remember("Joe prefers TypeScript")
|
|
25
|
+
results = await system.recall("What language?")
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from .system import MemorySystem
|
|
29
|
+
from .config import SystemConfig, EmbeddingConfig, StorageConfig
|
|
30
|
+
from .container import Container
|
|
31
|
+
|
|
32
|
+
__all__ = [
|
|
33
|
+
"MemorySystem",
|
|
34
|
+
"SystemConfig",
|
|
35
|
+
"EmbeddingConfig",
|
|
36
|
+
"StorageConfig",
|
|
37
|
+
"Container",
|
|
38
|
+
]
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""Configuration system for A2.1.
|
|
2
|
+
|
|
3
|
+
Provides strongly-typed configuration objects that can be loaded from:
|
|
4
|
+
- Environment variables
|
|
5
|
+
- YAML/JSON files
|
|
6
|
+
- Programmatic construction
|
|
7
|
+
|
|
8
|
+
All configuration is validated at load time.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from .system import SystemConfig
|
|
12
|
+
from .providers import EmbeddingConfig, StorageConfig, TimestampConfig, DeduplicationConfig
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
"SystemConfig",
|
|
16
|
+
"EmbeddingConfig",
|
|
17
|
+
"StorageConfig",
|
|
18
|
+
"TimestampConfig",
|
|
19
|
+
"DeduplicationConfig",
|
|
20
|
+
]
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""Provider-specific configuration classes."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import Optional, Literal
|
|
5
|
+
from enum import Enum
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class EmbeddingProviderType(Enum):
|
|
9
|
+
"""Available embedding providers."""
|
|
10
|
+
OPENAI = "openai"
|
|
11
|
+
LOCAL = "local" # Future: local model support
|
|
12
|
+
MOCK = "mock"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class StorageProviderType(Enum):
|
|
16
|
+
"""Available storage providers."""
|
|
17
|
+
LANCEDB = "lancedb"
|
|
18
|
+
MEMORY = "memory"
|
|
19
|
+
# Future: PINECONE = "pinecone", POSTGRES = "postgres"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class TimestampProviderType(Enum):
|
|
23
|
+
"""Available timestamp providers."""
|
|
24
|
+
RFC3161 = "rfc3161"
|
|
25
|
+
MOCK = "mock"
|
|
26
|
+
NONE = "none"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class EmbeddingConfig:
|
|
31
|
+
"""Configuration for embedding provider.
|
|
32
|
+
|
|
33
|
+
Attributes:
|
|
34
|
+
provider: Which embedding provider to use
|
|
35
|
+
model: Model name (e.g., "text-embedding-3-small")
|
|
36
|
+
dimensions: Embedding dimension size
|
|
37
|
+
api_key: API key (for cloud providers)
|
|
38
|
+
api_base: Custom API base URL
|
|
39
|
+
max_retries: Max retry attempts
|
|
40
|
+
timeout_seconds: Request timeout
|
|
41
|
+
backoff_base: Exponential backoff base
|
|
42
|
+
backoff_max: Maximum backoff delay
|
|
43
|
+
batch_size: Max texts per batch request
|
|
44
|
+
"""
|
|
45
|
+
provider: EmbeddingProviderType = EmbeddingProviderType.OPENAI
|
|
46
|
+
model: str = "text-embedding-3-small"
|
|
47
|
+
dimensions: int = 1536
|
|
48
|
+
api_key: Optional[str] = None
|
|
49
|
+
api_base: Optional[str] = None
|
|
50
|
+
max_retries: int = 3
|
|
51
|
+
timeout_seconds: float = 30.0
|
|
52
|
+
backoff_base: float = 2.0
|
|
53
|
+
backoff_max: float = 60.0
|
|
54
|
+
batch_size: int = 100
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass
|
|
58
|
+
class StorageConfig:
|
|
59
|
+
"""Configuration for storage provider.
|
|
60
|
+
|
|
61
|
+
Attributes:
|
|
62
|
+
provider: Which storage provider to use
|
|
63
|
+
path: Local database path (for LanceDB local)
|
|
64
|
+
uri: Cloud database URI (for LanceDB Cloud)
|
|
65
|
+
api_key: API key (for cloud storage)
|
|
66
|
+
table_name: Name of the memories table
|
|
67
|
+
embedding_dimensions: Expected embedding size (for validation)
|
|
68
|
+
"""
|
|
69
|
+
provider: StorageProviderType = StorageProviderType.MEMORY
|
|
70
|
+
path: Optional[str] = None
|
|
71
|
+
uri: Optional[str] = None
|
|
72
|
+
api_key: Optional[str] = None
|
|
73
|
+
table_name: str = "memories"
|
|
74
|
+
embedding_dimensions: int = 1536
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@dataclass
|
|
78
|
+
class TimestampConfig:
|
|
79
|
+
"""Configuration for timestamp provider.
|
|
80
|
+
|
|
81
|
+
Attributes:
|
|
82
|
+
provider: Which timestamp provider to use
|
|
83
|
+
tsa_url: RFC 3161 Time Stamp Authority URL
|
|
84
|
+
tsa_cert_path: Path to TSA certificate for verification
|
|
85
|
+
"""
|
|
86
|
+
provider: TimestampProviderType = TimestampProviderType.NONE
|
|
87
|
+
tsa_url: Optional[str] = None
|
|
88
|
+
tsa_cert_path: Optional[str] = None
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
@dataclass
|
|
92
|
+
class DeduplicationConfig:
|
|
93
|
+
"""Configuration for deduplication.
|
|
94
|
+
|
|
95
|
+
Attributes:
|
|
96
|
+
enabled: Whether to check for duplicates
|
|
97
|
+
exact_threshold: Similarity threshold for exact duplicates (reject)
|
|
98
|
+
near_threshold: Similarity threshold for near-duplicates (warn)
|
|
99
|
+
strategy: Deduplication strategy
|
|
100
|
+
"""
|
|
101
|
+
enabled: bool = True
|
|
102
|
+
exact_threshold: float = 0.98
|
|
103
|
+
near_threshold: float = 0.90
|
|
104
|
+
strategy: Literal["embedding", "hash", "hybrid"] = "embedding"
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
"""System-wide configuration."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from typing import Optional
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from .providers import (
|
|
9
|
+
EmbeddingConfig,
|
|
10
|
+
StorageConfig,
|
|
11
|
+
TimestampConfig,
|
|
12
|
+
DeduplicationConfig,
|
|
13
|
+
EmbeddingProviderType,
|
|
14
|
+
StorageProviderType,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class SystemConfig:
|
|
20
|
+
"""Complete system configuration.
|
|
21
|
+
|
|
22
|
+
Combines all provider configurations into a single object.
|
|
23
|
+
Can be loaded from environment variables or constructed programmatically.
|
|
24
|
+
|
|
25
|
+
Attributes:
|
|
26
|
+
instance_id: Unique identifier for this agent instance
|
|
27
|
+
embedding: Embedding provider configuration
|
|
28
|
+
storage: Storage provider configuration
|
|
29
|
+
timestamp: Timestamp provider configuration
|
|
30
|
+
deduplication: Deduplication configuration
|
|
31
|
+
debug: Enable debug logging
|
|
32
|
+
"""
|
|
33
|
+
instance_id: str = "default"
|
|
34
|
+
embedding: EmbeddingConfig = field(default_factory=EmbeddingConfig)
|
|
35
|
+
storage: StorageConfig = field(default_factory=StorageConfig)
|
|
36
|
+
timestamp: TimestampConfig = field(default_factory=TimestampConfig)
|
|
37
|
+
deduplication: DeduplicationConfig = field(default_factory=DeduplicationConfig)
|
|
38
|
+
debug: bool = False
|
|
39
|
+
|
|
40
|
+
@classmethod
|
|
41
|
+
def from_env(cls, prefix: str = "TRIBAL_MEMORY") -> "SystemConfig":
|
|
42
|
+
"""Load configuration from environment variables.
|
|
43
|
+
|
|
44
|
+
Environment variables:
|
|
45
|
+
{prefix}_INSTANCE_ID: Instance identifier
|
|
46
|
+
{prefix}_DEBUG: Enable debug mode
|
|
47
|
+
|
|
48
|
+
{prefix}_EMBEDDING_PROVIDER: openai|local|mock
|
|
49
|
+
{prefix}_EMBEDDING_MODEL: Model name
|
|
50
|
+
{prefix}_EMBEDDING_API_KEY: API key (or OPENAI_API_KEY)
|
|
51
|
+
|
|
52
|
+
{prefix}_STORAGE_PROVIDER: lancedb|memory
|
|
53
|
+
{prefix}_STORAGE_PATH: Local database path
|
|
54
|
+
{prefix}_STORAGE_URI: Cloud database URI
|
|
55
|
+
|
|
56
|
+
{prefix}_DEDUP_ENABLED: true|false
|
|
57
|
+
{prefix}_DEDUP_EXACT_THRESHOLD: Float (0-1)
|
|
58
|
+
{prefix}_DEDUP_NEAR_THRESHOLD: Float (0-1)
|
|
59
|
+
"""
|
|
60
|
+
def get(key: str, default: str = None) -> Optional[str]:
|
|
61
|
+
return os.environ.get(f"{prefix}_{key}", default)
|
|
62
|
+
|
|
63
|
+
def get_bool(key: str, default: bool = False) -> bool:
|
|
64
|
+
val = get(key)
|
|
65
|
+
if val is None:
|
|
66
|
+
return default
|
|
67
|
+
return val.lower() in ("true", "1", "yes")
|
|
68
|
+
|
|
69
|
+
def get_float(key: str, default: float) -> float:
|
|
70
|
+
val = get(key)
|
|
71
|
+
return float(val) if val else default
|
|
72
|
+
|
|
73
|
+
# Embedding config
|
|
74
|
+
embedding = EmbeddingConfig(
|
|
75
|
+
provider=EmbeddingProviderType(get("EMBEDDING_PROVIDER", "openai")),
|
|
76
|
+
model=get("EMBEDDING_MODEL", "text-embedding-3-small"),
|
|
77
|
+
api_key=get("EMBEDDING_API_KEY") or os.environ.get("OPENAI_API_KEY"),
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
# Storage config
|
|
81
|
+
storage = StorageConfig(
|
|
82
|
+
provider=StorageProviderType(get("STORAGE_PROVIDER", "memory")),
|
|
83
|
+
path=get("STORAGE_PATH"),
|
|
84
|
+
uri=get("STORAGE_URI"),
|
|
85
|
+
api_key=get("STORAGE_API_KEY") or os.environ.get("LANCEDB_API_KEY"),
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
# Deduplication config
|
|
89
|
+
deduplication = DeduplicationConfig(
|
|
90
|
+
enabled=get_bool("DEDUP_ENABLED", True),
|
|
91
|
+
exact_threshold=get_float("DEDUP_EXACT_THRESHOLD", 0.98),
|
|
92
|
+
near_threshold=get_float("DEDUP_NEAR_THRESHOLD", 0.90),
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
return cls(
|
|
96
|
+
instance_id=get("INSTANCE_ID", "default"),
|
|
97
|
+
embedding=embedding,
|
|
98
|
+
storage=storage,
|
|
99
|
+
deduplication=deduplication,
|
|
100
|
+
debug=get_bool("DEBUG", False),
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
@classmethod
|
|
104
|
+
def for_testing(cls, instance_id: str = "test") -> "SystemConfig":
|
|
105
|
+
"""Create a configuration suitable for testing.
|
|
106
|
+
|
|
107
|
+
Uses mock/in-memory providers to avoid external dependencies.
|
|
108
|
+
"""
|
|
109
|
+
return cls(
|
|
110
|
+
instance_id=instance_id,
|
|
111
|
+
embedding=EmbeddingConfig(
|
|
112
|
+
provider=EmbeddingProviderType.MOCK,
|
|
113
|
+
),
|
|
114
|
+
storage=StorageConfig(
|
|
115
|
+
provider=StorageProviderType.MEMORY,
|
|
116
|
+
),
|
|
117
|
+
deduplication=DeduplicationConfig(
|
|
118
|
+
enabled=True,
|
|
119
|
+
exact_threshold=0.90, # Lower for deterministic mock embeddings
|
|
120
|
+
),
|
|
121
|
+
debug=True,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
def validate(self) -> list[str]:
|
|
125
|
+
"""Validate configuration and return list of errors.
|
|
126
|
+
|
|
127
|
+
Checks:
|
|
128
|
+
- Required API keys for providers
|
|
129
|
+
- Storage path/uri requirements
|
|
130
|
+
- Dimension consistency
|
|
131
|
+
- Timeout and threshold bounds
|
|
132
|
+
- Batch size validity
|
|
133
|
+
- Instance ID format
|
|
134
|
+
"""
|
|
135
|
+
errors = []
|
|
136
|
+
|
|
137
|
+
# Instance ID validation
|
|
138
|
+
if not self.instance_id or not self.instance_id.strip():
|
|
139
|
+
errors.append("instance_id cannot be empty")
|
|
140
|
+
|
|
141
|
+
# Embedding validation
|
|
142
|
+
if self.embedding.provider == EmbeddingProviderType.OPENAI:
|
|
143
|
+
if not self.embedding.api_key:
|
|
144
|
+
errors.append("OpenAI embedding requires API key")
|
|
145
|
+
|
|
146
|
+
# Embedding config bounds
|
|
147
|
+
if self.embedding.timeout_seconds <= 0:
|
|
148
|
+
errors.append(f"embedding.timeout_seconds must be positive, got {self.embedding.timeout_seconds}")
|
|
149
|
+
if self.embedding.batch_size <= 0:
|
|
150
|
+
errors.append(f"embedding.batch_size must be positive, got {self.embedding.batch_size}")
|
|
151
|
+
if self.embedding.dimensions <= 0:
|
|
152
|
+
errors.append(f"embedding.dimensions must be positive, got {self.embedding.dimensions}")
|
|
153
|
+
|
|
154
|
+
# Storage validation
|
|
155
|
+
if self.storage.provider == StorageProviderType.LANCEDB:
|
|
156
|
+
if not self.storage.path and not self.storage.uri:
|
|
157
|
+
errors.append("LanceDB storage requires path or uri")
|
|
158
|
+
|
|
159
|
+
# Dimension consistency
|
|
160
|
+
if self.embedding.dimensions != self.storage.embedding_dimensions:
|
|
161
|
+
errors.append(
|
|
162
|
+
f"Embedding dimensions mismatch: "
|
|
163
|
+
f"embedding={self.embedding.dimensions}, storage={self.storage.embedding_dimensions}"
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
# Deduplication threshold validation
|
|
167
|
+
if self.deduplication.enabled:
|
|
168
|
+
if not (0.0 <= self.deduplication.exact_threshold <= 1.0):
|
|
169
|
+
errors.append(
|
|
170
|
+
f"deduplication.exact_threshold must be between 0 and 1, "
|
|
171
|
+
f"got {self.deduplication.exact_threshold}"
|
|
172
|
+
)
|
|
173
|
+
if not (0.0 <= self.deduplication.near_threshold <= 1.0):
|
|
174
|
+
errors.append(
|
|
175
|
+
f"deduplication.near_threshold must be between 0 and 1, "
|
|
176
|
+
f"got {self.deduplication.near_threshold}"
|
|
177
|
+
)
|
|
178
|
+
if self.deduplication.near_threshold > self.deduplication.exact_threshold:
|
|
179
|
+
errors.append(
|
|
180
|
+
f"deduplication.near_threshold ({self.deduplication.near_threshold}) "
|
|
181
|
+
f"should not exceed exact_threshold ({self.deduplication.exact_threshold})"
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
return errors
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
"""Dependency injection container for A2.1."""
|
|
2
|
+
|
|
3
|
+
from typing import Optional, TypeVar, Type
|
|
4
|
+
import logging
|
|
5
|
+
|
|
6
|
+
from ..config import SystemConfig
|
|
7
|
+
from ..providers.base import (
|
|
8
|
+
EmbeddingProvider,
|
|
9
|
+
StorageProvider,
|
|
10
|
+
TimestampProvider,
|
|
11
|
+
DeduplicationProvider,
|
|
12
|
+
ProviderHealth,
|
|
13
|
+
ProviderStatus,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
T = TypeVar('T')
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class Container:
|
|
22
|
+
"""Dependency injection container.
|
|
23
|
+
|
|
24
|
+
Manages the lifecycle of all providers and provides them to consumers.
|
|
25
|
+
Supports lazy initialization and graceful shutdown.
|
|
26
|
+
|
|
27
|
+
Usage:
|
|
28
|
+
container = Container(config)
|
|
29
|
+
await container.initialize()
|
|
30
|
+
|
|
31
|
+
embedding = container.embedding
|
|
32
|
+
storage = container.storage
|
|
33
|
+
|
|
34
|
+
await container.shutdown()
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(self, config: SystemConfig):
|
|
38
|
+
self.config = config
|
|
39
|
+
self._embedding: Optional[EmbeddingProvider] = None
|
|
40
|
+
self._storage: Optional[StorageProvider] = None
|
|
41
|
+
self._timestamp: Optional[TimestampProvider] = None
|
|
42
|
+
self._deduplication: Optional[DeduplicationProvider] = None
|
|
43
|
+
self._initialized = False
|
|
44
|
+
|
|
45
|
+
async def initialize(self) -> None:
|
|
46
|
+
"""Initialize all providers.
|
|
47
|
+
|
|
48
|
+
Providers are created and initialized in dependency order:
|
|
49
|
+
1. Embedding (no dependencies)
|
|
50
|
+
2. Storage (depends on embedding dimensions)
|
|
51
|
+
3. Deduplication (depends on both storage and embedding)
|
|
52
|
+
4. Timestamp (no dependencies)
|
|
53
|
+
"""
|
|
54
|
+
if self._initialized:
|
|
55
|
+
return
|
|
56
|
+
|
|
57
|
+
logger.info(f"Initializing container for instance: {self.config.instance_id}")
|
|
58
|
+
|
|
59
|
+
# Create and initialize providers in dependency order
|
|
60
|
+
self._embedding = self._create_embedding_provider()
|
|
61
|
+
await self._embedding.initialize()
|
|
62
|
+
|
|
63
|
+
self._storage = self._create_storage_provider()
|
|
64
|
+
await self._storage.initialize()
|
|
65
|
+
|
|
66
|
+
# Deduplication depends on both storage and embedding - create after they're initialized
|
|
67
|
+
self._deduplication = self._create_deduplication_provider(
|
|
68
|
+
storage=self._storage,
|
|
69
|
+
embedding=self._embedding,
|
|
70
|
+
)
|
|
71
|
+
if self._deduplication:
|
|
72
|
+
await self._deduplication.initialize()
|
|
73
|
+
|
|
74
|
+
if self.config.timestamp.provider.value != "none":
|
|
75
|
+
self._timestamp = self._create_timestamp_provider()
|
|
76
|
+
await self._timestamp.initialize()
|
|
77
|
+
|
|
78
|
+
self._initialized = True
|
|
79
|
+
logger.info("Container initialized successfully")
|
|
80
|
+
|
|
81
|
+
async def shutdown(self) -> None:
|
|
82
|
+
"""Shutdown all providers gracefully."""
|
|
83
|
+
if not self._initialized:
|
|
84
|
+
return
|
|
85
|
+
|
|
86
|
+
logger.info("Shutting down container")
|
|
87
|
+
|
|
88
|
+
# Shutdown in reverse order
|
|
89
|
+
if self._timestamp:
|
|
90
|
+
await self._timestamp.shutdown()
|
|
91
|
+
if self._deduplication:
|
|
92
|
+
await self._deduplication.shutdown()
|
|
93
|
+
await self._storage.shutdown()
|
|
94
|
+
await self._embedding.shutdown()
|
|
95
|
+
|
|
96
|
+
self._initialized = False
|
|
97
|
+
logger.info("Container shutdown complete")
|
|
98
|
+
|
|
99
|
+
async def health_check(self) -> dict[str, ProviderHealth]:
|
|
100
|
+
"""Check health of all providers."""
|
|
101
|
+
results = {}
|
|
102
|
+
|
|
103
|
+
if self._embedding:
|
|
104
|
+
results["embedding"] = await self._embedding.health_check()
|
|
105
|
+
if self._storage:
|
|
106
|
+
results["storage"] = await self._storage.health_check()
|
|
107
|
+
if self._timestamp:
|
|
108
|
+
results["timestamp"] = await self._timestamp.health_check()
|
|
109
|
+
if self._deduplication:
|
|
110
|
+
results["deduplication"] = await self._deduplication.health_check()
|
|
111
|
+
|
|
112
|
+
return results
|
|
113
|
+
|
|
114
|
+
@property
|
|
115
|
+
def embedding(self) -> EmbeddingProvider:
|
|
116
|
+
"""Get the embedding provider."""
|
|
117
|
+
if not self._embedding:
|
|
118
|
+
raise RuntimeError("Container not initialized. Call initialize() first.")
|
|
119
|
+
return self._embedding
|
|
120
|
+
|
|
121
|
+
@property
|
|
122
|
+
def storage(self) -> StorageProvider:
|
|
123
|
+
"""Get the storage provider."""
|
|
124
|
+
if not self._storage:
|
|
125
|
+
raise RuntimeError("Container not initialized. Call initialize() first.")
|
|
126
|
+
return self._storage
|
|
127
|
+
|
|
128
|
+
@property
|
|
129
|
+
def timestamp(self) -> Optional[TimestampProvider]:
|
|
130
|
+
"""Get the timestamp provider (may be None if disabled)."""
|
|
131
|
+
return self._timestamp
|
|
132
|
+
|
|
133
|
+
@property
|
|
134
|
+
def deduplication(self) -> Optional[DeduplicationProvider]:
|
|
135
|
+
"""Get the deduplication provider."""
|
|
136
|
+
return self._deduplication
|
|
137
|
+
|
|
138
|
+
def _create_embedding_provider(self) -> EmbeddingProvider:
|
|
139
|
+
"""Create embedding provider based on config."""
|
|
140
|
+
from ..config.providers import EmbeddingProviderType
|
|
141
|
+
from ..providers.openai import OpenAIEmbeddingProvider
|
|
142
|
+
from ..providers.mock import MockEmbeddingProvider
|
|
143
|
+
|
|
144
|
+
cfg = self.config.embedding
|
|
145
|
+
|
|
146
|
+
if cfg.provider == EmbeddingProviderType.OPENAI:
|
|
147
|
+
return OpenAIEmbeddingProvider(cfg)
|
|
148
|
+
elif cfg.provider == EmbeddingProviderType.MOCK:
|
|
149
|
+
return MockEmbeddingProvider(cfg)
|
|
150
|
+
else:
|
|
151
|
+
raise ValueError(f"Unknown embedding provider: {cfg.provider}")
|
|
152
|
+
|
|
153
|
+
def _create_storage_provider(self) -> StorageProvider:
|
|
154
|
+
"""Create storage provider based on config."""
|
|
155
|
+
from ..config.providers import StorageProviderType
|
|
156
|
+
from ..providers.lancedb import LanceDBStorageProvider
|
|
157
|
+
from ..providers.memory import InMemoryStorageProvider
|
|
158
|
+
|
|
159
|
+
cfg = self.config.storage
|
|
160
|
+
|
|
161
|
+
if cfg.provider == StorageProviderType.LANCEDB:
|
|
162
|
+
return LanceDBStorageProvider(cfg, self._embedding)
|
|
163
|
+
elif cfg.provider == StorageProviderType.MEMORY:
|
|
164
|
+
return InMemoryStorageProvider(cfg, self._embedding)
|
|
165
|
+
else:
|
|
166
|
+
raise ValueError(f"Unknown storage provider: {cfg.provider}")
|
|
167
|
+
|
|
168
|
+
def _create_timestamp_provider(self) -> TimestampProvider:
|
|
169
|
+
"""Create timestamp provider based on config."""
|
|
170
|
+
from ..config.providers import TimestampProviderType
|
|
171
|
+
from ..providers.timestamp import RFC3161TimestampProvider, MockTimestampProvider
|
|
172
|
+
|
|
173
|
+
cfg = self.config.timestamp
|
|
174
|
+
|
|
175
|
+
if cfg.provider == TimestampProviderType.RFC3161:
|
|
176
|
+
return RFC3161TimestampProvider(cfg)
|
|
177
|
+
elif cfg.provider == TimestampProviderType.MOCK:
|
|
178
|
+
return MockTimestampProvider(cfg)
|
|
179
|
+
else:
|
|
180
|
+
raise ValueError(f"Unknown timestamp provider: {cfg.provider}")
|
|
181
|
+
|
|
182
|
+
def _create_deduplication_provider(
|
|
183
|
+
self,
|
|
184
|
+
storage: StorageProvider,
|
|
185
|
+
embedding: EmbeddingProvider,
|
|
186
|
+
) -> Optional[DeduplicationProvider]:
|
|
187
|
+
"""Create deduplication provider based on config.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
storage: Initialized storage provider
|
|
191
|
+
embedding: Initialized embedding provider
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
DeduplicationProvider or None if disabled
|
|
195
|
+
"""
|
|
196
|
+
if not self.config.deduplication.enabled:
|
|
197
|
+
return None
|
|
198
|
+
|
|
199
|
+
from ..providers.deduplication import EmbeddingDeduplicationProvider
|
|
200
|
+
|
|
201
|
+
return EmbeddingDeduplicationProvider(
|
|
202
|
+
self.config.deduplication,
|
|
203
|
+
storage_provider=storage,
|
|
204
|
+
embedding_provider=embedding,
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
async def __aenter__(self):
|
|
208
|
+
await self.initialize()
|
|
209
|
+
return self
|
|
210
|
+
|
|
211
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
212
|
+
await self.shutdown()
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""Provider interfaces and implementations.
|
|
2
|
+
|
|
3
|
+
Providers are swappable backends that implement standard interfaces.
|
|
4
|
+
Each provider type has an abstract base and concrete implementations.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .base import (
|
|
8
|
+
EmbeddingProvider,
|
|
9
|
+
StorageProvider,
|
|
10
|
+
TimestampProvider,
|
|
11
|
+
DeduplicationProvider,
|
|
12
|
+
)
|
|
13
|
+
from .openai import OpenAIEmbeddingProvider
|
|
14
|
+
from .lancedb import LanceDBStorageProvider
|
|
15
|
+
from .memory import InMemoryStorageProvider
|
|
16
|
+
from .timestamp import RFC3161TimestampProvider, MockTimestampProvider
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
# Base interfaces
|
|
20
|
+
"EmbeddingProvider",
|
|
21
|
+
"StorageProvider",
|
|
22
|
+
"TimestampProvider",
|
|
23
|
+
"DeduplicationProvider",
|
|
24
|
+
# Embedding providers
|
|
25
|
+
"OpenAIEmbeddingProvider",
|
|
26
|
+
# Storage providers
|
|
27
|
+
"LanceDBStorageProvider",
|
|
28
|
+
"InMemoryStorageProvider",
|
|
29
|
+
# Timestamp providers
|
|
30
|
+
"RFC3161TimestampProvider",
|
|
31
|
+
"MockTimestampProvider",
|
|
32
|
+
]
|