solohq-memory 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- solohq_memory-0.1.0/.gitignore +13 -0
- solohq_memory-0.1.0/LICENSE +21 -0
- solohq_memory-0.1.0/PKG-INFO +108 -0
- solohq_memory-0.1.0/README.md +71 -0
- solohq_memory-0.1.0/pyproject.toml +41 -0
- solohq_memory-0.1.0/src/solohq_memory/__init__.py +90 -0
- solohq_memory-0.1.0/src/solohq_memory/indexing/__init__.py +12 -0
- solohq_memory-0.1.0/src/solohq_memory/indexing/file_indexer.py +246 -0
- solohq_memory-0.1.0/src/solohq_memory/indexing/processors.py +105 -0
- solohq_memory-0.1.0/src/solohq_memory/indexing/registry.py +28 -0
- solohq_memory-0.1.0/src/solohq_memory/interfaces/__init__.py +11 -0
- solohq_memory-0.1.0/src/solohq_memory/interfaces/block_storage.py +15 -0
- solohq_memory-0.1.0/src/solohq_memory/interfaces/embedding.py +10 -0
- solohq_memory-0.1.0/src/solohq_memory/interfaces/llm.py +21 -0
- solohq_memory-0.1.0/src/solohq_memory/interfaces/storage.py +59 -0
- solohq_memory-0.1.0/src/solohq_memory/memory/__init__.py +33 -0
- solohq_memory-0.1.0/src/solohq_memory/memory/artifact_manager.py +372 -0
- solohq_memory-0.1.0/src/solohq_memory/memory/block_contradiction.py +68 -0
- solohq_memory-0.1.0/src/solohq_memory/memory/block_summarizer.py +41 -0
- solohq_memory-0.1.0/src/solohq_memory/memory/boundary_detector.py +149 -0
- solohq_memory-0.1.0/src/solohq_memory/memory/classifier.py +247 -0
- solohq_memory-0.1.0/src/solohq_memory/memory/context_graph.py +290 -0
- solohq_memory-0.1.0/src/solohq_memory/memory/contradiction.py +143 -0
- solohq_memory-0.1.0/src/solohq_memory/memory/episode_recorder.py +46 -0
- solohq_memory-0.1.0/src/solohq_memory/memory/manager.py +182 -0
- solohq_memory-0.1.0/src/solohq_memory/memory/staleness.py +151 -0
- solohq_memory-0.1.0/src/solohq_memory/models/__init__.py +52 -0
- solohq_memory-0.1.0/src/solohq_memory/models/artifact.py +49 -0
- solohq_memory-0.1.0/src/solohq_memory/models/common.py +42 -0
- solohq_memory-0.1.0/src/solohq_memory/models/context.py +31 -0
- solohq_memory-0.1.0/src/solohq_memory/models/episode.py +18 -0
- solohq_memory-0.1.0/src/solohq_memory/models/message.py +21 -0
- solohq_memory-0.1.0/src/solohq_memory/models/relationship.py +29 -0
- solohq_memory-0.1.0/src/solohq_memory/models/session_block.py +23 -0
- solohq_memory-0.1.0/src/solohq_memory/prompts/__init__.py +27 -0
- solohq_memory-0.1.0/src/solohq_memory/prompts/block_boundary.py +30 -0
- solohq_memory-0.1.0/src/solohq_memory/prompts/block_contradiction.py +34 -0
- solohq_memory-0.1.0/src/solohq_memory/prompts/block_summarizer.py +28 -0
- solohq_memory-0.1.0/src/solohq_memory/prompts/classifier.py +46 -0
- solohq_memory-0.1.0/src/solohq_memory/prompts/contradiction.py +50 -0
- solohq_memory-0.1.0/src/solohq_memory/prompts/relationship.py +33 -0
- solohq_memory-0.1.0/src/solohq_memory/prompts/summarizer.py +28 -0
- solohq_memory-0.1.0/src/solohq_memory/prompts/updater.py +43 -0
- solohq_memory-0.1.0/src/solohq_memory/providers/__init__.py +0 -0
- solohq_memory-0.1.0/src/solohq_memory/providers/anthropic.py +77 -0
- solohq_memory-0.1.0/src/solohq_memory/providers/google.py +125 -0
- solohq_memory-0.1.0/src/solohq_memory/providers/openai.py +89 -0
- solohq_memory-0.1.0/src/solohq_memory/py.typed +0 -0
- solohq_memory-0.1.0/src/solohq_memory/storage/__init__.py +8 -0
- solohq_memory-0.1.0/src/solohq_memory/storage/schema.py +139 -0
- solohq_memory-0.1.0/src/solohq_memory/storage/serialization.py +12 -0
- solohq_memory-0.1.0/src/solohq_memory/storage/sqlite.py +690 -0
- solohq_memory-0.1.0/src/solohq_memory/utils/__init__.py +7 -0
- solohq_memory-0.1.0/src/solohq_memory/utils/cache.py +45 -0
- solohq_memory-0.1.0/src/solohq_memory/utils/similarity.py +14 -0
- solohq_memory-0.1.0/src/solohq_memory/utils/tokens.py +3 -0
- solohq_memory-0.1.0/tests/__init__.py +0 -0
- solohq_memory-0.1.0/tests/e2e/__init__.py +0 -0
- solohq_memory-0.1.0/tests/e2e/conftest.py +149 -0
- solohq_memory-0.1.0/tests/e2e/test_e2e_artifact_manager.py +516 -0
- solohq_memory-0.1.0/tests/e2e/test_e2e_context_graph.py +648 -0
- solohq_memory-0.1.0/tests/e2e/test_e2e_contradiction.py +603 -0
- solohq_memory-0.1.0/tests/e2e/test_e2e_edge_cases.py +916 -0
- solohq_memory-0.1.0/tests/e2e/test_e2e_messages.py +155 -0
- solohq_memory-0.1.0/tests/e2e/test_e2e_phase1.py +844 -0
- solohq_memory-0.1.0/tests/e2e/test_e2e_phase2_integration.py +435 -0
- solohq_memory-0.1.0/tests/e2e/test_e2e_phase3_integration.py +588 -0
- solohq_memory-0.1.0/tests/e2e/test_e2e_staleness.py +632 -0
- solohq_memory-0.1.0/tests/unit/__init__.py +0 -0
- solohq_memory-0.1.0/tests/unit/test_anthropic_provider.py +177 -0
- solohq_memory-0.1.0/tests/unit/test_artifact_manager.py +539 -0
- solohq_memory-0.1.0/tests/unit/test_artifact_versioning.py +302 -0
- solohq_memory-0.1.0/tests/unit/test_block_contradiction.py +133 -0
- solohq_memory-0.1.0/tests/unit/test_block_summarizer.py +132 -0
- solohq_memory-0.1.0/tests/unit/test_boundary_detector.py +247 -0
- solohq_memory-0.1.0/tests/unit/test_classifier.py +444 -0
- solohq_memory-0.1.0/tests/unit/test_context_graph.py +809 -0
- solohq_memory-0.1.0/tests/unit/test_context_updater.py +456 -0
- solohq_memory-0.1.0/tests/unit/test_contradiction.py +390 -0
- solohq_memory-0.1.0/tests/unit/test_file_indexer.py +349 -0
- solohq_memory-0.1.0/tests/unit/test_google_provider.py +240 -0
- solohq_memory-0.1.0/tests/unit/test_packaging.py +117 -0
- solohq_memory-0.1.0/tests/unit/test_performance.py +188 -0
- solohq_memory-0.1.0/tests/unit/test_processors.py +209 -0
- solohq_memory-0.1.0/tests/unit/test_staleness.py +413 -0
- solohq_memory-0.1.0/uv.lock +1513 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 SoloHQ
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: solohq-memory
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Context memory system for AI agents — persistent context graph with episodes, artifacts, relationships, and staleness tracking
|
|
5
|
+
Project-URL: Repository, https://github.com/whaleventure13/solohq-agent
|
|
6
|
+
Author: SoloHQ
|
|
7
|
+
License-Expression: MIT
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
17
|
+
Classifier: Typing :: Typed
|
|
18
|
+
Requires-Python: >=3.11
|
|
19
|
+
Requires-Dist: apsw>=3.46
|
|
20
|
+
Requires-Dist: pydantic>=2.0
|
|
21
|
+
Requires-Dist: python-ulid>=3.0
|
|
22
|
+
Requires-Dist: sqlite-vec>=0.1.6
|
|
23
|
+
Provides-Extra: all
|
|
24
|
+
Requires-Dist: anthropic>=0.40; extra == 'all'
|
|
25
|
+
Requires-Dist: google-genai>=1.0; extra == 'all'
|
|
26
|
+
Requires-Dist: openai>=1.0; extra == 'all'
|
|
27
|
+
Provides-Extra: anthropic
|
|
28
|
+
Requires-Dist: anthropic>=0.40; extra == 'anthropic'
|
|
29
|
+
Provides-Extra: dev
|
|
30
|
+
Requires-Dist: pytest-asyncio>=0.24; extra == 'dev'
|
|
31
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
32
|
+
Provides-Extra: google
|
|
33
|
+
Requires-Dist: google-genai>=1.0; extra == 'google'
|
|
34
|
+
Provides-Extra: openai
|
|
35
|
+
Requires-Dist: openai>=1.0; extra == 'openai'
|
|
36
|
+
Description-Content-Type: text/markdown
|
|
37
|
+
|
|
38
|
+
# solohq-memory
|
|
39
|
+
|
|
40
|
+
Core context memory library for AI agents. Persistent context graph with episodes, artifacts, relationships, and staleness tracking.
|
|
41
|
+
|
|
42
|
+
## Installation
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
pip install solohq-memory
|
|
46
|
+
|
|
47
|
+
# With LLM provider extras
|
|
48
|
+
pip install solohq-memory[anthropic]
|
|
49
|
+
pip install solohq-memory[openai]
|
|
50
|
+
pip install solohq-memory[google]
|
|
51
|
+
pip install solohq-memory[all]
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## Quick Start
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
from solohq_memory import (
|
|
58
|
+
ContextMemoryManager,
|
|
59
|
+
SqliteVecStorage,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
# You need to provide LLM and embedding implementations
|
|
63
|
+
# that satisfy LLMProvider and EmbeddingProvider protocols
|
|
64
|
+
|
|
65
|
+
storage = SqliteVecStorage("memory.db", dimensions=1536)
|
|
66
|
+
manager = ContextMemoryManager(
|
|
67
|
+
storage=storage,
|
|
68
|
+
llm=your_llm_provider,
|
|
69
|
+
embedder=your_embedding_provider,
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
# Classify a message to find or create a context
|
|
73
|
+
result = await manager.classify("Tell me about Python decorators")
|
|
74
|
+
context = result.context
|
|
75
|
+
|
|
76
|
+
# Record an episode
|
|
77
|
+
await manager.record_episode(
|
|
78
|
+
context_id=context.id,
|
|
79
|
+
user_message="Tell me about Python decorators",
|
|
80
|
+
assistant_message="Decorators are a way to modify functions...",
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# Search across all contexts
|
|
84
|
+
results = await manager.search("decorators", limit=5)
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## Key Components
|
|
88
|
+
|
|
89
|
+
- **ContextMemoryManager** — main orchestrator for context operations
|
|
90
|
+
- **SqliteVecStorage** — SQLite + sqlite-vec storage with vector search
|
|
91
|
+
- **LLMClassifier** — 3-tier context routing: stable shortcut, embedding similarity, LLM fallback
|
|
92
|
+
- **ArtifactManager** — versioned content objects with rollback support
|
|
93
|
+
- **EpisodeRecorder** — conversation recording
|
|
94
|
+
- **BoundaryDetector** — topic shift detection (heuristics + LLM)
|
|
95
|
+
- **FileIndexer** — file indexing (on-disk references) and upload (full content)
|
|
96
|
+
|
|
97
|
+
## Interfaces
|
|
98
|
+
|
|
99
|
+
All providers are protocol-based and pluggable:
|
|
100
|
+
|
|
101
|
+
- `StorageProvider` — CRUD + vector search
|
|
102
|
+
- `LLMProvider` — `complete()` and `complete_json()`
|
|
103
|
+
- `EmbeddingProvider` — `embed()`, `embed_batch()`, `dimensions`
|
|
104
|
+
- `BlockStorageProvider` — session block persistence
|
|
105
|
+
|
|
106
|
+
## License
|
|
107
|
+
|
|
108
|
+
MIT
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# solohq-memory
|
|
2
|
+
|
|
3
|
+
Core context memory library for AI agents. Persistent context graph with episodes, artifacts, relationships, and staleness tracking.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install solohq-memory
|
|
9
|
+
|
|
10
|
+
# With LLM provider extras
|
|
11
|
+
pip install solohq-memory[anthropic]
|
|
12
|
+
pip install solohq-memory[openai]
|
|
13
|
+
pip install solohq-memory[google]
|
|
14
|
+
pip install solohq-memory[all]
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Quick Start
|
|
18
|
+
|
|
19
|
+
```python
|
|
20
|
+
from solohq_memory import (
|
|
21
|
+
ContextMemoryManager,
|
|
22
|
+
SqliteVecStorage,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
# You need to provide LLM and embedding implementations
|
|
26
|
+
# that satisfy LLMProvider and EmbeddingProvider protocols
|
|
27
|
+
|
|
28
|
+
storage = SqliteVecStorage("memory.db", dimensions=1536)
|
|
29
|
+
manager = ContextMemoryManager(
|
|
30
|
+
storage=storage,
|
|
31
|
+
llm=your_llm_provider,
|
|
32
|
+
embedder=your_embedding_provider,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
# Classify a message to find or create a context
|
|
36
|
+
result = await manager.classify("Tell me about Python decorators")
|
|
37
|
+
context = result.context
|
|
38
|
+
|
|
39
|
+
# Record an episode
|
|
40
|
+
await manager.record_episode(
|
|
41
|
+
context_id=context.id,
|
|
42
|
+
user_message="Tell me about Python decorators",
|
|
43
|
+
assistant_message="Decorators are a way to modify functions...",
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
# Search across all contexts
|
|
47
|
+
results = await manager.search("decorators", limit=5)
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Key Components
|
|
51
|
+
|
|
52
|
+
- **ContextMemoryManager** — main orchestrator for context operations
|
|
53
|
+
- **SqliteVecStorage** — SQLite + sqlite-vec storage with vector search
|
|
54
|
+
- **LLMClassifier** — 3-tier context routing: stable shortcut, embedding similarity, LLM fallback
|
|
55
|
+
- **ArtifactManager** — versioned content objects with rollback support
|
|
56
|
+
- **EpisodeRecorder** — conversation recording
|
|
57
|
+
- **BoundaryDetector** — topic shift detection (heuristics + LLM)
|
|
58
|
+
- **FileIndexer** — file indexing (on-disk references) and upload (full content)
|
|
59
|
+
|
|
60
|
+
## Interfaces
|
|
61
|
+
|
|
62
|
+
All providers are protocol-based and pluggable:
|
|
63
|
+
|
|
64
|
+
- `StorageProvider` — CRUD + vector search
|
|
65
|
+
- `LLMProvider` — `complete()` and `complete_json()`
|
|
66
|
+
- `EmbeddingProvider` — `embed()`, `embed_batch()`, `dimensions`
|
|
67
|
+
- `BlockStorageProvider` — session block persistence
|
|
68
|
+
|
|
69
|
+
## License
|
|
70
|
+
|
|
71
|
+
MIT
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "solohq-memory"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Context memory system for AI agents — persistent context graph with episodes, artifacts, relationships, and staleness tracking"
|
|
9
|
+
requires-python = ">=3.11"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
authors = [
|
|
12
|
+
{name = "SoloHQ"},
|
|
13
|
+
]
|
|
14
|
+
readme = "README.md"
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Development Status :: 3 - Alpha",
|
|
17
|
+
"Intended Audience :: Developers",
|
|
18
|
+
"License :: OSI Approved :: MIT License",
|
|
19
|
+
"Programming Language :: Python :: 3",
|
|
20
|
+
"Programming Language :: Python :: 3.11",
|
|
21
|
+
"Programming Language :: Python :: 3.12",
|
|
22
|
+
"Programming Language :: Python :: 3.13",
|
|
23
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
24
|
+
"Typing :: Typed",
|
|
25
|
+
]
|
|
26
|
+
dependencies = [
|
|
27
|
+
"pydantic>=2.0",
|
|
28
|
+
"sqlite-vec>=0.1.6",
|
|
29
|
+
"apsw>=3.46",
|
|
30
|
+
"python-ulid>=3.0",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
[project.optional-dependencies]
|
|
34
|
+
openai = ["openai>=1.0"]
|
|
35
|
+
anthropic = ["anthropic>=0.40"]
|
|
36
|
+
google = ["google-genai>=1.0"]
|
|
37
|
+
all = ["solohq-memory[openai,anthropic,google]"]
|
|
38
|
+
dev = ["pytest>=8.0", "pytest-asyncio>=0.24"]
|
|
39
|
+
|
|
40
|
+
[project.urls]
|
|
41
|
+
Repository = "https://github.com/whaleventure13/solohq-agent"
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
from .indexing import FileIndexer
|
|
2
|
+
from .interfaces import BlockStorageProvider, EmbeddingProvider, LLMProvider, StorageProvider
|
|
3
|
+
from .memory.artifact_manager import ArtifactManager, CascadeResult, VersionDiff
|
|
4
|
+
from .memory.block_contradiction import BlockConflict, BlockConflictResult, BlockContradictionDetector
|
|
5
|
+
from .memory.block_summarizer import BlockSummarizer
|
|
6
|
+
from .memory.boundary_detector import BoundaryDetector
|
|
7
|
+
from .memory.classifier import ClassificationResult, Confidence, LLMClassifier
|
|
8
|
+
from .memory.context_graph import ContextGraphManager
|
|
9
|
+
from .memory.contradiction import (
|
|
10
|
+
Contradiction,
|
|
11
|
+
ContradictionDetector,
|
|
12
|
+
ContradictionResult,
|
|
13
|
+
Severity,
|
|
14
|
+
)
|
|
15
|
+
from .memory.episode_recorder import EpisodeRecorder
|
|
16
|
+
from .memory.manager import ContextLoad, ContextMemoryManager, SearchResult
|
|
17
|
+
from .memory.staleness import StalenessTracker, StalenessType
|
|
18
|
+
from .models import (
|
|
19
|
+
Artifact,
|
|
20
|
+
ArtifactContext,
|
|
21
|
+
ArtifactFreshness,
|
|
22
|
+
ArtifactVersion,
|
|
23
|
+
SourceMeta,
|
|
24
|
+
Context,
|
|
25
|
+
ContextRelationship,
|
|
26
|
+
ContextState,
|
|
27
|
+
ContextStatus,
|
|
28
|
+
Episode,
|
|
29
|
+
Message,
|
|
30
|
+
RelationType,
|
|
31
|
+
Role,
|
|
32
|
+
SessionBlock,
|
|
33
|
+
SignalType,
|
|
34
|
+
)
|
|
35
|
+
from .storage.sqlite import SqliteVecStorage
|
|
36
|
+
from .utils.cache import EmbeddingCache
|
|
37
|
+
|
|
38
|
+
__all__ = [
|
|
39
|
+
# Core manager
|
|
40
|
+
"ContextMemoryManager",
|
|
41
|
+
"ContextLoad",
|
|
42
|
+
"SearchResult",
|
|
43
|
+
# Memory components
|
|
44
|
+
"ArtifactManager",
|
|
45
|
+
"BlockConflict",
|
|
46
|
+
"BlockConflictResult",
|
|
47
|
+
"BlockContradictionDetector",
|
|
48
|
+
"BlockSummarizer",
|
|
49
|
+
"BoundaryDetector",
|
|
50
|
+
"CascadeResult",
|
|
51
|
+
"ClassificationResult",
|
|
52
|
+
"Confidence",
|
|
53
|
+
"ContextGraphManager",
|
|
54
|
+
"ContradictionDetector",
|
|
55
|
+
"Contradiction",
|
|
56
|
+
"ContradictionResult",
|
|
57
|
+
"EpisodeRecorder",
|
|
58
|
+
"LLMClassifier",
|
|
59
|
+
"Severity",
|
|
60
|
+
"StalenessTracker",
|
|
61
|
+
"StalenessType",
|
|
62
|
+
"VersionDiff",
|
|
63
|
+
# Models
|
|
64
|
+
"Artifact",
|
|
65
|
+
"ArtifactContext",
|
|
66
|
+
"ArtifactFreshness",
|
|
67
|
+
"ArtifactVersion",
|
|
68
|
+
"SourceMeta",
|
|
69
|
+
"Context",
|
|
70
|
+
"ContextRelationship",
|
|
71
|
+
"ContextState",
|
|
72
|
+
"ContextStatus",
|
|
73
|
+
"Episode",
|
|
74
|
+
"Message",
|
|
75
|
+
"RelationType",
|
|
76
|
+
"Role",
|
|
77
|
+
"SessionBlock",
|
|
78
|
+
"SignalType",
|
|
79
|
+
# Interfaces
|
|
80
|
+
"BlockStorageProvider",
|
|
81
|
+
"EmbeddingProvider",
|
|
82
|
+
"LLMProvider",
|
|
83
|
+
"StorageProvider",
|
|
84
|
+
# Indexing
|
|
85
|
+
"FileIndexer",
|
|
86
|
+
# Storage
|
|
87
|
+
"SqliteVecStorage",
|
|
88
|
+
# Utils
|
|
89
|
+
"EmbeddingCache",
|
|
90
|
+
]
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from .file_indexer import FileIndexer
|
|
2
|
+
from .processors import BinaryProcessor, CodeProcessor, FileProcessor, TextProcessor
|
|
3
|
+
from .registry import get_processor
|
|
4
|
+
|
|
5
|
+
__all__ = [
|
|
6
|
+
"FileIndexer",
|
|
7
|
+
"FileProcessor",
|
|
8
|
+
"CodeProcessor",
|
|
9
|
+
"TextProcessor",
|
|
10
|
+
"BinaryProcessor",
|
|
11
|
+
"get_processor",
|
|
12
|
+
]
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import mimetypes
|
|
4
|
+
import os
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from solohq_memory.interfaces import EmbeddingProvider, LLMProvider, StorageProvider
|
|
9
|
+
from solohq_memory.models import Artifact, ArtifactContext, ArtifactVersion
|
|
10
|
+
from solohq_memory.models.artifact import SourceMeta
|
|
11
|
+
|
|
12
|
+
from .registry import get_processor
|
|
13
|
+
|
|
14
|
+
DEFAULT_IGNORE = {".git", "node_modules", "__pycache__", ".venv", ".env", ".tox", ".mypy_cache"}
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class FileIndexer:
|
|
18
|
+
"""Core engine for indexing and uploading files as artifacts."""
|
|
19
|
+
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
storage: StorageProvider,
|
|
23
|
+
embedder: EmbeddingProvider,
|
|
24
|
+
llm: LLMProvider,
|
|
25
|
+
) -> None:
|
|
26
|
+
self._storage = storage
|
|
27
|
+
self._embedder = embedder
|
|
28
|
+
self._llm = llm
|
|
29
|
+
|
|
30
|
+
async def index_file(
|
|
31
|
+
self, path: str, context_id: str | None = None,
|
|
32
|
+
) -> Artifact:
|
|
33
|
+
"""Index a single file: summary + embedding + save.
|
|
34
|
+
|
|
35
|
+
Content stays on disk, read on-demand.
|
|
36
|
+
"""
|
|
37
|
+
abs_path = os.path.abspath(os.path.expanduser(path))
|
|
38
|
+
if not os.path.isfile(abs_path):
|
|
39
|
+
raise FileNotFoundError(f"File not found: {abs_path}")
|
|
40
|
+
|
|
41
|
+
# Check if already indexed and unchanged
|
|
42
|
+
existing = await self._storage.find_by_source_path(abs_path)
|
|
43
|
+
if existing and existing.source_meta:
|
|
44
|
+
current_mtime = os.path.getmtime(abs_path)
|
|
45
|
+
if existing.source_meta.mtime == current_mtime:
|
|
46
|
+
return existing
|
|
47
|
+
|
|
48
|
+
# Determine processor
|
|
49
|
+
mime_type = mimetypes.guess_type(abs_path)[0] or "application/octet-stream"
|
|
50
|
+
processor = get_processor(mime_type, abs_path)
|
|
51
|
+
|
|
52
|
+
# Extract text for embedding
|
|
53
|
+
text = processor.extract_text(abs_path)
|
|
54
|
+
embedding = await self._embedder.embed(text) if text else []
|
|
55
|
+
|
|
56
|
+
# LLM summary for artifact content
|
|
57
|
+
summary = await processor.extract_summary(abs_path, self._llm)
|
|
58
|
+
|
|
59
|
+
# Build source meta
|
|
60
|
+
stat = os.stat(abs_path)
|
|
61
|
+
source_meta = SourceMeta(
|
|
62
|
+
source_path=abs_path,
|
|
63
|
+
mime_type=mime_type,
|
|
64
|
+
size_bytes=stat.st_size,
|
|
65
|
+
mtime=stat.st_mtime,
|
|
66
|
+
processor=type(processor).__name__,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
filename = os.path.basename(abs_path)
|
|
70
|
+
|
|
71
|
+
contexts = []
|
|
72
|
+
if context_id:
|
|
73
|
+
contexts.append(ArtifactContext(context_id=context_id, role="primary", relevance=1.0))
|
|
74
|
+
|
|
75
|
+
if existing:
|
|
76
|
+
# Update existing artifact
|
|
77
|
+
existing.content = summary
|
|
78
|
+
existing.embedding = embedding
|
|
79
|
+
existing.source_meta = source_meta
|
|
80
|
+
existing.updated_at = datetime.now(timezone.utc)
|
|
81
|
+
await self._storage.save_artifact(existing)
|
|
82
|
+
return existing
|
|
83
|
+
|
|
84
|
+
artifact = Artifact(
|
|
85
|
+
type="indexed_file",
|
|
86
|
+
title=filename,
|
|
87
|
+
content=summary,
|
|
88
|
+
embedding=embedding,
|
|
89
|
+
source_path=abs_path,
|
|
90
|
+
source_meta=source_meta,
|
|
91
|
+
contexts=contexts,
|
|
92
|
+
current_version=1,
|
|
93
|
+
versions=[ArtifactVersion(version=1, content=summary, change_reason="indexed")],
|
|
94
|
+
)
|
|
95
|
+
await self._storage.save_artifact(artifact)
|
|
96
|
+
return artifact
|
|
97
|
+
|
|
98
|
+
async def index_directory(
|
|
99
|
+
self,
|
|
100
|
+
path: str,
|
|
101
|
+
context_id: str | None = None,
|
|
102
|
+
patterns: list[str] | None = None,
|
|
103
|
+
ignore_patterns: list[str] | None = None,
|
|
104
|
+
max_files: int = 100,
|
|
105
|
+
) -> list[Artifact]:
|
|
106
|
+
"""Recursively index files in a directory."""
|
|
107
|
+
abs_path = os.path.abspath(os.path.expanduser(path))
|
|
108
|
+
if not os.path.isdir(abs_path):
|
|
109
|
+
raise NotADirectoryError(f"Directory not found: {abs_path}")
|
|
110
|
+
|
|
111
|
+
ignore = DEFAULT_IGNORE | set(ignore_patterns or [])
|
|
112
|
+
root = Path(abs_path)
|
|
113
|
+
|
|
114
|
+
# Collect files matching patterns
|
|
115
|
+
files: list[Path] = []
|
|
116
|
+
if patterns:
|
|
117
|
+
for pattern in patterns:
|
|
118
|
+
for match in root.rglob(pattern):
|
|
119
|
+
if match.is_file() and not _should_ignore(match, root, ignore):
|
|
120
|
+
files.append(match)
|
|
121
|
+
else:
|
|
122
|
+
for match in root.rglob("*"):
|
|
123
|
+
if match.is_file() and not _should_ignore(match, root, ignore):
|
|
124
|
+
files.append(match)
|
|
125
|
+
|
|
126
|
+
# Deduplicate and limit
|
|
127
|
+
seen: set[str] = set()
|
|
128
|
+
unique_files: list[Path] = []
|
|
129
|
+
for f in files:
|
|
130
|
+
resolved = str(f.resolve())
|
|
131
|
+
if resolved not in seen:
|
|
132
|
+
seen.add(resolved)
|
|
133
|
+
unique_files.append(f)
|
|
134
|
+
|
|
135
|
+
unique_files = unique_files[:max_files]
|
|
136
|
+
|
|
137
|
+
results = []
|
|
138
|
+
for file_path in unique_files:
|
|
139
|
+
artifact = await self.index_file(str(file_path), context_id)
|
|
140
|
+
results.append(artifact)
|
|
141
|
+
|
|
142
|
+
return results
|
|
143
|
+
|
|
144
|
+
async def refresh_file(self, artifact: Artifact) -> Artifact | None:
|
|
145
|
+
"""Re-index if the file has changed (mtime check).
|
|
146
|
+
|
|
147
|
+
Returns updated artifact if changed, None if unchanged.
|
|
148
|
+
"""
|
|
149
|
+
if not artifact.source_meta or not artifact.source_path:
|
|
150
|
+
return None
|
|
151
|
+
|
|
152
|
+
if not os.path.isfile(artifact.source_path):
|
|
153
|
+
return None
|
|
154
|
+
|
|
155
|
+
current_mtime = os.path.getmtime(artifact.source_path)
|
|
156
|
+
if artifact.source_meta.mtime == current_mtime:
|
|
157
|
+
return None
|
|
158
|
+
|
|
159
|
+
# File changed — re-index
|
|
160
|
+
mime_type = artifact.source_meta.mime_type
|
|
161
|
+
processor = get_processor(mime_type, artifact.source_path)
|
|
162
|
+
|
|
163
|
+
text = processor.extract_text(artifact.source_path)
|
|
164
|
+
embedding = await self._embedder.embed(text) if text else []
|
|
165
|
+
summary = await processor.extract_summary(artifact.source_path, self._llm)
|
|
166
|
+
|
|
167
|
+
stat = os.stat(artifact.source_path)
|
|
168
|
+
artifact.source_meta.mtime = stat.st_mtime
|
|
169
|
+
artifact.source_meta.size_bytes = stat.st_size
|
|
170
|
+
artifact.source_meta.indexed_at = datetime.now(timezone.utc)
|
|
171
|
+
artifact.content = summary
|
|
172
|
+
artifact.embedding = embedding
|
|
173
|
+
artifact.updated_at = datetime.now(timezone.utc)
|
|
174
|
+
await self._storage.save_artifact(artifact)
|
|
175
|
+
return artifact
|
|
176
|
+
|
|
177
|
+
async def read_file_content(self, artifact: Artifact, max_chars: int = 50000) -> str:
|
|
178
|
+
"""Read current file content from disk."""
|
|
179
|
+
if artifact.type == "uploaded_file":
|
|
180
|
+
return artifact.content
|
|
181
|
+
|
|
182
|
+
if not artifact.source_path:
|
|
183
|
+
return "Error: No source path for this artifact."
|
|
184
|
+
|
|
185
|
+
if not os.path.isfile(artifact.source_path):
|
|
186
|
+
return f"Error: File not found: {artifact.source_path}"
|
|
187
|
+
|
|
188
|
+
mime_type = "application/octet-stream"
|
|
189
|
+
if artifact.source_meta:
|
|
190
|
+
mime_type = artifact.source_meta.mime_type
|
|
191
|
+
|
|
192
|
+
processor = get_processor(mime_type, artifact.source_path)
|
|
193
|
+
return processor.extract_text(artifact.source_path, max_chars=max_chars)
|
|
194
|
+
|
|
195
|
+
async def upload_file(
|
|
196
|
+
self, path: str, context_id: str | None = None,
|
|
197
|
+
) -> Artifact:
|
|
198
|
+
"""Upload a file — full content saved in the artifact."""
|
|
199
|
+
abs_path = os.path.abspath(os.path.expanduser(path))
|
|
200
|
+
if not os.path.isfile(abs_path):
|
|
201
|
+
raise FileNotFoundError(f"File not found: {abs_path}")
|
|
202
|
+
|
|
203
|
+
mime_type = mimetypes.guess_type(abs_path)[0] or "application/octet-stream"
|
|
204
|
+
processor = get_processor(mime_type, abs_path)
|
|
205
|
+
|
|
206
|
+
# Full text content stored in artifact
|
|
207
|
+
full_text = processor.extract_text(abs_path, max_chars=100000)
|
|
208
|
+
embedding = await self._embedder.embed(full_text) if full_text else []
|
|
209
|
+
|
|
210
|
+
stat = os.stat(abs_path)
|
|
211
|
+
source_meta = SourceMeta(
|
|
212
|
+
source_path=abs_path,
|
|
213
|
+
mime_type=mime_type,
|
|
214
|
+
size_bytes=stat.st_size,
|
|
215
|
+
mtime=stat.st_mtime,
|
|
216
|
+
processor=type(processor).__name__,
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
filename = os.path.basename(abs_path)
|
|
220
|
+
|
|
221
|
+
contexts = []
|
|
222
|
+
if context_id:
|
|
223
|
+
contexts.append(ArtifactContext(context_id=context_id, role="primary", relevance=1.0))
|
|
224
|
+
|
|
225
|
+
artifact = Artifact(
|
|
226
|
+
type="uploaded_file",
|
|
227
|
+
title=filename,
|
|
228
|
+
content=full_text,
|
|
229
|
+
embedding=embedding,
|
|
230
|
+
source_path=abs_path,
|
|
231
|
+
source_meta=source_meta,
|
|
232
|
+
contexts=contexts,
|
|
233
|
+
current_version=1,
|
|
234
|
+
versions=[ArtifactVersion(version=1, content=full_text, change_reason="uploaded")],
|
|
235
|
+
)
|
|
236
|
+
await self._storage.save_artifact(artifact)
|
|
237
|
+
return artifact
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def _should_ignore(path: Path, root: Path, ignore: set[str]) -> bool:
|
|
241
|
+
"""Check if a file should be ignored based on directory names."""
|
|
242
|
+
rel = path.relative_to(root)
|
|
243
|
+
for part in rel.parts:
|
|
244
|
+
if part in ignore:
|
|
245
|
+
return True
|
|
246
|
+
return False
|