soothe-plugins 0.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- soothe_plugins/.plugin_template/PLUGIN_TEMPLATE.md +40 -0
- soothe_plugins/.plugin_template/README.md.template +152 -0
- soothe_plugins/.plugin_template/__init__.py.template +174 -0
- soothe_plugins/.plugin_template/events.py.template +34 -0
- soothe_plugins/.plugin_template/implementation.py.template +112 -0
- soothe_plugins/.plugin_template/models.py.template +39 -0
- soothe_plugins/.plugin_template/state.py.template +48 -0
- soothe_plugins/README.md +150 -0
- soothe_plugins/__init__.py +20 -0
- soothe_plugins/_paths.py +17 -0
- soothe_plugins/sample_echo/__init__.py +44 -0
- soothe_plugins/sample_echo/implementation.py +47 -0
- soothe_plugins/skillify/__init__.py +288 -0
- soothe_plugins/skillify/events.py +148 -0
- soothe_plugins/skillify/indexer.py +312 -0
- soothe_plugins/skillify/models.py +36 -0
- soothe_plugins/skillify/retriever.py +165 -0
- soothe_plugins/skillify/warehouse.py +96 -0
- soothe_plugins/weaver/__init__.py +507 -0
- soothe_plugins/weaver/analyzer.py +81 -0
- soothe_plugins/weaver/composer.py +322 -0
- soothe_plugins/weaver/events.py +223 -0
- soothe_plugins/weaver/generator.py +177 -0
- soothe_plugins/weaver/models.py +136 -0
- soothe_plugins/weaver/registry.py +214 -0
- soothe_plugins/weaver/reuse.py +151 -0
- soothe_plugins-0.2.6.dist-info/METADATA +156 -0
- soothe_plugins-0.2.6.dist-info/RECORD +30 -0
- soothe_plugins-0.2.6.dist-info/WHEEL +4 -0
- soothe_plugins-0.2.6.dist-info/entry_points.txt +4 -0
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""Skillify subagent events.
|
|
2
|
+
|
|
3
|
+
This module defines events for the skillify subagent.
|
|
4
|
+
Events are self-registered at module load time.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from typing import Literal
|
|
10
|
+
|
|
11
|
+
from pydantic import ConfigDict
|
|
12
|
+
|
|
13
|
+
from soothe_sdk.core.events import SubagentEvent
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class SkillifyDispatchedEvent(SubagentEvent):
|
|
17
|
+
"""Skillify subagent dispatched event."""
|
|
18
|
+
|
|
19
|
+
type: Literal["soothe.subagent.skillify.dispatched"] = "soothe.subagent.skillify.dispatched"
|
|
20
|
+
task: str = ""
|
|
21
|
+
|
|
22
|
+
model_config = ConfigDict(extra="allow")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class SkillifyCompletedEvent(SubagentEvent):
|
|
26
|
+
"""Skillify subagent completed event."""
|
|
27
|
+
|
|
28
|
+
type: Literal["soothe.subagent.skillify.completed"] = "soothe.subagent.skillify.completed"
|
|
29
|
+
duration_ms: int = 0
|
|
30
|
+
result_count: int = 0
|
|
31
|
+
|
|
32
|
+
model_config = ConfigDict(extra="allow")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class SkillifyIndexingPendingEvent(SubagentEvent):
|
|
36
|
+
"""Skillify indexing pending event."""
|
|
37
|
+
|
|
38
|
+
type: Literal["soothe.subagent.skillify.indexing_pending"] = "soothe.subagent.skillify.indexing_pending"
|
|
39
|
+
query: str = ""
|
|
40
|
+
|
|
41
|
+
model_config = ConfigDict(extra="allow")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class SkillifyRetrieveStartedEvent(SubagentEvent):
|
|
45
|
+
"""Skillify retrieve started event."""
|
|
46
|
+
|
|
47
|
+
type: Literal["soothe.subagent.skillify.retrieve_started"] = "soothe.subagent.skillify.retrieve_started"
|
|
48
|
+
query: str = ""
|
|
49
|
+
|
|
50
|
+
model_config = ConfigDict(extra="allow")
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class SkillifyRetrieveCompletedEvent(SubagentEvent):
|
|
54
|
+
"""Skillify retrieve completed event."""
|
|
55
|
+
|
|
56
|
+
type: Literal["soothe.subagent.skillify.retrieve_completed"] = "soothe.subagent.skillify.retrieve_completed"
|
|
57
|
+
query: str = ""
|
|
58
|
+
result_count: int = 0
|
|
59
|
+
top_score: float = 0.0
|
|
60
|
+
|
|
61
|
+
model_config = ConfigDict(extra="allow")
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class SkillifyRetrieveNotReadyEvent(SubagentEvent):
|
|
65
|
+
"""Skillify retrieve not ready event."""
|
|
66
|
+
|
|
67
|
+
type: Literal["soothe.subagent.skillify.retrieve_not_ready"] = "soothe.subagent.skillify.retrieve_not_ready"
|
|
68
|
+
message: str = ""
|
|
69
|
+
|
|
70
|
+
model_config = ConfigDict(extra="allow")
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class SkillifyIndexStartedEvent(SubagentEvent):
|
|
74
|
+
"""Skillify index started event."""
|
|
75
|
+
|
|
76
|
+
type: Literal["soothe.subagent.skillify.index_started"] = "soothe.subagent.skillify.index_started"
|
|
77
|
+
collection: str = ""
|
|
78
|
+
|
|
79
|
+
model_config = ConfigDict(extra="allow")
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class SkillifyIndexUpdatedEvent(SubagentEvent):
|
|
83
|
+
"""Skillify index updated event."""
|
|
84
|
+
|
|
85
|
+
type: Literal["soothe.subagent.skillify.index_updated"] = "soothe.subagent.skillify.index_updated"
|
|
86
|
+
new: int = 0
|
|
87
|
+
changed: int = 0
|
|
88
|
+
deleted: int = 0
|
|
89
|
+
total: int = 0
|
|
90
|
+
|
|
91
|
+
model_config = ConfigDict(extra="allow")
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class SkillifyIndexUnchangedEvent(SubagentEvent):
|
|
95
|
+
"""Skillify index unchanged event."""
|
|
96
|
+
|
|
97
|
+
type: Literal["soothe.subagent.skillify.index_unchanged"] = "soothe.subagent.skillify.index_unchanged"
|
|
98
|
+
total: int = 0
|
|
99
|
+
|
|
100
|
+
model_config = ConfigDict(extra="allow")
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class SkillifyIndexFailedEvent(SubagentEvent):
|
|
104
|
+
"""Skillify index failed event."""
|
|
105
|
+
|
|
106
|
+
type: Literal["soothe.subagent.skillify.index_failed"] = "soothe.subagent.skillify.index_failed"
|
|
107
|
+
|
|
108
|
+
model_config = ConfigDict(extra="allow")
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
# Events are self-contained for community plugins.
|
|
112
|
+
# Daemon will handle event registration based on type strings.
|
|
113
|
+
# No explicit registration needed here.
|
|
114
|
+
|
|
115
|
+
# Event type constants for convenient imports
|
|
116
|
+
SUBAGENT_SKILLIFY_DISPATCHED = "soothe.subagent.skillify.dispatched"
|
|
117
|
+
SUBAGENT_SKILLIFY_COMPLETED = "soothe.subagent.skillify.completed"
|
|
118
|
+
SUBAGENT_SKILLIFY_INDEXING_PENDING = "soothe.subagent.skillify.indexing_pending"
|
|
119
|
+
SUBAGENT_SKILLIFY_RETRIEVE_STARTED = "soothe.subagent.skillify.retrieve_started"
|
|
120
|
+
SUBAGENT_SKILLIFY_RETRIEVE_COMPLETED = "soothe.subagent.skillify.retrieve_completed"
|
|
121
|
+
SUBAGENT_SKILLIFY_RETRIEVE_NOT_READY = "soothe.subagent.skillify.retrieve_not_ready"
|
|
122
|
+
SUBAGENT_SKILLIFY_INDEX_STARTED = "soothe.subagent.skillify.index_started"
|
|
123
|
+
SUBAGENT_SKILLIFY_INDEX_UPDATED = "soothe.subagent.skillify.index_updated"
|
|
124
|
+
SUBAGENT_SKILLIFY_INDEX_UNCHANGED = "soothe.subagent.skillify.index_unchanged"
|
|
125
|
+
SUBAGENT_SKILLIFY_INDEX_FAILED = "soothe.subagent.skillify.index_failed"
|
|
126
|
+
|
|
127
|
+
__all__ = [
|
|
128
|
+
"SUBAGENT_SKILLIFY_COMPLETED",
|
|
129
|
+
"SUBAGENT_SKILLIFY_DISPATCHED",
|
|
130
|
+
"SUBAGENT_SKILLIFY_INDEXING_PENDING",
|
|
131
|
+
"SUBAGENT_SKILLIFY_INDEX_FAILED",
|
|
132
|
+
"SUBAGENT_SKILLIFY_INDEX_STARTED",
|
|
133
|
+
"SUBAGENT_SKILLIFY_INDEX_UNCHANGED",
|
|
134
|
+
"SUBAGENT_SKILLIFY_INDEX_UPDATED",
|
|
135
|
+
"SUBAGENT_SKILLIFY_RETRIEVE_COMPLETED",
|
|
136
|
+
"SUBAGENT_SKILLIFY_RETRIEVE_NOT_READY",
|
|
137
|
+
"SUBAGENT_SKILLIFY_RETRIEVE_STARTED",
|
|
138
|
+
"SkillifyCompletedEvent",
|
|
139
|
+
"SkillifyDispatchedEvent",
|
|
140
|
+
"SkillifyIndexFailedEvent",
|
|
141
|
+
"SkillifyIndexStartedEvent",
|
|
142
|
+
"SkillifyIndexUnchangedEvent",
|
|
143
|
+
"SkillifyIndexUpdatedEvent",
|
|
144
|
+
"SkillifyIndexingPendingEvent",
|
|
145
|
+
"SkillifyRetrieveCompletedEvent",
|
|
146
|
+
"SkillifyRetrieveNotReadyEvent",
|
|
147
|
+
"SkillifyRetrieveStartedEvent",
|
|
148
|
+
]
|
|
@@ -0,0 +1,312 @@
|
|
|
1
|
+
"""SkillIndexer -- background loop for embedding and upserting skills (RFC-0004)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import contextlib
|
|
7
|
+
import logging
|
|
8
|
+
from typing import TYPE_CHECKING, Any
|
|
9
|
+
|
|
10
|
+
from .events import (
|
|
11
|
+
SkillifyIndexFailedEvent,
|
|
12
|
+
SkillifyIndexStartedEvent,
|
|
13
|
+
SkillifyIndexUnchangedEvent,
|
|
14
|
+
SkillifyIndexUpdatedEvent,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from collections.abc import Callable
|
|
19
|
+
|
|
20
|
+
from langchain_core.embeddings import Embeddings
|
|
21
|
+
|
|
22
|
+
from soothe_sdk.protocols import VectorStoreProtocol
|
|
23
|
+
|
|
24
|
+
from .models import SkillRecord
|
|
25
|
+
from .warehouse import SkillWarehouse
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class LazyEmbeddings:
|
|
31
|
+
"""Wrapper that creates fresh embedding instances per event loop."""
|
|
32
|
+
|
|
33
|
+
def __init__(self, factory: Callable[[], Embeddings]) -> None:
|
|
34
|
+
self._factory = factory
|
|
35
|
+
self._instances: dict[int, Embeddings] = {}
|
|
36
|
+
|
|
37
|
+
def _get_instance(self) -> Embeddings:
|
|
38
|
+
loop_id = id(asyncio.get_running_loop())
|
|
39
|
+
if loop_id not in self._instances:
|
|
40
|
+
self._instances[loop_id] = self._factory()
|
|
41
|
+
return self._instances[loop_id]
|
|
42
|
+
|
|
43
|
+
async def aembed_documents(self, texts: list[str]) -> list[list[float]]:
|
|
44
|
+
return await self._get_instance().aembed_documents(texts)
|
|
45
|
+
|
|
46
|
+
async def aembed_query(self, text: str) -> list[float]:
|
|
47
|
+
return await self._get_instance().aembed_query(text)
|
|
48
|
+
|
|
49
|
+
def embed_documents(self, texts: list[str]) -> list[list[float]]:
|
|
50
|
+
return self._get_instance().embed_documents(texts)
|
|
51
|
+
|
|
52
|
+
def embed_query(self, text: str) -> list[float]:
|
|
53
|
+
return self._get_instance().embed_query(text)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class SkillIndexer:
|
|
57
|
+
"""Background indexing loop that keeps the vector store in sync with the warehouse."""
|
|
58
|
+
|
|
59
|
+
def __init__(
|
|
60
|
+
self,
|
|
61
|
+
warehouse: SkillWarehouse,
|
|
62
|
+
vector_store: VectorStoreProtocol,
|
|
63
|
+
embeddings: Embeddings | Callable[[], Embeddings],
|
|
64
|
+
interval_seconds: int = 300,
|
|
65
|
+
collection: str = "soothe_skillify",
|
|
66
|
+
embedding_dims: int = 1536,
|
|
67
|
+
event_callback: Callable[[dict[str, Any]], None] | None = None,
|
|
68
|
+
) -> None:
|
|
69
|
+
self._warehouse = warehouse
|
|
70
|
+
self._vector_store = vector_store
|
|
71
|
+
if callable(embeddings):
|
|
72
|
+
self._embeddings: Embeddings | LazyEmbeddings = LazyEmbeddings(embeddings)
|
|
73
|
+
else:
|
|
74
|
+
self._embeddings = embeddings
|
|
75
|
+
self._interval = interval_seconds
|
|
76
|
+
self._collection = collection
|
|
77
|
+
self._embedding_dims = embedding_dims
|
|
78
|
+
self._hash_cache: dict[str, str] = {}
|
|
79
|
+
self._task: asyncio.Task[None] | None = None
|
|
80
|
+
self._start_task: asyncio.Task[None] | None = None
|
|
81
|
+
self._initialized = False
|
|
82
|
+
self._total_indexed = 0
|
|
83
|
+
self._ready_event: asyncio.Event | None = None
|
|
84
|
+
self._event_callback = event_callback
|
|
85
|
+
|
|
86
|
+
@property
|
|
87
|
+
def total_indexed(self) -> int:
|
|
88
|
+
return self._total_indexed
|
|
89
|
+
|
|
90
|
+
@property
|
|
91
|
+
def ready_event(self) -> asyncio.Event:
|
|
92
|
+
if self._ready_event is None:
|
|
93
|
+
self._ready_event = asyncio.Event()
|
|
94
|
+
return self._ready_event
|
|
95
|
+
|
|
96
|
+
@property
|
|
97
|
+
def is_ready(self) -> bool:
|
|
98
|
+
if self._ready_event is None:
|
|
99
|
+
return False
|
|
100
|
+
return self._ready_event.is_set()
|
|
101
|
+
|
|
102
|
+
async def start(self) -> None:
|
|
103
|
+
if self._task is not None:
|
|
104
|
+
return
|
|
105
|
+
await self._ensure_collection()
|
|
106
|
+
await self._bootstrap_hash_cache()
|
|
107
|
+
self._emit(SkillifyIndexStartedEvent(collection=self._collection).to_dict())
|
|
108
|
+
self._task = asyncio.create_task(self._index_loop())
|
|
109
|
+
logger.info("Skillify background indexer started (interval=%ds)", self._interval)
|
|
110
|
+
|
|
111
|
+
async def stop(self) -> None:
|
|
112
|
+
if self._task is None:
|
|
113
|
+
return
|
|
114
|
+
self._task.cancel()
|
|
115
|
+
with contextlib.suppress(asyncio.CancelledError):
|
|
116
|
+
await self._task
|
|
117
|
+
self._task = None
|
|
118
|
+
|
|
119
|
+
if hasattr(self._vector_store, "close"):
|
|
120
|
+
try:
|
|
121
|
+
await self._vector_store.close()
|
|
122
|
+
except Exception:
|
|
123
|
+
logger.debug("Failed to close vector store", exc_info=True)
|
|
124
|
+
|
|
125
|
+
logger.info("Skillify background indexer stopped")
|
|
126
|
+
|
|
127
|
+
async def run_once(self) -> dict[str, int]:
|
|
128
|
+
stats: dict[str, int] = {"new": 0, "changed": 0, "deleted": 0}
|
|
129
|
+
|
|
130
|
+
current_records = self._warehouse.scan()
|
|
131
|
+
current_ids = {r.id for r in current_records}
|
|
132
|
+
|
|
133
|
+
to_embed: list[SkillRecord] = []
|
|
134
|
+
for record in current_records:
|
|
135
|
+
cached_hash = self._hash_cache.get(record.id)
|
|
136
|
+
if cached_hash is None:
|
|
137
|
+
to_embed.append(record)
|
|
138
|
+
stats["new"] += 1
|
|
139
|
+
elif cached_hash != record.content_hash:
|
|
140
|
+
to_embed.append(record)
|
|
141
|
+
stats["changed"] += 1
|
|
142
|
+
|
|
143
|
+
deleted_ids = set(self._hash_cache.keys()) - current_ids
|
|
144
|
+
for did in deleted_ids:
|
|
145
|
+
try:
|
|
146
|
+
await self._vector_store.delete(did)
|
|
147
|
+
except Exception:
|
|
148
|
+
logger.warning("Failed to delete stale record %s", did, exc_info=True)
|
|
149
|
+
self._hash_cache.pop(did, None)
|
|
150
|
+
stats["deleted"] += 1
|
|
151
|
+
|
|
152
|
+
if to_embed:
|
|
153
|
+
await self._embed_and_upsert(to_embed)
|
|
154
|
+
|
|
155
|
+
for record in current_records:
|
|
156
|
+
self._hash_cache[record.id] = record.content_hash
|
|
157
|
+
|
|
158
|
+
self._total_indexed = len(current_ids)
|
|
159
|
+
return stats
|
|
160
|
+
|
|
161
|
+
async def _embed_and_upsert(self, records: list[SkillRecord]) -> None:
|
|
162
|
+
texts = [self._embedding_text(r) for r in records]
|
|
163
|
+
|
|
164
|
+
sanitized_texts = []
|
|
165
|
+
for i, text in enumerate(texts):
|
|
166
|
+
sanitized_text = text
|
|
167
|
+
if not isinstance(sanitized_text, str):
|
|
168
|
+
logger.warning("Text %d is not a string (type=%s), converting", i, type(sanitized_text).__name__)
|
|
169
|
+
sanitized_text = str(sanitized_text) if sanitized_text is not None else "Untitled skill"
|
|
170
|
+
if not sanitized_text or not sanitized_text.strip():
|
|
171
|
+
logger.warning("Text %d is empty, using placeholder", i)
|
|
172
|
+
sanitized_text = "Untitled skill"
|
|
173
|
+
sanitized_texts.append(sanitized_text)
|
|
174
|
+
|
|
175
|
+
if not sanitized_texts:
|
|
176
|
+
logger.error("No valid texts for embedding")
|
|
177
|
+
return
|
|
178
|
+
|
|
179
|
+
try:
|
|
180
|
+
vectors = await self._embeddings.aembed_documents(sanitized_texts)
|
|
181
|
+
except Exception:
|
|
182
|
+
logger.exception(
|
|
183
|
+
"Embedding generation failed for %d skills. First text sample: %.200s",
|
|
184
|
+
len(records),
|
|
185
|
+
sanitized_texts[0] if sanitized_texts else "N/A",
|
|
186
|
+
)
|
|
187
|
+
return
|
|
188
|
+
|
|
189
|
+
payloads: list[dict[str, Any]] = []
|
|
190
|
+
ids: list[str] = []
|
|
191
|
+
for record in records:
|
|
192
|
+
payloads.append(
|
|
193
|
+
{
|
|
194
|
+
"skill_id": record.id,
|
|
195
|
+
"name": record.name,
|
|
196
|
+
"description": record.description,
|
|
197
|
+
"path": record.path,
|
|
198
|
+
"tags": record.tags,
|
|
199
|
+
"content_hash": record.content_hash,
|
|
200
|
+
}
|
|
201
|
+
)
|
|
202
|
+
ids.append(record.id)
|
|
203
|
+
|
|
204
|
+
try:
|
|
205
|
+
await self._vector_store.insert(vectors=vectors, payloads=payloads, ids=ids)
|
|
206
|
+
except Exception:
|
|
207
|
+
logger.exception("Vector store upsert failed for %d skills", len(records))
|
|
208
|
+
|
|
209
|
+
@staticmethod
|
|
210
|
+
def _embedding_text(record: SkillRecord) -> str:
|
|
211
|
+
parts = []
|
|
212
|
+
if record.name:
|
|
213
|
+
name_str = str(record.name).strip()
|
|
214
|
+
if name_str:
|
|
215
|
+
parts.append(name_str)
|
|
216
|
+
if record.description:
|
|
217
|
+
desc_str = str(record.description).strip()
|
|
218
|
+
if desc_str:
|
|
219
|
+
parts.append(desc_str)
|
|
220
|
+
if record.tags and isinstance(record.tags, (list, tuple)):
|
|
221
|
+
tag_strs = []
|
|
222
|
+
for tag in record.tags:
|
|
223
|
+
if tag is not None:
|
|
224
|
+
tag_str = str(tag).strip()
|
|
225
|
+
if tag_str:
|
|
226
|
+
tag_strs.append(tag_str)
|
|
227
|
+
if tag_strs:
|
|
228
|
+
parts.append("Tags: " + ", ".join(tag_strs))
|
|
229
|
+
|
|
230
|
+
result = "\n".join(parts)
|
|
231
|
+
return result if result.strip() else "Untitled skill"
|
|
232
|
+
|
|
233
|
+
async def _ensure_collection(self) -> None:
|
|
234
|
+
if self._initialized:
|
|
235
|
+
return
|
|
236
|
+
try:
|
|
237
|
+
await self._vector_store.create_collection(
|
|
238
|
+
vector_size=self._embedding_dims,
|
|
239
|
+
distance="cosine",
|
|
240
|
+
)
|
|
241
|
+
self._initialized = True
|
|
242
|
+
except Exception:
|
|
243
|
+
logger.warning("Collection creation failed (may already exist)", exc_info=True)
|
|
244
|
+
self._initialized = True
|
|
245
|
+
|
|
246
|
+
async def _index_loop(self) -> None:
|
|
247
|
+
first_pass = True
|
|
248
|
+
while True:
|
|
249
|
+
try:
|
|
250
|
+
stats = await self.run_once()
|
|
251
|
+
total_changes = stats["new"] + stats["changed"] + stats["deleted"]
|
|
252
|
+
if total_changes > 0:
|
|
253
|
+
self._emit(
|
|
254
|
+
SkillifyIndexUpdatedEvent(
|
|
255
|
+
new=stats["new"],
|
|
256
|
+
changed=stats["changed"],
|
|
257
|
+
deleted=stats["deleted"],
|
|
258
|
+
total=self._total_indexed,
|
|
259
|
+
).to_dict()
|
|
260
|
+
)
|
|
261
|
+
logger.info(
|
|
262
|
+
"Skillify index pass: new=%d changed=%d deleted=%d total=%d",
|
|
263
|
+
stats["new"],
|
|
264
|
+
stats["changed"],
|
|
265
|
+
stats["deleted"],
|
|
266
|
+
self._total_indexed,
|
|
267
|
+
)
|
|
268
|
+
else:
|
|
269
|
+
self._emit(
|
|
270
|
+
SkillifyIndexUnchangedEvent(
|
|
271
|
+
total=self._total_indexed,
|
|
272
|
+
).to_dict()
|
|
273
|
+
)
|
|
274
|
+
logger.debug("Skillify index pass: no changes (total=%d)", self._total_indexed)
|
|
275
|
+
if first_pass:
|
|
276
|
+
if self._ready_event:
|
|
277
|
+
self._ready_event.set()
|
|
278
|
+
first_pass = False
|
|
279
|
+
logger.info("Skillify index ready (total=%d)", self._total_indexed)
|
|
280
|
+
except asyncio.CancelledError:
|
|
281
|
+
raise
|
|
282
|
+
except Exception:
|
|
283
|
+
self._emit(SkillifyIndexFailedEvent().to_dict())
|
|
284
|
+
logger.exception("Skillify index pass failed")
|
|
285
|
+
if first_pass:
|
|
286
|
+
if self._ready_event:
|
|
287
|
+
self._ready_event.set()
|
|
288
|
+
first_pass = False
|
|
289
|
+
|
|
290
|
+
await asyncio.sleep(self._interval)
|
|
291
|
+
|
|
292
|
+
async def _bootstrap_hash_cache(self) -> None:
|
|
293
|
+
try:
|
|
294
|
+
records = await self._vector_store.list_records(limit=10000)
|
|
295
|
+
except Exception:
|
|
296
|
+
logger.debug("Skillify hash cache bootstrap failed", exc_info=True)
|
|
297
|
+
return
|
|
298
|
+
|
|
299
|
+
for record in records:
|
|
300
|
+
payload = record.payload or {}
|
|
301
|
+
skill_id = payload.get("skill_id")
|
|
302
|
+
content_hash = payload.get("content_hash")
|
|
303
|
+
if isinstance(skill_id, str) and isinstance(content_hash, str) and skill_id and content_hash:
|
|
304
|
+
self._hash_cache[skill_id] = content_hash
|
|
305
|
+
|
|
306
|
+
def _emit(self, event: dict[str, Any]) -> None:
|
|
307
|
+
if self._event_callback is None:
|
|
308
|
+
return
|
|
309
|
+
try:
|
|
310
|
+
self._event_callback(event)
|
|
311
|
+
except Exception:
|
|
312
|
+
logger.debug("Skillify event callback failed", exc_info=True)
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""Skillify data models (RFC-0004)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from datetime import UTC, datetime
|
|
6
|
+
from typing import Literal
|
|
7
|
+
|
|
8
|
+
from pydantic import BaseModel, Field
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class SkillRecord(BaseModel):
|
|
12
|
+
"""Metadata for a single indexed skill."""
|
|
13
|
+
|
|
14
|
+
id: str
|
|
15
|
+
name: str
|
|
16
|
+
description: str
|
|
17
|
+
path: str
|
|
18
|
+
tags: list[str] = Field(default_factory=list)
|
|
19
|
+
status: Literal["indexed", "stale", "error"] = "indexed"
|
|
20
|
+
indexed_at: datetime = Field(default_factory=lambda: datetime.now(UTC))
|
|
21
|
+
content_hash: str = ""
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class SkillSearchResult(BaseModel):
|
|
25
|
+
"""A single result from a retrieval query."""
|
|
26
|
+
|
|
27
|
+
record: SkillRecord
|
|
28
|
+
score: float
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class SkillBundle(BaseModel):
|
|
32
|
+
"""Response payload for a retrieval request."""
|
|
33
|
+
|
|
34
|
+
query: str
|
|
35
|
+
results: list[SkillSearchResult] = Field(default_factory=list)
|
|
36
|
+
total_indexed: int = 0
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
"""SkillRetriever -- semantic search over the skill index (RFC-0004)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import logging
|
|
7
|
+
from datetime import UTC, datetime
|
|
8
|
+
from typing import TYPE_CHECKING, Any
|
|
9
|
+
|
|
10
|
+
from .models import SkillBundle, SkillRecord, SkillSearchResult
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from collections.abc import Callable
|
|
14
|
+
|
|
15
|
+
from langchain_core.embeddings import Embeddings
|
|
16
|
+
|
|
17
|
+
from soothe_sdk.protocols import (
|
|
18
|
+
ActionRequest,
|
|
19
|
+
PermissionSet,
|
|
20
|
+
PolicyContext,
|
|
21
|
+
VectorStoreProtocol,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
_INDEXING_WAIT_TIMEOUT = 10.0
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class LazyEmbeddings:
|
|
30
|
+
"""Wrapper that creates fresh embedding instances per event loop."""
|
|
31
|
+
|
|
32
|
+
def __init__(self, factory: Callable[[], Embeddings]) -> None:
|
|
33
|
+
self._factory = factory
|
|
34
|
+
self._instances: dict[int, Embeddings] = {}
|
|
35
|
+
|
|
36
|
+
def _get_instance(self) -> Embeddings:
|
|
37
|
+
loop_id = id(asyncio.get_running_loop())
|
|
38
|
+
if loop_id not in self._instances:
|
|
39
|
+
self._instances[loop_id] = self._factory()
|
|
40
|
+
return self._instances[loop_id]
|
|
41
|
+
|
|
42
|
+
async def aembed_documents(self, texts: list[str]) -> list[list[float]]:
|
|
43
|
+
return await self._get_instance().aembed_documents(texts)
|
|
44
|
+
|
|
45
|
+
async def aembed_query(self, text: str) -> list[float]:
|
|
46
|
+
return await self._get_instance().aembed_query(text)
|
|
47
|
+
|
|
48
|
+
def embed_documents(self, texts: list[str]) -> list[list[float]]:
|
|
49
|
+
return self._get_instance().embed_documents(texts)
|
|
50
|
+
|
|
51
|
+
def embed_query(self, text: str) -> list[float]:
|
|
52
|
+
return self._get_instance().embed_query(text)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class SkillRetriever:
|
|
56
|
+
"""Semantic search over the Skillify vector index."""
|
|
57
|
+
|
|
58
|
+
def __init__(
|
|
59
|
+
self,
|
|
60
|
+
vector_store: VectorStoreProtocol,
|
|
61
|
+
embeddings: Embeddings | Callable[[], Embeddings],
|
|
62
|
+
top_k: int = 10,
|
|
63
|
+
ready_event: asyncio.Event | None = None,
|
|
64
|
+
policy: Any | None = None,
|
|
65
|
+
policy_profile: str = "standard",
|
|
66
|
+
) -> None:
|
|
67
|
+
self._vector_store = vector_store
|
|
68
|
+
if callable(embeddings):
|
|
69
|
+
self._embeddings: Embeddings | LazyEmbeddings = LazyEmbeddings(embeddings)
|
|
70
|
+
else:
|
|
71
|
+
self._embeddings = embeddings
|
|
72
|
+
self._top_k = top_k
|
|
73
|
+
self._ready_event = ready_event
|
|
74
|
+
self._policy = policy
|
|
75
|
+
self._policy_profile = policy_profile
|
|
76
|
+
|
|
77
|
+
@property
|
|
78
|
+
def is_ready(self) -> bool:
|
|
79
|
+
if self._ready_event is None:
|
|
80
|
+
return True
|
|
81
|
+
return self._ready_event.is_set()
|
|
82
|
+
|
|
83
|
+
async def retrieve(self, query: str, top_k: int | None = None) -> SkillBundle:
|
|
84
|
+
self._check_policy(query)
|
|
85
|
+
|
|
86
|
+
if self._ready_event and not self._ready_event.is_set():
|
|
87
|
+
logger.info("Skillify index not ready, waiting up to %.0fs", _INDEXING_WAIT_TIMEOUT)
|
|
88
|
+
try:
|
|
89
|
+
await asyncio.wait_for(self._ready_event.wait(), timeout=_INDEXING_WAIT_TIMEOUT)
|
|
90
|
+
except TimeoutError:
|
|
91
|
+
logger.warning("Skillify index still not ready after %.0fs timeout", _INDEXING_WAIT_TIMEOUT)
|
|
92
|
+
return SkillBundle(
|
|
93
|
+
query="[Indexing in progress] The skill warehouse is still being indexed. Please retry shortly.",
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
k = top_k or self._top_k
|
|
97
|
+
|
|
98
|
+
try:
|
|
99
|
+
vector = await self._embeddings.aembed_query(query)
|
|
100
|
+
except Exception:
|
|
101
|
+
logger.exception("Query embedding failed for: %s", query[:100])
|
|
102
|
+
return SkillBundle(query=query)
|
|
103
|
+
|
|
104
|
+
try:
|
|
105
|
+
records = await self._vector_store.search(
|
|
106
|
+
query=query,
|
|
107
|
+
vector=vector,
|
|
108
|
+
limit=k,
|
|
109
|
+
)
|
|
110
|
+
except Exception:
|
|
111
|
+
logger.exception("Vector store search failed")
|
|
112
|
+
return SkillBundle(query=query)
|
|
113
|
+
|
|
114
|
+
results: list[SkillSearchResult] = []
|
|
115
|
+
for vr in records:
|
|
116
|
+
payload = vr.payload
|
|
117
|
+
record = SkillRecord(
|
|
118
|
+
id=payload.get("skill_id", vr.id),
|
|
119
|
+
name=payload.get("name", "unknown"),
|
|
120
|
+
description=payload.get("description", ""),
|
|
121
|
+
path=payload.get("path", ""),
|
|
122
|
+
tags=payload.get("tags", []),
|
|
123
|
+
status="indexed",
|
|
124
|
+
indexed_at=datetime.now(UTC),
|
|
125
|
+
content_hash=payload.get("content_hash", ""),
|
|
126
|
+
)
|
|
127
|
+
results.append(SkillSearchResult(record=record, score=vr.score or 0.0))
|
|
128
|
+
|
|
129
|
+
total_records = await self._count_indexed()
|
|
130
|
+
|
|
131
|
+
return SkillBundle(
|
|
132
|
+
query=query,
|
|
133
|
+
results=results,
|
|
134
|
+
total_indexed=total_records,
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
def _check_policy(self, query: str) -> None:
|
|
138
|
+
if self._policy is None:
|
|
139
|
+
return
|
|
140
|
+
|
|
141
|
+
permissions = PermissionSet(frozenset())
|
|
142
|
+
get_profile = getattr(self._policy, "get_profile", None)
|
|
143
|
+
if callable(get_profile):
|
|
144
|
+
profile = get_profile(self._policy_profile)
|
|
145
|
+
if profile is not None:
|
|
146
|
+
permissions = profile.permissions
|
|
147
|
+
|
|
148
|
+
decision = self._policy.check(
|
|
149
|
+
ActionRequest(
|
|
150
|
+
action_type="skillify_retrieve",
|
|
151
|
+
tool_name="skillify.retrieve",
|
|
152
|
+
tool_args={"query": query[:200]},
|
|
153
|
+
),
|
|
154
|
+
PolicyContext(active_permissions=permissions, thread_id=None),
|
|
155
|
+
)
|
|
156
|
+
if decision.verdict == "deny":
|
|
157
|
+
msg = f"Policy denied skill retrieval: {decision.reason}"
|
|
158
|
+
raise ValueError(msg)
|
|
159
|
+
|
|
160
|
+
async def _count_indexed(self) -> int:
|
|
161
|
+
try:
|
|
162
|
+
all_records = await self._vector_store.list_records(limit=10000)
|
|
163
|
+
return len(all_records)
|
|
164
|
+
except Exception:
|
|
165
|
+
return 0
|