weavert-kit-chat 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- weavert_kit_chat-0.1.0/PKG-INFO +53 -0
- weavert_kit_chat-0.1.0/README.md +28 -0
- weavert_kit_chat-0.1.0/pyproject.toml +37 -0
- weavert_kit_chat-0.1.0/setup.cfg +4 -0
- weavert_kit_chat-0.1.0/src/weavert_kit_chat/__init__.py +110 -0
- weavert_kit_chat-0.1.0/src/weavert_kit_chat/_builtins.py +371 -0
- weavert_kit_chat-0.1.0/src/weavert_kit_chat/_tool_impls.py +539 -0
- weavert_kit_chat-0.1.0/src/weavert_kit_chat.egg-info/PKG-INFO +53 -0
- weavert_kit_chat-0.1.0/src/weavert_kit_chat.egg-info/SOURCES.txt +10 -0
- weavert_kit_chat-0.1.0/src/weavert_kit_chat.egg-info/dependency_links.txt +1 -0
- weavert_kit_chat-0.1.0/src/weavert_kit_chat.egg-info/requires.txt +3 -0
- weavert_kit_chat-0.1.0/src/weavert_kit_chat.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: weavert-kit-chat
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Chat-oriented product kit for extracted WeaveRT scenario packs.
|
|
5
|
+
Author: WeaveRT Maintainers
|
|
6
|
+
License-Expression: Apache-2.0
|
|
7
|
+
Project-URL: Homepage, https://github.com/xyz2b/weave-ai-runtime
|
|
8
|
+
Project-URL: Documentation, https://github.com/xyz2b/weave-ai-runtime/tree/main/docs
|
|
9
|
+
Project-URL: Repository, https://github.com/xyz2b/weave-ai-runtime
|
|
10
|
+
Project-URL: Issues, https://github.com/xyz2b/weave-ai-runtime/issues
|
|
11
|
+
Keywords: weavert,agents,ai,product-kit,scenario-pack
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Programming Language :: Python
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Requires-Python: >=3.11
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
Requires-Dist: weavert<0.2.0,>=0.1.0
|
|
23
|
+
Requires-Dist: weavert-kit-common-retrieval<0.2.0,>=0.1.0
|
|
24
|
+
Requires-Dist: weavert-kit-common-web<0.2.0,>=0.1.0
|
|
25
|
+
|
|
26
|
+
# Chat Product Kit
|
|
27
|
+
|
|
28
|
+
Canonical import root: `weavert_kit_chat`
|
|
29
|
+
|
|
30
|
+
## What this package owns
|
|
31
|
+
|
|
32
|
+
- the `weavert-scenario-chat` scenario pack
|
|
33
|
+
- chat-oriented product-profile defaults layered on shared grounding packages
|
|
34
|
+
|
|
35
|
+
## Canonical names
|
|
36
|
+
|
|
37
|
+
- install name: `weavert-kit-chat`
|
|
38
|
+
- import root: `weavert_kit_chat`
|
|
39
|
+
- runtime activation: `weavert-scenario-chat`
|
|
40
|
+
|
|
41
|
+
The public install name stays separate from the runtime scenario-pack activation name.
|
|
42
|
+
|
|
43
|
+
## Shared packages it composes
|
|
44
|
+
|
|
45
|
+
- `weavert_kit_common_retrieval`
|
|
46
|
+
- `weavert_kit_common_web`
|
|
47
|
+
|
|
48
|
+
## See also
|
|
49
|
+
|
|
50
|
+
- `../README.md`
|
|
51
|
+
- `../common/README.md`
|
|
52
|
+
- `../../../docs/guides/use-scenario-packs.md`
|
|
53
|
+
- `../../../docs/introduction/use-cases.md`
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# Chat Product Kit
|
|
2
|
+
|
|
3
|
+
Canonical import root: `weavert_kit_chat`
|
|
4
|
+
|
|
5
|
+
## What this package owns
|
|
6
|
+
|
|
7
|
+
- the `weavert-scenario-chat` scenario pack
|
|
8
|
+
- chat-oriented product-profile defaults layered on shared grounding packages
|
|
9
|
+
|
|
10
|
+
## Canonical names
|
|
11
|
+
|
|
12
|
+
- install name: `weavert-kit-chat`
|
|
13
|
+
- import root: `weavert_kit_chat`
|
|
14
|
+
- runtime activation: `weavert-scenario-chat`
|
|
15
|
+
|
|
16
|
+
The public install name stays separate from the runtime scenario-pack activation name.
|
|
17
|
+
|
|
18
|
+
## Shared packages it composes
|
|
19
|
+
|
|
20
|
+
- `weavert_kit_common_retrieval`
|
|
21
|
+
- `weavert_kit_common_web`
|
|
22
|
+
|
|
23
|
+
## See also
|
|
24
|
+
|
|
25
|
+
- `../README.md`
|
|
26
|
+
- `../common/README.md`
|
|
27
|
+
- `../../../docs/guides/use-scenario-packs.md`
|
|
28
|
+
- `../../../docs/introduction/use-cases.md`
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=69", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "weavert-kit-chat"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Chat-oriented product kit for extracted WeaveRT scenario packs."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "Apache-2.0"
|
|
11
|
+
authors = [{ name = "WeaveRT Maintainers" }]
|
|
12
|
+
requires-python = ">=3.11"
|
|
13
|
+
dependencies = [
|
|
14
|
+
"weavert>=0.1.0,<0.2.0",
|
|
15
|
+
"weavert-kit-common-retrieval>=0.1.0,<0.2.0",
|
|
16
|
+
"weavert-kit-common-web>=0.1.0,<0.2.0",
|
|
17
|
+
]
|
|
18
|
+
keywords = ["weavert", "agents", "ai", "product-kit", "scenario-pack"]
|
|
19
|
+
classifiers = [
|
|
20
|
+
"Development Status :: 3 - Alpha",
|
|
21
|
+
"Intended Audience :: Developers",
|
|
22
|
+
"Operating System :: OS Independent",
|
|
23
|
+
"Programming Language :: Python",
|
|
24
|
+
"Programming Language :: Python :: 3",
|
|
25
|
+
"Programming Language :: Python :: 3.11",
|
|
26
|
+
"Programming Language :: Python :: 3.12",
|
|
27
|
+
"Programming Language :: Python :: 3.13",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
[project.urls]
|
|
31
|
+
Homepage = "https://github.com/xyz2b/weave-ai-runtime"
|
|
32
|
+
Documentation = "https://github.com/xyz2b/weave-ai-runtime/tree/main/docs"
|
|
33
|
+
Repository = "https://github.com/xyz2b/weave-ai-runtime"
|
|
34
|
+
Issues = "https://github.com/xyz2b/weave-ai-runtime/issues"
|
|
35
|
+
|
|
36
|
+
[tool.setuptools.packages.find]
|
|
37
|
+
where = ["src"]
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from weavert.package_system.protocols import RuntimePackageManifest
|
|
4
|
+
from weavert.extension_contracts.scenario_runtime_packs import (
|
|
5
|
+
ReferenceScenarioPackShape,
|
|
6
|
+
build_reference_scenario_pack_manifest,
|
|
7
|
+
)
|
|
8
|
+
from weavert_kit_common_retrieval import CHAT_RETRIEVAL_TOOLS, reference_shared_package_manifest as retrieval_package_manifest
|
|
9
|
+
from weavert_kit_common_web import CHAT_WEB_TOOLS, reference_shared_package_manifest as web_package_manifest
|
|
10
|
+
|
|
11
|
+
from ._builtins import (
|
|
12
|
+
CHAT_SCENARIO_AGENTS,
|
|
13
|
+
CHAT_SCENARIO_SKILLS,
|
|
14
|
+
chat_scenario_builtin_agents,
|
|
15
|
+
chat_scenario_builtin_skills,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
CHAT_WORKFLOW_CONTROL_TOOLS = ("ask_user",)
|
|
19
|
+
|
|
20
|
+
REFERENCE_SCENARIO_PACK_SHAPE = ReferenceScenarioPackShape(
|
|
21
|
+
package_name="weavert-scenario-chat",
|
|
22
|
+
profile="chat",
|
|
23
|
+
display_name="AI chat",
|
|
24
|
+
description="Reference scenario pack for read-mostly chat experiences.",
|
|
25
|
+
recommended_distribution="weavert-core",
|
|
26
|
+
recommended_first_party_packages=("weavert-memory",),
|
|
27
|
+
shared_package_dependencies=(
|
|
28
|
+
"weavert-shared-retrieval",
|
|
29
|
+
"weavert-bridge-web",
|
|
30
|
+
),
|
|
31
|
+
expected_tools=(*CHAT_RETRIEVAL_TOOLS, *CHAT_WEB_TOOLS, *CHAT_WORKFLOW_CONTROL_TOOLS),
|
|
32
|
+
expected_agents=CHAT_SCENARIO_AGENTS,
|
|
33
|
+
expected_skills=("remember", *CHAT_SCENARIO_SKILLS),
|
|
34
|
+
default_boundaries=(
|
|
35
|
+
"read-mostly by default",
|
|
36
|
+
"no implicit workspace mutation or shell execution surfaces",
|
|
37
|
+
"retrieval and web grounding stay shared-package concerns while workflow roles stay scenario-pack owned",
|
|
38
|
+
),
|
|
39
|
+
app_owned_wiring=(
|
|
40
|
+
"model provider selection",
|
|
41
|
+
"session/transcript store selection",
|
|
42
|
+
"host binding for web, mobile, or support surfaces",
|
|
43
|
+
"final permission policy composition",
|
|
44
|
+
),
|
|
45
|
+
host_assumptions=(
|
|
46
|
+
"host remains lightweight and may only expose notifications or approval prompts",
|
|
47
|
+
),
|
|
48
|
+
permission_policy_posture=(
|
|
49
|
+
"default to read-only or approval-first policies",
|
|
50
|
+
"treat any write-capable bridge as an app-owned escalation decision",
|
|
51
|
+
),
|
|
52
|
+
profile_prompt_fragments=(
|
|
53
|
+
"Scenario profile: AI chat.",
|
|
54
|
+
"Preserve read-mostly defaults and avoid implicit workspace mutation or shell execution.",
|
|
55
|
+
),
|
|
56
|
+
workflow_agent_ids=CHAT_SCENARIO_AGENTS,
|
|
57
|
+
workflow_skill_ids=CHAT_SCENARIO_SKILLS,
|
|
58
|
+
notes=(
|
|
59
|
+
"Chat inherits memory and retrieval posture without inheriting coding defaults.",
|
|
60
|
+
"The chat scenario pack owns workflow agents and skills, while shared retrieval/web packages own the grounding tools.",
|
|
61
|
+
),
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def reference_scenario_pack_shapes() -> tuple[ReferenceScenarioPackShape, ...]:
|
|
66
|
+
return (REFERENCE_SCENARIO_PACK_SHAPE,)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def reference_scenario_pack_shape(name: str | None = None) -> ReferenceScenarioPackShape:
|
|
70
|
+
normalized = REFERENCE_SCENARIO_PACK_SHAPE.package_name if name is None else str(name)
|
|
71
|
+
if normalized in {
|
|
72
|
+
REFERENCE_SCENARIO_PACK_SHAPE.package_name,
|
|
73
|
+
REFERENCE_SCENARIO_PACK_SHAPE.profile,
|
|
74
|
+
REFERENCE_SCENARIO_PACK_SHAPE.display_name,
|
|
75
|
+
}:
|
|
76
|
+
return REFERENCE_SCENARIO_PACK_SHAPE
|
|
77
|
+
raise KeyError(f"Unknown chat scenario pack shape: {name}")
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def reference_scenario_pack_manifest() -> RuntimePackageManifest:
|
|
81
|
+
return build_reference_scenario_pack_manifest(
|
|
82
|
+
REFERENCE_SCENARIO_PACK_SHAPE,
|
|
83
|
+
builtin_agents=chat_scenario_builtin_agents,
|
|
84
|
+
builtin_skills=chat_scenario_builtin_skills,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def reference_scenario_pack_manifests() -> tuple[RuntimePackageManifest, ...]:
|
|
89
|
+
return (reference_scenario_pack_manifest(),)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def chat_scenario_runtime_pack_manifests() -> tuple[RuntimePackageManifest, ...]:
|
|
93
|
+
return (
|
|
94
|
+
retrieval_package_manifest(),
|
|
95
|
+
web_package_manifest(),
|
|
96
|
+
reference_scenario_pack_manifest(),
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
__all__ = [
|
|
101
|
+
"CHAT_SCENARIO_AGENTS",
|
|
102
|
+
"CHAT_SCENARIO_SKILLS",
|
|
103
|
+
"CHAT_WORKFLOW_CONTROL_TOOLS",
|
|
104
|
+
"REFERENCE_SCENARIO_PACK_SHAPE",
|
|
105
|
+
"chat_scenario_runtime_pack_manifests",
|
|
106
|
+
"reference_scenario_pack_manifest",
|
|
107
|
+
"reference_scenario_pack_manifests",
|
|
108
|
+
"reference_scenario_pack_shape",
|
|
109
|
+
"reference_scenario_pack_shapes",
|
|
110
|
+
]
|
|
@@ -0,0 +1,371 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from weavert.builtins.definition_helpers import static_semantics
|
|
4
|
+
from weavert.definitions import (
|
|
5
|
+
AgentDefinition,
|
|
6
|
+
DefinitionOrigin,
|
|
7
|
+
DefinitionSource,
|
|
8
|
+
MemoryScope,
|
|
9
|
+
PermissionMode,
|
|
10
|
+
SkillDefinition,
|
|
11
|
+
SkillExecutionContext,
|
|
12
|
+
ToolClassifierInput,
|
|
13
|
+
ToolDefinition,
|
|
14
|
+
ToolPresentationEmphasis,
|
|
15
|
+
ToolRiskLevel,
|
|
16
|
+
ToolTraits,
|
|
17
|
+
ToolUsePresentation,
|
|
18
|
+
)
|
|
19
|
+
from ._tool_impls import (
|
|
20
|
+
grounding_web_fetch_tool,
|
|
21
|
+
grounding_web_search_tool,
|
|
22
|
+
prepare_citations_tool,
|
|
23
|
+
retrieve_context_tool,
|
|
24
|
+
validate_grounding_web_fetch,
|
|
25
|
+
validate_grounding_web_search,
|
|
26
|
+
validate_prepare_citations_tool,
|
|
27
|
+
validate_retrieve_context_tool,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
CHAT_RETRIEVAL_TOOLS = (
|
|
31
|
+
"retrieve_context",
|
|
32
|
+
"prepare_citations",
|
|
33
|
+
)
|
|
34
|
+
CHAT_WEB_TOOLS = (
|
|
35
|
+
"grounding_web_search",
|
|
36
|
+
"grounding_web_fetch",
|
|
37
|
+
)
|
|
38
|
+
CHAT_SCENARIO_AGENTS = (
|
|
39
|
+
"researcher",
|
|
40
|
+
"support-agent",
|
|
41
|
+
"memory-curator",
|
|
42
|
+
)
|
|
43
|
+
CHAT_SCENARIO_SKILLS = (
|
|
44
|
+
"chat-summarize",
|
|
45
|
+
"answer-with-citations",
|
|
46
|
+
"clarify-request",
|
|
47
|
+
"capture-preferences",
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def chat_shared_retrieval_builtin_tools() -> tuple[ToolDefinition, ...]:
|
|
52
|
+
origin = DefinitionOrigin(DefinitionSource.BUNDLED)
|
|
53
|
+
item_schema = {
|
|
54
|
+
"type": "object",
|
|
55
|
+
"properties": {
|
|
56
|
+
"id": {"type": "string"},
|
|
57
|
+
"title": {"type": "string"},
|
|
58
|
+
"content": {"type": "string"},
|
|
59
|
+
"excerpt": {"type": "string"},
|
|
60
|
+
"url": {"type": "string"},
|
|
61
|
+
"source_kind": {"type": "string"},
|
|
62
|
+
"metadata": {"type": "object"},
|
|
63
|
+
},
|
|
64
|
+
"additionalProperties": True,
|
|
65
|
+
}
|
|
66
|
+
return (
|
|
67
|
+
ToolDefinition(
|
|
68
|
+
name="retrieve_context",
|
|
69
|
+
description="Rank grounding notes, passages, and optional runtime memory for a chat query.",
|
|
70
|
+
input_schema={
|
|
71
|
+
"type": "object",
|
|
72
|
+
"properties": {
|
|
73
|
+
"query": {"type": "string"},
|
|
74
|
+
"items": {"type": "array", "items": item_schema},
|
|
75
|
+
"limit": {"type": "integer", "minimum": 1, "maximum": 12},
|
|
76
|
+
"include_memory": {"type": "boolean"},
|
|
77
|
+
"memory_scope": {
|
|
78
|
+
"type": "string",
|
|
79
|
+
"enum": [MemoryScope.USER.value, MemoryScope.PROJECT.value, MemoryScope.LOCAL.value],
|
|
80
|
+
},
|
|
81
|
+
},
|
|
82
|
+
"required": ["query"],
|
|
83
|
+
"additionalProperties": False,
|
|
84
|
+
},
|
|
85
|
+
traits=ToolTraits(read_only=True, concurrency_safe=True),
|
|
86
|
+
semantics=_read_only_tool_semantics(
|
|
87
|
+
title="Retrieve grounding context",
|
|
88
|
+
operation="retrieve_context",
|
|
89
|
+
summary_prefix="Retrieve grounding context",
|
|
90
|
+
subtitle_key="query",
|
|
91
|
+
risk_level=ToolRiskLevel.READ,
|
|
92
|
+
tags=("grounding", "retrieval"),
|
|
93
|
+
),
|
|
94
|
+
validate_input=validate_retrieve_context_tool,
|
|
95
|
+
execute=retrieve_context_tool,
|
|
96
|
+
origin=origin,
|
|
97
|
+
),
|
|
98
|
+
ToolDefinition(
|
|
99
|
+
name="prepare_citations",
|
|
100
|
+
description="Turn retrieved grounding items into a flat citation bundle for chat answers.",
|
|
101
|
+
input_schema={
|
|
102
|
+
"type": "object",
|
|
103
|
+
"properties": {
|
|
104
|
+
"items": {"type": "array", "items": item_schema},
|
|
105
|
+
"limit": {"type": "integer", "minimum": 1, "maximum": 12},
|
|
106
|
+
},
|
|
107
|
+
"required": ["items"],
|
|
108
|
+
"additionalProperties": False,
|
|
109
|
+
},
|
|
110
|
+
traits=ToolTraits(read_only=True, concurrency_safe=True),
|
|
111
|
+
semantics=_read_only_tool_semantics(
|
|
112
|
+
title="Prepare citations",
|
|
113
|
+
operation="prepare_citations",
|
|
114
|
+
summary_prefix="Prepare citations",
|
|
115
|
+
subtitle_key="limit",
|
|
116
|
+
risk_level=ToolRiskLevel.READ,
|
|
117
|
+
tags=("grounding", "citations"),
|
|
118
|
+
),
|
|
119
|
+
validate_input=validate_prepare_citations_tool,
|
|
120
|
+
execute=prepare_citations_tool,
|
|
121
|
+
origin=origin,
|
|
122
|
+
),
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def chat_web_grounding_builtin_tools() -> tuple[ToolDefinition, ...]:
|
|
127
|
+
origin = DefinitionOrigin(DefinitionSource.BUNDLED)
|
|
128
|
+
return (
|
|
129
|
+
ToolDefinition(
|
|
130
|
+
name="grounding_web_search",
|
|
131
|
+
description="Search the web for chat-safe grounding candidates.",
|
|
132
|
+
input_schema={
|
|
133
|
+
"type": "object",
|
|
134
|
+
"properties": {
|
|
135
|
+
"query": {"type": "string"},
|
|
136
|
+
"limit": {"type": "integer", "minimum": 1, "maximum": 8},
|
|
137
|
+
},
|
|
138
|
+
"required": ["query"],
|
|
139
|
+
"additionalProperties": False,
|
|
140
|
+
},
|
|
141
|
+
traits=ToolTraits(read_only=True, concurrency_safe=True),
|
|
142
|
+
semantics=_network_tool_semantics(
|
|
143
|
+
title="Search grounding sources",
|
|
144
|
+
operation="grounding_web_search",
|
|
145
|
+
summary_prefix="Search grounding sources",
|
|
146
|
+
subtitle_key="query",
|
|
147
|
+
tags=("grounding", "web", "search"),
|
|
148
|
+
),
|
|
149
|
+
validate_input=validate_grounding_web_search,
|
|
150
|
+
execute=grounding_web_search_tool,
|
|
151
|
+
origin=origin,
|
|
152
|
+
),
|
|
153
|
+
ToolDefinition(
|
|
154
|
+
name="grounding_web_fetch",
|
|
155
|
+
description="Fetch a remote page and return chat-safe text for grounding.",
|
|
156
|
+
input_schema={
|
|
157
|
+
"type": "object",
|
|
158
|
+
"properties": {
|
|
159
|
+
"url": {"type": "string"},
|
|
160
|
+
"timeout_ms": {"type": "integer", "minimum": 1},
|
|
161
|
+
"max_chars": {"type": "integer", "minimum": 500, "maximum": 32000},
|
|
162
|
+
},
|
|
163
|
+
"required": ["url"],
|
|
164
|
+
"additionalProperties": False,
|
|
165
|
+
},
|
|
166
|
+
traits=ToolTraits(read_only=True, concurrency_safe=True),
|
|
167
|
+
semantics=_network_tool_semantics(
|
|
168
|
+
title="Fetch grounding page",
|
|
169
|
+
operation="grounding_web_fetch",
|
|
170
|
+
summary_prefix="Fetch grounding page",
|
|
171
|
+
subtitle_key="url",
|
|
172
|
+
tags=("grounding", "web", "fetch"),
|
|
173
|
+
),
|
|
174
|
+
validate_input=validate_grounding_web_fetch,
|
|
175
|
+
execute=grounding_web_fetch_tool,
|
|
176
|
+
origin=origin,
|
|
177
|
+
),
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def chat_scenario_builtin_agents() -> tuple[AgentDefinition, ...]:
|
|
182
|
+
origin = DefinitionOrigin(DefinitionSource.BUNDLED)
|
|
183
|
+
return (
|
|
184
|
+
AgentDefinition(
|
|
185
|
+
name="researcher",
|
|
186
|
+
description="Gather read-only evidence bundles for grounded chat answers.",
|
|
187
|
+
prompt=(
|
|
188
|
+
"You are the grounded-chat researcher.\n\n"
|
|
189
|
+
"Workflow contract:\n"
|
|
190
|
+
"1. Start with read-only grounding surfaces.\n"
|
|
191
|
+
"2. Use `grounding_web_search` and `grounding_web_fetch` for fresh external facts when needed.\n"
|
|
192
|
+
"3. Use `retrieve_context` to rank notes, memory, or fetched passages before summarizing.\n"
|
|
193
|
+
"4. Use `prepare_citations` before handing off a final evidence bundle.\n"
|
|
194
|
+
"5. Never imply shell access, workspace mutation, or uninspected sources."
|
|
195
|
+
),
|
|
196
|
+
tools=(*CHAT_RETRIEVAL_TOOLS, *CHAT_WEB_TOOLS, "ask_user"),
|
|
197
|
+
skills=("chat-summarize", "answer-with-citations", "clarify-request"),
|
|
198
|
+
permission_mode=PermissionMode.DEFAULT,
|
|
199
|
+
max_turns=6,
|
|
200
|
+
memory=MemoryScope.PROJECT,
|
|
201
|
+
origin=origin,
|
|
202
|
+
),
|
|
203
|
+
AgentDefinition(
|
|
204
|
+
name="support-agent",
|
|
205
|
+
description="Answer user support questions with clarification and citations.",
|
|
206
|
+
prompt=(
|
|
207
|
+
"You are the grounded-chat support agent.\n\n"
|
|
208
|
+
"Workflow contract:\n"
|
|
209
|
+
"1. Clarify the user's goal when the policy, product, or account scope is ambiguous.\n"
|
|
210
|
+
"2. Prefer cited, read-only answers over unsupported guesses.\n"
|
|
211
|
+
"3. Use retrieval and web grounding surfaces before finalizing an answer.\n"
|
|
212
|
+
"4. Capture durable user preferences only when they are explicit and stable.\n"
|
|
213
|
+
"5. Do not request workspace or shell mutation as part of the default support flow."
|
|
214
|
+
),
|
|
215
|
+
tools=(*CHAT_RETRIEVAL_TOOLS, *CHAT_WEB_TOOLS, "ask_user"),
|
|
216
|
+
skills=(
|
|
217
|
+
"chat-summarize",
|
|
218
|
+
"answer-with-citations",
|
|
219
|
+
"clarify-request",
|
|
220
|
+
"capture-preferences",
|
|
221
|
+
"remember",
|
|
222
|
+
),
|
|
223
|
+
permission_mode=PermissionMode.DEFAULT,
|
|
224
|
+
max_turns=6,
|
|
225
|
+
memory=MemoryScope.PROJECT,
|
|
226
|
+
origin=origin,
|
|
227
|
+
),
|
|
228
|
+
AgentDefinition(
|
|
229
|
+
name="memory-curator",
|
|
230
|
+
description="Curate durable chat preferences and reusable support facts.",
|
|
231
|
+
prompt=(
|
|
232
|
+
"You are the grounded-chat memory curator.\n\n"
|
|
233
|
+
"Workflow contract:\n"
|
|
234
|
+
"1. Inspect recent context and retrieved notes before recording anything durable.\n"
|
|
235
|
+
"2. Prefer stable preferences, conventions, and reusable support facts.\n"
|
|
236
|
+
"3. Use `remember` only when the information is explicit, durable, and helpful later.\n"
|
|
237
|
+
"4. Keep the posture read-mostly and never ask for coding-oriented mutation surfaces."
|
|
238
|
+
),
|
|
239
|
+
tools=(*CHAT_RETRIEVAL_TOOLS, "ask_user"),
|
|
240
|
+
skills=("capture-preferences", "remember", "chat-summarize"),
|
|
241
|
+
permission_mode=PermissionMode.DEFAULT,
|
|
242
|
+
max_turns=4,
|
|
243
|
+
memory=MemoryScope.PROJECT,
|
|
244
|
+
origin=origin,
|
|
245
|
+
),
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def chat_scenario_builtin_skills() -> tuple[SkillDefinition, ...]:
|
|
250
|
+
origin = DefinitionOrigin(DefinitionSource.BUNDLED)
|
|
251
|
+
return (
|
|
252
|
+
SkillDefinition(
|
|
253
|
+
name="chat-summarize",
|
|
254
|
+
description="Summarize retrieved grounding without dropping important caveats.",
|
|
255
|
+
content=(
|
|
256
|
+
"Summarize the material already in hand.\n\n"
|
|
257
|
+
"1. Start from retrieved notes, fetched passages, or durable memory.\n"
|
|
258
|
+
"2. Separate confirmed facts from uncertainty.\n"
|
|
259
|
+
"3. Keep the summary concise, user-facing, and faithful to the sources."
|
|
260
|
+
),
|
|
261
|
+
execution_context=SkillExecutionContext.INLINE,
|
|
262
|
+
origin=origin,
|
|
263
|
+
),
|
|
264
|
+
SkillDefinition(
|
|
265
|
+
name="answer-with-citations",
|
|
266
|
+
description="Assemble a grounded answer that cites supporting evidence explicitly.",
|
|
267
|
+
content=(
|
|
268
|
+
"Answer with visible grounding.\n\n"
|
|
269
|
+
"1. Retrieve or fetch the best evidence first.\n"
|
|
270
|
+
"2. Call `prepare_citations` on the supporting items before drafting the answer.\n"
|
|
271
|
+
"3. Cite only evidence you actually inspected.\n"
|
|
272
|
+
"4. State uncertainty when the grounding is thin or incomplete."
|
|
273
|
+
),
|
|
274
|
+
execution_context=SkillExecutionContext.INLINE,
|
|
275
|
+
origin=origin,
|
|
276
|
+
),
|
|
277
|
+
SkillDefinition(
|
|
278
|
+
name="clarify-request",
|
|
279
|
+
description="Ask a short clarification before grounding an ambiguous request.",
|
|
280
|
+
content=(
|
|
281
|
+
"When the request is ambiguous or missing the policy/product scope:\n\n"
|
|
282
|
+
"1. Ask the shortest question that unblocks a grounded answer.\n"
|
|
283
|
+
"2. Prefer one focused clarification over a long questionnaire.\n"
|
|
284
|
+
"3. Resume grounding once the missing detail is provided."
|
|
285
|
+
),
|
|
286
|
+
execution_context=SkillExecutionContext.INLINE,
|
|
287
|
+
origin=origin,
|
|
288
|
+
),
|
|
289
|
+
SkillDefinition(
|
|
290
|
+
name="capture-preferences",
|
|
291
|
+
description="Record durable user preferences and recurring support facts.",
|
|
292
|
+
content=(
|
|
293
|
+
"Capture only stable preferences or reusable support facts.\n\n"
|
|
294
|
+
"1. Confirm the preference or fact is explicit and durable.\n"
|
|
295
|
+
"2. Use `remember` when the information should survive future turns.\n"
|
|
296
|
+
"3. Skip volatile or one-off details."
|
|
297
|
+
),
|
|
298
|
+
execution_context=SkillExecutionContext.INLINE,
|
|
299
|
+
origin=origin,
|
|
300
|
+
),
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
def _read_only_tool_semantics(
|
|
305
|
+
*,
|
|
306
|
+
title: str,
|
|
307
|
+
operation: str,
|
|
308
|
+
summary_prefix: str,
|
|
309
|
+
subtitle_key: str,
|
|
310
|
+
risk_level: ToolRiskLevel,
|
|
311
|
+
tags: tuple[str, ...],
|
|
312
|
+
):
|
|
313
|
+
return static_semantics(
|
|
314
|
+
read_only=True,
|
|
315
|
+
concurrency_safe=True,
|
|
316
|
+
tool_use_presentation=lambda tool_input, _context: ToolUsePresentation(
|
|
317
|
+
title=title,
|
|
318
|
+
subtitle=str(tool_input.get(subtitle_key) or "grounding"),
|
|
319
|
+
emphasis=ToolPresentationEmphasis.LOW,
|
|
320
|
+
),
|
|
321
|
+
classifier_input=lambda tool_input, _context: ToolClassifierInput(
|
|
322
|
+
operation=operation,
|
|
323
|
+
summary=f"{summary_prefix}: {tool_input.get(subtitle_key) or 'grounding'}",
|
|
324
|
+
risk_level=risk_level,
|
|
325
|
+
side_effects=False,
|
|
326
|
+
tags=tags,
|
|
327
|
+
),
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def _network_tool_semantics(
|
|
332
|
+
*,
|
|
333
|
+
title: str,
|
|
334
|
+
operation: str,
|
|
335
|
+
summary_prefix: str,
|
|
336
|
+
subtitle_key: str,
|
|
337
|
+
tags: tuple[str, ...],
|
|
338
|
+
):
|
|
339
|
+
return static_semantics(
|
|
340
|
+
read_only=True,
|
|
341
|
+
concurrency_safe=True,
|
|
342
|
+
tool_use_presentation=lambda tool_input, _context: ToolUsePresentation(
|
|
343
|
+
title=title,
|
|
344
|
+
subtitle=str(tool_input.get(subtitle_key) or "grounding"),
|
|
345
|
+
emphasis=ToolPresentationEmphasis.LOW,
|
|
346
|
+
),
|
|
347
|
+
classifier_input=lambda tool_input, _context: ToolClassifierInput(
|
|
348
|
+
operation=operation,
|
|
349
|
+
summary=f"{summary_prefix}: {tool_input.get(subtitle_key) or 'grounding'}",
|
|
350
|
+
target_urls=(
|
|
351
|
+
(str(tool_input[subtitle_key]),)
|
|
352
|
+
if subtitle_key == "url" and tool_input.get(subtitle_key) is not None
|
|
353
|
+
else ()
|
|
354
|
+
),
|
|
355
|
+
risk_level=ToolRiskLevel.NETWORK,
|
|
356
|
+
side_effects=False,
|
|
357
|
+
tags=tags,
|
|
358
|
+
),
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
__all__ = [
|
|
363
|
+
"CHAT_RETRIEVAL_TOOLS",
|
|
364
|
+
"CHAT_SCENARIO_AGENTS",
|
|
365
|
+
"CHAT_SCENARIO_SKILLS",
|
|
366
|
+
"CHAT_WEB_TOOLS",
|
|
367
|
+
"chat_scenario_builtin_agents",
|
|
368
|
+
"chat_scenario_builtin_skills",
|
|
369
|
+
"chat_shared_retrieval_builtin_tools",
|
|
370
|
+
"chat_web_grounding_builtin_tools",
|
|
371
|
+
]
|
|
@@ -0,0 +1,539 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import html
|
|
5
|
+
import ipaddress
|
|
6
|
+
import re
|
|
7
|
+
import socket
|
|
8
|
+
import urllib.error
|
|
9
|
+
import urllib.parse
|
|
10
|
+
import urllib.request
|
|
11
|
+
from collections.abc import Mapping, Sequence
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
from functools import lru_cache
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
from weavert.definitions import MemoryScope, ValidationOutcome
|
|
18
|
+
from weavert.tool_runtime import ToolContext
|
|
19
|
+
|
|
20
|
+
_STOPWORDS = {
|
|
21
|
+
"about",
|
|
22
|
+
"after",
|
|
23
|
+
"again",
|
|
24
|
+
"also",
|
|
25
|
+
"been",
|
|
26
|
+
"from",
|
|
27
|
+
"have",
|
|
28
|
+
"into",
|
|
29
|
+
"just",
|
|
30
|
+
"more",
|
|
31
|
+
"that",
|
|
32
|
+
"their",
|
|
33
|
+
"them",
|
|
34
|
+
"then",
|
|
35
|
+
"they",
|
|
36
|
+
"this",
|
|
37
|
+
"what",
|
|
38
|
+
"when",
|
|
39
|
+
"where",
|
|
40
|
+
"which",
|
|
41
|
+
"with",
|
|
42
|
+
"would",
|
|
43
|
+
}
|
|
44
|
+
_HTML_TITLE_RE = re.compile(r"<title[^>]*>(?P<title>.*?)</title>", re.IGNORECASE | re.DOTALL)
|
|
45
|
+
_HTML_SCRIPT_STYLE_RE = re.compile(
|
|
46
|
+
r"<(script|style)[^>]*>.*?</\1>",
|
|
47
|
+
re.IGNORECASE | re.DOTALL,
|
|
48
|
+
)
|
|
49
|
+
_HTML_TAG_RE = re.compile(r"<[^>]+>")
|
|
50
|
+
_WHITESPACE_RE = re.compile(r"\s+")
|
|
51
|
+
_MAX_FETCH_BYTES = 256_000
|
|
52
|
+
_DEFAULT_FETCH_CHARS = 12_000
|
|
53
|
+
_GROUNDING_SEARCH_BASE_URL = "https://duckduckgo.com/html/"
|
|
54
|
+
_GROUNDING_REDIRECT_HOSTS = frozenset({"duckduckgo.com", "www.duckduckgo.com"})
|
|
55
|
+
_GROUNDING_BLOCKED_HOSTS = frozenset(
|
|
56
|
+
{
|
|
57
|
+
"localhost",
|
|
58
|
+
"localhost.localdomain",
|
|
59
|
+
"metadata.google.internal",
|
|
60
|
+
}
|
|
61
|
+
)
|
|
62
|
+
_GROUNDING_BLOCKED_HOST_SUFFIXES = (
|
|
63
|
+
".localhost",
|
|
64
|
+
".localdomain",
|
|
65
|
+
".local",
|
|
66
|
+
".internal",
|
|
67
|
+
".home.arpa",
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@dataclass(frozen=True, slots=True)
|
|
72
|
+
class _GroundingCandidate:
|
|
73
|
+
candidate_id: str
|
|
74
|
+
title: str
|
|
75
|
+
content: str
|
|
76
|
+
url: str | None
|
|
77
|
+
source_kind: str
|
|
78
|
+
metadata: dict[str, Any]
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def validate_retrieve_context_tool(tool_input: dict[str, Any], _: ToolContext) -> ValidationOutcome:
|
|
82
|
+
query = str(tool_input.get("query") or "").strip()
|
|
83
|
+
if not query:
|
|
84
|
+
return ValidationOutcome(False, "query must be non-empty")
|
|
85
|
+
items = tool_input.get("items")
|
|
86
|
+
if items is not None and not isinstance(items, list):
|
|
87
|
+
return ValidationOutcome(False, "items must be an array when provided")
|
|
88
|
+
include_memory = tool_input.get("include_memory", True)
|
|
89
|
+
if not include_memory and not items:
|
|
90
|
+
return ValidationOutcome(False, "items are required when include_memory is false")
|
|
91
|
+
memory_scope = str(tool_input.get("memory_scope") or "").strip()
|
|
92
|
+
if memory_scope and memory_scope not in {scope.value for scope in MemoryScope}:
|
|
93
|
+
return ValidationOutcome(False, f"Unsupported memory_scope: {memory_scope}")
|
|
94
|
+
return ValidationOutcome(True)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
async def retrieve_context_tool(tool_input: dict[str, Any], context: ToolContext) -> dict[str, Any]:
|
|
98
|
+
query = str(tool_input["query"]).strip()
|
|
99
|
+
limit = max(1, min(int(tool_input.get("limit", 5)), 12))
|
|
100
|
+
include_memory = tool_input.get("include_memory", True)
|
|
101
|
+
candidates = list(_inline_candidates(tool_input.get("items")))
|
|
102
|
+
if include_memory:
|
|
103
|
+
candidates.extend(_memory_candidates(context, tool_input.get("memory_scope")))
|
|
104
|
+
|
|
105
|
+
query_tokens = _tokenize(query)
|
|
106
|
+
if not candidates:
|
|
107
|
+
return {"query": query, "results": [], "sources": {"external": 0, "memory": 0}}
|
|
108
|
+
|
|
109
|
+
scored: list[dict[str, Any]] = []
|
|
110
|
+
for candidate in candidates:
|
|
111
|
+
score = _candidate_score(query_tokens, candidate)
|
|
112
|
+
if score <= 0:
|
|
113
|
+
continue
|
|
114
|
+
excerpt = _best_excerpt(query_tokens, candidate.content)
|
|
115
|
+
scored.append(
|
|
116
|
+
{
|
|
117
|
+
"id": candidate.candidate_id,
|
|
118
|
+
"title": candidate.title,
|
|
119
|
+
"excerpt": excerpt,
|
|
120
|
+
"content": _truncate_text(candidate.content, 600),
|
|
121
|
+
"score": round(score, 3),
|
|
122
|
+
"url": candidate.url,
|
|
123
|
+
"source_kind": candidate.source_kind,
|
|
124
|
+
"metadata": dict(candidate.metadata),
|
|
125
|
+
}
|
|
126
|
+
)
|
|
127
|
+
scored.sort(key=lambda item: (-float(item["score"]), str(item["title"]).lower(), str(item["id"])))
|
|
128
|
+
results = scored[:limit]
|
|
129
|
+
return {
|
|
130
|
+
"query": query,
|
|
131
|
+
"results": results,
|
|
132
|
+
"sources": {
|
|
133
|
+
"external": sum(1 for item in results if item["source_kind"] == "external"),
|
|
134
|
+
"memory": sum(1 for item in results if item["source_kind"] == "memory"),
|
|
135
|
+
},
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def validate_prepare_citations_tool(tool_input: dict[str, Any], _: ToolContext) -> ValidationOutcome:
|
|
140
|
+
items = tool_input.get("items")
|
|
141
|
+
if not isinstance(items, list) or not items:
|
|
142
|
+
return ValidationOutcome(False, "items must contain at least one citation candidate")
|
|
143
|
+
return ValidationOutcome(True)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
async def prepare_citations_tool(tool_input: dict[str, Any], _: ToolContext) -> dict[str, Any]:
|
|
147
|
+
limit = max(1, min(int(tool_input.get("limit", 5)), 12))
|
|
148
|
+
citations: list[dict[str, Any]] = []
|
|
149
|
+
seen: set[tuple[str, str, str]] = set()
|
|
150
|
+
for raw in tool_input.get("items") or ():
|
|
151
|
+
if not isinstance(raw, Mapping):
|
|
152
|
+
continue
|
|
153
|
+
title = str(raw.get("title") or raw.get("id") or "Untitled source").strip() or "Untitled source"
|
|
154
|
+
excerpt = str(raw.get("excerpt") or raw.get("content") or "").strip()
|
|
155
|
+
url = _normalize_optional_string(raw.get("url"))
|
|
156
|
+
key = (str(raw.get("id") or title).strip() or title, url or "", excerpt)
|
|
157
|
+
if key in seen:
|
|
158
|
+
continue
|
|
159
|
+
seen.add(key)
|
|
160
|
+
label = f"[{len(citations) + 1}]"
|
|
161
|
+
citation = {
|
|
162
|
+
"label": label,
|
|
163
|
+
"id": str(raw.get("id") or title).strip() or title,
|
|
164
|
+
"title": title,
|
|
165
|
+
"excerpt": _truncate_text(excerpt, 240),
|
|
166
|
+
"url": url,
|
|
167
|
+
"source_kind": str(raw.get("source_kind") or "external"),
|
|
168
|
+
"metadata": dict(raw.get("metadata") or {}),
|
|
169
|
+
}
|
|
170
|
+
citations.append(citation)
|
|
171
|
+
if len(citations) >= limit:
|
|
172
|
+
break
|
|
173
|
+
citation_block = "\n".join(_render_citation(citation) for citation in citations)
|
|
174
|
+
return {"citations": citations, "citation_block": citation_block}
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def validate_grounding_web_search(tool_input: dict[str, Any], _: ToolContext) -> ValidationOutcome:
|
|
178
|
+
if not str(tool_input.get("query") or "").strip():
|
|
179
|
+
return ValidationOutcome(False, "query must be non-empty")
|
|
180
|
+
return ValidationOutcome(True)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
async def grounding_web_search_tool(tool_input: dict[str, Any], _: ToolContext) -> dict[str, Any]:
|
|
184
|
+
query = str(tool_input["query"]).strip()
|
|
185
|
+
limit = max(1, min(int(tool_input.get("limit", 5)), 8))
|
|
186
|
+
encoded = urllib.parse.urlencode({"q": query})
|
|
187
|
+
url = f"{_GROUNDING_SEARCH_BASE_URL}?{encoded}"
|
|
188
|
+
|
|
189
|
+
def search() -> dict[str, Any]:
|
|
190
|
+
request = urllib.request.Request(url, headers={"User-Agent": "weavert/0.1"})
|
|
191
|
+
with _grounding_urlopen(request, timeout=10) as response:
|
|
192
|
+
body = response.read().decode("utf-8", errors="replace")
|
|
193
|
+
results: list[dict[str, Any]] = []
|
|
194
|
+
for match in re.finditer(
|
|
195
|
+
r'<a[^>]*class="result__a"[^>]*href="(?P<href>[^"]+)"[^>]*>(?P<title>.*?)</a>',
|
|
196
|
+
body,
|
|
197
|
+
):
|
|
198
|
+
title = html.unescape(_HTML_TAG_RE.sub("", match.group("title"))).strip()
|
|
199
|
+
href = _normalize_grounding_url(match.group("href"))
|
|
200
|
+
if href is None or _grounding_url_validation_error(href) is not None:
|
|
201
|
+
continue
|
|
202
|
+
results.append({"title": title or href, "url": href})
|
|
203
|
+
if len(results) >= limit:
|
|
204
|
+
break
|
|
205
|
+
return {"query": query, "results": results}
|
|
206
|
+
|
|
207
|
+
return await asyncio.to_thread(search)
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def validate_grounding_web_fetch(tool_input: dict[str, Any], _: ToolContext) -> ValidationOutcome:
|
|
211
|
+
url = _normalize_grounding_url(tool_input.get("url"))
|
|
212
|
+
if url is None:
|
|
213
|
+
return ValidationOutcome(False, "Only http:// and https:// URLs are supported")
|
|
214
|
+
validation_error = _grounding_url_validation_error(url)
|
|
215
|
+
if validation_error is not None:
|
|
216
|
+
return ValidationOutcome(False, validation_error)
|
|
217
|
+
return ValidationOutcome(True)
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
async def grounding_web_fetch_tool(tool_input: dict[str, Any], _: ToolContext) -> dict[str, Any]:
|
|
221
|
+
timeout = max(1, int(tool_input.get("timeout_ms", 10_000))) / 1000
|
|
222
|
+
max_chars = max(500, min(int(tool_input.get("max_chars", _DEFAULT_FETCH_CHARS)), 32_000))
|
|
223
|
+
url = _normalize_grounding_url(tool_input.get("url"))
|
|
224
|
+
if url is None:
|
|
225
|
+
raise ValueError("Only http:// and https:// URLs are supported")
|
|
226
|
+
validation_error = _grounding_url_validation_error(url)
|
|
227
|
+
if validation_error is not None:
|
|
228
|
+
raise ValueError(validation_error)
|
|
229
|
+
|
|
230
|
+
def fetch() -> dict[str, Any]:
|
|
231
|
+
request = urllib.request.Request(url, headers={"User-Agent": "weavert/0.1"})
|
|
232
|
+
with _grounding_urlopen(request, timeout=timeout) as response:
|
|
233
|
+
raw = response.read(_MAX_FETCH_BYTES + 1)
|
|
234
|
+
content_type = response.headers.get_content_type()
|
|
235
|
+
body = raw.decode("utf-8", errors="replace")
|
|
236
|
+
normalized = _normalize_remote_text(body, content_type=content_type)
|
|
237
|
+
truncated = len(raw) > _MAX_FETCH_BYTES or len(normalized) > max_chars
|
|
238
|
+
title = _extract_html_title(body) if "html" in content_type else None
|
|
239
|
+
resolved_url = _normalize_grounding_url(_response_url(response)) or url
|
|
240
|
+
return {
|
|
241
|
+
"url": resolved_url,
|
|
242
|
+
"status": getattr(response, "status", 200),
|
|
243
|
+
"content_type": content_type,
|
|
244
|
+
"title": title,
|
|
245
|
+
"content": _truncate_text(normalized, max_chars),
|
|
246
|
+
"truncated": truncated,
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
return await asyncio.to_thread(fetch)
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def _inline_candidates(items: Any) -> tuple[_GroundingCandidate, ...]:
|
|
253
|
+
if not isinstance(items, list):
|
|
254
|
+
return ()
|
|
255
|
+
candidates: list[_GroundingCandidate] = []
|
|
256
|
+
for index, raw in enumerate(items, start=1):
|
|
257
|
+
if not isinstance(raw, Mapping):
|
|
258
|
+
continue
|
|
259
|
+
content = str(raw.get("content") or raw.get("excerpt") or "").strip()
|
|
260
|
+
if not content:
|
|
261
|
+
continue
|
|
262
|
+
title = str(raw.get("title") or raw.get("id") or f"Source {index}").strip() or f"Source {index}"
|
|
263
|
+
candidate_id = str(raw.get("id") or f"source-{index}").strip() or f"source-{index}"
|
|
264
|
+
metadata = dict(raw.get("metadata") or {})
|
|
265
|
+
candidates.append(
|
|
266
|
+
_GroundingCandidate(
|
|
267
|
+
candidate_id=candidate_id,
|
|
268
|
+
title=title,
|
|
269
|
+
content=content,
|
|
270
|
+
url=_normalize_optional_string(raw.get("url")),
|
|
271
|
+
source_kind=str(raw.get("source_kind") or "external"),
|
|
272
|
+
metadata=metadata,
|
|
273
|
+
)
|
|
274
|
+
)
|
|
275
|
+
return tuple(candidates)
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def _memory_candidates(context: ToolContext, scope_value: Any) -> tuple[_GroundingCandidate, ...]:
|
|
279
|
+
services = context.runtime_services
|
|
280
|
+
if services is None:
|
|
281
|
+
return ()
|
|
282
|
+
try:
|
|
283
|
+
memory_service = services.resolve_memory_service()
|
|
284
|
+
except Exception:
|
|
285
|
+
return ()
|
|
286
|
+
if memory_service is None:
|
|
287
|
+
return ()
|
|
288
|
+
resolver = getattr(memory_service, "context_for_scope", None)
|
|
289
|
+
manager = getattr(memory_service, "manager", None)
|
|
290
|
+
provider = getattr(manager, "provider", None)
|
|
291
|
+
if not callable(resolver) or provider is None or not hasattr(provider, "list_documents"):
|
|
292
|
+
return ()
|
|
293
|
+
try:
|
|
294
|
+
scope = _coerce_memory_scope(scope_value)
|
|
295
|
+
resolved_scope = resolver(
|
|
296
|
+
session_id=context.session_id,
|
|
297
|
+
scope=scope,
|
|
298
|
+
cwd=context.cwd,
|
|
299
|
+
)
|
|
300
|
+
documents = provider.list_documents(resolved_scope)
|
|
301
|
+
except Exception:
|
|
302
|
+
return ()
|
|
303
|
+
return tuple(
|
|
304
|
+
_candidate_from_memory_document(document, resolved_scope.memory_root)
|
|
305
|
+
for document in documents
|
|
306
|
+
if _looks_like_memory_document(document)
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
def _looks_like_memory_document(document: Any) -> bool:
|
|
311
|
+
scope = getattr(document, "scope", None)
|
|
312
|
+
return (
|
|
313
|
+
hasattr(document, "path")
|
|
314
|
+
and hasattr(document, "title")
|
|
315
|
+
and hasattr(document, "metadata")
|
|
316
|
+
and hasattr(document, "kind")
|
|
317
|
+
and hasattr(scope, "value")
|
|
318
|
+
and bool(str(getattr(document, "content", "") or "").strip())
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
def _candidate_from_memory_document(document: Any, memory_root: Path) -> _GroundingCandidate:
|
|
323
|
+
try:
|
|
324
|
+
relative_path = document.path.relative_to(memory_root).as_posix()
|
|
325
|
+
except ValueError:
|
|
326
|
+
relative_path = document.path.name
|
|
327
|
+
metadata = dict(document.metadata)
|
|
328
|
+
metadata.setdefault("memory_scope", document.scope.value)
|
|
329
|
+
metadata.setdefault("memory_path", relative_path)
|
|
330
|
+
metadata.setdefault("memory_kind", document.kind)
|
|
331
|
+
return _GroundingCandidate(
|
|
332
|
+
candidate_id=relative_path,
|
|
333
|
+
title=document.title,
|
|
334
|
+
content=document.content,
|
|
335
|
+
url=None,
|
|
336
|
+
source_kind="memory",
|
|
337
|
+
metadata=metadata,
|
|
338
|
+
)
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
def _coerce_memory_scope(value: Any) -> MemoryScope:
|
|
342
|
+
normalized = str(value or MemoryScope.PROJECT.value).strip() or MemoryScope.PROJECT.value
|
|
343
|
+
try:
|
|
344
|
+
return MemoryScope(normalized)
|
|
345
|
+
except ValueError:
|
|
346
|
+
return MemoryScope.PROJECT
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
def _candidate_score(query_tokens: set[str], candidate: _GroundingCandidate) -> float:
|
|
350
|
+
combined = f"{candidate.title} {candidate.content}"
|
|
351
|
+
combined_tokens = _tokenize(combined)
|
|
352
|
+
if not query_tokens:
|
|
353
|
+
return 0.0
|
|
354
|
+
overlap = len(query_tokens & combined_tokens)
|
|
355
|
+
if overlap <= 0:
|
|
356
|
+
lowered_query = " ".join(sorted(query_tokens))
|
|
357
|
+
if lowered_query and lowered_query not in combined.lower():
|
|
358
|
+
return 0.0
|
|
359
|
+
title_overlap = len(query_tokens & _tokenize(candidate.title))
|
|
360
|
+
tag_overlap = len(query_tokens & _tokenize(" ".join(_string_values(candidate.metadata.get("tags")))))
|
|
361
|
+
source_bonus = 0.25 if candidate.source_kind == "memory" else 0.0
|
|
362
|
+
return float(overlap) + (0.5 * float(title_overlap)) + (0.25 * float(tag_overlap)) + source_bonus
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def _best_excerpt(query_tokens: set[str], content: str, *, limit: int = 280) -> str:
|
|
366
|
+
normalized = " ".join(content.strip().split())
|
|
367
|
+
if not normalized:
|
|
368
|
+
return ""
|
|
369
|
+
for chunk in re.split(r"(?<=[.!?])\s+|\n+", normalized):
|
|
370
|
+
candidate = chunk.strip()
|
|
371
|
+
if not candidate:
|
|
372
|
+
continue
|
|
373
|
+
lowered = candidate.lower()
|
|
374
|
+
if any(token in lowered for token in query_tokens):
|
|
375
|
+
return _truncate_text(candidate, limit)
|
|
376
|
+
return _truncate_text(normalized, limit)
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def _render_citation(citation: Mapping[str, Any]) -> str:
|
|
380
|
+
title = str(citation.get("title") or citation.get("id") or "Untitled source").strip() or "Untitled source"
|
|
381
|
+
excerpt = str(citation.get("excerpt") or "").strip()
|
|
382
|
+
url = _normalize_optional_string(citation.get("url"))
|
|
383
|
+
suffix = f" — {url}" if url else ""
|
|
384
|
+
if excerpt:
|
|
385
|
+
return f"{citation['label']} {title}{suffix}: {excerpt}"
|
|
386
|
+
return f"{citation['label']} {title}{suffix}"
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
def _normalize_grounding_url(value: Any) -> str | None:
|
|
390
|
+
candidate = _normalize_optional_string(value)
|
|
391
|
+
if candidate is None:
|
|
392
|
+
return None
|
|
393
|
+
candidate = html.unescape(candidate)
|
|
394
|
+
if candidate.startswith("//"):
|
|
395
|
+
candidate = f"https:{candidate}"
|
|
396
|
+
elif candidate.startswith("/"):
|
|
397
|
+
candidate = urllib.parse.urljoin(_GROUNDING_SEARCH_BASE_URL, candidate)
|
|
398
|
+
parsed = urllib.parse.urlparse(candidate)
|
|
399
|
+
scheme = parsed.scheme.lower()
|
|
400
|
+
if scheme not in {"http", "https"} or not parsed.netloc:
|
|
401
|
+
return None
|
|
402
|
+
hostname = (parsed.hostname or "").rstrip(".").lower()
|
|
403
|
+
if hostname in _GROUNDING_REDIRECT_HOSTS and parsed.path in {"/l", "/l/"}:
|
|
404
|
+
redirect_targets = urllib.parse.parse_qs(parsed.query).get("uddg")
|
|
405
|
+
if redirect_targets:
|
|
406
|
+
return _normalize_grounding_url(redirect_targets[0])
|
|
407
|
+
return urllib.parse.urlunparse(parsed._replace(fragment=""))
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
def _grounding_url_validation_error(url: str) -> str | None:
|
|
411
|
+
parsed = urllib.parse.urlparse(url)
|
|
412
|
+
hostname = (parsed.hostname or "").rstrip(".").lower()
|
|
413
|
+
if not hostname:
|
|
414
|
+
return "Grounding fetch requires a public web hostname"
|
|
415
|
+
if parsed.username is not None or parsed.password is not None:
|
|
416
|
+
return "Grounding fetch does not allow embedded URL credentials"
|
|
417
|
+
if hostname in _GROUNDING_BLOCKED_HOSTS or any(
|
|
418
|
+
hostname.endswith(suffix) for suffix in _GROUNDING_BLOCKED_HOST_SUFFIXES
|
|
419
|
+
):
|
|
420
|
+
return "Grounding fetch only supports public web hosts"
|
|
421
|
+
try:
|
|
422
|
+
address = ipaddress.ip_address(hostname)
|
|
423
|
+
except ValueError:
|
|
424
|
+
if "." not in hostname:
|
|
425
|
+
return "Grounding fetch only supports public web hosts"
|
|
426
|
+
resolution_is_public = _grounding_hostname_resolves_publicly(hostname)
|
|
427
|
+
if resolution_is_public is False:
|
|
428
|
+
return "Grounding fetch only supports public web hosts"
|
|
429
|
+
return None
|
|
430
|
+
if not address.is_global:
|
|
431
|
+
return "Grounding fetch only supports public web hosts"
|
|
432
|
+
return None
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
@lru_cache(maxsize=256)
|
|
436
|
+
def _grounding_hostname_resolves_publicly(hostname: str) -> bool | None:
|
|
437
|
+
try:
|
|
438
|
+
resolutions = socket.getaddrinfo(
|
|
439
|
+
hostname,
|
|
440
|
+
None,
|
|
441
|
+
type=socket.SOCK_STREAM,
|
|
442
|
+
proto=socket.IPPROTO_TCP,
|
|
443
|
+
)
|
|
444
|
+
except OSError:
|
|
445
|
+
return None
|
|
446
|
+
saw_address = False
|
|
447
|
+
for _family, _kind, _proto, _canonname, sockaddr in resolutions:
|
|
448
|
+
if not sockaddr:
|
|
449
|
+
continue
|
|
450
|
+
try:
|
|
451
|
+
address = ipaddress.ip_address(str(sockaddr[0]).strip())
|
|
452
|
+
except ValueError:
|
|
453
|
+
continue
|
|
454
|
+
saw_address = True
|
|
455
|
+
if not address.is_global:
|
|
456
|
+
return False
|
|
457
|
+
return True if saw_address else None
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
class _SafeGroundingRedirectHandler(urllib.request.HTTPRedirectHandler):
|
|
461
|
+
def redirect_request(self, req, fp, code, msg, headers, newurl):
|
|
462
|
+
normalized = _normalize_grounding_url(newurl)
|
|
463
|
+
if normalized is None:
|
|
464
|
+
raise urllib.error.HTTPError(newurl, code, "Only http:// and https:// URLs are supported", headers, fp)
|
|
465
|
+
validation_error = _grounding_url_validation_error(normalized)
|
|
466
|
+
if validation_error is not None:
|
|
467
|
+
raise urllib.error.HTTPError(normalized, code, validation_error, headers, fp)
|
|
468
|
+
return super().redirect_request(req, fp, code, msg, headers, normalized)
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
def _grounding_urlopen(request: urllib.request.Request, *, timeout: float | int):
|
|
472
|
+
opener = urllib.request.build_opener(_SafeGroundingRedirectHandler())
|
|
473
|
+
return opener.open(request, timeout=timeout)
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
def _response_url(response: Any) -> str | None:
|
|
477
|
+
resolver = getattr(response, "geturl", None)
|
|
478
|
+
if callable(resolver):
|
|
479
|
+
return _normalize_optional_string(resolver())
|
|
480
|
+
return getattr(response, "url", None)
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
def _normalize_remote_text(body: str, *, content_type: str) -> str:
|
|
484
|
+
normalized = body
|
|
485
|
+
if "html" in content_type:
|
|
486
|
+
normalized = _HTML_SCRIPT_STYLE_RE.sub(" ", normalized)
|
|
487
|
+
normalized = _HTML_TAG_RE.sub(" ", normalized)
|
|
488
|
+
normalized = html.unescape(normalized)
|
|
489
|
+
return _WHITESPACE_RE.sub(" ", normalized).strip()
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
def _extract_html_title(body: str) -> str | None:
|
|
493
|
+
match = _HTML_TITLE_RE.search(body)
|
|
494
|
+
if not match:
|
|
495
|
+
return None
|
|
496
|
+
title = html.unescape(_HTML_TAG_RE.sub("", match.group("title"))).strip()
|
|
497
|
+
return title or None
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
def _truncate_text(value: str, limit: int) -> str:
|
|
501
|
+
normalized = " ".join(value.strip().split())
|
|
502
|
+
if len(normalized) <= limit:
|
|
503
|
+
return normalized
|
|
504
|
+
return normalized[: limit - 3].rstrip() + "..."
|
|
505
|
+
|
|
506
|
+
|
|
507
|
+
def _normalize_optional_string(value: Any) -> str | None:
|
|
508
|
+
if value is None:
|
|
509
|
+
return None
|
|
510
|
+
normalized = str(value).strip()
|
|
511
|
+
return normalized or None
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
def _string_values(value: Any) -> tuple[str, ...]:
|
|
515
|
+
if isinstance(value, str):
|
|
516
|
+
return (value,)
|
|
517
|
+
if isinstance(value, Sequence) and not isinstance(value, (str, bytes, bytearray)):
|
|
518
|
+
return tuple(str(item) for item in value if str(item).strip())
|
|
519
|
+
return ()
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
def _tokenize(text: str) -> set[str]:
|
|
523
|
+
return {
|
|
524
|
+
token
|
|
525
|
+
for token in re.findall(r"[a-z0-9]+", text.lower())
|
|
526
|
+
if len(token) > 2 and token not in _STOPWORDS
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
|
|
530
|
+
__all__ = [
|
|
531
|
+
"grounding_web_fetch_tool",
|
|
532
|
+
"grounding_web_search_tool",
|
|
533
|
+
"prepare_citations_tool",
|
|
534
|
+
"retrieve_context_tool",
|
|
535
|
+
"validate_grounding_web_fetch",
|
|
536
|
+
"validate_grounding_web_search",
|
|
537
|
+
"validate_prepare_citations_tool",
|
|
538
|
+
"validate_retrieve_context_tool",
|
|
539
|
+
]
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: weavert-kit-chat
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Chat-oriented product kit for extracted WeaveRT scenario packs.
|
|
5
|
+
Author: WeaveRT Maintainers
|
|
6
|
+
License-Expression: Apache-2.0
|
|
7
|
+
Project-URL: Homepage, https://github.com/xyz2b/weave-ai-runtime
|
|
8
|
+
Project-URL: Documentation, https://github.com/xyz2b/weave-ai-runtime/tree/main/docs
|
|
9
|
+
Project-URL: Repository, https://github.com/xyz2b/weave-ai-runtime
|
|
10
|
+
Project-URL: Issues, https://github.com/xyz2b/weave-ai-runtime/issues
|
|
11
|
+
Keywords: weavert,agents,ai,product-kit,scenario-pack
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Programming Language :: Python
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Requires-Python: >=3.11
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
Requires-Dist: weavert<0.2.0,>=0.1.0
|
|
23
|
+
Requires-Dist: weavert-kit-common-retrieval<0.2.0,>=0.1.0
|
|
24
|
+
Requires-Dist: weavert-kit-common-web<0.2.0,>=0.1.0
|
|
25
|
+
|
|
26
|
+
# Chat Product Kit
|
|
27
|
+
|
|
28
|
+
Canonical import root: `weavert_kit_chat`
|
|
29
|
+
|
|
30
|
+
## What this package owns
|
|
31
|
+
|
|
32
|
+
- the `weavert-scenario-chat` scenario pack
|
|
33
|
+
- chat-oriented product-profile defaults layered on shared grounding packages
|
|
34
|
+
|
|
35
|
+
## Canonical names
|
|
36
|
+
|
|
37
|
+
- install name: `weavert-kit-chat`
|
|
38
|
+
- import root: `weavert_kit_chat`
|
|
39
|
+
- runtime activation: `weavert-scenario-chat`
|
|
40
|
+
|
|
41
|
+
The public install name stays separate from the runtime scenario-pack activation name.
|
|
42
|
+
|
|
43
|
+
## Shared packages it composes
|
|
44
|
+
|
|
45
|
+
- `weavert_kit_common_retrieval`
|
|
46
|
+
- `weavert_kit_common_web`
|
|
47
|
+
|
|
48
|
+
## See also
|
|
49
|
+
|
|
50
|
+
- `../README.md`
|
|
51
|
+
- `../common/README.md`
|
|
52
|
+
- `../../../docs/guides/use-scenario-packs.md`
|
|
53
|
+
- `../../../docs/introduction/use-cases.md`
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
src/weavert_kit_chat/__init__.py
|
|
4
|
+
src/weavert_kit_chat/_builtins.py
|
|
5
|
+
src/weavert_kit_chat/_tool_impls.py
|
|
6
|
+
src/weavert_kit_chat.egg-info/PKG-INFO
|
|
7
|
+
src/weavert_kit_chat.egg-info/SOURCES.txt
|
|
8
|
+
src/weavert_kit_chat.egg-info/dependency_links.txt
|
|
9
|
+
src/weavert_kit_chat.egg-info/requires.txt
|
|
10
|
+
src/weavert_kit_chat.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
weavert_kit_chat
|