teddy-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. teddy_cli-0.1.0.dist-info/LICENSE +677 -0
  2. teddy_cli-0.1.0.dist-info/METADATA +33 -0
  3. teddy_cli-0.1.0.dist-info/RECORD +143 -0
  4. teddy_cli-0.1.0.dist-info/WHEEL +4 -0
  5. teddy_cli-0.1.0.dist-info/entry_points.txt +3 -0
  6. teddy_executor/__init__.py +1 -0
  7. teddy_executor/__main__.py +335 -0
  8. teddy_executor/adapters/__init__.py +0 -0
  9. teddy_executor/adapters/inbound/__init__.py +0 -0
  10. teddy_executor/adapters/inbound/cli_formatter.py +107 -0
  11. teddy_executor/adapters/inbound/cli_helpers.py +249 -0
  12. teddy_executor/adapters/inbound/console_plan_reviewer.py +69 -0
  13. teddy_executor/adapters/inbound/session_cli_handlers.py +366 -0
  14. teddy_executor/adapters/inbound/textual_plan_reviewer.py +78 -0
  15. teddy_executor/adapters/inbound/textual_plan_reviewer_app.py +367 -0
  16. teddy_executor/adapters/inbound/textual_plan_reviewer_editor.py +281 -0
  17. teddy_executor/adapters/inbound/textual_plan_reviewer_execution.py +213 -0
  18. teddy_executor/adapters/inbound/textual_plan_reviewer_helpers.py +308 -0
  19. teddy_executor/adapters/inbound/textual_plan_reviewer_logic.py +345 -0
  20. teddy_executor/adapters/inbound/textual_plan_reviewer_previews.py +227 -0
  21. teddy_executor/adapters/inbound/textual_plan_reviewer_widgets.py +246 -0
  22. teddy_executor/adapters/outbound/__init__.py +7 -0
  23. teddy_executor/adapters/outbound/console_interactor.py +212 -0
  24. teddy_executor/adapters/outbound/console_interactor_ask_loop.py +121 -0
  25. teddy_executor/adapters/outbound/console_interactor_helpers.py +95 -0
  26. teddy_executor/adapters/outbound/console_tooling.py +62 -0
  27. teddy_executor/adapters/outbound/filesystem_helpers.py +61 -0
  28. teddy_executor/adapters/outbound/litellm_adapter.py +462 -0
  29. teddy_executor/adapters/outbound/local_file_system_adapter.py +300 -0
  30. teddy_executor/adapters/outbound/local_repo_tree_generator.py +96 -0
  31. teddy_executor/adapters/outbound/openrouter_hydrator.py +89 -0
  32. teddy_executor/adapters/outbound/shell_adapter.py +344 -0
  33. teddy_executor/adapters/outbound/shell_command_builder.py +105 -0
  34. teddy_executor/adapters/outbound/system_environment_adapter.py +62 -0
  35. teddy_executor/adapters/outbound/system_environment_inspector.py +54 -0
  36. teddy_executor/adapters/outbound/system_time_adapter.py +22 -0
  37. teddy_executor/adapters/outbound/web_scraper_adapter.py +346 -0
  38. teddy_executor/adapters/outbound/web_searcher_adapter.py +122 -0
  39. teddy_executor/adapters/outbound/yaml_config_adapter.py +105 -0
  40. teddy_executor/container.py +333 -0
  41. teddy_executor/core/__init__.py +0 -0
  42. teddy_executor/core/domain/__init__.py +0 -0
  43. teddy_executor/core/domain/models/__init__.py +44 -0
  44. teddy_executor/core/domain/models/action_ports.py +28 -0
  45. teddy_executor/core/domain/models/change_set.py +10 -0
  46. teddy_executor/core/domain/models/exceptions.py +40 -0
  47. teddy_executor/core/domain/models/execution_report.py +65 -0
  48. teddy_executor/core/domain/models/orchestrator_ports.py +26 -0
  49. teddy_executor/core/domain/models/plan.py +85 -0
  50. teddy_executor/core/domain/models/planning_ports.py +43 -0
  51. teddy_executor/core/domain/models/project_context.py +56 -0
  52. teddy_executor/core/domain/models/report_assembly_data.py +18 -0
  53. teddy_executor/core/domain/models/session.py +17 -0
  54. teddy_executor/core/domain/models/shell_output.py +12 -0
  55. teddy_executor/core/domain/models/web_search_results.py +26 -0
  56. teddy_executor/core/ports/__init__.py +0 -0
  57. teddy_executor/core/ports/inbound/__init__.py +0 -0
  58. teddy_executor/core/ports/inbound/edit_simulator.py +33 -0
  59. teddy_executor/core/ports/inbound/get_context_use_case.py +32 -0
  60. teddy_executor/core/ports/inbound/init.py +15 -0
  61. teddy_executor/core/ports/inbound/plan_parser.py +52 -0
  62. teddy_executor/core/ports/inbound/plan_reviewer.py +44 -0
  63. teddy_executor/core/ports/inbound/plan_validator.py +26 -0
  64. teddy_executor/core/ports/inbound/planning_use_case.py +30 -0
  65. teddy_executor/core/ports/inbound/run_plan_use_case.py +60 -0
  66. teddy_executor/core/ports/outbound/__init__.py +34 -0
  67. teddy_executor/core/ports/outbound/config_service.py +29 -0
  68. teddy_executor/core/ports/outbound/environment_inspector.py +30 -0
  69. teddy_executor/core/ports/outbound/execution_report_assembler.py +19 -0
  70. teddy_executor/core/ports/outbound/file_system_manager.py +131 -0
  71. teddy_executor/core/ports/outbound/llm_client.py +90 -0
  72. teddy_executor/core/ports/outbound/markdown_report_formatter.py +26 -0
  73. teddy_executor/core/ports/outbound/prompt_manager.py +55 -0
  74. teddy_executor/core/ports/outbound/repo_tree_generator.py +17 -0
  75. teddy_executor/core/ports/outbound/session_loop_guard.py +16 -0
  76. teddy_executor/core/ports/outbound/session_manager.py +97 -0
  77. teddy_executor/core/ports/outbound/session_repository.py +65 -0
  78. teddy_executor/core/ports/outbound/shell_executor.py +24 -0
  79. teddy_executor/core/ports/outbound/system_environment.py +25 -0
  80. teddy_executor/core/ports/outbound/time_service.py +28 -0
  81. teddy_executor/core/ports/outbound/user_interactor.py +126 -0
  82. teddy_executor/core/ports/outbound/web_scraper.py +24 -0
  83. teddy_executor/core/ports/outbound/web_searcher.py +25 -0
  84. teddy_executor/core/services/__init__.py +0 -0
  85. teddy_executor/core/services/action_changeset_builder.py +90 -0
  86. teddy_executor/core/services/action_diff_manager.py +110 -0
  87. teddy_executor/core/services/action_dispatcher.py +142 -0
  88. teddy_executor/core/services/action_executor.py +209 -0
  89. teddy_executor/core/services/action_factory.py +197 -0
  90. teddy_executor/core/services/action_parser_complex.py +216 -0
  91. teddy_executor/core/services/action_parser_strategies.py +84 -0
  92. teddy_executor/core/services/context_service.py +437 -0
  93. teddy_executor/core/services/edit_simulator.py +128 -0
  94. teddy_executor/core/services/execution_orchestrator.py +295 -0
  95. teddy_executor/core/services/execution_report_assembler.py +62 -0
  96. teddy_executor/core/services/init_service.py +80 -0
  97. teddy_executor/core/services/markdown_plan_parser.py +309 -0
  98. teddy_executor/core/services/markdown_report_formatter.py +143 -0
  99. teddy_executor/core/services/parser_infrastructure.py +222 -0
  100. teddy_executor/core/services/parser_metadata.py +153 -0
  101. teddy_executor/core/services/parser_reporting.py +267 -0
  102. teddy_executor/core/services/plan_validator.py +82 -0
  103. teddy_executor/core/services/planning_service.py +242 -0
  104. teddy_executor/core/services/prompt_manager.py +146 -0
  105. teddy_executor/core/services/session_lifecycle_manager.py +228 -0
  106. teddy_executor/core/services/session_loop_guard.py +46 -0
  107. teddy_executor/core/services/session_orchestrator.py +538 -0
  108. teddy_executor/core/services/session_planner.py +43 -0
  109. teddy_executor/core/services/session_pruning_service.py +438 -0
  110. teddy_executor/core/services/session_replanner.py +105 -0
  111. teddy_executor/core/services/session_repository.py +194 -0
  112. teddy_executor/core/services/session_service.py +529 -0
  113. teddy_executor/core/services/templates/execution_report.md.j2 +290 -0
  114. teddy_executor/core/services/validation_rules/__init__.py +4 -0
  115. teddy_executor/core/services/validation_rules/edit.py +207 -0
  116. teddy_executor/core/services/validation_rules/edit_matcher.py +247 -0
  117. teddy_executor/core/services/validation_rules/edit_matcher_heuristics.py +84 -0
  118. teddy_executor/core/services/validation_rules/execute.py +37 -0
  119. teddy_executor/core/services/validation_rules/filesystem.py +73 -0
  120. teddy_executor/core/services/validation_rules/helpers.py +178 -0
  121. teddy_executor/core/services/validation_rules/message.py +29 -0
  122. teddy_executor/core/utils/__init__.py +1 -0
  123. teddy_executor/core/utils/diff.py +57 -0
  124. teddy_executor/core/utils/io.py +75 -0
  125. teddy_executor/core/utils/markdown.py +131 -0
  126. teddy_executor/core/utils/serialization.py +39 -0
  127. teddy_executor/core/utils/string.py +351 -0
  128. teddy_executor/prompts.py +45 -0
  129. teddy_executor/registries/__init__.py +1 -0
  130. teddy_executor/registries/infrastructure.py +147 -0
  131. teddy_executor/registries/reviewer.py +57 -0
  132. teddy_executor/registries/validators.py +47 -0
  133. teddy_executor/resources/__init__.py +1 -0
  134. teddy_executor/resources/config/.gitignore +2 -0
  135. teddy_executor/resources/config/__init__.py +1 -0
  136. teddy_executor/resources/config/config.yaml +49 -0
  137. teddy_executor/resources/config/init.context +5 -0
  138. teddy_executor/resources/config/prompts/architect.xml +462 -0
  139. teddy_executor/resources/config/prompts/assistant.xml +336 -0
  140. teddy_executor/resources/config/prompts/debugger.xml +456 -0
  141. teddy_executor/resources/config/prompts/developer.xml +481 -0
  142. teddy_executor/resources/config/prompts/pathfinder.xml +502 -0
  143. teddy_executor/resources/config/prompts/prototyper.xml +425 -0
@@ -0,0 +1,346 @@
1
+ from teddy_executor.core.ports.outbound.web_scraper import WebScraper
2
+ from teddy_executor.core.ports.outbound.config_service import IConfigService
3
+
4
+
5
+ MIN_GITHUB_CONTENT_LENGTH = 10
6
+ HTTP_BAD_REQUEST = 400
7
+ HTTP_FORBIDDEN = 403
8
+ HTTP_NOT_ACCEPTABLE = 406
9
+ HTTP_TOO_MANY_REQUESTS = 429
10
+ HTTP_INTERNAL_SERVER_ERROR = 500
11
+
12
+
13
+ class WebScraperAdapter(WebScraper):
14
+ """
15
+ An adapter that implements the WebScraper port using requests and trafilatura.
16
+ """
17
+
18
+ def __init__(self, config_service: IConfigService = None): # type: ignore
19
+ self._config_service = config_service
20
+
21
+ def _get_trafilatura(self):
22
+ """Lazy-load trafilatura to keep CLI startup fast."""
23
+ import trafilatura
24
+
25
+ return trafilatura
26
+
27
+ def _get_bs4(self):
28
+ """Lazy-load BeautifulSoup to keep CLI startup fast."""
29
+ from bs4 import BeautifulSoup
30
+
31
+ return BeautifulSoup
32
+
33
+ def _extract_github_conversation(self, html: str) -> str:
34
+ """
35
+ Extracts issue or pull request content and comments from GitHub HTML.
36
+ Uses a hybrid strategy: JSON-embedded data (primary) and CSS selectors (fallback).
37
+ """
38
+ import json
39
+
40
+ soup = self._get_bs4()(html, "html.parser")
41
+
42
+ # 1. Primary: Try high-fidelity extraction from embedded JSON data
43
+ scripts = soup.find_all("script", type="application/json")
44
+ for script in scripts:
45
+ if not script.string:
46
+ continue
47
+ try:
48
+ data = json.loads(script.string)
49
+ result = self._parse_github_json(data)
50
+ if result:
51
+ return result
52
+ except (json.JSONDecodeError, TypeError, AttributeError):
53
+ continue
54
+
55
+ # 2. Fallback: CSS-based scraping
56
+ return self._scrape_github_html(soup)
57
+
58
+ def _find_key_recursive(self, obj, target_key):
59
+ """Recursively search for a key in a nested dictionary/list."""
60
+ if isinstance(obj, dict):
61
+ if target_key in obj:
62
+ return obj[target_key]
63
+ for v in obj.values():
64
+ res = self._find_key_recursive(v, target_key)
65
+ if res:
66
+ return res
67
+ elif isinstance(obj, list):
68
+ for item in obj:
69
+ res = self._find_key_recursive(item, target_key)
70
+ if res:
71
+ return res
72
+ return None
73
+
74
+ def _gather_edges_recursive(self, obj, edges_out: list):
75
+ """Recursively gather all 'edges' lists into the output list."""
76
+ if isinstance(obj, dict):
77
+ if "edges" in obj and isinstance(obj["edges"], list):
78
+ edges_out.extend(obj["edges"])
79
+ for v in obj.values():
80
+ self._gather_edges_recursive(v, edges_out)
81
+ elif isinstance(obj, list):
82
+ for i in obj:
83
+ self._gather_edges_recursive(i, edges_out)
84
+
85
+ def _parse_github_json(self, data: dict) -> str | None:
86
+ """Helper to recursively find and parse issue/PR data from JSON."""
87
+ container = self._find_key_recursive(data, "issue") or self._find_key_recursive(
88
+ data, "pullRequest"
89
+ )
90
+ if not container or not isinstance(container, dict):
91
+ return None
92
+
93
+ title = (
94
+ container.get("title")
95
+ or container.get("titleHtml")
96
+ or container.get("titleText")
97
+ or "Unknown Title"
98
+ )
99
+ body = container.get("body") or container.get("bodyHTML", "")
100
+
101
+ all_edges: list[dict] = []
102
+ self._gather_edges_recursive(data, all_edges)
103
+
104
+ comments = []
105
+ seen_ids = set()
106
+ for edge in all_edges:
107
+ node = edge.get("node", {}) if isinstance(edge, dict) else {}
108
+ node_id = node.get("id")
109
+ if node_id and node_id not in seen_ids:
110
+ if node.get("__typename") in [
111
+ "IssueComment",
112
+ "PullRequestReview",
113
+ "PullRequestReviewComment",
114
+ ]:
115
+ seen_ids.add(node_id)
116
+ author = node.get("author", {}).get("login", "unknown")
117
+ c_body = node.get("body") or node.get("bodyHTML") or ""
118
+ comments.append(
119
+ f"### {node.get('__typename')} by {author}\n{c_body}\n\n"
120
+ )
121
+
122
+ return f"# {title}\n\n## Description\n{body}\n\n" + "".join(comments)
123
+
124
+ def _scrape_github_html(self, soup) -> str:
125
+ """Helper for CSS-based fallback scraping."""
126
+ title_elem = soup.select_one(".markdown-title") or soup.select_one(
127
+ ".gh-header-title"
128
+ )
129
+ title = title_elem.get_text(strip=True) if title_elem else "GitHub Content"
130
+
131
+ bodies = soup.select(".markdown-body")
132
+ content_blocks = []
133
+ for i, block in enumerate(bodies):
134
+ text = block.get_text(separator="\n", strip=True)
135
+ if len(text) > MIN_GITHUB_CONTENT_LENGTH:
136
+ label = "Description" if i == 0 else f"Comment {i}"
137
+ content_blocks.append(f"## {label}\n{text}\n\n")
138
+
139
+ return f"# {title}\n\n" + "".join(content_blocks)
140
+
141
+ def _is_retryable_error(self, status_code: int | None) -> bool:
142
+ """Determines if an HTTP error is transient and should be retried."""
143
+ if not status_code:
144
+ return True
145
+ # Retry on 5xx (server errors) or 429 (Too Many Requests)
146
+ return (
147
+ status_code >= HTTP_INTERNAL_SERVER_ERROR
148
+ or status_code == HTTP_TOO_MANY_REQUESTS
149
+ )
150
+
151
+ def _fetch_with_ua(self, url: str, ua: str, max_retries: int) -> str | None:
152
+ """Internal helper to attempt fetch with a specific User-Agent and retries."""
153
+ import requests
154
+ import time
155
+
156
+ headers = {
157
+ "User-Agent": ua,
158
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
159
+ "Accept-Language": "en-US,en;q=0.9",
160
+ "Accept-Encoding": "gzip, deflate, br",
161
+ "DNT": "1",
162
+ "Connection": "keep-alive",
163
+ "Upgrade-Insecure-Requests": "1",
164
+ "Sec-Fetch-Dest": "document",
165
+ "Sec-Fetch-Mode": "navigate",
166
+ "Sec-Fetch-Site": "none",
167
+ "Sec-Fetch-User": "?1",
168
+ }
169
+
170
+ for attempt in range(max_retries):
171
+ try:
172
+ response = requests.get(url, headers=headers, timeout=20)
173
+ response.raise_for_status()
174
+ return response.text
175
+ except requests.exceptions.HTTPError as e:
176
+ status_code = getattr(e.response, "status_code", None)
177
+
178
+ # 403/406 signal a need for UA rotation, not same-UA retry
179
+ if status_code in [HTTP_FORBIDDEN, HTTP_NOT_ACCEPTABLE]:
180
+ return None
181
+
182
+ if self._is_retryable_error(status_code) and attempt < max_retries - 1:
183
+ time.sleep(2**attempt)
184
+ continue
185
+ raise
186
+ except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
187
+ if attempt < max_retries - 1:
188
+ time.sleep(2**attempt)
189
+ continue
190
+ return None
191
+ except Exception:
192
+ return None
193
+ return None
194
+
195
+ def _fetch_with_rotation(self, url: str) -> str | None:
196
+ """Attempts to fetch HTML content using a rotating pool of User-Agents and retries."""
197
+ user_agents = [
198
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, Gecko) Chrome/124.0.0.0 Safari/537.36",
199
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, Gecko) Chrome/121.0.0.0 Safari/537.36",
200
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, Gecko) Chrome/122.0.0.0 Safari/537.36",
201
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 17_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, Gecko) Version/17.4.1 Mobile/15E148 Safari/604.1",
202
+ ]
203
+
204
+ max_retries = 3
205
+ if self._config_service:
206
+ val = self._config_service.get_setting("research.max_scraper_retries", 3)
207
+ if val is not None:
208
+ max_retries = int(val)
209
+
210
+ last_error: Exception | None = None
211
+
212
+ for ua in user_agents:
213
+ try:
214
+ html_content = self._fetch_with_ua(url, ua, max_retries)
215
+ if html_content:
216
+ return html_content
217
+ except Exception as e:
218
+ last_error = e
219
+ continue
220
+
221
+ # Final Resort: Trafilatura's internal fetcher
222
+ html_content = self._get_trafilatura().fetch_url(url)
223
+ if not html_content and last_error:
224
+ raise last_error
225
+
226
+ return html_content
227
+
228
+ def _handle_github_raw(self, url: str) -> str | None:
229
+ """Handles specialized fetching for GitHub raw content with retries."""
230
+ import requests
231
+ import time
232
+
233
+ is_raw_github = url.startswith("https://raw.githubusercontent.com/")
234
+ is_github_blob = url.startswith("https://github.com/") and "/blob/" in url
235
+
236
+ if not (is_raw_github or is_github_blob):
237
+ return None
238
+
239
+ target_url = (
240
+ url.replace("github.com", "raw.githubusercontent.com").replace(
241
+ "/blob/", "/"
242
+ )
243
+ if is_github_blob
244
+ else url
245
+ )
246
+
247
+ max_retries = 3
248
+ if self._config_service:
249
+ val = self._config_service.get_setting("research.max_scraper_retries", 3)
250
+ if val is not None:
251
+ max_retries = int(val)
252
+
253
+ for attempt in range(max_retries):
254
+ try:
255
+ response = requests.get(
256
+ target_url,
257
+ headers={
258
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, Gecko) Chrome/124.0.0.0 Safari/537.36"
259
+ },
260
+ timeout=30,
261
+ )
262
+ response.raise_for_status()
263
+ return response.text
264
+ except (
265
+ requests.exceptions.HTTPError,
266
+ requests.exceptions.ConnectionError,
267
+ requests.exceptions.Timeout,
268
+ ) as e:
269
+ if attempt < max_retries - 1:
270
+ status_code = getattr(
271
+ getattr(e, "response", None), "status_code", None
272
+ )
273
+ # Don't retry on most 4xx
274
+ if (
275
+ status_code
276
+ and HTTP_BAD_REQUEST <= status_code < HTTP_INTERNAL_SERVER_ERROR
277
+ and status_code not in [HTTP_FORBIDDEN, HTTP_TOO_MANY_REQUESTS]
278
+ ):
279
+ raise
280
+ time.sleep(2**attempt)
281
+ continue
282
+ raise
283
+ return None
284
+
285
+ def _truncate_content(self, markdown_content: str) -> str:
286
+ """Truncates markdown content based on read.max_lines configuration."""
287
+ if not self._config_service or not markdown_content:
288
+ return markdown_content
289
+
290
+ max_lines = self._config_service.get_setting("read.max_lines", None)
291
+ if max_lines is not None:
292
+ try:
293
+ max_lines_int = int(max_lines)
294
+ lines = markdown_content.splitlines()
295
+ if len(lines) > max_lines_int:
296
+ return "\n".join(lines[:max_lines_int])
297
+ except (ValueError, TypeError):
298
+ pass
299
+ return markdown_content
300
+
301
+ def get_content(self, url: str, **_kwargs) -> str:
302
+ """
303
+ Fetches and extracts the content from the given URL.
304
+ Employs a multi-stage stealth rotation to bypass automated blocking.
305
+
306
+ Args:
307
+ url: The URL to fetch content from.
308
+ **_kwargs: Optional extraction hints.
309
+
310
+ Returns:
311
+ The extracted text content.
312
+ """
313
+ # 1. Specialized handling for GitHub raw content
314
+ raw_github_content = self._handle_github_raw(url)
315
+ if raw_github_content is not None:
316
+ return raw_github_content
317
+
318
+ # 2. Multi-stage Stealth Rotation for general URLs
319
+ html_content = self._fetch_with_rotation(url)
320
+ if not html_content:
321
+ return ""
322
+
323
+ # 3. Routing: Use specialized extractor for GitHub Issues and Pull Requests
324
+ is_github_domain = (
325
+ "github.com" in url or "localhost" in url or "127.0.0.1" in url
326
+ )
327
+ if is_github_domain and ("/issues/" in url or "/pull/" in url):
328
+ github_content = self._extract_github_conversation(html_content)
329
+ if github_content:
330
+ return github_content
331
+
332
+ trafilatura = self._get_trafilatura()
333
+ markdown_content = trafilatura.extract(
334
+ html_content,
335
+ output_format="markdown",
336
+ include_links=True,
337
+ include_formatting=True,
338
+ favor_recall=False,
339
+ include_comments=False,
340
+ include_tables=True,
341
+ )
342
+
343
+ if not markdown_content:
344
+ return ""
345
+
346
+ return self._truncate_content(markdown_content)
@@ -0,0 +1,122 @@
1
+ import logging
2
+ from typing import Any, Callable, List, Optional
3
+ from teddy_executor.core.domain.models import (
4
+ QueryResult,
5
+ SearchResult,
6
+ WebSearchError,
7
+ WebSearchResults,
8
+ )
9
+ from teddy_executor.core.ports.outbound import IConfigService, IWebSearcher
10
+
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class WebSearcherAdapter(IWebSearcher):
16
+ """
17
+ An adapter that uses the ddgs library to perform web searches.
18
+ """
19
+
20
+ def __init__(
21
+ self,
22
+ config_service: IConfigService,
23
+ ddgs_factory: Optional[Callable[..., Any]] = None,
24
+ ):
25
+ self._config_service = config_service
26
+ self._ddgs_factory = ddgs_factory
27
+
28
+ def _apply_ddgs_monkeypatch(self) -> None:
29
+ """Applies a structural patch to DDGS to preserve word boundaries."""
30
+ from ddgs.base import BaseSearchEngine
31
+
32
+ def patched_extract_results(self, html_text: str):
33
+ html_text = self.pre_process_html(html_text)
34
+ tree = self.extract_tree(html_text)
35
+ items = tree.xpath(self.items_xpath)
36
+ results = []
37
+ for item in items:
38
+ result = self.result_type()
39
+ for key, value in self.elements_xpath.items():
40
+ parts = (x.strip() for x in item.xpath(value))
41
+ # JOIN WITH SPACE instead of empty string to preserve boundaries
42
+ data = " ".join(" ".join(parts).split())
43
+ result.__setattr__(key, data)
44
+ results.append(result)
45
+ return results
46
+
47
+ # Apply the structural patch to the base class
48
+ BaseSearchEngine.extract_results = patched_extract_results # type: ignore[method-assign]
49
+
50
+ def _clean_snippet(self, text: str) -> str:
51
+ """Fixes missing spaces after punctuation in raw text."""
52
+ import re
53
+
54
+ if not text:
55
+ return ""
56
+ # Fix missing space after period, comma, or colon followed by a letter
57
+ return re.sub(r"([.,:])([A-Za-z])", r"\1 \2", text)
58
+
59
+ def _execute_single_query(
60
+ self, ddgs_client: Any, query: str, total_queries: int
61
+ ) -> QueryResult:
62
+ """Executes a single search query and maps results."""
63
+ try:
64
+ max_results = self._config_service.get_setting("research.max_results", 5)
65
+ # DDGS.text returns a generator, so we convert it to a list
66
+ results = list(ddgs_client.text(query, max_results=max_results))
67
+
68
+ search_results_for_query: List[SearchResult] = []
69
+ for res in results:
70
+ url = res.get("href", "")
71
+ # Map library 'body' to our 'description'
72
+ item: SearchResult = {
73
+ "title": res.get("title", ""),
74
+ "href": url,
75
+ "description": self._clean_snippet(res.get("body", "")),
76
+ }
77
+
78
+ search_results_for_query.append(item)
79
+
80
+ return {
81
+ "query": query,
82
+ "results": search_results_for_query,
83
+ }
84
+ except Exception as e:
85
+ # Log the individual query failure but continue with other queries.
86
+ # This prevents one failing query from sabotaging the entire action.
87
+ logger.warning(f"Search query '{query}' failed: {e}")
88
+
89
+ # If this is the ONLY query, we still want to raise the error
90
+ # to maintain failure transparency (Stop the Line).
91
+ if total_queries == 1:
92
+ raise WebSearchError(f"Failed to execute search: {e}") from e
93
+
94
+ return {
95
+ "query": query,
96
+ "results": [],
97
+ }
98
+
99
+ def search(self, queries: List[str]) -> WebSearchResults:
100
+ """
101
+ Performs a web search for each query and maps the results.
102
+ """
103
+ from ddgs import DDGS
104
+
105
+ self._apply_ddgs_monkeypatch()
106
+ all_query_results: List[QueryResult] = []
107
+ factory = self._ddgs_factory or DDGS
108
+
109
+ # Globally disable logging (CRITICAL and below) to silence noisy
110
+ # third-party HTTP clients (urllib3, httpx, curl_cffi) used by DDGS.
111
+ logging.disable(logging.CRITICAL)
112
+ try:
113
+ with factory() as ddgs_client:
114
+ for query in queries:
115
+ result = self._execute_single_query(
116
+ ddgs_client, query, len(queries)
117
+ )
118
+ all_query_results.append(result)
119
+
120
+ return {"query_results": all_query_results}
121
+ finally:
122
+ logging.disable(logging.NOTSET)
@@ -0,0 +1,105 @@
1
+ import os
2
+ from importlib import resources
3
+ from typing import Any, Dict, Optional
4
+ import yaml
5
+ from teddy_executor.core.ports.outbound.config_service import IConfigService
6
+
7
+
8
+ class YamlConfigAdapter(IConfigService):
9
+ """
10
+ Implements IConfigService by reading configuration from a YAML file.
11
+ """
12
+
13
+ def __init__(
14
+ self, config_path: str = ".teddy/config.yaml", root_dir: Optional[str] = None
15
+ ):
16
+ if root_dir:
17
+ self._config_path = os.path.join(root_dir, config_path)
18
+ else:
19
+ self._config_path = config_path
20
+ self._config: Dict[str, Any] = self._load_layered_config()
21
+
22
+ def _load_layered_config(self) -> Dict[str, Any]:
23
+ """Loads the baseline config and merges it with the user config."""
24
+ # 1. Load Bundled Baseline
25
+ config = self._load_baseline()
26
+
27
+ # 2. Load User Overrides
28
+ user_config = self._load_user_config()
29
+
30
+ # 3. Simple Deep Merge (Layered)
31
+ self._merge_dicts(config, user_config)
32
+
33
+ return config
34
+
35
+ def _load_baseline(self) -> Dict[str, Any]:
36
+ """Loads the bundled baseline config from package resources."""
37
+ try:
38
+ resource_path = resources.files("teddy_executor.resources.config").joinpath(
39
+ "config.yaml"
40
+ )
41
+ with resource_path.open("r", encoding="utf-8") as f:
42
+ data = yaml.safe_load(f)
43
+ return data if isinstance(data, dict) else {}
44
+ except (yaml.YAMLError, OSError, ImportError, AttributeError):
45
+ return {}
46
+
47
+ def _load_user_config(self) -> Dict[str, Any]:
48
+ """Loads the user-specific YAML configuration file if it exists."""
49
+ if not os.path.exists(self._config_path):
50
+ return {}
51
+
52
+ try:
53
+ with open(self._config_path, "r", encoding="utf-8") as f:
54
+ data = yaml.safe_load(f)
55
+ return data if isinstance(data, dict) else {}
56
+ except (yaml.YAMLError, OSError):
57
+ return {}
58
+
59
+ def _merge_dicts(self, base: Dict[str, Any], overrides: Dict[str, Any]) -> None:
60
+ """Recursively merges overrides into base. Prunes keys set to None."""
61
+ for key, value in overrides.items():
62
+ if value is None:
63
+ if key in base:
64
+ del base[key]
65
+ elif isinstance(value, dict):
66
+ if key not in base or not isinstance(base[key], dict):
67
+ base[key] = {}
68
+ self._merge_dicts(base[key], value)
69
+ else:
70
+ base[key] = value
71
+
72
+ def get_setting(self, key: str, default: Optional[Any] = None) -> Optional[Any]:
73
+ """
74
+ Retrieves a configuration value by its key from the loaded YAML.
75
+ Supports nested keys using dot notation (e.g., 'outer.inner').
76
+ """
77
+ if not key:
78
+ return default
79
+
80
+ # 1. Try exact match first (highest priority: top-level user overrides)
81
+ if key in self._config:
82
+ return self._config[key]
83
+
84
+ # 2. Try nested resolution (standard hierarchical structure)
85
+ parts = key.split(".")
86
+ result = self._resolve_nested(parts)
87
+
88
+ if result is not None:
89
+ return result
90
+
91
+ return default
92
+
93
+ def get_config_path(self) -> str:
94
+ """Returns the path to the configuration file."""
95
+ return self._config_path
96
+
97
+ def _resolve_nested(self, parts: list[str]) -> Optional[Any]:
98
+ """Iteratively resolves nested keys."""
99
+ current = self._config
100
+ for part in parts:
101
+ if isinstance(current, dict) and part in current:
102
+ current = current[part]
103
+ else:
104
+ return None
105
+ return current