PyPI - teddy-cli - Versions diffs - 0.1.0__py3-none-any.whl - Mend

teddy-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (143) hide show

teddy_cli-0.1.0.dist-info/LICENSE +677 -0
teddy_cli-0.1.0.dist-info/METADATA +33 -0
teddy_cli-0.1.0.dist-info/RECORD +143 -0
teddy_cli-0.1.0.dist-info/WHEEL +4 -0
teddy_cli-0.1.0.dist-info/entry_points.txt +3 -0
teddy_executor/__init__.py +1 -0
teddy_executor/__main__.py +335 -0
teddy_executor/adapters/__init__.py +0 -0
teddy_executor/adapters/inbound/__init__.py +0 -0
teddy_executor/adapters/inbound/cli_formatter.py +107 -0
teddy_executor/adapters/inbound/cli_helpers.py +249 -0
teddy_executor/adapters/inbound/console_plan_reviewer.py +69 -0
teddy_executor/adapters/inbound/session_cli_handlers.py +366 -0
teddy_executor/adapters/inbound/textual_plan_reviewer.py +78 -0
teddy_executor/adapters/inbound/textual_plan_reviewer_app.py +367 -0
teddy_executor/adapters/inbound/textual_plan_reviewer_editor.py +281 -0
teddy_executor/adapters/inbound/textual_plan_reviewer_execution.py +213 -0
teddy_executor/adapters/inbound/textual_plan_reviewer_helpers.py +308 -0
teddy_executor/adapters/inbound/textual_plan_reviewer_logic.py +345 -0
teddy_executor/adapters/inbound/textual_plan_reviewer_previews.py +227 -0
teddy_executor/adapters/inbound/textual_plan_reviewer_widgets.py +246 -0
teddy_executor/adapters/outbound/__init__.py +7 -0
teddy_executor/adapters/outbound/console_interactor.py +212 -0
teddy_executor/adapters/outbound/console_interactor_ask_loop.py +121 -0
teddy_executor/adapters/outbound/console_interactor_helpers.py +95 -0
teddy_executor/adapters/outbound/console_tooling.py +62 -0
teddy_executor/adapters/outbound/filesystem_helpers.py +61 -0
teddy_executor/adapters/outbound/litellm_adapter.py +462 -0
teddy_executor/adapters/outbound/local_file_system_adapter.py +300 -0
teddy_executor/adapters/outbound/local_repo_tree_generator.py +96 -0
teddy_executor/adapters/outbound/openrouter_hydrator.py +89 -0
teddy_executor/adapters/outbound/shell_adapter.py +344 -0
teddy_executor/adapters/outbound/shell_command_builder.py +105 -0
teddy_executor/adapters/outbound/system_environment_adapter.py +62 -0
teddy_executor/adapters/outbound/system_environment_inspector.py +54 -0
teddy_executor/adapters/outbound/system_time_adapter.py +22 -0
teddy_executor/adapters/outbound/web_scraper_adapter.py +346 -0
teddy_executor/adapters/outbound/web_searcher_adapter.py +122 -0
teddy_executor/adapters/outbound/yaml_config_adapter.py +105 -0
teddy_executor/container.py +333 -0
teddy_executor/core/__init__.py +0 -0
teddy_executor/core/domain/__init__.py +0 -0
teddy_executor/core/domain/models/__init__.py +44 -0
teddy_executor/core/domain/models/action_ports.py +28 -0
teddy_executor/core/domain/models/change_set.py +10 -0
teddy_executor/core/domain/models/exceptions.py +40 -0
teddy_executor/core/domain/models/execution_report.py +65 -0
teddy_executor/core/domain/models/orchestrator_ports.py +26 -0
teddy_executor/core/domain/models/plan.py +85 -0
teddy_executor/core/domain/models/planning_ports.py +43 -0
teddy_executor/core/domain/models/project_context.py +56 -0
teddy_executor/core/domain/models/report_assembly_data.py +18 -0
teddy_executor/core/domain/models/session.py +17 -0
teddy_executor/core/domain/models/shell_output.py +12 -0
teddy_executor/core/domain/models/web_search_results.py +26 -0
teddy_executor/core/ports/__init__.py +0 -0
teddy_executor/core/ports/inbound/__init__.py +0 -0
teddy_executor/core/ports/inbound/edit_simulator.py +33 -0
teddy_executor/core/ports/inbound/get_context_use_case.py +32 -0
teddy_executor/core/ports/inbound/init.py +15 -0
teddy_executor/core/ports/inbound/plan_parser.py +52 -0
teddy_executor/core/ports/inbound/plan_reviewer.py +44 -0
teddy_executor/core/ports/inbound/plan_validator.py +26 -0
teddy_executor/core/ports/inbound/planning_use_case.py +30 -0
teddy_executor/core/ports/inbound/run_plan_use_case.py +60 -0
teddy_executor/core/ports/outbound/__init__.py +34 -0
teddy_executor/core/ports/outbound/config_service.py +29 -0
teddy_executor/core/ports/outbound/environment_inspector.py +30 -0
teddy_executor/core/ports/outbound/execution_report_assembler.py +19 -0
teddy_executor/core/ports/outbound/file_system_manager.py +131 -0
teddy_executor/core/ports/outbound/llm_client.py +90 -0
teddy_executor/core/ports/outbound/markdown_report_formatter.py +26 -0
teddy_executor/core/ports/outbound/prompt_manager.py +55 -0
teddy_executor/core/ports/outbound/repo_tree_generator.py +17 -0
teddy_executor/core/ports/outbound/session_loop_guard.py +16 -0
teddy_executor/core/ports/outbound/session_manager.py +97 -0
teddy_executor/core/ports/outbound/session_repository.py +65 -0
teddy_executor/core/ports/outbound/shell_executor.py +24 -0
teddy_executor/core/ports/outbound/system_environment.py +25 -0
teddy_executor/core/ports/outbound/time_service.py +28 -0
teddy_executor/core/ports/outbound/user_interactor.py +126 -0
teddy_executor/core/ports/outbound/web_scraper.py +24 -0
teddy_executor/core/ports/outbound/web_searcher.py +25 -0
teddy_executor/core/services/__init__.py +0 -0
teddy_executor/core/services/action_changeset_builder.py +90 -0
teddy_executor/core/services/action_diff_manager.py +110 -0
teddy_executor/core/services/action_dispatcher.py +142 -0
teddy_executor/core/services/action_executor.py +209 -0
teddy_executor/core/services/action_factory.py +197 -0
teddy_executor/core/services/action_parser_complex.py +216 -0
teddy_executor/core/services/action_parser_strategies.py +84 -0
teddy_executor/core/services/context_service.py +437 -0
teddy_executor/core/services/edit_simulator.py +128 -0
teddy_executor/core/services/execution_orchestrator.py +295 -0
teddy_executor/core/services/execution_report_assembler.py +62 -0
teddy_executor/core/services/init_service.py +80 -0
teddy_executor/core/services/markdown_plan_parser.py +309 -0
teddy_executor/core/services/markdown_report_formatter.py +143 -0
teddy_executor/core/services/parser_infrastructure.py +222 -0
teddy_executor/core/services/parser_metadata.py +153 -0
teddy_executor/core/services/parser_reporting.py +267 -0
teddy_executor/core/services/plan_validator.py +82 -0
teddy_executor/core/services/planning_service.py +242 -0
teddy_executor/core/services/prompt_manager.py +146 -0
teddy_executor/core/services/session_lifecycle_manager.py +228 -0
teddy_executor/core/services/session_loop_guard.py +46 -0
teddy_executor/core/services/session_orchestrator.py +538 -0
teddy_executor/core/services/session_planner.py +43 -0
teddy_executor/core/services/session_pruning_service.py +438 -0
teddy_executor/core/services/session_replanner.py +105 -0
teddy_executor/core/services/session_repository.py +194 -0
teddy_executor/core/services/session_service.py +529 -0
teddy_executor/core/services/templates/execution_report.md.j2 +290 -0
teddy_executor/core/services/validation_rules/__init__.py +4 -0
teddy_executor/core/services/validation_rules/edit.py +207 -0
teddy_executor/core/services/validation_rules/edit_matcher.py +247 -0
teddy_executor/core/services/validation_rules/edit_matcher_heuristics.py +84 -0
teddy_executor/core/services/validation_rules/execute.py +37 -0
teddy_executor/core/services/validation_rules/filesystem.py +73 -0
teddy_executor/core/services/validation_rules/helpers.py +178 -0
teddy_executor/core/services/validation_rules/message.py +29 -0
teddy_executor/core/utils/__init__.py +1 -0
teddy_executor/core/utils/diff.py +57 -0
teddy_executor/core/utils/io.py +75 -0
teddy_executor/core/utils/markdown.py +131 -0
teddy_executor/core/utils/serialization.py +39 -0
teddy_executor/core/utils/string.py +351 -0
teddy_executor/prompts.py +45 -0
teddy_executor/registries/__init__.py +1 -0
teddy_executor/registries/infrastructure.py +147 -0
teddy_executor/registries/reviewer.py +57 -0
teddy_executor/registries/validators.py +47 -0
teddy_executor/resources/__init__.py +1 -0
teddy_executor/resources/config/.gitignore +2 -0
teddy_executor/resources/config/__init__.py +1 -0
teddy_executor/resources/config/config.yaml +49 -0
teddy_executor/resources/config/init.context +5 -0
teddy_executor/resources/config/prompts/architect.xml +462 -0
teddy_executor/resources/config/prompts/assistant.xml +336 -0
teddy_executor/resources/config/prompts/debugger.xml +456 -0
teddy_executor/resources/config/prompts/developer.xml +481 -0
teddy_executor/resources/config/prompts/pathfinder.xml +502 -0
teddy_executor/resources/config/prompts/prototyper.xml +425 -0

teddy_executor/core/services/markdown_plan_parser.py ADDED Viewed

@@ -0,0 +1,309 @@
+from __future__ import annotations
+import os
+import re
+from typing import TYPE_CHECKING, Any, List, Optional
+if TYPE_CHECKING:
+    from mistletoe.block_token import (
+        Document,
+    )
+from teddy_executor.core.domain.models import ActionData, Plan, ActionType
+from teddy_executor.core.ports.inbound.plan_parser import IPlanParser, InvalidPlanError
+from teddy_executor.core.services.parser_infrastructure import (
+    H1_LEVEL,
+    H2_LEVEL,
+    _FencePreProcessor,
+    _PeekableStream,
+    get_child_text,
+    get_action_heading,
+    consume_content_until_next_action,
+    normalize_headings,
+    print_ast,
+)
+from teddy_executor.core.services.parser_reporting import (
+    format_structural_mismatch_msg,
+    validate_plan_structure,
+)
+from teddy_executor.core.services.parser_metadata import parse_plan_metadata
+from teddy_executor.core.services.action_parser_strategies import (
+    parse_create_action,
+    parse_read_action,
+)
+from teddy_executor.core.services.action_parser_complex import (
+    parse_edit_action,
+    parse_execute_action,
+    parse_research_action,
+    parse_message_action,
+)
+class MarkdownPlanParser(IPlanParser):
+    """
+    A service that parses a Markdown plan string into a `Plan` domain object using a
+    single-pass AST stream.
+    """
+    def __init__(self):
+        self._preprocessor = _FencePreProcessor()
+        self._valid_actions = {action.value for action in ActionType}
+        self._dispatch_map = {
+            "CREATE": parse_create_action,
+            "READ": parse_read_action,
+            "EDIT": lambda s, node=None: parse_edit_action(
+                s, self._valid_actions, node=node
+            ),
+            "EXECUTE": parse_execute_action,
+            "RESEARCH": lambda s, node=None: parse_research_action(
+                s, self._valid_actions, node=node
+            ),
+        }
+    def parse(self, plan_content: str, plan_path: Optional[str] = None) -> Plan:
+        """
+        Parses the specified Markdown plan string into a structured Plan object.
+        """
+        from mistletoe.block_token import (
+            Document,
+        )
+        # Trim trailing whitespace to prevent mistletoe from
+        # interpreting trailing indentation as an unexpected code block.
+        # We keep leading whitespace for potential Markdown significance (though rare at top-level).
+        clean_content = plan_content.rstrip()
+        if not clean_content:
+            raise InvalidPlanError("Plan content cannot be empty.")
+        # Strip preamble (text before the first # heading at start of a line)
+        # Use MULTILINE so ^ matches start of any line. Allow optional leading whitespace
+        # before # because Markdown permits up to 3 spaces before heading markers.
+        h1_match = re.search(r"^[ \t]*#", clean_content, re.MULTILINE)
+        if h1_match and h1_match.start() > 0:
+            clean_content = clean_content[h1_match.start() :]
+        # Normalize H1 heading on the first line (e.g., #Title -> # Title)
+        # This runs after preamble stripping so it always targets the heading line
+        clean_content = normalize_headings(clean_content)
+        processed_content = self._preprocessor.process(clean_content)
+        doc = Document(processed_content)
+        if os.environ.get("TEDDY_DEBUG"):
+            print_ast(doc)
+        stream = _PeekableStream(iter(doc.children or []))
+        try:
+            title, rationale, metadata, section_heading = self._parse_strict_top_level(
+                stream, doc
+            )
+            self._validate_mutual_exclusivity(doc)
+            actions = self._parse_section_content(
+                stream, clean_content, section_heading, doc
+            )
+            is_session = False
+            if plan_path:
+                normalized_path = plan_path.replace("\\", "/").lower()
+                is_session = ".teddy/sessions/" in normalized_path
+            plan = Plan(
+                title=title,
+                rationale=rationale,
+                actions=actions,
+                metadata=metadata,
+                source_doc=doc,
+                is_session=is_session,
+                plan_path=plan_path,
+                raw_content=clean_content,
+            )
+            # Write corrected content back to source file if it came from a session file path
+            if plan_path and is_session:
+                from pathlib import Path
+                path_obj = Path(plan_path)
+                try:
+                    current_disk = path_obj.read_text(encoding="utf-8")
+                except Exception:
+                    current_disk = None
+                if current_disk is not None and current_disk.rstrip() != clean_content:
+                    path_obj.write_text(clean_content, encoding="utf-8")
+            return plan
+        except InvalidPlanError as e:
+            if "### Expected Response Structure (MRP) " in str(e):
+                raise e
+            # Re-format the error using the shared infrastructure to always include AST
+            e_nodes = getattr(e, "offending_nodes", [])
+            rich_msg = format_structural_mismatch_msg(
+                doc, str(e).splitlines()[0], -1, e_nodes
+            )
+            raise InvalidPlanError(rich_msg, offending_nodes=e_nodes) from e
+    def _raise_structural_error(
+        self, doc: Document, expected_name: str, mismatch_idx: int, actual_node: Any
+    ):
+        """Constructs and raises a detailed structural validation error."""
+        offending_nodes = [actual_node] if actual_node else []
+        raise InvalidPlanError(
+            self._format_structural_mismatch_msg(
+                doc, expected_name, mismatch_idx, actual_node
+            ),
+            offending_nodes=offending_nodes,
+        )
+    def _format_structural_mismatch_msg(
+        self, doc: Document, expected: str, mismatch_idx: int, actual_node: Any
+    ) -> str:
+        """Wrapper for infrastructure helper to maintain internal API for tests."""
+        offending_nodes = (
+            actual_node if isinstance(actual_node, list) else [actual_node]
+        )
+        return format_structural_mismatch_msg(
+            doc, expected, mismatch_idx, offending_nodes
+        )
+    def _consume_mandatory_node(
+        self, stream: _PeekableStream, doc: Document, idx: int, expected: str, predicate
+    ) -> Any:
+        node = stream.peek()
+        if not node or not predicate(node):
+            self._raise_structural_error(doc, expected, idx, node)
+        return stream.next()
+    def _parse_strict_top_level(
+        self, stream: _PeekableStream, doc: Document
+    ) -> tuple[str, str, dict[str, str], Any]:
+        from mistletoe.block_token import Heading
+        # 0: Find H1 Title. Must be at index 0 per Rule 3.1.
+        node = stream.peek()
+        start_idx = 0
+        if not node or not (isinstance(node, Heading) and node.level == H1_LEVEL):
+            offending_nodes = [node] if node else []
+            rich_msg = format_structural_mismatch_msg(
+                doc, "a Level 1 Heading (Title)", 0, offending_nodes
+            )
+            raise InvalidPlanError(rich_msg, offending_nodes=offending_nodes)
+        title = get_child_text(node).strip()
+        validate_plan_structure(doc, start_idx)
+        # If we got here, the structure is correct. Consume nodes and extract data.
+        stream.next()  # Title (already used)
+        metadata_list_node = stream.next()
+        if not metadata_list_node:
+            raise InvalidPlanError(
+                "Plan parsing failed: Expected metadata list missing."
+            )
+        metadata = parse_plan_metadata(metadata_list_node)
+        stream.next()  # H2 Rationale
+        rationale_node = stream.next()
+        rationale = get_child_text(rationale_node).strip()
+        section_heading = stream.next()  # H2 Action Plan or Message
+        return title, rationale, metadata, section_heading
+    def _validate_mutual_exclusivity(self, doc: "Document") -> None:
+        """Validates that the document does not contain both ## Action Plan and ## Message."""
+        from mistletoe.block_token import Heading
+        doc_children = doc.children or []
+        h2_headings = [
+            n for n in doc_children if isinstance(n, Heading) and n.level == H2_LEVEL
+        ]
+        h2_texts = [get_child_text(h) for h in h2_headings]
+        if "Action Plan" in h2_texts and "Message" in h2_texts:
+            raise InvalidPlanError(
+                "Plan cannot contain both '## Action Plan' and '## Message'. Mutual exclusivity is required."
+            )
+    def _parse_section_content(
+        self,
+        stream: _PeekableStream,
+        clean_content: str,
+        section_heading: Any,
+        doc: Document,
+    ) -> List[ActionData]:
+        """Parses the content of either a ## Message or ## Action Plan section."""
+        section_name = get_child_text(section_heading).strip()
+        if "Message" in section_name:
+            raw_content = None
+            start_line = getattr(section_heading, "line_number", None)
+            if start_line is not None and start_line > 0:
+                lines = clean_content.splitlines(keepends=True)
+                if start_line < len(lines):
+                    raw_content = "".join(lines[start_line:]).lstrip("\n")
+            actions = [
+                parse_message_action(
+                    stream, node=section_heading, raw_content=raw_content
+                )
+            ]
+        else:
+            actions = self._parse_actions(stream, doc)
+        return actions
+    def _parse_actions(
+        self, stream: _PeekableStream, doc: Document
+    ) -> List[ActionData]:
+        from mistletoe.block_token import BlockCode, CodeFence, ThematicBreak
+        actions: List[ActionData] = []
+        # 'Action Plan' heading is already consumed by _parse_strict_top_level.
+        # Parse all subsequent actions
+        while stream.has_next():
+            node = stream.peek()
+            action_heading = get_action_heading(node, self._valid_actions)
+            if not action_heading:
+                # Skip code blocks and thematic breaks that can appear between
+                # action blocks due to formatting or trailing content.
+                if isinstance(node, (BlockCode, CodeFence, ThematicBreak)):
+                    stream.next()
+                    continue
+                # Accumulate offending node and raise structural error
+                offending_nodes = consume_content_until_next_action(
+                    stream, self._valid_actions
+                )
+                raise InvalidPlanError(
+                    format_structural_mismatch_msg(
+                        doc, "a Level 3 Action Heading", -1, offending_nodes
+                    ),
+                    offending_nodes=offending_nodes,
+                )
+            stream.next()  # Consume action heading
+            action_type_str = get_child_text(action_heading).strip().replace("`", "")
+            # Guard: MESSAGE under ## Action Plan must produce a clear mutual exclusivity error
+            if action_type_str == "MESSAGE":
+                raise InvalidPlanError(
+                    "MESSAGE action is not allowed under '## Action Plan'. "
+                    "Use '## Message' section instead. Mutual exclusivity is required.",
+                    offending_nodes=[action_heading],
+                )
+            if action_type_str not in self._dispatch_map:
+                raise InvalidPlanError(
+                    f"Unknown action type: {action_type_str}",
+                    offending_nodes=[action_heading],
+                )
+            parse_method = self._dispatch_map[action_type_str]
+            actions.append(parse_method(stream, node=action_heading))
+        return actions
+    # Structural formatting logic moved to parser_infrastructure.py

teddy_executor/core/services/markdown_report_formatter.py ADDED Viewed

@@ -0,0 +1,143 @@
+import os
+from datetime import timezone
+from typing import Any
+from teddy_executor.core.domain.models import ExecutionReport
+from teddy_executor.core.ports.outbound.markdown_report_formatter import (
+    IMarkdownReportFormatter,
+)
+from teddy_executor.core.utils.markdown import (
+    get_fence_for_content,
+    get_language_from_path,
+)
+class MarkdownReportFormatter(IMarkdownReportFormatter):
+    """
+    Implements IMarkdownReportFormatter using the Jinja2 template engine.
+    """
+    _cached_env = None
+    _cached_template = None
+    @classmethod
+    def _reset_singleton(cls):
+        """Internal helper for test isolation."""
+        cls._cached_env = None
+        cls._cached_template = None
+    def __init__(self):
+        from jinja2 import Environment, PackageLoader
+        if MarkdownReportFormatter._cached_env is None:
+            env = Environment(
+                loader=PackageLoader("teddy_executor.core.services", "templates"),
+                trim_blocks=True,
+                lstrip_blocks=True,
+                autoescape=False,  # nosec B701
+            )
+            env.filters["basename"] = os.path.basename
+            env.filters["fence"] = get_fence_for_content
+            env.filters["language_from_path"] = get_language_from_path
+            MarkdownReportFormatter._cached_env = env
+            MarkdownReportFormatter._cached_template = env.get_template(
+                "execution_report.md.j2"
+            )
+        self.env = MarkdownReportFormatter._cached_env
+        self.template = MarkdownReportFormatter._cached_template
+    def _prepare_context(self, report: ExecutionReport) -> dict[str, Any]:
+        """Prepares the report data for rendering."""
+        def format_datetime(dt):
+            if not dt:
+                return ""
+            if dt.tzinfo is None:
+                dt = dt.replace(tzinfo=timezone.utc)
+            return dt.isoformat()
+        plan_title: str = "Untitled Plan"
+        if hasattr(report, "plan_title"):
+            val = getattr(report, "plan_title")
+            plan_title = str(val) if val is not None else "Untitled Plan"
+        elif isinstance(report, dict):
+            plan_title = str(report.get("plan_title", "Untitled Plan"))
+        is_session = False
+        if hasattr(report, "is_session"):
+            is_session = bool(getattr(report, "is_session"))
+        elif isinstance(report, dict):
+            is_session = bool(report.get("is_session", False))
+        return {
+            "report": report,
+            "is_session": is_session,
+            "plan_title": plan_title,
+            "format_datetime": format_datetime,
+        }
+    def format(self, report: ExecutionReport) -> str:
+        """Renders the execution report to a Markdown string."""
+        from teddy_executor.core.utils.serialization import (
+            scrub_dict_for_serialization,
+        )
+        # 1. Prepare context with real objects to support attribute access in Python
+        context = self._prepare_context(report)
+        # 2. Scrub the report data specifically to neutralize mocks for Jinja2
+        report_data = (
+            report.__dict__
+            if hasattr(report, "__dict__")
+            else (report if isinstance(report, dict) else {})
+        )
+        context["report"] = scrub_dict_for_serialization(report_data)
+        # 3. Render with scrubbed data but real functions
+        rendered = self.template.render(context)
+        # Post-process for whitespace sanitization
+        lines = [line.rstrip() for line in rendered.splitlines()]
+        sanitized_lines = []
+        in_fence = False
+        consecutive_blanks = 0
+        for line in lines:
+            # Track code block state
+            if line.strip().startswith("```"):
+                in_fence = not in_fence
+            if in_fence:
+                # Inside code block: preserve all whitespace and newlines
+                sanitized_lines.append(line)
+                consecutive_blanks = 0
+            # Outside code block: apply sanitization rules
+            elif not line:
+                consecutive_blanks += 1
+                # Only allow one consecutive blank line (max 2 newlines)
+                if consecutive_blanks <= 1:
+                    sanitized_lines.append(line)
+            else:
+                # If the line starts with a bullet point, prevent a blank line before it
+                # unless it's the very first bullet in a list after a header.
+                # This ensures density for list items.
+                if (
+                    line.strip().startswith("- ")
+                    and sanitized_lines
+                    and not sanitized_lines[-1].strip()
+                ):
+                    # If the previous line was blank and we are starting a bullet,
+                    # check if the line before THAT was also a bullet.
+                    if len(sanitized_lines) > 1 and sanitized_lines[
+                        -2
+                    ].strip().startswith("- "):
+                        sanitized_lines.pop()  # Remove the blank line between bullets
+                consecutive_blanks = 0
+                sanitized_lines.append(line)
+        sanitized = "\n".join(sanitized_lines).strip()
+        return sanitized

teddy_executor/core/services/parser_infrastructure.py ADDED Viewed

@@ -0,0 +1,222 @@
+import os
+import re
+from typing import Any, List, Optional, Iterator, TYPE_CHECKING
+# Insert a space after `#` on the first line if missing (e.g., `#Title` -> `# Title`)
+# Only normalizes the first line to avoid corrupting code fences or shebangs.
+def normalize_headings(content: str) -> str:
+    """Insert a space after `#` if missing on the first line (the H1 title)."""
+    first_newline = content.find("\n")
+    if first_newline == -1:
+        first_line = content
+        rest = ""
+    else:
+        first_line = content[:first_newline]
+        rest = content[first_newline:]
+    if re.match(r"^#[^ #\t\n]", first_line):
+        first_line = "# " + first_line[1:]
+    return first_line + rest
+if TYPE_CHECKING:
+    from mistletoe.block_token import Heading
+# Constants for Markdown structure
+H1_LEVEL = 1
+H2_LEVEL = 2
+H3_LEVEL = 3
+# Constant for parsing key-value pairs
+EXPECTED_KV_PARTS = 2
+class _FencePreProcessor:
+    """
+    A utility to pre-process raw LLM Markdown output to ensure all code fences are valid
+    before parsing. This is a crucial safety net.
+    """
+    def process(self, content: str) -> str:
+        """
+        Pre-process raw Markdown content to normalize code fences.
+        Currently handles:
+        - Stripping trailing non-whitespace content on fence lines with 6+
+          consecutive backticks or tildes (e.g., ``~~~~~~ trailing text`` → ``~~~~~~``).
+        """
+        lines = content.split("\n")
+        result = []
+        # Pattern: optional leading whitespace, then 6+ consecutive pure tildes
+        # OR 6+ consecutive pure backticks, then any trailing content.
+        pattern = re.compile(r"^(\s*)(\~{6,}|\`{6,})(.*)$")
+        for line in lines:
+            match = pattern.match(line)
+            if match:
+                trailing = match.group(3)
+                # Only strip trailing content if it does NOT contain any backtick
+                # or tilde.  This prevents corrupting lines like
+                # "~~~~~~` trailing" where fence characters appear in content.
+                if trailing is not None and trailing.strip():
+                    if not any(c in trailing for c in ("`", "~")):
+                        line = match.group(1) + match.group(2)
+                # If trailing is empty/whitespace or contains fence chars,
+                # keep original line unchanged.
+            result.append(line)
+        return "\n".join(result)
+class _PeekableStream:
+    """A wrapper for an iterator to allow peeking at the next item."""
+    def __init__(self, iterator: Iterator[Any]):
+        self._iterator = iterator
+        self._next_item: Optional[Any] = None
+        self._fetch_next()
+    def _fetch_next(self):
+        try:
+            self._next_item = next(self._iterator)
+        except StopIteration:
+            self._next_item = None
+    def has_next(self) -> bool:
+        return self._next_item is not None
+    def peek(self) -> Optional[Any]:
+        return self._next_item
+    def next(self) -> Optional[Any]:
+        current_item = self._next_item
+        if current_item is not None:
+            self._fetch_next()
+        return current_item
+def normalize_path(path: str) -> str:
+    return path.replace("\\", "/")
+def normalize_link_target(target: str) -> str:
+    if target.startswith(("http://", "https://")):
+        return target
+    is_abs = os.path.isabs(target)
+    is_likely_true_absolute = False
+    if os.name == "nt":
+        has_drive, _ = os.path.splitdrive(target)
+        if is_abs and has_drive:
+            is_likely_true_absolute = True
+    elif os.name == "posix" and is_abs:
+        common_roots = ("/tmp", "/etc", "/home", "/var", "/usr", "/root")  # nosec B108
+        if target.startswith(common_roots):
+            is_likely_true_absolute = True
+    if target.startswith("/") and not is_likely_true_absolute:
+        return target.lstrip("/")
+    return target
+def find_node_in_tree(node: Any, node_type: type) -> Optional[Any]:
+    if isinstance(node, node_type):
+        return node
+    if hasattr(node, "children") and node.children is not None:
+        for child in node.children:
+            found = find_node_in_tree(child, node_type)
+            if found:
+                return found
+    return None
+def get_child_text(node: Any) -> str:
+    if hasattr(node, "children") and node.children is not None:
+        return "".join([get_child_text(child) for child in node.children])
+    return getattr(node, "content", "")
+def get_action_heading(node: Any, valid_actions: set[str]) -> "Optional[Heading]":
+    """Checks if a node is a valid H3 action heading."""
+    from mistletoe.block_token import Heading
+    from mistletoe.span_token import InlineCode
+    if isinstance(node, Heading) and node.level == H3_LEVEL:
+        text = get_child_text(node).strip()
+        potential_type = text.split(":")[0].strip().replace("`", "")
+        if potential_type in valid_actions:
+            return node
+        # Allow unknown actions if they are formatted like `ACTION` to fail later
+        children = list(node.children) if node.children else []
+        if children and isinstance(children[0], InlineCode):
+            return node
+    return None
+def consume_content_until_next_action(
+    stream: _PeekableStream, valid_actions: set[str]
+) -> List[Any]:
+    """Consumes nodes from the stream until the next H3 action heading or H1/H2."""
+    from mistletoe.block_token import Heading
+    content_nodes = []
+    while stream.has_next():
+        node = stream.peek()
+        if isinstance(node, Heading):
+            if node.level <= H2_LEVEL:
+                break
+            if get_action_heading(node, valid_actions):
+                break
+        content_nodes.append(stream.next())
+    return content_nodes
+def print_ast(token: Any, indent: int = 0):
+    """Recursively prints the AST in a readable format for debugging."""
+    prefix = "  " * indent
+    print(f"{prefix}- {type(token).__name__}")
+    content_attr = getattr(token, "content", None)
+    if content_attr is not None:
+        first_line = (
+            str(content_attr).splitlines()[0]
+            if "\n" in str(content_attr)
+            else str(content_attr)
+        )
+        print(f'{prefix}  Content: "{first_line[:80]}"')
+    children_attr = getattr(token, "children", None)
+    if children_attr is not None:
+        for child in children_attr:
+            print_ast(child, indent + 1)
+def translate_setup_commands(
+    setup_str: str,
+    initial_cwd: Optional[str] = None,
+    initial_env: Optional[dict[str, str]] = None,
+) -> tuple[Optional[str], Optional[dict[str, str]]]:
+    """
+    Translates a chained setup string (e.g. 'cd dir && export FOO=bar')
+    into cwd and env parameters.
+    """
+    cwd = initial_cwd
+    env = initial_env
+    parts = [p.strip() for p in setup_str.split("&&")]
+    for part in parts:
+        if part.startswith("cd "):
+            cwd = part[3:].strip()
+        elif part.startswith("export "):
+            if env is None:
+                env = {}
+            kv_part = part[7:].strip()
+            if "=" in kv_part:
+                key, value = kv_part.split("=", 1)
+                key = key.strip()
+                value = value.strip()
+                if (value.startswith('"') and value.endswith('"')) or (
+                    value.startswith("'") and value.endswith("'")
+                ):
+                    value = value[1:-1]
+                env[key] = value
+    return cwd, env