PyPI - yamlgraph - Versions diffs - 0.1.1__py3-none-any.whl - Mend

yamlgraph 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of yamlgraph might be problematic. Click here for more details.

Files changed (111) hide show

examples/__init__.py +1 -0
examples/storyboard/__init__.py +1 -0
examples/storyboard/generate_videos.py +335 -0
examples/storyboard/nodes/__init__.py +10 -0
examples/storyboard/nodes/animated_character_node.py +248 -0
examples/storyboard/nodes/animated_image_node.py +138 -0
examples/storyboard/nodes/character_node.py +162 -0
examples/storyboard/nodes/image_node.py +118 -0
examples/storyboard/nodes/replicate_tool.py +238 -0
examples/storyboard/retry_images.py +118 -0
tests/__init__.py +1 -0
tests/conftest.py +178 -0
tests/integration/__init__.py +1 -0
tests/integration/test_animated_storyboard.py +63 -0
tests/integration/test_cli_commands.py +242 -0
tests/integration/test_map_demo.py +50 -0
tests/integration/test_memory_demo.py +281 -0
tests/integration/test_pipeline_flow.py +105 -0
tests/integration/test_providers.py +163 -0
tests/integration/test_resume.py +75 -0
tests/unit/__init__.py +1 -0
tests/unit/test_agent_nodes.py +200 -0
tests/unit/test_checkpointer.py +212 -0
tests/unit/test_cli.py +121 -0
tests/unit/test_cli_package.py +81 -0
tests/unit/test_compile_graph_map.py +132 -0
tests/unit/test_conditions_routing.py +253 -0
tests/unit/test_config.py +93 -0
tests/unit/test_conversation_memory.py +270 -0
tests/unit/test_database.py +145 -0
tests/unit/test_deprecation.py +104 -0
tests/unit/test_executor.py +60 -0
tests/unit/test_executor_async.py +179 -0
tests/unit/test_export.py +150 -0
tests/unit/test_expressions.py +178 -0
tests/unit/test_format_prompt.py +145 -0
tests/unit/test_generic_report.py +200 -0
tests/unit/test_graph_commands.py +327 -0
tests/unit/test_graph_loader.py +299 -0
tests/unit/test_graph_schema.py +193 -0
tests/unit/test_inline_schema.py +151 -0
tests/unit/test_issues.py +164 -0
tests/unit/test_jinja2_prompts.py +85 -0
tests/unit/test_langsmith.py +319 -0
tests/unit/test_llm_factory.py +109 -0
tests/unit/test_llm_factory_async.py +118 -0
tests/unit/test_loops.py +403 -0
tests/unit/test_map_node.py +144 -0
tests/unit/test_no_backward_compat.py +56 -0
tests/unit/test_node_factory.py +225 -0
tests/unit/test_prompts.py +166 -0
tests/unit/test_python_nodes.py +198 -0
tests/unit/test_reliability.py +298 -0
tests/unit/test_result_export.py +234 -0
tests/unit/test_router.py +296 -0
tests/unit/test_sanitize.py +99 -0
tests/unit/test_schema_loader.py +295 -0
tests/unit/test_shell_tools.py +229 -0
tests/unit/test_state_builder.py +331 -0
tests/unit/test_state_builder_map.py +104 -0
tests/unit/test_state_config.py +197 -0
tests/unit/test_template.py +190 -0
tests/unit/test_tool_nodes.py +129 -0
yamlgraph/__init__.py +35 -0
yamlgraph/builder.py +110 -0
yamlgraph/cli/__init__.py +139 -0
yamlgraph/cli/__main__.py +6 -0
yamlgraph/cli/commands.py +232 -0
yamlgraph/cli/deprecation.py +92 -0
yamlgraph/cli/graph_commands.py +382 -0
yamlgraph/cli/validators.py +37 -0
yamlgraph/config.py +67 -0
yamlgraph/constants.py +66 -0
yamlgraph/error_handlers.py +226 -0
yamlgraph/executor.py +275 -0
yamlgraph/executor_async.py +122 -0
yamlgraph/graph_loader.py +337 -0
yamlgraph/map_compiler.py +138 -0
yamlgraph/models/__init__.py +36 -0
yamlgraph/models/graph_schema.py +141 -0
yamlgraph/models/schemas.py +124 -0
yamlgraph/models/state_builder.py +236 -0
yamlgraph/node_factory.py +240 -0
yamlgraph/routing.py +87 -0
yamlgraph/schema_loader.py +160 -0
yamlgraph/storage/__init__.py +17 -0
yamlgraph/storage/checkpointer.py +72 -0
yamlgraph/storage/database.py +320 -0
yamlgraph/storage/export.py +269 -0
yamlgraph/tools/__init__.py +1 -0
yamlgraph/tools/agent.py +235 -0
yamlgraph/tools/nodes.py +124 -0
yamlgraph/tools/python_tool.py +178 -0
yamlgraph/tools/shell.py +205 -0
yamlgraph/utils/__init__.py +47 -0
yamlgraph/utils/conditions.py +157 -0
yamlgraph/utils/expressions.py +111 -0
yamlgraph/utils/langsmith.py +308 -0
yamlgraph/utils/llm_factory.py +118 -0
yamlgraph/utils/llm_factory_async.py +105 -0
yamlgraph/utils/logging.py +127 -0
yamlgraph/utils/prompts.py +116 -0
yamlgraph/utils/sanitize.py +98 -0
yamlgraph/utils/template.py +102 -0
yamlgraph/utils/validators.py +181 -0
yamlgraph-0.1.1.dist-info/METADATA +854 -0
yamlgraph-0.1.1.dist-info/RECORD +111 -0
yamlgraph-0.1.1.dist-info/WHEEL +5 -0
yamlgraph-0.1.1.dist-info/entry_points.txt +2 -0
yamlgraph-0.1.1.dist-info/licenses/LICENSE +21 -0
yamlgraph-0.1.1.dist-info/top_level.txt +3 -0

tests/unit/test_issues.py ADDED Viewed

@@ -0,0 +1,164 @@
+"""Tests for issues that were identified and fixed.
+These tests verify the fixes for issues documented in docs/open-issues.md.
+"""
+from unittest.mock import patch
+import pytest
+from tests.conftest import FixtureAnalysis, FixtureGeneratedContent
+from yamlgraph.builder import build_resume_graph
+from yamlgraph.graph_loader import load_graph_config
+from yamlgraph.models import create_initial_state
+# =============================================================================
+# Issue 1: Resume Logic - FIXED: skip_if_exists behavior
+# =============================================================================
+class TestResumeStartFromParameter:
+    """Issue 1: Resume should skip nodes whose output already exists."""
+    @patch("yamlgraph.node_factory.execute_prompt")
+    def test_resume_from_analyze_skips_generate(self, mock_execute):
+        """When state has 'generated', generate node should be skipped.
+        Resume works via skip_if_exists: if output already in state, skip LLM call.
+        """
+        # State with generated content already present
+        state = create_initial_state(topic="test", thread_id="issue1")
+        state["generated"] = FixtureGeneratedContent(
+            title="Already Generated",
+            content="This was generated in a previous run",
+            word_count=10,
+            tags=[],
+        )
+        # Only mock analyze and summarize - generate should be skipped
+        mock_analysis = FixtureAnalysis(
+            summary="Analysis",
+            key_points=["Point"],
+            sentiment="neutral",
+            confidence=0.8,
+        )
+        mock_execute.side_effect = [mock_analysis, "Final summary"]
+        graph = build_resume_graph().compile()
+        result = graph.invoke(state)
+        # Expected: 2 calls (analyze, summarize) - generate skipped
+        assert mock_execute.call_count == 2, (
+            f"Expected 2 LLM calls (analyze, summarize), "
+            f"but got {mock_execute.call_count}. "
+            f"Generate should be skipped when 'generated' exists!"
+        )
+        # Original generated content should be preserved
+        assert result["generated"].title == "Already Generated"
+    @patch("yamlgraph.node_factory.execute_prompt")
+    def test_resume_from_summarize_skips_generate_and_analyze(self, mock_execute):
+        """When state has 'generated' and 'analysis', only summarize runs."""
+        state = create_initial_state(topic="test", thread_id="issue1b")
+        state["generated"] = FixtureGeneratedContent(
+            title="Done",
+            content="Content",
+            word_count=5,
+            tags=[],
+        )
+        state["analysis"] = FixtureAnalysis(
+            summary="Done",
+            key_points=["Point"],
+            sentiment="positive",
+            confidence=0.9,
+        )
+        mock_execute.return_value = "Final summary"
+        graph = build_resume_graph().compile()
+        result = graph.invoke(state)
+        # Expected: 1 call (summarize only)
+        assert mock_execute.call_count == 1, (
+            f"Expected 1 LLM call (summarize only), "
+            f"but got {mock_execute.call_count}. "
+            f"Generate and analyze should be skipped!"
+        )
+        # Original content should be preserved
+        assert result["generated"].title == "Done"
+        assert result["analysis"].summary == "Done"
+    def test_resume_preserves_existing_generated_content(self):
+        """Resuming should NOT overwrite already-generated content."""
+        # Covered by test_resume_from_analyze_skips_generate
+        pass
+# =============================================================================
+# Issue 2: Conditions Block is Dead Config
+# =============================================================================
+class TestConditionsFromYAML:
+    """Issue 2: Conditions block was dead config - now uses expression routing."""
+    def test_conditions_block_not_in_schema(self):
+        """GraphConfig no longer parses conditions block."""
+        from yamlgraph.config import DEFAULT_GRAPH
+        config = load_graph_config(DEFAULT_GRAPH)
+        # conditions attribute should not exist
+        assert not hasattr(
+            config, "conditions"
+        ), "GraphConfig should not have 'conditions' attribute - it's dead config"
+# =============================================================================
+# Issue 5: _entry_point hack
+# =============================================================================
+class TestEntryPointHack:
+    """Issue 5: Using private _entry_point is fragile."""
+    @pytest.fixture
+    def simple_yaml(self, tmp_path):
+        """Minimal YAML for testing."""
+        yaml_content = """
+version: "1.0"
+name: test
+nodes:
+  first:
+    type: llm
+    prompt: generate
+    output_model: yamlgraph.models.GenericReport
+    state_key: generated
+edges:
+  - from: START
+    to: first
+  - from: first
+    to: END
+"""
+        yaml_file = tmp_path / "test.yaml"
+        yaml_file.write_text(yaml_content)
+        return yaml_file
+    def test_entry_point_accessible_via_behavior(self, simple_yaml):
+        """Entry point should be testable via graph behavior, not private attrs.
+        Currently graph_loader.py sets graph._entry_point for testing.
+        This test shows how to test entry point via behavior instead.
+        """
+        from yamlgraph.graph_loader import load_and_compile
+        graph = load_and_compile(simple_yaml)
+        _ = graph.compile()  # Verify it compiles
+        # Get the graph structure - this is the proper way
+        # The first node after START should be 'first'
+        nodes = list(graph.nodes.keys())
+        assert "first" in nodes
+        # We can also check by looking at edges from __start__
+        # But testing via invocation is more robust

tests/unit/test_jinja2_prompts.py ADDED Viewed

@@ -0,0 +1,85 @@
+"""Integration test for Jinja2 prompt templates."""
+from yamlgraph.executor import format_prompt, load_prompt
+def test_jinja2_analyze_list_prompt():
+    """Test the analyze_list prompt with Jinja2 features."""
+    prompt = load_prompt("analyze_list")
+    # Test data
+    variables = {
+        "items": [
+            {
+                "title": "Introduction to AI",
+                "topic": "Artificial Intelligence",
+                "word_count": 500,
+                "tags": ["AI", "machine learning", "technology"],
+                "content": "Artificial intelligence is transforming how we interact with technology...",
+            },
+            {
+                "title": "Machine Learning Basics",
+                "topic": "ML Fundamentals",
+                "word_count": 750,
+                "tags": ["ML", "algorithms", "data"],
+                "content": "Machine learning involves training models on data to make predictions...",
+            },
+        ],
+        "min_confidence": 0.8,
+    }
+    # Format the template field
+    result = format_prompt(prompt["template"], variables)
+    # Verify Jinja2 features are working
+    assert "2 items" in result  # {{ items|length }} filter
+    assert "1. Introduction to AI" in result  # {{ loop.index }}
+    assert "2. Machine Learning Basics" in result
+    assert "**Tags**: AI, machine learning, technology" in result  # join filter
+    assert "**Tags**: ML, algorithms, data" in result
+    assert "confidence >= 0.8" in result  # conditional rendering
+    assert "**Content**:" in result  # if/else conditional
+    # Verify loop counter
+    assert "### 1." in result
+    assert "### 2." in result
+def test_jinja2_prompt_with_empty_list():
+    """Test analyze_list prompt with empty items."""
+    prompt = load_prompt("analyze_list")
+    variables = {"items": [], "min_confidence": None}
+    result = format_prompt(prompt["template"], variables)
+    # Should handle empty list gracefully
+    assert "0 items" in result
+    assert "### 1." not in result  # No items to iterate
+def test_jinja2_prompt_without_optional_fields():
+    """Test analyze_list prompt without optional fields."""
+    prompt = load_prompt("analyze_list")
+    variables = {
+        "items": [
+            {
+                "title": "Short Content",
+                "topic": "Brief",
+                "word_count": 100,
+                "tags": [],  # Empty tags
+                "content": "Short content without tags",
+            },
+        ],
+    }
+    result = format_prompt(prompt["template"], variables)
+    # Should handle missing/empty optional fields
+    assert "1 items" in result
+    assert "Short Content" in result
+    # Should not show tags section if empty
+    assert "**Tags**:" not in result or "**Tags**: \n" in result
+    # Should not show min_confidence note if not provided
+    assert "confidence >=" not in result

tests/unit/test_langsmith.py ADDED Viewed

@@ -0,0 +1,319 @@
+"""Unit tests for LangSmith utilities.
+Tests for:
+- share_run() - Create public share links
+- read_run_shared_link() - Get existing share links
+- get_client() - Client creation with env var handling
+- is_tracing_enabled() - Tracing detection
+"""
+import os
+from unittest.mock import MagicMock, patch
+from yamlgraph.utils.langsmith import (
+    get_client,
+    get_latest_run_id,
+    get_project_name,
+    is_tracing_enabled,
+    read_run_shared_link,
+    share_run,
+)
+# =============================================================================
+# is_tracing_enabled() tests
+# =============================================================================
+class TestIsTracingEnabled:
+    """Tests for is_tracing_enabled()."""
+    def test_enabled_with_langchain_tracing_v2_true(self):
+        """LANGCHAIN_TRACING_V2=true enables tracing."""
+        with patch.dict(os.environ, {"LANGCHAIN_TRACING_V2": "true"}, clear=False):
+            # Need to remove LANGSMITH_TRACING if set
+            env = dict(os.environ)
+            env.pop("LANGSMITH_TRACING", None)
+            with patch.dict(os.environ, env, clear=True):
+                os.environ["LANGCHAIN_TRACING_V2"] = "true"
+                assert is_tracing_enabled() is True
+    def test_enabled_with_langsmith_tracing_true(self):
+        """LANGSMITH_TRACING=true enables tracing."""
+        with patch.dict(os.environ, {"LANGSMITH_TRACING": "true"}, clear=True):
+            assert is_tracing_enabled() is True
+    def test_disabled_when_no_env_vars(self):
+        """No tracing vars means disabled."""
+        with patch.dict(os.environ, {}, clear=True):
+            assert is_tracing_enabled() is False
+    def test_disabled_with_false_value(self):
+        """Explicit false value disables tracing."""
+        with patch.dict(os.environ, {"LANGCHAIN_TRACING_V2": "false"}, clear=True):
+            assert is_tracing_enabled() is False
+    def test_case_insensitive(self):
+        """TRUE, True, true all work."""
+        with patch.dict(os.environ, {"LANGSMITH_TRACING": "TRUE"}, clear=True):
+            assert is_tracing_enabled() is True
+# =============================================================================
+# get_project_name() tests
+# =============================================================================
+class TestGetProjectName:
+    """Tests for get_project_name()."""
+    def test_langchain_project(self):
+        """Returns LANGCHAIN_PROJECT when set."""
+        with patch.dict(os.environ, {"LANGCHAIN_PROJECT": "my-project"}, clear=True):
+            assert get_project_name() == "my-project"
+    def test_langsmith_project(self):
+        """Returns LANGSMITH_PROJECT when set."""
+        with patch.dict(os.environ, {"LANGSMITH_PROJECT": "other-project"}, clear=True):
+            assert get_project_name() == "other-project"
+    def test_langchain_takes_precedence(self):
+        """LANGCHAIN_PROJECT takes precedence over LANGSMITH_PROJECT."""
+        with patch.dict(
+            os.environ,
+            {"LANGCHAIN_PROJECT": "first", "LANGSMITH_PROJECT": "second"},
+            clear=True,
+        ):
+            assert get_project_name() == "first"
+    def test_default_value(self):
+        """Returns default when no env vars."""
+        with patch.dict(os.environ, {}, clear=True):
+            assert get_project_name() == "yamlgraph"
+# =============================================================================
+# get_client() tests
+# =============================================================================
+class TestGetClient:
+    """Tests for get_client()."""
+    def test_returns_none_without_api_key(self):
+        """No API key means no client."""
+        with patch.dict(os.environ, {}, clear=True):
+            assert get_client() is None
+    def test_creates_client_with_langchain_key(self):
+        """Creates client with LANGCHAIN_API_KEY."""
+        with patch.dict(
+            os.environ,
+            {"LANGCHAIN_API_KEY": "lsv2_test_key"},
+            clear=True,
+        ):
+            with patch("langsmith.Client") as mock_client:
+                result = get_client()
+                mock_client.assert_called_once()
+                assert result is not None
+    def test_creates_client_with_langsmith_key(self):
+        """Creates client with LANGSMITH_API_KEY."""
+        with patch.dict(
+            os.environ,
+            {"LANGSMITH_API_KEY": "lsv2_test_key"},
+            clear=True,
+        ):
+            with patch("langsmith.Client") as mock_client:
+                result = get_client()
+                mock_client.assert_called_once()
+                assert result is not None
+    def test_uses_custom_endpoint(self):
+        """Uses LANGSMITH_ENDPOINT if set."""
+        with patch.dict(
+            os.environ,
+            {
+                "LANGSMITH_API_KEY": "key",
+                "LANGSMITH_ENDPOINT": "https://eu.smith.langchain.com",
+            },
+            clear=True,
+        ):
+            with patch("langsmith.Client") as mock_client:
+                get_client()
+                mock_client.assert_called_with(
+                    api_url="https://eu.smith.langchain.com",
+                    api_key="key",
+                )
+    def test_returns_none_on_import_error(self):
+        """Returns None if langsmith not installed."""
+        # Verify graceful handling when Client constructor fails
+        with patch.dict(os.environ, {"LANGSMITH_API_KEY": "key"}, clear=True):
+            with patch("langsmith.Client", side_effect=ImportError("No module")):
+                # Should catch ImportError and return None
+                result = get_client()
+                assert result is None
+# =============================================================================
+# share_run() tests
+# =============================================================================
+class TestShareRun:
+    """Tests for share_run()."""
+    def test_returns_none_when_no_client(self):
+        """Returns None when client unavailable."""
+        with patch("yamlgraph.utils.langsmith.get_client", return_value=None):
+            result = share_run("test-run-id")
+            assert result is None
+    def test_shares_provided_run_id(self):
+        """Shares the provided run ID."""
+        mock_client = MagicMock()
+        mock_client.share_run.return_value = "https://smith.langchain.com/public/abc123"
+        with patch("yamlgraph.utils.langsmith.get_client", return_value=mock_client):
+            result = share_run("my-run-id")
+            mock_client.share_run.assert_called_once_with("my-run-id")
+            assert result == "https://smith.langchain.com/public/abc123"
+    def test_uses_latest_run_when_no_id(self):
+        """Gets latest run ID when not provided."""
+        mock_client = MagicMock()
+        mock_client.share_run.return_value = "https://share.url"
+        with patch("yamlgraph.utils.langsmith.get_client", return_value=mock_client):
+            with patch(
+                "yamlgraph.utils.langsmith.get_latest_run_id",
+                return_value="latest-id",
+            ):
+                result = share_run()
+                mock_client.share_run.assert_called_once_with("latest-id")
+                assert result == "https://share.url"
+    def test_returns_none_when_no_latest_run(self):
+        """Returns None when no latest run found."""
+        mock_client = MagicMock()
+        with patch("yamlgraph.utils.langsmith.get_client", return_value=mock_client):
+            with patch(
+                "yamlgraph.utils.langsmith.get_latest_run_id",
+                return_value=None,
+            ):
+                result = share_run()
+                assert result is None
+    def test_handles_exception_gracefully(self):
+        """Returns None on error (logs warning to stderr)."""
+        mock_client = MagicMock()
+        mock_client.share_run.side_effect = Exception("API error")
+        with patch("yamlgraph.utils.langsmith.get_client", return_value=mock_client):
+            result = share_run("test-id")
+            assert result is None
+# =============================================================================
+# read_run_shared_link() tests
+# =============================================================================
+class TestReadRunSharedLink:
+    """Tests for read_run_shared_link()."""
+    def test_returns_none_when_no_client(self):
+        """Returns None when client unavailable."""
+        with patch("yamlgraph.utils.langsmith.get_client", return_value=None):
+            result = read_run_shared_link("test-run-id")
+            assert result is None
+    def test_returns_existing_link(self):
+        """Returns existing share link."""
+        mock_client = MagicMock()
+        mock_client.read_run_shared_link.return_value = "https://existing.url"
+        with patch("yamlgraph.utils.langsmith.get_client", return_value=mock_client):
+            result = read_run_shared_link("my-run-id")
+            mock_client.read_run_shared_link.assert_called_once_with("my-run-id")
+            assert result == "https://existing.url"
+    def test_returns_none_when_not_shared(self):
+        """Returns None when run not shared (exception)."""
+        mock_client = MagicMock()
+        mock_client.read_run_shared_link.side_effect = Exception("Not found")
+        with patch("yamlgraph.utils.langsmith.get_client", return_value=mock_client):
+            result = read_run_shared_link("test-id")
+            assert result is None
+# =============================================================================
+# get_latest_run_id() tests
+# =============================================================================
+class TestGetLatestRunId:
+    """Tests for get_latest_run_id()."""
+    def test_returns_none_when_no_client(self):
+        """Returns None when client unavailable."""
+        with patch("yamlgraph.utils.langsmith.get_client", return_value=None):
+            result = get_latest_run_id()
+            assert result is None
+    def test_returns_latest_run_id(self):
+        """Returns ID of most recent run."""
+        mock_run = MagicMock()
+        mock_run.id = "abc-123"
+        mock_client = MagicMock()
+        mock_client.list_runs.return_value = [mock_run]
+        with patch("yamlgraph.utils.langsmith.get_client", return_value=mock_client):
+            with patch(
+                "yamlgraph.utils.langsmith.get_project_name",
+                return_value="test-project",
+            ):
+                result = get_latest_run_id()
+                mock_client.list_runs.assert_called_once_with(
+                    project_name="test-project", limit=1
+                )
+                assert result == "abc-123"
+    def test_returns_none_when_no_runs(self):
+        """Returns None when no runs found."""
+        mock_client = MagicMock()
+        mock_client.list_runs.return_value = []
+        with patch("yamlgraph.utils.langsmith.get_client", return_value=mock_client):
+            result = get_latest_run_id()
+            assert result is None
+    def test_uses_provided_project_name(self):
+        """Uses provided project name."""
+        mock_run = MagicMock()
+        mock_run.id = "run-id"
+        mock_client = MagicMock()
+        mock_client.list_runs.return_value = [mock_run]
+        with patch("yamlgraph.utils.langsmith.get_client", return_value=mock_client):
+            get_latest_run_id(project_name="custom-project")
+            mock_client.list_runs.assert_called_once_with(
+                project_name="custom-project", limit=1
+            )
+    def test_handles_exception_gracefully(self):
+        """Returns None on error (logs warning to stderr)."""
+        mock_client = MagicMock()
+        mock_client.list_runs.side_effect = Exception("API error")
+        with patch("yamlgraph.utils.langsmith.get_client", return_value=mock_client):
+            result = get_latest_run_id()
+            assert result is None

tests/unit/test_llm_factory.py ADDED Viewed

@@ -0,0 +1,109 @@
+"""Unit tests for LLM factory module."""
+import os
+from unittest.mock import patch
+import pytest
+from langchain_anthropic import ChatAnthropic
+from yamlgraph.utils.llm_factory import clear_cache, create_llm
+class TestCreateLLM:
+    """Test the create_llm factory function."""
+    def setup_method(self):
+        """Clear cache and environment before each test."""
+        clear_cache()
+    def test_default_provider_is_anthropic(self):
+        """Should use Anthropic by default."""
+        # Clear PROVIDER from environment to ensure default behavior
+        with patch.dict(os.environ, {"PROVIDER": ""}, clear=False):
+            llm = create_llm(temperature=0.7)
+            assert isinstance(llm, ChatAnthropic)
+            assert llm.temperature == 0.7
+    def test_explicit_anthropic_provider(self):
+        """Should create Anthropic LLM when provider='anthropic'."""
+        llm = create_llm(provider="anthropic", temperature=0.5)
+        assert isinstance(llm, ChatAnthropic)
+        assert llm.temperature == 0.5
+    def test_mistral_provider(self):
+        """Should create Mistral LLM when provider='mistral'."""
+        with patch.dict(os.environ, {"MISTRAL_API_KEY": "test-key"}):
+            llm = create_llm(provider="mistral", temperature=0.8)
+            # Check it's the right class (will import on first call)
+            assert llm.__class__.__name__ == "ChatMistralAI"
+            assert llm.temperature == 0.8
+    def test_openai_provider(self):
+        """Should create OpenAI LLM when provider='openai'."""
+        with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
+            llm = create_llm(provider="openai", temperature=0.6)
+            assert llm.__class__.__name__ == "ChatOpenAI"
+            assert llm.temperature == 0.6
+    def test_provider_from_environment(self):
+        """Should use PROVIDER env var when no provider specified."""
+        with patch.dict(
+            os.environ, {"PROVIDER": "mistral", "MISTRAL_API_KEY": "test-key"}
+        ):
+            llm = create_llm(temperature=0.7)
+            assert llm.__class__.__name__ == "ChatMistralAI"
+    def test_custom_model(self):
+        """Should use custom model when specified."""
+        with patch.dict(os.environ, {"PROVIDER": ""}, clear=False):
+            llm = create_llm(model="claude-opus-4", temperature=0.5)
+            assert isinstance(llm, ChatAnthropic)
+            assert llm.model == "claude-opus-4"
+    def test_model_override_parameter(self):
+        """Should prefer model parameter over default."""
+        llm = create_llm(provider="anthropic", model="claude-sonnet-4", temperature=0.7)
+        assert llm.model == "claude-sonnet-4"
+    def test_default_models(self):
+        """Should use correct default models for each provider."""
+        # Anthropic default
+        llm_anthropic = create_llm(provider="anthropic", temperature=0.7)
+        assert llm_anthropic.model == "claude-haiku-4-5"
+        # Mistral default
+        with patch.dict(os.environ, {"MISTRAL_API_KEY": "test-key"}):
+            llm_mistral = create_llm(provider="mistral", temperature=0.7)
+            assert llm_mistral.model == "mistral-large-latest"
+        # OpenAI default (uses model_name attribute)
+        with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
+            llm_openai = create_llm(provider="openai", temperature=0.7)
+            assert llm_openai.model_name == "gpt-4o"
+    def test_invalid_provider(self):
+        """Should raise error for invalid provider."""
+        with pytest.raises((ValueError, KeyError)):
+            create_llm(provider="invalid-provider", temperature=0.7)
+    def test_caching(self):
+        """Should cache LLM instances for same parameters."""
+        llm1 = create_llm(provider="anthropic", temperature=0.7)
+        llm2 = create_llm(provider="anthropic", temperature=0.7)
+        assert llm1 is llm2
+        # Different temperature = different instance
+        llm3 = create_llm(provider="anthropic", temperature=0.5)
+        assert llm1 is not llm3
+    def test_cache_key_includes_all_params(self):
+        """Cache should differentiate on provider, model, temperature."""
+        llm1 = create_llm(
+            provider="anthropic", model="claude-haiku-4-5", temperature=0.7
+        )
+        llm2 = create_llm(provider="anthropic", model="claude-opus-4", temperature=0.7)
+        assert llm1 is not llm2
+        with patch.dict(os.environ, {"MISTRAL_API_KEY": "test-key"}):
+            llm3 = create_llm(provider="mistral", temperature=0.7)
+            assert llm1 is not llm3