PyPI - spark-advisor-cli - Versions diffs - 0.1.0__tar.gz - Mend

spark-advisor-cli 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

spark_advisor_cli-0.1.0/.gitignore ADDED Viewed

@@ -0,0 +1,41 @@
+CLAUDE.md
+.claude/
+# Python
+__pycache__/
+*.py[cod]
+*.pyo
+*.egg-info/
+dist/
+build/
+# Virtual environment
+.venv/
+.envrc
+# uv
+uv.lock
+# Testing
+.coverage
+.pytest_cache/
+htmlcov/
+# Type checking
+.mypy_cache/
+# Ruff
+.ruff_cache/
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+*~
+# OS
+.DS_Store
+Thumbs.db
+/.claude/
+tasks

spark_advisor_cli-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,79 @@
+Metadata-Version: 2.4
+Name: spark-advisor-cli
+Version: 0.1.0
+Summary: AI-powered Apache Spark job analyzer and configuration advisor
+Project-URL: Homepage, https://github.com/pstysz/spark-advisor
+Project-URL: Repository, https://github.com/pstysz/spark-advisor
+Project-URL: Issues, https://github.com/pstysz/spark-advisor/issues
+Project-URL: Documentation, https://github.com/pstysz/spark-advisor/blob/main/docs/architecture.md
+Author: Pawel Stysz
+License-Expression: Apache-2.0
+Keywords: ai,apache-spark,claude,mcp,optimization,performance,spark
+Classifier: Development Status :: 4 - Beta
+Classifier: Environment :: Console
+Classifier: Intended Audience :: Developers
+Classifier: Intended Audience :: System Administrators
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Software Development :: Quality Assurance
+Classifier: Topic :: System :: Monitoring
+Classifier: Typing :: Typed
+Requires-Python: >=3.12
+Requires-Dist: anthropic>=0.52
+Requires-Dist: httpx>=0.28
+Requires-Dist: orjson>=3.10
+Requires-Dist: rich>=14
+Requires-Dist: spark-advisor-analyzer
+Requires-Dist: spark-advisor-hs-connector
+Requires-Dist: spark-advisor-models
+Requires-Dist: spark-advisor-rules
+Requires-Dist: typer>=0.15
+Description-Content-Type: text/markdown
+# spark-advisor
+AI-powered Apache Spark job analyzer and configuration advisor.
+**Stop guessing Spark configs. Let data and AI tell you what's wrong.**
+## Install
+```bash
+pip install spark-advisor-cli
+```
+## Quick Start
+```bash
+# Analyze from event log file (rules-only, free)
+spark-advisor analyze /path/to/event-log.json.gz --no-ai
+# Analyze with AI recommendations
+export ANTHROPIC_API_KEY=sk-ant-...
+spark-advisor analyze /path/to/event-log.json.gz
+# Analyze from History Server
+spark-advisor analyze app-20250101120000-0001 -hs http://yarn:18080
+# Agent mode (multi-turn AI analysis)
+spark-advisor analyze /path/to/event-log.json.gz --agent
+# Scan recent jobs
+spark-advisor scan -hs http://yarn:18080 --limit 20
+```
+## What it detects
+11 deterministic rules: data skew, disk spill, GC pressure, shuffle partitions, executor idle, task failures, small files, broadcast join threshold, serializer choice, dynamic allocation, memory overhead.
+## Links
+- [Full documentation and architecture](https://github.com/pstysz/spark-advisor)
+- [MCP Server setup (Claude Desktop / Cursor)](https://github.com/pstysz/spark-advisor/blob/main/docs/mcp-setup.md)
+- [Contributing](https://github.com/pstysz/spark-advisor/blob/main/CONTRIBUTING.md)
+## License
+Apache 2.0

spark_advisor_cli-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,45 @@
+# spark-advisor
+AI-powered Apache Spark job analyzer and configuration advisor.
+**Stop guessing Spark configs. Let data and AI tell you what's wrong.**
+## Install
+```bash
+pip install spark-advisor-cli
+```
+## Quick Start
+```bash
+# Analyze from event log file (rules-only, free)
+spark-advisor analyze /path/to/event-log.json.gz --no-ai
+# Analyze with AI recommendations
+export ANTHROPIC_API_KEY=sk-ant-...
+spark-advisor analyze /path/to/event-log.json.gz
+# Analyze from History Server
+spark-advisor analyze app-20250101120000-0001 -hs http://yarn:18080
+# Agent mode (multi-turn AI analysis)
+spark-advisor analyze /path/to/event-log.json.gz --agent
+# Scan recent jobs
+spark-advisor scan -hs http://yarn:18080 --limit 20
+```
+## What it detects
+11 deterministic rules: data skew, disk spill, GC pressure, shuffle partitions, executor idle, task failures, small files, broadcast join threshold, serializer choice, dynamic allocation, memory overhead.
+## Links
+- [Full documentation and architecture](https://github.com/pstysz/spark-advisor)
+- [MCP Server setup (Claude Desktop / Cursor)](https://github.com/pstysz/spark-advisor/blob/main/docs/mcp-setup.md)
+- [Contributing](https://github.com/pstysz/spark-advisor/blob/main/CONTRIBUTING.md)
+## License
+Apache 2.0

spark_advisor_cli-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,96 @@
+[project]
+name = "spark-advisor-cli"
+version = "0.1.0"
+description = "AI-powered Apache Spark job analyzer and configuration advisor"
+readme = "README.md"
+license = "Apache-2.0"
+requires-python = ">=3.12"
+authors = [
+    { name = "Pawel Stysz" },
+]
+keywords = ["spark", "apache-spark", "performance", "optimization", "ai", "claude", "mcp"]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Environment :: Console",
+    "Intended Audience :: Developers",
+    "Intended Audience :: System Administrators",
+    "License :: OSI Approved :: Apache Software License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Topic :: System :: Monitoring",
+    "Topic :: Software Development :: Quality Assurance",
+    "Typing :: Typed",
+]
+dependencies = [
+    "spark-advisor-models",
+    "spark-advisor-rules",
+    "spark-advisor-hs-connector",
+    "spark-advisor-analyzer",
+    "typer>=0.15",
+    "rich>=14",
+    "httpx>=0.28",
+    "orjson>=3.10",
+    "anthropic>=0.52",
+]
+[tool.uv.sources]
+spark-advisor-models = { workspace = true }
+spark-advisor-rules = { workspace = true }
+spark-advisor-hs-connector = { workspace = true }
+spark-advisor-analyzer = { workspace = true }
+[project.scripts]
+spark-advisor = "spark_advisor_cli.app:main"
+[project.urls]
+Homepage = "https://github.com/pstysz/spark-advisor"
+Repository = "https://github.com/pstysz/spark-advisor"
+Issues = "https://github.com/pstysz/spark-advisor/issues"
+Documentation = "https://github.com/pstysz/spark-advisor/blob/main/docs/architecture.md"
+[dependency-groups]
+dev = [
+    "pytest>=8.3",
+    "pytest-cov>=6.1",
+    "mypy>=1.15",
+    "ruff>=0.11",
+    "respx>=0.22",
+]
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[tool.hatch.build.targets.wheel]
+packages = ["src/spark_advisor_cli"]
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+pythonpath = ["src"]
+addopts = [
+    "-v",
+    "--strict-markers",
+    "--tb=short",
+]
+[tool.ruff]
+target-version = "py312"
+line-length = 120
+src = ["src", "tests"]
+[tool.ruff.lint]
+select = ["E", "W", "F", "I", "UP", "B", "SIM", "TCH", "RUF"]
+[tool.ruff.lint.flake8-type-checking]
+runtime-evaluated-base-classes = ["pydantic.BaseModel", "pydantic_settings.BaseSettings"]
+[tool.ruff.lint.isort]
+known-first-party = ["spark_advisor_cli", "spark_advisor_models", "spark_advisor_rules", "spark_advisor_hs_connector", "spark_advisor_analyzer"]
+[tool.mypy]
+python_version = "3.12"
+strict = true
+warn_return_any = true
+warn_unused_configs = true
+plugins = ["pydantic.mypy"]

spark_advisor_cli-0.1.0/src/spark_advisor_cli/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """CLI tool for Spark job analysis."""

spark_advisor_cli-0.1.0/src/spark_advisor_cli/app.py ADDED Viewed

@@ -0,0 +1,19 @@
+import typer
+from spark_advisor_cli.commands.analyze import analyze
+from spark_advisor_cli.commands.scan import scan
+from spark_advisor_cli.commands.version import version
+app = typer.Typer(
+    name="spark-advisor",
+    help="AI-powered Apache Spark job analyzer and configuration advisor",
+    no_args_is_help=True,
+)
+app.command()(analyze)
+app.command()(scan)
+app.command()(version)
+def main() -> None:
+    app()

spark_advisor_cli-0.1.0/src/spark_advisor_cli/commands/__init__.py ADDED Viewed

File without changes

spark_advisor_cli-0.1.0/src/spark_advisor_cli/commands/analyze.py ADDED Viewed

@@ -0,0 +1,158 @@
+import os
+import sys
+from pathlib import Path
+from typing import Annotated
+import typer
+from rich.console import Console
+from spark_advisor_cli.event_log.parser import parse_event_log
+from spark_advisor_cli.output.console import print_analysis_result, print_job_overview, print_stage_breakdown
+from spark_advisor_models.config import AiSettings, Thresholds
+from spark_advisor_models.model import AnalysisResult, JobAnalysis
+from spark_advisor_models.model.output import AnalysisMode, OutputFormat
+from spark_advisor_rules import StaticAnalysisService, rules_for_threshold
+console = Console()
+def _load_job(source: str, history_server: str | None) -> JobAnalysis:
+    if history_server:
+        return _fetch_from_history_server(source, history_server)
+    path = Path(source)
+    if not path.exists():
+        raise FileNotFoundError(f"Event log file not found: {source}")
+    return parse_event_log(path)
+def _fetch_from_history_server(app_id: str, history_server_url: str) -> JobAnalysis:
+    from spark_advisor_hs_connector.history_server_client import HistoryServerClient
+    from spark_advisor_hs_connector.hs_fetcher import fetch_job_analysis
+    with HistoryServerClient(history_server_url) as client:
+        return fetch_job_analysis(client, app_id)
+def _resolve_ai_enabled(no_ai: bool) -> bool:
+    if no_ai:
+        return False
+    return bool(os.environ.get("ANTHROPIC_API_KEY"))
+def _run_analysis(
+    job: JobAnalysis,
+    thresholds: Thresholds,
+    *,
+    use_ai: bool,
+    mode: AnalysisMode,
+    model: str,
+    ai_timeout: float,
+) -> AnalysisResult:
+    static = StaticAnalysisService(rules_for_threshold(thresholds))
+    if use_ai:
+        from spark_advisor_analyzer.ai.client import AnthropicClient
+        from spark_advisor_analyzer.ai.service import LlmAnalysisService
+        from spark_advisor_analyzer.orchestrator import AdviceOrchestrator
+        ai_settings = AiSettings(model=model, api_timeout=ai_timeout)
+        with AnthropicClient(timeout=ai_settings.api_timeout) as ai_client:
+            llm_service: LlmAnalysisService | None = None
+            agent_orch = None
+            if mode == AnalysisMode.AGENT:
+                from spark_advisor_analyzer.agent.orchestrator import AgentOrchestrator
+                agent_orch = AgentOrchestrator(ai_client, static, ai_settings)
+            else:
+                llm_service = LlmAnalysisService(ai_client, ai_settings, thresholds)
+            orchestrator = AdviceOrchestrator(static, llm_service, agent_orch)
+            return orchestrator.run(job, mode=mode)
+    rule_results = static.analyze(job)
+    return AnalysisResult(app_id=job.app_id, job=job, rule_results=rule_results, ai_report=None)
+def analyze(
+    source: Annotated[
+        str,
+        typer.Argument(help="App ID (with --history-server) or path to event log file (.json or .json.gz)"),
+    ],
+    history_server: Annotated[
+        str | None,
+        typer.Option("--history-server", "-hs", help="Spark History Server URL (e.g. http://yarn:18080)"),
+    ] = None,
+    no_ai: Annotated[
+        bool,
+        typer.Option("--no-ai", help="Disable AI analysis (rules only)"),
+    ] = False,
+    agent: Annotated[
+        bool,
+        typer.Option("--agent", help="Use agent mode (multi-turn AI analysis with tool use)"),
+    ] = False,
+    model: Annotated[
+        str,
+        typer.Option("--model", "-m", help="Claude model for AI analysis"),
+    ] = "claude-sonnet-4-6",
+    output: Annotated[
+        Path | None,
+        typer.Option("--output", "-o", help="Write suggested config to file (default console)"),
+    ] = None,
+    output_format: Annotated[
+        OutputFormat,
+        typer.Option("--format", "-f", help="Output format: text or json"),
+    ] = OutputFormat.TEXT,
+    verbose: Annotated[
+        bool,
+        typer.Option("--verbose", "-v", help="Show per-stage breakdown"),
+    ] = False,
+) -> None:
+    """Analyze a Spark job and get optimization recommendations."""
+    if agent and no_ai:
+        console.print("[red]Error: --agent requires AI (cannot use with --no-ai)[/]")
+        raise typer.Exit(code=1)
+    if agent and not os.environ.get("ANTHROPIC_API_KEY"):
+        console.print("[red]Error: --agent requires ANTHROPIC_API_KEY environment variable[/]")
+        raise typer.Exit(code=1)
+    with console.status("[bold blue]Loading job data...[/]"):
+        try:
+            job = _load_job(source, history_server)
+        except FileNotFoundError as e:
+            console.print(f"[red]Error: {e}[/]")
+            raise typer.Exit(code=1) from e
+        except Exception as e:
+            console.print(f"[red]Error fetching job data: {e}[/]")
+            raise typer.Exit(code=1) from e
+    thresholds = Thresholds()
+    use_ai = _resolve_ai_enabled(no_ai)
+    analysis_mode = AnalysisMode.AGENT if agent else AnalysisMode.STANDARD
+    if analysis_mode == AnalysisMode.AGENT:
+        status_msg = "[bold blue]Running agent analysis (multi-turn AI)...[/]"
+    elif use_ai:
+        status_msg = "[bold blue]Running analysis (rules + AI)...[/]"
+    else:
+        status_msg = "[bold blue]Running analysis...[/]"
+    with console.status(status_msg):
+        try:
+            result = _run_analysis(
+                job, thresholds, use_ai=use_ai, mode=analysis_mode, model=model, ai_timeout=90.0,
+            )
+        except Exception as e:
+            console.print(f"[red]Analysis error: {e}[/]")
+            raise typer.Exit(code=1) from e
+    if output_format == OutputFormat.JSON:
+        sys.stdout.write(result.model_dump_json(indent=2) + "\n")
+    else:
+        print_job_overview(console, job)
+        if verbose:
+            print_stage_breakdown(console, job)
+        print_analysis_result(
+            console, result, use_ai=use_ai or analysis_mode == AnalysisMode.AGENT, output_config=output,
+        )

spark_advisor_cli-0.1.0/src/spark_advisor_cli/commands/scan.py ADDED Viewed

@@ -0,0 +1,57 @@
+from typing import Annotated
+import typer
+from rich.console import Console
+from rich.table import Table
+console = Console()
+def scan(
+    history_server: Annotated[
+        str,
+        typer.Option("--history-server", "-hs", help="Spark History Server URL (e.g. http://yarn:18080)"),
+    ],
+    limit: Annotated[
+        int,
+        typer.Option("--limit", "-l", help="Maximum number of applications to list"),
+    ] = 20,
+) -> None:
+    """List recent Spark applications from History Server."""
+    from spark_advisor_hs_connector.history_server_client import HistoryServerClient
+    with console.status("[bold blue]Fetching applications...[/]"):
+        try:
+            with HistoryServerClient(history_server) as client:
+                apps = client.list_applications(limit=limit)
+        except Exception as e:
+            console.print(f"[red]Error connecting to History Server: {e}[/]")
+            raise typer.Exit(code=1) from e
+    if not apps:
+        console.print("[yellow]No applications found.[/]")
+        return
+    table = Table(title=f"Recent Spark Applications ({len(apps)})")
+    table.add_column("App ID", style="bold")
+    table.add_column("Name")
+    table.add_column("Duration", justify="right")
+    table.add_column("Status")
+    table.add_column("Spark Version")
+    for app in apps:
+        latest = app.attempts[-1] if app.attempts else None
+        duration = "-"
+        status = ""
+        spark_version = ""
+        if latest:
+            if latest.duration > 0:
+                duration_min = latest.duration / 60_000
+                duration = f"{duration_min:.1f} min"
+            status = "[green]completed[/]" if latest.completed else "[yellow]running[/]"
+            spark_version = latest.appSparkVersion
+        table.add_row(app.id, app.name, duration, status, spark_version)
+    console.print(table)

spark_advisor_cli-0.1.0/src/spark_advisor_cli/commands/version.py ADDED Viewed

@@ -0,0 +1,11 @@
+from importlib.metadata import version as pkg_version
+from rich.console import Console
+console = Console()
+def version() -> None:
+    """Show version information."""
+    ver = pkg_version("spark-advisor-cli")
+    console.print(f"spark-advisor [bold]v{ver}[/]")

spark_advisor_cli-0.1.0/src/spark_advisor_cli/event_log/__init__.py ADDED Viewed

File without changes