specscore 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
clitic/__init__.py ADDED
File without changes
clitic/analyzer.py ADDED
@@ -0,0 +1,158 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ import shutil
5
+ import subprocess
6
+ import time
7
+ from dataclasses import dataclass, field
8
+
9
+ TIMEOUT = 5.0 # seconds per probe run
10
+
11
+
12
+ @dataclass
13
+ class RunResult:
14
+ args: list[str]
15
+ exit_code: int
16
+ stdout: str
17
+ stderr: str
18
+ elapsed: float
19
+ timed_out: bool = False
20
+ error: str | None = None
21
+
22
+
23
+ @dataclass
24
+ class ToolProbe:
25
+ tool_name: str
26
+ tool_path: str | None
27
+
28
+ help_result: RunResult | None = None # tool --help
29
+ version_result: RunResult | None = None # tool --version
30
+ no_args_result: RunResult | None = None # tool (no args)
31
+ bad_args_result: RunResult | None = None # tool --xxxxclitictest (unknown flag)
32
+ json_results: list[RunResult] = field(default_factory=list) # --json / --output json / etc.
33
+ subcommand_names: list[str] = field(default_factory=list)
34
+ subcommand_help_results: list[tuple[str, RunResult]] = field(default_factory=list)
35
+
36
+
37
+ def _run(cmd: list[str]) -> RunResult:
38
+ t0 = time.monotonic()
39
+ try:
40
+ proc = subprocess.run(
41
+ cmd,
42
+ capture_output=True,
43
+ text=True,
44
+ timeout=TIMEOUT,
45
+ )
46
+ elapsed = time.monotonic() - t0
47
+ return RunResult(
48
+ args=cmd,
49
+ exit_code=proc.returncode,
50
+ stdout=proc.stdout,
51
+ stderr=proc.stderr,
52
+ elapsed=elapsed,
53
+ )
54
+ except subprocess.TimeoutExpired:
55
+ elapsed = time.monotonic() - t0
56
+ return RunResult(args=cmd, exit_code=-1, stdout="", stderr="", elapsed=elapsed, timed_out=True)
57
+ except FileNotFoundError as exc:
58
+ elapsed = time.monotonic() - t0
59
+ return RunResult(args=cmd, exit_code=-1, stdout="", stderr="", elapsed=elapsed, error=str(exc))
60
+ except Exception as exc:
61
+ elapsed = time.monotonic() - t0
62
+ return RunResult(args=cmd, exit_code=-1, stdout="", stderr="", elapsed=elapsed, error=str(exc))
63
+
64
+
65
+ def _extract_subcommands(help_text: str) -> list[str]:
66
+ """Heuristically extract subcommand names from help text."""
67
+ lines = help_text.splitlines()
68
+
69
+ # Strategy 1: Named section headers (Commands:, Available Commands:, Subcommands:)
70
+ in_commands_section = False
71
+ section_subcommands: list[str] = []
72
+ skip_words = {"usage", "options", "arguments", "flags", "help", "version", "global", "topics"}
73
+
74
+ for line in lines:
75
+ stripped = line.strip()
76
+ if re.match(
77
+ r"^(commands?|subcommands?|available commands?|sub-commands?)\s*:?\s*$",
78
+ stripped,
79
+ re.IGNORECASE,
80
+ ):
81
+ in_commands_section = True
82
+ continue
83
+
84
+ if in_commands_section:
85
+ if not stripped:
86
+ if section_subcommands:
87
+ break
88
+ continue
89
+ # New section header ends the commands section
90
+ if line and not line[0].isspace() and re.search(r":\s*$", stripped):
91
+ break
92
+ match = re.match(r"^\s{1,8}([a-z][\w-]{0,30})\b", line)
93
+ if match:
94
+ candidate = match.group(1)
95
+ if candidate.lower() not in skip_words:
96
+ section_subcommands.append(candidate)
97
+
98
+ if section_subcommands:
99
+ return section_subcommands[:8]
100
+
101
+ # Strategy 2: Indented "word Description" lines (git-style)
102
+ pattern_subcommands: list[str] = []
103
+ for line in lines:
104
+ match = re.match(r"^\s{2,8}([a-z][\w-]{1,20})\s{2,}[A-Za-z]", line)
105
+ if match:
106
+ candidate = match.group(1)
107
+ if candidate.lower() not in skip_words:
108
+ pattern_subcommands.append(candidate)
109
+
110
+ return pattern_subcommands[:8]
111
+
112
+
113
+ def probe(tool_name: str) -> ToolProbe:
114
+ """Run all probes for a CLI tool and return a ToolProbe."""
115
+ tool_path = shutil.which(tool_name)
116
+ result = ToolProbe(tool_name=tool_name, tool_path=tool_path)
117
+
118
+ if tool_path is None:
119
+ return result # tool not found; all results will be None
120
+
121
+ cmd_base = [tool_path]
122
+
123
+ # help probe
124
+ result.help_result = _run(cmd_base + ["--help"])
125
+ if result.help_result.exit_code != 0 and not result.help_result.error:
126
+ alt = _run(cmd_base + ["-h"])
127
+ if alt.exit_code == 0:
128
+ result.help_result = alt
129
+
130
+ # version probe
131
+ result.version_result = _run(cmd_base + ["--version"])
132
+ if result.version_result.exit_code != 0 and not result.version_result.error:
133
+ alt = _run(cmd_base + ["-V"])
134
+ if alt.exit_code == 0:
135
+ result.version_result = alt
136
+
137
+ # no-args probe
138
+ result.no_args_result = _run(cmd_base)
139
+
140
+ # bad args probe — unique flag an agent would never pass intentionally
141
+ result.bad_args_result = _run(cmd_base + ["--xxxxclitictest-unknown"])
142
+
143
+ # JSON output probes (try common patterns)
144
+ for json_flag in [["--json"], ["--output", "json"], ["-o", "json"], ["--format", "json"]]:
145
+ r = _run(cmd_base + json_flag)
146
+ if not r.error:
147
+ result.json_results.append(r)
148
+
149
+ # Subcommand discovery
150
+ help_text = (result.help_result.stdout or "") + (result.help_result.stderr or "")
151
+ result.subcommand_names = _extract_subcommands(help_text)
152
+
153
+ # Probe first few subcommands' help
154
+ for sub in result.subcommand_names[:3]:
155
+ sub_help = _run(cmd_base + [sub, "--help"])
156
+ result.subcommand_help_results.append((sub, sub_help))
157
+
158
+ return result
clitic/cli.py ADDED
@@ -0,0 +1,63 @@
1
+ from __future__ import annotations
2
+
3
+ import shutil
4
+
5
+ import typer
6
+ from typing_extensions import Annotated
7
+
8
+ from .analyzer import probe
9
+ from .scorer import run
10
+ from . import report as reporter
11
+
12
+ clitic_app = typer.Typer(
13
+ name="clitic",
14
+ help="Score a CLI tool for AI-agent readiness — the CLI Intelligence & Compliance Tester.",
15
+ add_completion=False,
16
+ )
17
+
18
+ _DEMO_TOOL = "git"
19
+
20
+
21
+ def _score_tool(tool_name: str, as_json: bool) -> None:
22
+ if not as_json:
23
+ tool_probe = reporter.animate_probe(
24
+ tool_name,
25
+ probe,
26
+ tool_name,
27
+ )
28
+ else:
29
+ tool_probe = probe(tool_name)
30
+
31
+ if tool_probe is None or tool_probe.tool_path is None:
32
+ typer.echo(f"Error: '{tool_name}' not found in PATH", err=True)
33
+ raise typer.Exit(1)
34
+
35
+ result = run(tool_probe)
36
+
37
+ if as_json:
38
+ reporter.print_json(result)
39
+ else:
40
+ reporter.print_report(result)
41
+
42
+ if result.overall_score < 60:
43
+ raise typer.Exit(2)
44
+
45
+
46
+ @clitic_app.command()
47
+ def score(
48
+ tool: Annotated[str, typer.Argument(help="CLI tool name or path (e.g. 'git', 'gh', 'curl')")],
49
+ json: Annotated[bool, typer.Option("--json", help="Output results as JSON")] = False,
50
+ ) -> None:
51
+ """Score a CLI tool for AI-agent readiness across 5 dimensions."""
52
+ _score_tool(tool, json)
53
+
54
+
55
+ @clitic_app.command()
56
+ def demo(
57
+ json: Annotated[bool, typer.Option("--json", help="Output results as JSON")] = False,
58
+ ) -> None:
59
+ """Score 'git' as a demo — see how a real-world CLI tool fares for AI agents."""
60
+ if shutil.which(_DEMO_TOOL) is None:
61
+ typer.echo(f"Error: '{_DEMO_TOOL}' not found in PATH — install git to run the demo.", err=True)
62
+ raise typer.Exit(1)
63
+ _score_tool(_DEMO_TOOL, json)
File without changes
@@ -0,0 +1,100 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+
5
+ from ..models import DimensionResult, Issue
6
+ from ..analyzer import ToolProbe
7
+
8
+ NAME = "Argument & Interface Design"
9
+
10
+
11
+ def _strip_ansi(text: str) -> str:
12
+ return re.sub(r"\x1b\[[0-9;]*m", "", text)
13
+
14
+
15
+ def score(probe: ToolProbe) -> DimensionResult:
16
+ issues: list[Issue] = []
17
+ total_score = 0.0
18
+
19
+ hr = probe.help_result
20
+ if hr is None:
21
+ return DimensionResult(name=NAME, score=0, issues=[
22
+ Issue(severity="error", message="Tool not found — cannot assess argument design", location="tool")
23
+ ])
24
+
25
+ help_text = _strip_ansi((hr.stdout or "") + (hr.stderr or ""))
26
+ help_lower = help_text.lower()
27
+
28
+ long_flags = re.findall(r"--[\w-]+", help_text)
29
+ short_flags = re.findall(r"(?<!\-)(?<!\w)-[a-zA-Z]\b", help_text)
30
+
31
+ # 1. GNU-style long flags (--flag) present — 25 pts
32
+ if len(long_flags) >= 2:
33
+ total_score += 25
34
+ elif len(long_flags) == 1:
35
+ total_score += 12
36
+ issues.append(Issue(
37
+ severity="warning",
38
+ message="Only one long flag detected — prefer GNU-style --flags for agent-readable invocations",
39
+ location="flags",
40
+ ))
41
+ else:
42
+ issues.append(Issue(
43
+ severity="warning",
44
+ message="No GNU-style long flags (--flag) detected — agents benefit from descriptive flag names",
45
+ location="flags",
46
+ ))
47
+
48
+ # 2. Flags are kebab-case, not camelCase — 20 pts
49
+ camel_flags = [f for f in long_flags if re.search(r"--[a-z]+[A-Z]", f)]
50
+ if not camel_flags:
51
+ total_score += 20
52
+ else:
53
+ issues.append(Issue(
54
+ severity="warning",
55
+ message=f"camelCase flags detected ({', '.join(camel_flags[:3])}) — prefer kebab-case (--my-flag) by convention",
56
+ location="flags",
57
+ ))
58
+
59
+ # 3. Standard flags present (--verbose, --help, --quiet) — 25 pts
60
+ standard = {
61
+ "--verbose / -v": ["--verbose", "-v ", "--debug"],
62
+ "--help / -h": ["--help", "-h "],
63
+ "--quiet / -q": ["--quiet", "-q ", "--silent"],
64
+ }
65
+ found = sum(
66
+ 1 for variants in standard.values()
67
+ if any(v in help_lower for v in variants)
68
+ )
69
+ total_score += (found / len(standard)) * 25
70
+ if found < 2:
71
+ missing = [k for k, variants in standard.items() if not any(v in help_lower for v in variants)]
72
+ issues.append(Issue(
73
+ severity="info",
74
+ message=f"Missing standard flags: {', '.join(missing)} — standard flags aid agent discoverability",
75
+ location="flags",
76
+ ))
77
+
78
+ # 4. Both short and long flag forms — 15 pts
79
+ if long_flags and short_flags:
80
+ total_score += 15
81
+ elif long_flags:
82
+ total_score += 8
83
+ issues.append(Issue(
84
+ severity="info",
85
+ message="No short flag aliases (-x) detected — short aliases improve usability in agent-generated commands",
86
+ location="flags",
87
+ ))
88
+
89
+ # 5. Config / environment variable documentation — 15 pts
90
+ config_keywords = ["config", "env", "environment", ".env", "configuration", "settings", "$ "]
91
+ if any(kw in help_lower for kw in config_keywords):
92
+ total_score += 15
93
+ else:
94
+ issues.append(Issue(
95
+ severity="info",
96
+ message="No mention of config files or environment variables — agents benefit from knowing all configuration methods",
97
+ location="--help output",
98
+ ))
99
+
100
+ return DimensionResult(name=NAME, score=round(min(total_score, 100), 1), issues=issues)
@@ -0,0 +1,107 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+
5
+ from ..models import DimensionResult, Issue
6
+ from ..analyzer import ToolProbe
7
+
8
+ NAME = "Help & Discoverability"
9
+
10
+
11
+ def score(probe: ToolProbe) -> DimensionResult:
12
+ issues: list[Issue] = []
13
+ total_score = 0.0
14
+
15
+ hr = probe.help_result
16
+ if hr is None:
17
+ issues.append(Issue(severity="error", message="Tool not found in PATH", location="tool"))
18
+ return DimensionResult(name=NAME, score=0, issues=issues)
19
+
20
+ # 1. --help works (exit 0) — 30 pts
21
+ if hr.timed_out:
22
+ issues.append(Issue(
23
+ severity="error",
24
+ message="--help timed out — tool hangs on --help, agents cannot introspect it",
25
+ location="--help",
26
+ ))
27
+ elif hr.exit_code == 0:
28
+ total_score += 30
29
+ else:
30
+ combined = (hr.stdout or "") + (hr.stderr or "")
31
+ if len(combined.strip()) > 50:
32
+ total_score += 15 # help text exists but exit code is wrong
33
+ issues.append(Issue(
34
+ severity="warning",
35
+ message=f"--help exits {hr.exit_code} instead of 0 — agents may interpret this as failure",
36
+ location="--help",
37
+ ))
38
+ else:
39
+ issues.append(Issue(
40
+ severity="error",
41
+ message="--help produced no useful output or failed entirely",
42
+ location="--help",
43
+ ))
44
+
45
+ # 2. Help text is substantial — 20 pts
46
+ help_text = (hr.stdout or "") + (hr.stderr or "")
47
+ help_clean = re.sub(r"\x1b\[[0-9;]*m", "", help_text).strip()
48
+ if len(help_clean) >= 100:
49
+ total_score += 20
50
+ elif len(help_clean) >= 40:
51
+ total_score += 10
52
+ issues.append(Issue(
53
+ severity="warning",
54
+ message=f"Help text is very short ({len(help_clean)} chars) — agents need context to invoke the tool correctly",
55
+ location="--help output",
56
+ ))
57
+ else:
58
+ issues.append(Issue(
59
+ severity="error",
60
+ message="Help text is minimal or absent",
61
+ location="--help output",
62
+ ))
63
+
64
+ # 3. Subcommands listed — 20 pts
65
+ if probe.subcommand_names:
66
+ total_score += 20
67
+ else:
68
+ if len(help_clean) > 200:
69
+ total_score += 10 # likely a focused single-command tool
70
+ issues.append(Issue(
71
+ severity="info",
72
+ message="No subcommands detected — if this is a multi-command tool, explicit subcommand listing helps agents discover capabilities",
73
+ location="--help output",
74
+ ))
75
+ else:
76
+ issues.append(Issue(
77
+ severity="info",
78
+ message="No subcommands detected in help output",
79
+ location="--help output",
80
+ ))
81
+
82
+ # 4. --version works — 15 pts
83
+ vr = probe.version_result
84
+ if vr and vr.exit_code == 0 and (vr.stdout or vr.stderr).strip():
85
+ total_score += 15
86
+ elif vr and not vr.error:
87
+ issues.append(Issue(
88
+ severity="warning",
89
+ message="--version flag missing or non-functional — agents cannot verify tool version for compatibility checks",
90
+ location="--version",
91
+ ))
92
+
93
+ # 5. Subcommand --help works — 15 pts
94
+ if probe.subcommand_help_results:
95
+ working = sum(1 for _, r in probe.subcommand_help_results if r.exit_code == 0)
96
+ ratio = working / len(probe.subcommand_help_results)
97
+ total_score += 15 * ratio
98
+ if ratio < 1.0:
99
+ issues.append(Issue(
100
+ severity="warning",
101
+ message=f"{len(probe.subcommand_help_results) - working}/{len(probe.subcommand_help_results)} subcommands don't respond to --help",
102
+ location="<subcommand> --help",
103
+ ))
104
+ else:
105
+ total_score += 15 # no subcommands to test — not penalized
106
+
107
+ return DimensionResult(name=NAME, score=round(min(total_score, 100), 1), issues=issues)
@@ -0,0 +1,94 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+
5
+ from ..models import DimensionResult, Issue
6
+ from ..analyzer import ToolProbe
7
+
8
+ NAME = "Error Handling"
9
+
10
+
11
+ def _strip_ansi(text: str) -> str:
12
+ return re.sub(r"\x1b\[[0-9;]*m", "", text)
13
+
14
+
15
+ def _looks_like_traceback(text: str) -> bool:
16
+ patterns = [
17
+ r"Traceback \(most recent call last\)",
18
+ r'^\s+File ".*", line \d+',
19
+ r"^\s+at \w+[\.\w]+ \(", # JS/Java
20
+ r"\w+Error:",
21
+ r"Exception in thread",
22
+ ]
23
+ return any(re.search(p, text, re.MULTILINE) for p in patterns)
24
+
25
+
26
+ def _error_is_informative(text: str) -> bool:
27
+ clean = _strip_ansi(text).lower().strip()
28
+ if not clean or len(clean) < 20:
29
+ return False
30
+ helpful_keywords = ["usage", "try", "see", "run", "--help", "expected", "unknown",
31
+ "unrecognized", "invalid", "error:", "did you mean"]
32
+ return any(kw in clean for kw in helpful_keywords)
33
+
34
+
35
+ def score(probe: ToolProbe) -> DimensionResult:
36
+ issues: list[Issue] = []
37
+ total_score = 0.0
38
+
39
+ br = probe.bad_args_result
40
+ if br is None or br.error:
41
+ return DimensionResult(name=NAME, score=50, issues=[
42
+ Issue(severity="info", message="Could not probe error handling — tool unavailable", location="error probe")
43
+ ])
44
+
45
+ stdout_clean = _strip_ansi(br.stdout or "")
46
+ stderr_clean = _strip_ansi(br.stderr or "")
47
+ combined = stdout_clean + stderr_clean
48
+
49
+ # 1. Error output goes to stderr — 30 pts
50
+ if br.exit_code != 0:
51
+ if stderr_clean.strip():
52
+ total_score += 30
53
+ elif stdout_clean.strip():
54
+ total_score += 15
55
+ issues.append(Issue(
56
+ severity="warning",
57
+ message="Error message printed to stdout instead of stderr — agents parsing stdout may misinterpret it as valid output",
58
+ location="stderr",
59
+ ))
60
+ else:
61
+ issues.append(Issue(
62
+ severity="warning",
63
+ message="Bad args produced no error output at all",
64
+ location="stderr",
65
+ ))
66
+
67
+ # 2. Error message is informative — 40 pts
68
+ if _error_is_informative(combined):
69
+ total_score += 40
70
+ elif combined.strip():
71
+ total_score += 20
72
+ issues.append(Issue(
73
+ severity="warning",
74
+ message="Error message for unknown flag is minimal — agents need clear guidance on what went wrong",
75
+ location="error message",
76
+ ))
77
+ else:
78
+ issues.append(Issue(
79
+ severity="error",
80
+ message="No error message for invalid flag — agents cannot diagnose the failure",
81
+ location="error message",
82
+ ))
83
+
84
+ # 3. No stack trace exposed — 30 pts
85
+ if not _looks_like_traceback(combined):
86
+ total_score += 30
87
+ else:
88
+ issues.append(Issue(
89
+ severity="error",
90
+ message="Stack trace exposed on invalid input — leaks internals and confuses agents parsing errors",
91
+ location="stderr",
92
+ ))
93
+
94
+ return DimensionResult(name=NAME, score=round(min(total_score, 100), 1), issues=issues)
@@ -0,0 +1,71 @@
1
+ from __future__ import annotations
2
+
3
+ from ..models import DimensionResult, Issue
4
+ from ..analyzer import ToolProbe
5
+
6
+ NAME = "Exit Code Semantics"
7
+
8
+
9
+ def score(probe: ToolProbe) -> DimensionResult:
10
+ issues: list[Issue] = []
11
+ total_score = 0.0
12
+
13
+ # 1. --help exits 0 — 25 pts
14
+ hr = probe.help_result
15
+ if hr and not hr.timed_out and not hr.error:
16
+ if hr.exit_code == 0:
17
+ total_score += 25
18
+ else:
19
+ issues.append(Issue(
20
+ severity="warning",
21
+ message=f"--help exits {hr.exit_code} instead of 0 — agents interpret non-zero as failure",
22
+ location="--help exit code",
23
+ ))
24
+
25
+ # 2. --version exits 0 — 20 pts
26
+ vr = probe.version_result
27
+ if vr and not vr.timed_out and not vr.error:
28
+ if vr.exit_code == 0:
29
+ total_score += 20
30
+ else:
31
+ issues.append(Issue(
32
+ severity="warning",
33
+ message=f"--version exits {vr.exit_code} instead of 0",
34
+ location="--version exit code",
35
+ ))
36
+ else:
37
+ total_score += 10 # --version absent — partial credit, not a fatal flaw
38
+
39
+ # 3. Unknown flag exits non-zero — 35 pts
40
+ br = probe.bad_args_result
41
+ if br and not br.timed_out and not br.error:
42
+ if br.exit_code != 0:
43
+ total_score += 35
44
+ else:
45
+ issues.append(Issue(
46
+ severity="error",
47
+ message="Unknown flag '--xxxxclitictest-unknown' returned exit 0 — agents cannot detect invalid invocations",
48
+ location="bad args exit code",
49
+ ))
50
+
51
+ # 4. No-args behavior is intentional — 20 pts
52
+ nar = probe.no_args_result
53
+ if nar and not nar.timed_out and not nar.error:
54
+ combined = (nar.stdout or "") + (nar.stderr or "")
55
+ if combined.strip():
56
+ total_score += 20 # produces output (help or usage error) — intentional
57
+ elif nar.exit_code == 0:
58
+ total_score += 10
59
+ issues.append(Issue(
60
+ severity="info",
61
+ message="Running with no args produces no output and exits 0 — consider showing usage or help",
62
+ location="no-args exit code",
63
+ ))
64
+ else:
65
+ issues.append(Issue(
66
+ severity="warning",
67
+ message="No-args invocation is silent with non-zero exit — agents cannot tell what went wrong",
68
+ location="no-args exit code",
69
+ ))
70
+
71
+ return DimensionResult(name=NAME, score=round(min(total_score, 100), 1), issues=issues)