testsmith-ai 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
testsmith/__init__.py ADDED
@@ -0,0 +1 @@
1
+ __version__ = "0.2.0"
testsmith/cli.py ADDED
@@ -0,0 +1,191 @@
1
+ """testsmith CLI entrypoint."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ import sys
7
+ from pathlib import Path
8
+ from typing import Optional
9
+
10
+ import typer
11
+ from rich.console import Console
12
+
13
+ from .csv_writer import write_csv
14
+ from .generator import generate_test_cases
15
+ from .interview import run_interview
16
+ from .loaders import build_context
17
+ from .providers import get_provider
18
+
19
+ app = typer.Typer(
20
+ add_completion=False, help="Generate QA test cases from text and documents."
21
+ )
22
+ console = Console()
23
+
24
+
25
+ @app.command()
26
+ def generate(
27
+ prompt: Optional[str] = typer.Option(
28
+ None, "--prompt", "-p", help="Plain text prompt / feature description."
29
+ ),
30
+ file: list[str] = typer.Option(
31
+ [],
32
+ "--file",
33
+ "-f",
34
+ help="Input source: local file (PDF/DOCX/MD/TXT) or URL. Repeatable.",
35
+ ),
36
+ out: Optional[Path] = typer.Option(
37
+ None,
38
+ "--out",
39
+ "-o",
40
+ help="Output CSV path. If omitted, a name is suggested by the LLM.",
41
+ ),
42
+ provider: Optional[str] = typer.Option(
43
+ None,
44
+ "--provider",
45
+ help="LLM provider: 'anthropic' or 'gemini'. Auto-detected from env if omitted.",
46
+ ),
47
+ model: Optional[str] = typer.Option(
48
+ None,
49
+ "--model",
50
+ "-m",
51
+ help="LLM model name (e.g. 'claude-sonnet-4-6', 'gemini-2.5-flash'). Defaults per provider.",
52
+ ),
53
+ temperature: Optional[float] = typer.Option(
54
+ None,
55
+ "--temperature",
56
+ "-t",
57
+ help="Sampling temperature (0.0–2.0). Lower = more deterministic.",
58
+ ),
59
+ top_p: Optional[float] = typer.Option(
60
+ None,
61
+ "--top-p",
62
+ help="Nucleus sampling top-p (0.0–1.0).",
63
+ ),
64
+ system_prompt: Optional[str] = typer.Option(
65
+ None,
66
+ "--system",
67
+ "-s",
68
+ help="Custom system prompt. Inline text or @path/to/file.txt. Replaces the default.",
69
+ ),
70
+ append_system: bool = typer.Option(
71
+ False,
72
+ "--append-system",
73
+ help="Append --system to the default system prompt instead of replacing it.",
74
+ ),
75
+ user_template: Optional[str] = typer.Option(
76
+ None,
77
+ "--user-template",
78
+ "-u",
79
+ help="Custom user prompt template. Inline text or @path/to/file.txt. Use {context} as a placeholder.",
80
+ ),
81
+ fmt: str = typer.Option(
82
+ "steps",
83
+ "--format",
84
+ help="Test step format: 'steps' (numbered steps) or 'bdd' (Given/When/Then, business-focused).",
85
+ ),
86
+ trace: bool = typer.Option(
87
+ False,
88
+ "--trace",
89
+ help="Add source traceability columns (document, section, quote, derivation) for debugging.",
90
+ ),
91
+ max_tokens: int = typer.Option(
92
+ 16384,
93
+ "--max-tokens",
94
+ help="Maximum output tokens for LLM response. Increase for large prompts or thinking models.",
95
+ ),
96
+ interactive: bool = typer.Option(
97
+ False,
98
+ "--interactive",
99
+ "-i",
100
+ help="Let the LLM ask clarifying questions before generating test cases.",
101
+ ),
102
+ ):
103
+ """Generate test cases and write them to a CSV file."""
104
+ if fmt not in ("steps", "bdd"):
105
+ console.print("[red]Error:[/red] --format must be 'steps' or 'bdd'.")
106
+ raise typer.Exit(code=2)
107
+
108
+ system_prompt = _resolve_text_arg(system_prompt)
109
+ user_template = _resolve_text_arg(user_template)
110
+
111
+ if not prompt and not file and sys.stdin.isatty() is False:
112
+ prompt = sys.stdin.read().strip() or None
113
+
114
+ if not prompt and not file:
115
+ console.print(
116
+ "[red]Error:[/red] provide --prompt and/or --file (or pipe text via stdin)."
117
+ )
118
+ raise typer.Exit(code=2)
119
+
120
+ console.print(f"[cyan]Loading context[/cyan] ({len(file)} source(s))...")
121
+ context, load_errors = build_context(prompt, list(file))
122
+ for err in load_errors:
123
+ console.print(f"[yellow]Warning:[/yellow] failed to load source: {err}")
124
+ if not context.strip():
125
+ console.print("[red]Error:[/red] context is empty after loading.")
126
+ raise typer.Exit(code=2)
127
+
128
+ try:
129
+ llm = get_provider(provider, model=model, temperature=temperature, top_p=top_p)
130
+ except Exception as e:
131
+ console.print(f"[red]Provider error:[/red] {e}")
132
+ raise typer.Exit(code=2)
133
+
134
+ if interactive:
135
+ if not sys.stdin.isatty():
136
+ console.print(
137
+ "[yellow]Stdin is piped — skipping interactive mode.[/yellow]"
138
+ )
139
+ else:
140
+ context = run_interview(context, provider=llm, console=console)
141
+
142
+ console.print(f"[cyan]Generating test cases via {llm.name} ({llm.model})...[/cyan]")
143
+ try:
144
+ rows, suggested = generate_test_cases(
145
+ context,
146
+ provider=llm,
147
+ system_prompt=system_prompt,
148
+ user_template=user_template,
149
+ append_system=append_system,
150
+ fmt=fmt,
151
+ max_tokens=max_tokens,
152
+ trace=trace,
153
+ )
154
+ except Exception as e:
155
+ console.print(f"[red]Generation failed:[/red] {e}")
156
+ raise typer.Exit(code=1)
157
+
158
+ if out is None:
159
+ out = _resolve_output_path(suggested)
160
+
161
+ count = write_csv(rows, out, extra_columns=trace)
162
+ console.print(f"[green]Wrote {count} test case(s) to[/green] {out}")
163
+
164
+
165
+ _SLUG_RE = re.compile(r"[^a-z0-9]+")
166
+
167
+
168
+ def _slugify(value: str) -> str:
169
+ slug = _SLUG_RE.sub("-", value.lower()).strip("-")
170
+ return slug[:60] or "test-cases"
171
+
172
+
173
+ def _resolve_output_path(suggested: str | None) -> Path:
174
+ base = _slugify(suggested) if suggested else "test-cases"
175
+ path = Path(f"{base}.csv")
176
+ n = 2
177
+ while path.exists():
178
+ path = Path(f"{base}_{n}.csv")
179
+ n += 1
180
+ return path
181
+
182
+
183
+ def _resolve_text_arg(value: Optional[str]) -> Optional[str]:
184
+ """Allow '@path/to/file' to load text from a file."""
185
+ if value and value.startswith("@"):
186
+ return Path(value[1:]).expanduser().read_text(encoding="utf-8")
187
+ return value
188
+
189
+
190
+ if __name__ == "__main__":
191
+ app()
@@ -0,0 +1,58 @@
1
+ """Write test cases to CSV."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import csv
6
+ from pathlib import Path
7
+
8
+ from .generator import CSV_COLUMNS
9
+
10
+
11
+ def _flatten(row: dict, parent_key: str = "") -> dict:
12
+ """Flatten nested dicts into dot-separated keys.
13
+
14
+ Example: {"source": {"document": "PRD"}} → {"source.document": "PRD"}
15
+ """
16
+ items: dict = {}
17
+ for key, value in row.items():
18
+ full_key = f"{parent_key}.{key}" if parent_key else key
19
+ if isinstance(value, dict):
20
+ items.update(_flatten(value, full_key))
21
+ else:
22
+ items[full_key] = value
23
+ return items
24
+
25
+
26
+ def write_csv(rows: list[dict], out_path: Path, extra_columns: bool = False) -> int:
27
+ out_path.parent.mkdir(parents=True, exist_ok=True)
28
+
29
+ # Flatten nested objects (e.g. source.document, source.section)
30
+ flat_rows = [_flatten(row) for row in rows]
31
+
32
+ if extra_columns:
33
+ # Discover extra columns beyond the standard set, preserving order of first appearance
34
+ extra: list[str] = []
35
+ seen = set(CSV_COLUMNS)
36
+ for row in flat_rows:
37
+ for key in row:
38
+ if key not in seen:
39
+ seen.add(key)
40
+ extra.append(key)
41
+ fieldnames = CSV_COLUMNS + extra
42
+ else:
43
+ fieldnames = CSV_COLUMNS
44
+
45
+ with out_path.open("w", newline="", encoding="utf-8") as f:
46
+ writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction="ignore")
47
+ writer.writeheader()
48
+ for row in flat_rows:
49
+ writer.writerow({col: _stringify(row.get(col, "")) for col in fieldnames})
50
+ return len(rows)
51
+
52
+
53
+ def _stringify(value) -> str:
54
+ if value is None:
55
+ return ""
56
+ if isinstance(value, list):
57
+ return " | ".join(str(v) for v in value)
58
+ return str(value)
testsmith/generator.py ADDED
@@ -0,0 +1,168 @@
1
+ """Call Claude to generate test cases as structured JSON."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import re
7
+
8
+ from .providers import LLMProvider, get_provider
9
+
10
+ CSV_COLUMNS = [
11
+ "ID",
12
+ "Title",
13
+ "Preconditions",
14
+ "Steps",
15
+ "Expected Result",
16
+ "Priority",
17
+ "Type",
18
+ ]
19
+
20
+ _STEPS_GUIDANCE_DEFAULT = '- Steps: numbered steps, each on its own line (use "\\n" inside the JSON string). Example: "1. Open app\\n2. Click login\\n3. Enter credentials".'
21
+
22
+ _STEPS_GUIDANCE_BDD = """\
23
+ - Steps: write in BDD format using Given / When / Then keywords, each on its own line (use "\\n" inside the JSON string).
24
+ Each step MUST start with one of: "Given", "When", "Then", "And", "But".
25
+ Example: "Given user has an active subscription\\nWhen the subscription renewal date arrives\\nThen the subscription is renewed automatically\\nAnd the user receives a confirmation email"
26
+
27
+ CRITICAL — Business-focused language rules for BDD steps:
28
+ • Steps MUST describe business intent, outcomes, and domain actions — NOT UI interactions.
29
+ • NEVER use UI-action words: click, tap, press, scroll, hover, swipe, drag, select (dropdown),
30
+ type, enter (into field), navigate, open, close, toggle, check (checkbox), uncheck,
31
+ fill in, submit (button), expand, collapse.
32
+ • INSTEAD of "When user clicks the checkout button" → "When user initiates checkout"
33
+ • INSTEAD of "Given user navigates to profile page" → "Given user is viewing their profile"
34
+ • INSTEAD of "When user types email in the login field" → "When user provides login credentials"
35
+ • INSTEAD of "Then user scrolls to the bottom" → "Then user reviews the full content"
36
+ • "Given" sets up the business state or context (not the UI state).
37
+ • "When" describes the business action or event (not the UI gesture).
38
+ • "Then" asserts the business outcome or side-effect (not what appears on screen).
39
+ • If a verification is about data, say what the DATA should be — not what the SCREEN shows."""
40
+
41
+
42
+ def _build_output_contract(fmt: str = "steps", trace: bool = False) -> str:
43
+ steps_guidance = _STEPS_GUIDANCE_BDD if fmt == "bdd" else _STEPS_GUIDANCE_DEFAULT
44
+ trace_guidance = _TRACE_GUIDANCE_TEXT if trace else ""
45
+ return f"""Return ONLY a JSON object (no prose, no markdown fences) with EXACTLY these keys:
46
+ - "suggested_filename": a short, descriptive, kebab-case filename (no extension, no path,
47
+ max 60 chars) reflecting the feature under test. Examples: "login-social-auth",
48
+ "checkout-guest-flow", "password-reset-email".
49
+ - "test_cases": a JSON array where each element is an object with AT LEAST these keys:
50
+ {json.dumps(CSV_COLUMNS)}
51
+ (Additional keys are allowed and will be preserved in the JSON but omitted from the CSV.)
52
+
53
+ Field guidance for each test case:
54
+ - ID: "TC-001", "TC-002", ... sequential.
55
+ - Title: short imperative summary.
56
+ - Preconditions: setup/state required; use "None" if not applicable.
57
+ {steps_guidance}
58
+ - Expected Result: the observable outcome.
59
+ - Priority: one of P0, P1, P2, P3.
60
+ - Type: one of Functional, Negative, Edge, UI, Integration, Performance, Security, Accessibility.
61
+ {trace_guidance}"""
62
+
63
+
64
+ _TRACE_GUIDANCE_TEXT = """
65
+ IMPORTANT — Source traceability (required):
66
+ Each test case MUST also include a "source" object with these keys:
67
+ - "document": which source document or design file the test was derived from
68
+ - "section": specific section, heading, rule ID, or component/screen name
69
+ - "quote": verbatim excerpt (≤ 50 words) from the source that justifies this test.
70
+ For design sources (e.g. Figma) where no text is quotable, describe the visual element
71
+ or interaction pattern instead (e.g. "Toggle switch for Delivery option in Deal Method section").
72
+ - "derivation": one sentence explaining how the test was derived (e.g. boundary test, negative case, happy path)"""
73
+
74
+
75
+ # Default contract for backward compatibility
76
+ OUTPUT_CONTRACT = _build_output_contract("steps")
77
+
78
+ DEFAULT_SYSTEM_PROMPT = f"""You are a senior QA engineer. Given product context (requirements, design docs,
79
+ user prompts), produce a comprehensive set of test cases covering happy paths,
80
+ edge cases, negative tests, and non-functional concerns where relevant.
81
+
82
+ {OUTPUT_CONTRACT}
83
+ """
84
+
85
+
86
+ def _build_default_system_prompt(fmt: str = "steps") -> str:
87
+ contract = _build_output_contract(fmt)
88
+ return (
89
+ "You are a senior QA engineer. Given product context (requirements, design docs,\n"
90
+ "user prompts), produce a comprehensive set of test cases covering happy paths,\n"
91
+ "edge cases, negative tests, and non-functional concerns where relevant.\n\n"
92
+ f"{contract}\n"
93
+ )
94
+
95
+
96
+ DEFAULT_USER_TEMPLATE = (
97
+ "Product context:\n\n{context}\n\nGenerate the test cases now as a JSON array."
98
+ )
99
+
100
+
101
+ def build_system_prompt(
102
+ custom: str | None,
103
+ append: bool = False,
104
+ fmt: str = "steps",
105
+ trace: bool = False,
106
+ ) -> str:
107
+ contract = _build_output_contract(fmt, trace=trace)
108
+ default = _build_default_system_prompt(fmt)
109
+ if not custom:
110
+ return default
111
+ if append:
112
+ return f"{default}\n\nAdditional instructions:\n{custom}"
113
+ # Custom replaces default, but we always enforce the output contract
114
+ # so the CSV stays parseable.
115
+ return f"{custom}\n\n{contract}"
116
+
117
+
118
+ def build_user_prompt(context: str, template: str | None) -> str:
119
+ tmpl = template or DEFAULT_USER_TEMPLATE
120
+ if "{context}" in tmpl:
121
+ return tmpl.format(context=context)
122
+ return f"{tmpl}\n\nProduct context:\n\n{context}"
123
+
124
+
125
+ def generate_test_cases(
126
+ context: str,
127
+ provider: LLMProvider | None = None,
128
+ system_prompt: str | None = None,
129
+ user_template: str | None = None,
130
+ append_system: bool = False,
131
+ fmt: str = "steps",
132
+ max_tokens: int = 16384,
133
+ trace: bool = False,
134
+ ) -> tuple[list[dict], str | None]:
135
+ provider = provider or get_provider()
136
+ system = build_system_prompt(
137
+ system_prompt, append=append_system, fmt=fmt, trace=trace
138
+ )
139
+ user = build_user_prompt(context, user_template)
140
+ text = provider.complete(system=system, user=user, max_tokens=max_tokens)
141
+ return _parse_response(text)
142
+
143
+
144
+ def _parse_response(text: str) -> tuple[list[dict], str | None]:
145
+ text = text.strip()
146
+ # Strip accidental code fences.
147
+ fence = re.match(r"^```(?:json)?\s*(.*?)\s*```$", text, re.DOTALL)
148
+ if fence:
149
+ text = fence.group(1).strip()
150
+
151
+ # Try object form first ({"suggested_filename": ..., "test_cases": [...]})
152
+ if text.startswith("{"):
153
+ data = json.loads(text)
154
+ rows = data.get("test_cases")
155
+ if not isinstance(rows, list):
156
+ raise ValueError("Model response missing 'test_cases' array")
157
+ name = data.get("suggested_filename")
158
+ return rows, name if isinstance(name, str) and name.strip() else None
159
+
160
+ # Back-compat: bare array
161
+ if not text.startswith("["):
162
+ match = re.search(r"\[.*\]", text, re.DOTALL)
163
+ if match:
164
+ text = match.group(0)
165
+ data = json.loads(text)
166
+ if not isinstance(data, list):
167
+ raise ValueError("Model did not return a JSON array or object")
168
+ return data, None
testsmith/interview.py ADDED
@@ -0,0 +1,130 @@
1
+ """Adaptive interview: LLM asks clarifying questions one at a time, only when needed."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import re
7
+
8
+ from rich.console import Console
9
+ from rich.prompt import Prompt
10
+
11
+ from .providers import LLMProvider
12
+
13
+ INTERVIEW_SYSTEM_PROMPT = """You are a senior QA engineer preparing to write test cases.
14
+ You will review product context and decide whether you need a clarification from the
15
+ user before you can write high-quality test cases.
16
+
17
+ Rules:
18
+ - Ask a clarifying question ONLY if the answer would MEANINGFULLY change the test cases.
19
+ Do NOT ask about things you can reasonably assume or that are already clear.
20
+ - Ask about things like: user roles, platforms, acceptance criteria, edge cases,
21
+ out-of-scope items, non-functional concerns (a11y, perf, security), integrations.
22
+ - Ask ONE focused question at a time. Do not batch multiple questions.
23
+ - Stop asking as soon as you have enough to write solid test cases. It is perfectly
24
+ fine — and often correct — to ask zero questions.
25
+
26
+ Return ONLY a JSON object (no prose, no markdown fences) with EXACTLY these keys:
27
+ - "need_clarification": boolean
28
+ - "question": string (the next question to ask, or "" if need_clarification is false)
29
+ - "reason": string (short rationale; why you need this, or why you are ready to proceed)
30
+ """
31
+
32
+
33
+ def run_interview(
34
+ context: str,
35
+ provider: LLMProvider,
36
+ console: Console,
37
+ max_turns: int = 5,
38
+ ) -> str:
39
+ """Adaptively ask clarifying questions one at a time until the LLM is confident."""
40
+ console.print(
41
+ "[cyan]Checking context for ambiguity...[/cyan] "
42
+ "[dim](type [cyan]done[/cyan] at any prompt to stop early)[/dim]"
43
+ )
44
+
45
+ answers: list[tuple[str, str]] = []
46
+ asked: set[str] = set()
47
+
48
+ for turn in range(1, max_turns + 1):
49
+ enriched = _build_context_with_answers(context, answers)
50
+ try:
51
+ raw = provider.complete(
52
+ system=INTERVIEW_SYSTEM_PROMPT,
53
+ user=(
54
+ f"Product context:\n\n{enriched}\n\n"
55
+ "Decide if you need one more clarifying question. "
56
+ "Return the JSON object now."
57
+ ),
58
+ max_tokens=4096,
59
+ )
60
+ if not raw or not raw.strip():
61
+ raise ValueError("empty response from model")
62
+ decision = _parse_decision(raw)
63
+ except Exception as e:
64
+ console.print(
65
+ f"[yellow]Clarification check failed ({e}); proceeding.[/yellow]"
66
+ )
67
+ break
68
+
69
+ if not decision.get("need_clarification"):
70
+ if turn == 1:
71
+ console.print(
72
+ "[green]Context looks clear — no questions needed.[/green]"
73
+ )
74
+ else:
75
+ console.print(
76
+ "[green]Enough context gathered — generating now.[/green]"
77
+ )
78
+ break
79
+
80
+ question = (decision.get("question") or "").strip()
81
+ if not question or question in asked:
82
+ break
83
+ asked.add(question)
84
+
85
+ try:
86
+ ans = Prompt.ask(
87
+ f"[green]?[/green] {question}", default="", show_default=False
88
+ )
89
+ except (EOFError, KeyboardInterrupt):
90
+ console.print(
91
+ "\n[yellow]Interview aborted — generating with current answers.[/yellow]"
92
+ )
93
+ break
94
+
95
+ ans = ans.strip()
96
+ if ans.lower() == "done":
97
+ break
98
+ if not ans or ans.lower() == "skip":
99
+ # Record the skip so the model doesn't re-ask the same thing.
100
+ answers.append((question, "(user skipped)"))
101
+ continue
102
+ answers.append((question, ans))
103
+ else:
104
+ console.print(
105
+ f"[yellow]Reached max {max_turns} questions — proceeding.[/yellow]"
106
+ )
107
+
108
+ return _build_context_with_answers(context, answers)
109
+
110
+
111
+ def _build_context_with_answers(context: str, answers: list[tuple[str, str]]) -> str:
112
+ if not answers:
113
+ return context
114
+ addendum = "\n\n".join(f"Q: {q}\nA: {a}" for q, a in answers)
115
+ return f"{context}\n\n---\nClarifications from the user:\n\n{addendum}"
116
+
117
+
118
+ def _parse_decision(text: str) -> dict:
119
+ text = text.strip()
120
+ fence = re.match(r"^```(?:json)?\s*(.*?)\s*```$", text, re.DOTALL)
121
+ if fence:
122
+ text = fence.group(1).strip()
123
+ if not text.startswith("{"):
124
+ match = re.search(r"\{.*\}", text, re.DOTALL)
125
+ if match:
126
+ text = match.group(0)
127
+ data = json.loads(text)
128
+ if not isinstance(data, dict):
129
+ raise ValueError("Expected a JSON object")
130
+ return data
testsmith/loaders.py ADDED
@@ -0,0 +1,27 @@
1
+ """Build the combined context string from a prompt and a list of references.
2
+
3
+ Loading logic lives in `testsmith.sources`. This module only composes
4
+ `LoadedDoc`s into the final context passed to the LLM.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from .sources import SourceError, load
10
+
11
+ _SEPARATOR = "\n\n---\n\n"
12
+
13
+
14
+ def build_context(prompt: str | None, refs: list[str]) -> tuple[str, list[str]]:
15
+ """Build context and return (context_string, list_of_error_messages)."""
16
+ parts: list[str] = []
17
+ errors: list[str] = []
18
+ if prompt:
19
+ parts.append(f"## User Prompt\n{prompt}")
20
+ for ref in refs:
21
+ try:
22
+ doc = load(ref)
23
+ parts.append(f"## {doc.title}\n{doc.text}")
24
+ except SourceError as e:
25
+ parts.append(f"## {ref}\n[ERROR loading source: {e}]")
26
+ errors.append(f"{ref}: {e}")
27
+ return _SEPARATOR.join(parts), errors