socratic-method 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. socratic_method-0.1.0/.github/workflows/ci.yml +56 -0
  2. socratic_method-0.1.0/.github/workflows/release.yml +75 -0
  3. socratic_method-0.1.0/.github/workflows/tag-release.yml +38 -0
  4. socratic_method-0.1.0/.gitignore +9 -0
  5. socratic_method-0.1.0/.pre-commit-config.yaml +35 -0
  6. socratic_method-0.1.0/LICENSE +21 -0
  7. socratic_method-0.1.0/PKG-INFO +170 -0
  8. socratic_method-0.1.0/README.md +148 -0
  9. socratic_method-0.1.0/evals/fixtures/tech-talk-series-20260704.md +55 -0
  10. socratic_method-0.1.0/evals/graders.py +402 -0
  11. socratic_method-0.1.0/evals/judge-rubric.md +48 -0
  12. socratic_method-0.1.0/evals/run_eval.py +320 -0
  13. socratic_method-0.1.0/evals/scenarios/e1-stress-stop.yaml +29 -0
  14. socratic_method-0.1.0/evals/scenarios/e2-disputed-restatement.yaml +37 -0
  15. socratic_method-0.1.0/evals/scenarios/n1-stress-contradiction.yaml +44 -0
  16. socratic_method-0.1.0/evals/scenarios/n2-develop-aporia.yaml +38 -0
  17. socratic_method-0.1.0/evals/scenarios/n3-develop-quick.yaml +30 -0
  18. socratic_method-0.1.0/evals/scenarios/o1-out-of-scope.yaml +37 -0
  19. socratic_method-0.1.0/pyproject.toml +49 -0
  20. socratic_method-0.1.0/src/socratic_method/__init__.py +11 -0
  21. socratic_method-0.1.0/src/socratic_method/assets/SKILL.md +240 -0
  22. socratic_method-0.1.0/src/socratic_method/assets/idea-brief-v1.schema.json +97 -0
  23. socratic_method-0.1.0/src/socratic_method/assets/references/example-session.md +155 -0
  24. socratic_method-0.1.0/src/socratic_method/cli.py +151 -0
  25. socratic_method-0.1.0/src/socratic_method/installer.py +263 -0
  26. socratic_method-0.1.0/src/socratic_method/validator.py +113 -0
  27. socratic_method-0.1.0/tests/test_cli.py +71 -0
  28. socratic_method-0.1.0/tests/test_detection.py +60 -0
  29. socratic_method-0.1.0/tests/test_installer.py +113 -0
  30. socratic_method-0.1.0/tests/test_validator.py +68 -0
@@ -0,0 +1,56 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [master, main]
6
+ pull_request:
7
+
8
+ jobs:
9
+ lint:
10
+ runs-on: ubuntu-latest
11
+ steps:
12
+ - uses: actions/checkout@v4
13
+ - uses: astral-sh/setup-uv@v5
14
+ - name: pre-commit (all files)
15
+ run: uvx pre-commit run --all-files --show-diff-on-failure
16
+
17
+ test:
18
+ runs-on: ubuntu-latest
19
+ strategy:
20
+ matrix:
21
+ python-version: ["3.11", "3.12", "3.13"]
22
+ steps:
23
+ - uses: actions/checkout@v4
24
+ - uses: astral-sh/setup-uv@v5
25
+ with:
26
+ python-version: ${{ matrix.python-version }}
27
+ - name: Sync (with dev group)
28
+ run: uv sync
29
+ - name: Tests
30
+ run: uv run pytest -q
31
+ - name: CLI smoke
32
+ run: |
33
+ uv run socratic-method --version
34
+ uv run socratic-method setup all --dry-run --root "$RUNNER_TEMP"
35
+ uv run socratic-method status --root "$RUNNER_TEMP"
36
+ uv run socratic-method validate evals/fixtures/tech-talk-series-20260704.md
37
+
38
+ build:
39
+ runs-on: ubuntu-latest
40
+ steps:
41
+ - uses: actions/checkout@v4
42
+ - uses: astral-sh/setup-uv@v5
43
+ - name: Build sdist + wheel
44
+ run: uv build
45
+ - name: Check metadata
46
+ run: uvx twine check dist/*
47
+ - name: Install from wheel and smoke it
48
+ run: |
49
+ uv venv /tmp/wheel-venv
50
+ VIRTUAL_ENV=/tmp/wheel-venv uv pip install dist/*.whl
51
+ /tmp/wheel-venv/bin/socratic-method --version
52
+ /tmp/wheel-venv/bin/socratic-method setup all --dry-run --root "$RUNNER_TEMP"
53
+ - uses: actions/upload-artifact@v4
54
+ with:
55
+ name: dist
56
+ path: dist/
@@ -0,0 +1,75 @@
1
+ # Tag-driven release: push a tag vX.Y.Z (matching src/socratic_method/__init__.py)
2
+ # and this workflow builds, creates the GitHub Release with the dist files attached,
3
+ # and publishes to PyPI via Trusted Publishing (OIDC) — no API token stored in the repo.
4
+ #
5
+ # One-time setup on pypi.org (repo owner):
6
+ # PyPI → Your account → Publishing → Add a new pending publisher:
7
+ # project: socratic-method
8
+ # owner: grammy-jiang
9
+ # repository: socratic-method
10
+ # workflow: release.yml
11
+ # environment: pypi
12
+
13
+ name: Release
14
+
15
+ on:
16
+ push:
17
+ tags:
18
+ - "v*"
19
+ # Fallback for tags created by the "Tag release" helper workflow: tags pushed
20
+ # with the Actions GITHUB_TOKEN do not fire tag-push events, so dispatch this
21
+ # workflow manually with ref = the new tag (e.g. v0.1.0).
22
+ workflow_dispatch:
23
+
24
+ jobs:
25
+ build:
26
+ runs-on: ubuntu-latest
27
+ steps:
28
+ - uses: actions/checkout@v4
29
+ - uses: astral-sh/setup-uv@v5
30
+ - name: Verify tag matches package version
31
+ run: |
32
+ PKG_VERSION=$(uv run python -c "import socratic_method; print(socratic_method.__version__)")
33
+ TAG="${GITHUB_REF_NAME}"
34
+ if [ "v${PKG_VERSION}" != "${TAG}" ]; then
35
+ echo "Tag ${TAG} does not match package version v${PKG_VERSION}" >&2
36
+ exit 1
37
+ fi
38
+ - name: Build sdist + wheel
39
+ run: uv build
40
+ - name: Check metadata
41
+ run: uvx twine check dist/*
42
+ - uses: actions/upload-artifact@v4
43
+ with:
44
+ name: dist
45
+ path: dist/
46
+
47
+ github-release:
48
+ needs: build
49
+ runs-on: ubuntu-latest
50
+ permissions:
51
+ contents: write # required to create the GitHub Release
52
+ steps:
53
+ - uses: actions/download-artifact@v4
54
+ with:
55
+ name: dist
56
+ path: dist/
57
+ - uses: softprops/action-gh-release@v2
58
+ with:
59
+ files: dist/*
60
+ generate_release_notes: true
61
+
62
+ pypi:
63
+ needs: build
64
+ runs-on: ubuntu-latest
65
+ environment:
66
+ name: pypi
67
+ url: https://pypi.org/p/socratic-method
68
+ permissions:
69
+ id-token: write # required for PyPI Trusted Publishing (OIDC)
70
+ steps:
71
+ - uses: actions/download-artifact@v4
72
+ with:
73
+ name: dist
74
+ path: dist/
75
+ - uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,38 @@
1
+ # Helper for environments that cannot push tags directly (e.g. remote sandboxes
2
+ # whose git proxy rejects refs/tags pushes). Creates the annotated release tag
3
+ # at the dispatched ref's HEAD after checking it matches the package version.
4
+ #
5
+ # Note: tags pushed with the Actions GITHUB_TOKEN do not trigger tag-push
6
+ # workflows, so after this succeeds, dispatch the Release workflow with
7
+ # ref = the new tag.
8
+
9
+ name: Tag release
10
+
11
+ on:
12
+ workflow_dispatch:
13
+ inputs:
14
+ tag:
15
+ description: "Tag to create (vX.Y.Z, must match src/socratic_method/__init__.py)"
16
+ required: true
17
+ default: "v0.1.0"
18
+
19
+ jobs:
20
+ tag:
21
+ runs-on: ubuntu-latest
22
+ permissions:
23
+ contents: write # required to push the tag
24
+ env:
25
+ TAG: ${{ inputs.tag }}
26
+ steps:
27
+ - uses: actions/checkout@v4
28
+ - name: Create and push annotated tag
29
+ run: |
30
+ PKG_VERSION=$(sed -n 's/^__version__ = "\(.*\)"$/\1/p' src/socratic_method/__init__.py)
31
+ if [ "v${PKG_VERSION}" != "${TAG}" ]; then
32
+ echo "Tag ${TAG} does not match package version v${PKG_VERSION}" >&2
33
+ exit 1
34
+ fi
35
+ git config user.name "github-actions[bot]"
36
+ git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
37
+ git tag -a "${TAG}" -m "socratic-method ${TAG#v} — agent skill, installer CLI, validator, evals"
38
+ git push origin "refs/tags/${TAG}"
@@ -0,0 +1,9 @@
1
+ __pycache__/
2
+ *.pyc
3
+ .venv/
4
+ dist/
5
+ *.egg-info/
6
+ .pytest_cache/
7
+ uv.lock
8
+ evals/reports/*/*/workdir/
9
+ notes/
@@ -0,0 +1,35 @@
1
+ # One tool per file type, deliberately — stacked linters/formatters on the same type
2
+ # fight each other:
3
+ # Python -> ruff (lint + format in ONE tool; no black/isort/flake8 alongside)
4
+ # YAML -> check-yaml (syntax only; no style formatter)
5
+ # GitHub workflow -> actionlint (the workflow dialect, distinct from plain YAML)
6
+ # JSON -> check-json
7
+ # TOML -> check-toml
8
+ # Markdown -> whitespace hygiene only: a formatter would reflow
9
+ # src/socratic_method/assets/SKILL.md, the shipped skill asset,
10
+ # whose prose shape is deliberate.
11
+ repos:
12
+ - repo: https://github.com/pre-commit/pre-commit-hooks
13
+ rev: v6.0.0
14
+ hooks:
15
+ - id: trailing-whitespace
16
+ - id: end-of-file-fixer
17
+ - id: mixed-line-ending
18
+ args: [--fix=lf]
19
+ - id: check-yaml
20
+ - id: check-json
21
+ - id: check-toml
22
+ - id: check-added-large-files
23
+ - id: check-merge-conflict
24
+
25
+ - repo: https://github.com/astral-sh/ruff-pre-commit
26
+ rev: v0.15.20
27
+ hooks:
28
+ - id: ruff-check
29
+ args: [--fix]
30
+ - id: ruff-format
31
+
32
+ - repo: https://github.com/rhysd/actionlint
33
+ rev: v1.7.12
34
+ hooks:
35
+ - id: actionlint
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Grammy Jiang
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,170 @@
1
+ Metadata-Version: 2.4
2
+ Name: socratic-method
3
+ Version: 0.1.0
4
+ Summary: A Socratic questioning skill for coding agents (Claude Code, Codex, Copilot), with a one-command installer and an idea-brief validator.
5
+ Project-URL: Homepage, https://github.com/grammy-jiang/socratic-method
6
+ Project-URL: Issues, https://github.com/grammy-jiang/socratic-method/issues
7
+ Author: Grammy Jiang
8
+ License-Expression: MIT
9
+ License-File: LICENSE
10
+ Keywords: agent-skills,claude-code,codex,copilot,socratic-method
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Environment :: Console
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Topic :: Software Development :: Quality Assurance
18
+ Requires-Python: >=3.11
19
+ Requires-Dist: jsonschema>=4.0
20
+ Requires-Dist: pyyaml>=6.0
21
+ Description-Content-Type: text/markdown
22
+
23
+ # socratic-method
24
+
25
+ A **Socratic questioning skill for coding agents** — Claude Code, OpenAI Codex, and
26
+ GitHub Copilot — packaged with a one-command installer and a deterministic artifact
27
+ validator.
28
+
29
+ Invoke the skill before real work starts (writing software, drafting a plan or document,
30
+ making a decision) and the agent becomes a disciplined questioner: it steelmans your idea,
31
+ interrogates it one question at a time (classic elenchus — six Socratic question types,
32
+ counterexamples, contradiction-surfacing by quoting your own words), and ends with an
33
+ honest verdict — **sharpened**, **aporia** (a genuinely unresolved hole, treated as a
34
+ finding), or **refuted** (only ever out of your own mouth). The result is written down as
35
+ a machine-validatable **idea brief** (`idea-brief-v1`) that downstream work can consume:
36
+ open questions become a research agenda, unvalidated assumptions become a validation
37
+ worklist.
38
+
39
+ The skill was authored and hardened in
40
+ [subagent-factory](https://github.com/grammy-jiang/subagent-factory): seven rounds of
41
+ grounded review plus a six-cell adversarial behavioral eval (examiner vs. scripted user
42
+ simulator, deterministic graders + independent judge), which caught and fixed real
43
+ behavior gaps static review missed. The eval harness ships in this repo under `evals/`.
44
+
45
+ ## Install
46
+
47
+ ```bash
48
+ pip install git+https://github.com/grammy-jiang/socratic-method
49
+ # or, without installing:
50
+ uvx --from git+https://github.com/grammy-jiang/socratic-method socratic-method --help
51
+ ```
52
+
53
+ ## Set up the skill for your agents
54
+
55
+ ```bash
56
+ # auto-detect which agents are installed on this machine and configure those
57
+ socratic-method setup
58
+
59
+ # or name platforms explicitly / force all three
60
+ socratic-method setup claude codex
61
+ socratic-method setup all
62
+
63
+ # install into your user home instead of the current project
64
+ socratic-method setup claude --scope user
65
+
66
+ # see what would happen first / check current state / remove
67
+ socratic-method setup --dry-run
68
+ socratic-method status
69
+ socratic-method uninstall claude
70
+ ```
71
+
72
+ With no targets, `setup` **auto-detects** installed agents and configures only those,
73
+ printing the concrete evidence for each detection (never a bare claim):
74
+
75
+ | Agent | Detection signals, in order |
76
+ |---|---|
77
+ | Claude Code | `claude` CLI on PATH; else `~/.claude/` config directory |
78
+ | OpenAI Codex | `codex` CLI on PATH; else `~/.codex/` config directory |
79
+ | GitHub Copilot | `copilot` CLI on PATH; else `gh-copilot` extension; else a `github.copilot*` VS Code extension |
80
+
81
+ If nothing is detected, `setup` installs nothing and tells you how to name targets
82
+ explicitly. `setup all` bypasses detection.
83
+
84
+ `setup` is **idempotent** (content-hash comparison; an identical install reports
85
+ "up to date"), refuses to overwrite locally modified files without `--force`, and after
86
+ every write **reads the files back from disk before reporting success** — the skill's own
87
+ "verify before claiming" rule, applied to its installer.
88
+
89
+ ### Where the skill lands
90
+
91
+ | Platform | `--scope project` (default) | `--scope user` |
92
+ |---|---|---|
93
+ | Claude Code | `<root>/.claude/skills/socratic-method/` | `~/.claude/skills/socratic-method/` |
94
+ | OpenAI Codex | `<root>/.agents/skills/socratic-method/` | `~/.agents/skills/socratic-method/` |
95
+ | GitHub Copilot | `<root>/.github/skills/socratic-method/` | — |
96
+
97
+ Copilot also reads a repo's `.claude/skills/`, so if the Claude project install is already
98
+ present, the Copilot step reports "already covered" and skips (a duplicate would trigger
99
+ twice); `--force` installs to `.github/skills/` anyway. Paths live in one data-driven
100
+ registry (`installer.py`) — if a platform moves its skills directory, the fix is one line.
101
+
102
+ ## Use the skill
103
+
104
+ In any agent with the skill installed:
105
+
106
+ > stress-test this plan: …
107
+ > help me think through …
108
+ > poke holes in this idea: …
109
+
110
+ Or invoke directly: `/socratic-method <idea> [--mode stress|develop] [--depth quick|standard|deep]`.
111
+ The session ends with the brief saved to `notes/idea-briefs/<slug>-YYYYMMDD.md`.
112
+
113
+ ## Validate a brief
114
+
115
+ ```bash
116
+ socratic-method validate notes/idea-briefs/my-idea-20260704.md
117
+ ```
118
+
119
+ Checks the YAML frontmatter against the packaged `idea-brief-v1` JSON schema plus the
120
+ cross-field rules a schema can't express (e.g. `verdict: refuted` requires the two
121
+ colliding claims verbatim in the body; `verdict: aporia` requires open questions).
122
+
123
+ ## Behavioral eval harness (`evals/`)
124
+
125
+ The six-cell regression matrix that hardened the skill: normal cells (planted
126
+ contradiction → refuted; genuine unknowns → aporia; quick-depth cadence), edge cells
127
+ (mid-session stop; disputed restatement), and an out-of-scope cell (fully specified plan →
128
+ decline). Each cell runs a live examiner against a scripted user simulator, then grades
129
+ the transcript with deterministic graders and an independent model judge.
130
+
131
+ ```bash
132
+ python evals/run_eval.py --dry-run # list cells, no calls
133
+ python evals/run_eval.py --cell O1 # one cell
134
+ python evals/run_eval.py # full matrix — spawns ~30-60 headless `claude` calls
135
+ ```
136
+
137
+ Requires the `claude` CLI and real tokens; run cells individually while iterating. A cell
138
+ passes only when **all** deterministic graders pass AND the judge confirms the expected
139
+ behavior with no fabrication.
140
+
141
+ ## Development
142
+
143
+ ```bash
144
+ uv sync # or: pip install -e . && pip install pytest
145
+ uv run pytest # validator negatives, installer + detection behavior, CLI smoke
146
+ uv run pre-commit install # one tool per file type: ruff (py), syntax checks (yaml/json/toml), actionlint
147
+ ```
148
+
149
+ CI (`.github/workflows/ci.yml`) runs the test suite on Python 3.11–3.13, builds the
150
+ sdist/wheel, checks metadata, and smoke-tests the CLI installed from the built wheel.
151
+
152
+ ## Releasing to PyPI
153
+
154
+ Publishing uses **PyPI Trusted Publishing** (OIDC) — no API token lives in the repo.
155
+ One-time setup on pypi.org: *Account → Publishing → Add a new pending publisher* with
156
+ project `socratic-method`, owner `grammy-jiang`, repository `socratic-method`, workflow
157
+ `release.yml`, environment `pypi`. Then, to release:
158
+
159
+ 1. Bump `__version__` in `src/socratic_method/__init__.py`.
160
+ 2. Create a GitHub Release with tag `vX.Y.Z` (must match — the workflow verifies).
161
+ 3. `release.yml` builds, checks, and publishes; the package appears at
162
+ `pypi.org/project/socratic-method/`, after which `pip install socratic-method` works.
163
+
164
+ Maintenance rule inherited from the skill's history: a behavioral failure is not fixed
165
+ until something durable changes — a grader, a scenario, or a rail in `SKILL.md` — never a
166
+ prose spot-fix alone.
167
+
168
+ ## License
169
+
170
+ MIT
@@ -0,0 +1,148 @@
1
+ # socratic-method
2
+
3
+ A **Socratic questioning skill for coding agents** — Claude Code, OpenAI Codex, and
4
+ GitHub Copilot — packaged with a one-command installer and a deterministic artifact
5
+ validator.
6
+
7
+ Invoke the skill before real work starts (writing software, drafting a plan or document,
8
+ making a decision) and the agent becomes a disciplined questioner: it steelmans your idea,
9
+ interrogates it one question at a time (classic elenchus — six Socratic question types,
10
+ counterexamples, contradiction-surfacing by quoting your own words), and ends with an
11
+ honest verdict — **sharpened**, **aporia** (a genuinely unresolved hole, treated as a
12
+ finding), or **refuted** (only ever out of your own mouth). The result is written down as
13
+ a machine-validatable **idea brief** (`idea-brief-v1`) that downstream work can consume:
14
+ open questions become a research agenda, unvalidated assumptions become a validation
15
+ worklist.
16
+
17
+ The skill was authored and hardened in
18
+ [subagent-factory](https://github.com/grammy-jiang/subagent-factory): seven rounds of
19
+ grounded review plus a six-cell adversarial behavioral eval (examiner vs. scripted user
20
+ simulator, deterministic graders + independent judge), which caught and fixed real
21
+ behavior gaps static review missed. The eval harness ships in this repo under `evals/`.
22
+
23
+ ## Install
24
+
25
+ ```bash
26
+ pip install git+https://github.com/grammy-jiang/socratic-method
27
+ # or, without installing:
28
+ uvx --from git+https://github.com/grammy-jiang/socratic-method socratic-method --help
29
+ ```
30
+
31
+ ## Set up the skill for your agents
32
+
33
+ ```bash
34
+ # auto-detect which agents are installed on this machine and configure those
35
+ socratic-method setup
36
+
37
+ # or name platforms explicitly / force all three
38
+ socratic-method setup claude codex
39
+ socratic-method setup all
40
+
41
+ # install into your user home instead of the current project
42
+ socratic-method setup claude --scope user
43
+
44
+ # see what would happen first / check current state / remove
45
+ socratic-method setup --dry-run
46
+ socratic-method status
47
+ socratic-method uninstall claude
48
+ ```
49
+
50
+ With no targets, `setup` **auto-detects** installed agents and configures only those,
51
+ printing the concrete evidence for each detection (never a bare claim):
52
+
53
+ | Agent | Detection signals, in order |
54
+ |---|---|
55
+ | Claude Code | `claude` CLI on PATH; else `~/.claude/` config directory |
56
+ | OpenAI Codex | `codex` CLI on PATH; else `~/.codex/` config directory |
57
+ | GitHub Copilot | `copilot` CLI on PATH; else `gh-copilot` extension; else a `github.copilot*` VS Code extension |
58
+
59
+ If nothing is detected, `setup` installs nothing and tells you how to name targets
60
+ explicitly. `setup all` bypasses detection.
61
+
62
+ `setup` is **idempotent** (content-hash comparison; an identical install reports
63
+ "up to date"), refuses to overwrite locally modified files without `--force`, and after
64
+ every write **reads the files back from disk before reporting success** — the skill's own
65
+ "verify before claiming" rule, applied to its installer.
66
+
67
+ ### Where the skill lands
68
+
69
+ | Platform | `--scope project` (default) | `--scope user` |
70
+ |---|---|---|
71
+ | Claude Code | `<root>/.claude/skills/socratic-method/` | `~/.claude/skills/socratic-method/` |
72
+ | OpenAI Codex | `<root>/.agents/skills/socratic-method/` | `~/.agents/skills/socratic-method/` |
73
+ | GitHub Copilot | `<root>/.github/skills/socratic-method/` | — |
74
+
75
+ Copilot also reads a repo's `.claude/skills/`, so if the Claude project install is already
76
+ present, the Copilot step reports "already covered" and skips (a duplicate would trigger
77
+ twice); `--force` installs to `.github/skills/` anyway. Paths live in one data-driven
78
+ registry (`installer.py`) — if a platform moves its skills directory, the fix is one line.
79
+
80
+ ## Use the skill
81
+
82
+ In any agent with the skill installed:
83
+
84
+ > stress-test this plan: …
85
+ > help me think through …
86
+ > poke holes in this idea: …
87
+
88
+ Or invoke directly: `/socratic-method <idea> [--mode stress|develop] [--depth quick|standard|deep]`.
89
+ The session ends with the brief saved to `notes/idea-briefs/<slug>-YYYYMMDD.md`.
90
+
91
+ ## Validate a brief
92
+
93
+ ```bash
94
+ socratic-method validate notes/idea-briefs/my-idea-20260704.md
95
+ ```
96
+
97
+ Checks the YAML frontmatter against the packaged `idea-brief-v1` JSON schema plus the
98
+ cross-field rules a schema can't express (e.g. `verdict: refuted` requires the two
99
+ colliding claims verbatim in the body; `verdict: aporia` requires open questions).
100
+
101
+ ## Behavioral eval harness (`evals/`)
102
+
103
+ The six-cell regression matrix that hardened the skill: normal cells (planted
104
+ contradiction → refuted; genuine unknowns → aporia; quick-depth cadence), edge cells
105
+ (mid-session stop; disputed restatement), and an out-of-scope cell (fully specified plan →
106
+ decline). Each cell runs a live examiner against a scripted user simulator, then grades
107
+ the transcript with deterministic graders and an independent model judge.
108
+
109
+ ```bash
110
+ python evals/run_eval.py --dry-run # list cells, no calls
111
+ python evals/run_eval.py --cell O1 # one cell
112
+ python evals/run_eval.py # full matrix — spawns ~30-60 headless `claude` calls
113
+ ```
114
+
115
+ Requires the `claude` CLI and real tokens; run cells individually while iterating. A cell
116
+ passes only when **all** deterministic graders pass AND the judge confirms the expected
117
+ behavior with no fabrication.
118
+
119
+ ## Development
120
+
121
+ ```bash
122
+ uv sync # or: pip install -e . && pip install pytest
123
+ uv run pytest # validator negatives, installer + detection behavior, CLI smoke
124
+ uv run pre-commit install # one tool per file type: ruff (py), syntax checks (yaml/json/toml), actionlint
125
+ ```
126
+
127
+ CI (`.github/workflows/ci.yml`) runs the test suite on Python 3.11–3.13, builds the
128
+ sdist/wheel, checks metadata, and smoke-tests the CLI installed from the built wheel.
129
+
130
+ ## Releasing to PyPI
131
+
132
+ Publishing uses **PyPI Trusted Publishing** (OIDC) — no API token lives in the repo.
133
+ One-time setup on pypi.org: *Account → Publishing → Add a new pending publisher* with
134
+ project `socratic-method`, owner `grammy-jiang`, repository `socratic-method`, workflow
135
+ `release.yml`, environment `pypi`. Then, to release:
136
+
137
+ 1. Bump `__version__` in `src/socratic_method/__init__.py`.
138
+ 2. Create a GitHub Release with tag `vX.Y.Z` (must match — the workflow verifies).
139
+ 3. `release.yml` builds, checks, and publishes; the package appears at
140
+ `pypi.org/project/socratic-method/`, after which `pip install socratic-method` works.
141
+
142
+ Maintenance rule inherited from the skill's history: a behavioral failure is not fixed
143
+ until something durable changes — a grader, a scenario, or a rail in `SKILL.md` — never a
144
+ prose spot-fix alone.
145
+
146
+ ## License
147
+
148
+ MIT
@@ -0,0 +1,55 @@
1
+ ---
2
+ schema: idea-brief-v1
3
+ idea: tech-talk-series
4
+ date: 2026-07-04
5
+ mode: stress
6
+ depth: standard
7
+ verdict: sharpened
8
+ thesis_final: "A monthly 30-minute internal tech talk, seeded with 3 pre-committed speakers,
9
+ to improve cross-team visibility — piloted for a quarter before any cadence increase."
10
+ questions_asked: 9
11
+ types_used: [clarification, assumptions, evidence, implications]
12
+ assumptions:
13
+ - text: "Cross-team visibility is a widely felt problem, not just the proposer's"
14
+ status: unvalidated
15
+ - text: "Enough engineers will volunteer to speak at monthly cadence"
16
+ status: risky
17
+ - text: "30 minutes is short enough that attendance survives busy weeks"
18
+ status: unvalidated
19
+ open_questions:
20
+ - "Do at least 5 other people report being surprised by another team's work in the last quarter?"
21
+ - "Who are speakers 1-3, by name, before anything is announced?"
22
+ constraints:
23
+ - "No budget; must run on existing meeting infrastructure"
24
+ - "Cannot be mandatory (user: 'the moment it's mandatory it's dead')"
25
+ next_step: "Ask the last 5 people who joined what they wish they'd known — validates the
26
+ visibility assumption before committing anyone's Friday."
27
+ ---
28
+
29
+ # Idea brief: internal tech-talk series
30
+
31
+ ## What changed under questioning
32
+ Initial: weekly talks to fix visibility and grow juniors, weekly cadence essential.
33
+ Final: monthly pilot with pre-committed speakers; the junior-growth goal was dropped as a
34
+ separate untested claim riding along with the visibility one.
35
+
36
+ ## Scope
37
+ For: engineers across teams at the user's company. Out of scope: external audience,
38
+ recorded-content library, mandatory attendance.
39
+
40
+ ## Assumptions surfaced
41
+ The visibility problem rests on a sample of one (the user's own migration surprise). Speaker
42
+ supply was the load-bearing weakness: the user's own words — "I'd be begging people" —
43
+ forced the cadence change.
44
+
45
+ ## Contradictions & how resolved
46
+ "Weekly, otherwise it loses momentum" vs "most engineers here hate presenting" — resolved by
47
+ the user: cadence yielded ("monthly with strong talks beats weekly with filler").
48
+
49
+ ## Open questions (aporia)
50
+ Whether the visibility problem is real beyond the user, and who the first three named
51
+ speakers are. Both must be answered before announcing anything.
52
+
53
+ ## Suggested next step
54
+ Interview the last 5 joiners about what they wish they'd known. Cheap, fast, and directly
55
+ tests the founding assumption.