socratic-method 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- socratic_method-0.1.0/.github/workflows/ci.yml +56 -0
- socratic_method-0.1.0/.github/workflows/release.yml +75 -0
- socratic_method-0.1.0/.github/workflows/tag-release.yml +38 -0
- socratic_method-0.1.0/.gitignore +9 -0
- socratic_method-0.1.0/.pre-commit-config.yaml +35 -0
- socratic_method-0.1.0/LICENSE +21 -0
- socratic_method-0.1.0/PKG-INFO +170 -0
- socratic_method-0.1.0/README.md +148 -0
- socratic_method-0.1.0/evals/fixtures/tech-talk-series-20260704.md +55 -0
- socratic_method-0.1.0/evals/graders.py +402 -0
- socratic_method-0.1.0/evals/judge-rubric.md +48 -0
- socratic_method-0.1.0/evals/run_eval.py +320 -0
- socratic_method-0.1.0/evals/scenarios/e1-stress-stop.yaml +29 -0
- socratic_method-0.1.0/evals/scenarios/e2-disputed-restatement.yaml +37 -0
- socratic_method-0.1.0/evals/scenarios/n1-stress-contradiction.yaml +44 -0
- socratic_method-0.1.0/evals/scenarios/n2-develop-aporia.yaml +38 -0
- socratic_method-0.1.0/evals/scenarios/n3-develop-quick.yaml +30 -0
- socratic_method-0.1.0/evals/scenarios/o1-out-of-scope.yaml +37 -0
- socratic_method-0.1.0/pyproject.toml +49 -0
- socratic_method-0.1.0/src/socratic_method/__init__.py +11 -0
- socratic_method-0.1.0/src/socratic_method/assets/SKILL.md +240 -0
- socratic_method-0.1.0/src/socratic_method/assets/idea-brief-v1.schema.json +97 -0
- socratic_method-0.1.0/src/socratic_method/assets/references/example-session.md +155 -0
- socratic_method-0.1.0/src/socratic_method/cli.py +151 -0
- socratic_method-0.1.0/src/socratic_method/installer.py +263 -0
- socratic_method-0.1.0/src/socratic_method/validator.py +113 -0
- socratic_method-0.1.0/tests/test_cli.py +71 -0
- socratic_method-0.1.0/tests/test_detection.py +60 -0
- socratic_method-0.1.0/tests/test_installer.py +113 -0
- socratic_method-0.1.0/tests/test_validator.py +68 -0
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [master, main]
|
|
6
|
+
pull_request:
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
lint:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
steps:
|
|
12
|
+
- uses: actions/checkout@v4
|
|
13
|
+
- uses: astral-sh/setup-uv@v5
|
|
14
|
+
- name: pre-commit (all files)
|
|
15
|
+
run: uvx pre-commit run --all-files --show-diff-on-failure
|
|
16
|
+
|
|
17
|
+
test:
|
|
18
|
+
runs-on: ubuntu-latest
|
|
19
|
+
strategy:
|
|
20
|
+
matrix:
|
|
21
|
+
python-version: ["3.11", "3.12", "3.13"]
|
|
22
|
+
steps:
|
|
23
|
+
- uses: actions/checkout@v4
|
|
24
|
+
- uses: astral-sh/setup-uv@v5
|
|
25
|
+
with:
|
|
26
|
+
python-version: ${{ matrix.python-version }}
|
|
27
|
+
- name: Sync (with dev group)
|
|
28
|
+
run: uv sync
|
|
29
|
+
- name: Tests
|
|
30
|
+
run: uv run pytest -q
|
|
31
|
+
- name: CLI smoke
|
|
32
|
+
run: |
|
|
33
|
+
uv run socratic-method --version
|
|
34
|
+
uv run socratic-method setup all --dry-run --root "$RUNNER_TEMP"
|
|
35
|
+
uv run socratic-method status --root "$RUNNER_TEMP"
|
|
36
|
+
uv run socratic-method validate evals/fixtures/tech-talk-series-20260704.md
|
|
37
|
+
|
|
38
|
+
build:
|
|
39
|
+
runs-on: ubuntu-latest
|
|
40
|
+
steps:
|
|
41
|
+
- uses: actions/checkout@v4
|
|
42
|
+
- uses: astral-sh/setup-uv@v5
|
|
43
|
+
- name: Build sdist + wheel
|
|
44
|
+
run: uv build
|
|
45
|
+
- name: Check metadata
|
|
46
|
+
run: uvx twine check dist/*
|
|
47
|
+
- name: Install from wheel and smoke it
|
|
48
|
+
run: |
|
|
49
|
+
uv venv /tmp/wheel-venv
|
|
50
|
+
VIRTUAL_ENV=/tmp/wheel-venv uv pip install dist/*.whl
|
|
51
|
+
/tmp/wheel-venv/bin/socratic-method --version
|
|
52
|
+
/tmp/wheel-venv/bin/socratic-method setup all --dry-run --root "$RUNNER_TEMP"
|
|
53
|
+
- uses: actions/upload-artifact@v4
|
|
54
|
+
with:
|
|
55
|
+
name: dist
|
|
56
|
+
path: dist/
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# Tag-driven release: push a tag vX.Y.Z (matching src/socratic_method/__init__.py)
|
|
2
|
+
# and this workflow builds, creates the GitHub Release with the dist files attached,
|
|
3
|
+
# and publishes to PyPI via Trusted Publishing (OIDC) — no API token stored in the repo.
|
|
4
|
+
#
|
|
5
|
+
# One-time setup on pypi.org (repo owner):
|
|
6
|
+
# PyPI → Your account → Publishing → Add a new pending publisher:
|
|
7
|
+
# project: socratic-method
|
|
8
|
+
# owner: grammy-jiang
|
|
9
|
+
# repository: socratic-method
|
|
10
|
+
# workflow: release.yml
|
|
11
|
+
# environment: pypi
|
|
12
|
+
|
|
13
|
+
name: Release
|
|
14
|
+
|
|
15
|
+
on:
|
|
16
|
+
push:
|
|
17
|
+
tags:
|
|
18
|
+
- "v*"
|
|
19
|
+
# Fallback for tags created by the "Tag release" helper workflow: tags pushed
|
|
20
|
+
# with the Actions GITHUB_TOKEN do not fire tag-push events, so dispatch this
|
|
21
|
+
# workflow manually with ref = the new tag (e.g. v0.1.0).
|
|
22
|
+
workflow_dispatch:
|
|
23
|
+
|
|
24
|
+
jobs:
|
|
25
|
+
build:
|
|
26
|
+
runs-on: ubuntu-latest
|
|
27
|
+
steps:
|
|
28
|
+
- uses: actions/checkout@v4
|
|
29
|
+
- uses: astral-sh/setup-uv@v5
|
|
30
|
+
- name: Verify tag matches package version
|
|
31
|
+
run: |
|
|
32
|
+
PKG_VERSION=$(uv run python -c "import socratic_method; print(socratic_method.__version__)")
|
|
33
|
+
TAG="${GITHUB_REF_NAME}"
|
|
34
|
+
if [ "v${PKG_VERSION}" != "${TAG}" ]; then
|
|
35
|
+
echo "Tag ${TAG} does not match package version v${PKG_VERSION}" >&2
|
|
36
|
+
exit 1
|
|
37
|
+
fi
|
|
38
|
+
- name: Build sdist + wheel
|
|
39
|
+
run: uv build
|
|
40
|
+
- name: Check metadata
|
|
41
|
+
run: uvx twine check dist/*
|
|
42
|
+
- uses: actions/upload-artifact@v4
|
|
43
|
+
with:
|
|
44
|
+
name: dist
|
|
45
|
+
path: dist/
|
|
46
|
+
|
|
47
|
+
github-release:
|
|
48
|
+
needs: build
|
|
49
|
+
runs-on: ubuntu-latest
|
|
50
|
+
permissions:
|
|
51
|
+
contents: write # required to create the GitHub Release
|
|
52
|
+
steps:
|
|
53
|
+
- uses: actions/download-artifact@v4
|
|
54
|
+
with:
|
|
55
|
+
name: dist
|
|
56
|
+
path: dist/
|
|
57
|
+
- uses: softprops/action-gh-release@v2
|
|
58
|
+
with:
|
|
59
|
+
files: dist/*
|
|
60
|
+
generate_release_notes: true
|
|
61
|
+
|
|
62
|
+
pypi:
|
|
63
|
+
needs: build
|
|
64
|
+
runs-on: ubuntu-latest
|
|
65
|
+
environment:
|
|
66
|
+
name: pypi
|
|
67
|
+
url: https://pypi.org/p/socratic-method
|
|
68
|
+
permissions:
|
|
69
|
+
id-token: write # required for PyPI Trusted Publishing (OIDC)
|
|
70
|
+
steps:
|
|
71
|
+
- uses: actions/download-artifact@v4
|
|
72
|
+
with:
|
|
73
|
+
name: dist
|
|
74
|
+
path: dist/
|
|
75
|
+
- uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Helper for environments that cannot push tags directly (e.g. remote sandboxes
|
|
2
|
+
# whose git proxy rejects refs/tags pushes). Creates the annotated release tag
|
|
3
|
+
# at the dispatched ref's HEAD after checking it matches the package version.
|
|
4
|
+
#
|
|
5
|
+
# Note: tags pushed with the Actions GITHUB_TOKEN do not trigger tag-push
|
|
6
|
+
# workflows, so after this succeeds, dispatch the Release workflow with
|
|
7
|
+
# ref = the new tag.
|
|
8
|
+
|
|
9
|
+
name: Tag release
|
|
10
|
+
|
|
11
|
+
on:
|
|
12
|
+
workflow_dispatch:
|
|
13
|
+
inputs:
|
|
14
|
+
tag:
|
|
15
|
+
description: "Tag to create (vX.Y.Z, must match src/socratic_method/__init__.py)"
|
|
16
|
+
required: true
|
|
17
|
+
default: "v0.1.0"
|
|
18
|
+
|
|
19
|
+
jobs:
|
|
20
|
+
tag:
|
|
21
|
+
runs-on: ubuntu-latest
|
|
22
|
+
permissions:
|
|
23
|
+
contents: write # required to push the tag
|
|
24
|
+
env:
|
|
25
|
+
TAG: ${{ inputs.tag }}
|
|
26
|
+
steps:
|
|
27
|
+
- uses: actions/checkout@v4
|
|
28
|
+
- name: Create and push annotated tag
|
|
29
|
+
run: |
|
|
30
|
+
PKG_VERSION=$(sed -n 's/^__version__ = "\(.*\)"$/\1/p' src/socratic_method/__init__.py)
|
|
31
|
+
if [ "v${PKG_VERSION}" != "${TAG}" ]; then
|
|
32
|
+
echo "Tag ${TAG} does not match package version v${PKG_VERSION}" >&2
|
|
33
|
+
exit 1
|
|
34
|
+
fi
|
|
35
|
+
git config user.name "github-actions[bot]"
|
|
36
|
+
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
|
|
37
|
+
git tag -a "${TAG}" -m "socratic-method ${TAG#v} — agent skill, installer CLI, validator, evals"
|
|
38
|
+
git push origin "refs/tags/${TAG}"
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# One tool per file type, deliberately — stacked linters/formatters on the same type
|
|
2
|
+
# fight each other:
|
|
3
|
+
# Python -> ruff (lint + format in ONE tool; no black/isort/flake8 alongside)
|
|
4
|
+
# YAML -> check-yaml (syntax only; no style formatter)
|
|
5
|
+
# GitHub workflow -> actionlint (the workflow dialect, distinct from plain YAML)
|
|
6
|
+
# JSON -> check-json
|
|
7
|
+
# TOML -> check-toml
|
|
8
|
+
# Markdown -> whitespace hygiene only: a formatter would reflow
|
|
9
|
+
# src/socratic_method/assets/SKILL.md, the shipped skill asset,
|
|
10
|
+
# whose prose shape is deliberate.
|
|
11
|
+
repos:
|
|
12
|
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
13
|
+
rev: v6.0.0
|
|
14
|
+
hooks:
|
|
15
|
+
- id: trailing-whitespace
|
|
16
|
+
- id: end-of-file-fixer
|
|
17
|
+
- id: mixed-line-ending
|
|
18
|
+
args: [--fix=lf]
|
|
19
|
+
- id: check-yaml
|
|
20
|
+
- id: check-json
|
|
21
|
+
- id: check-toml
|
|
22
|
+
- id: check-added-large-files
|
|
23
|
+
- id: check-merge-conflict
|
|
24
|
+
|
|
25
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
26
|
+
rev: v0.15.20
|
|
27
|
+
hooks:
|
|
28
|
+
- id: ruff-check
|
|
29
|
+
args: [--fix]
|
|
30
|
+
- id: ruff-format
|
|
31
|
+
|
|
32
|
+
- repo: https://github.com/rhysd/actionlint
|
|
33
|
+
rev: v1.7.12
|
|
34
|
+
hooks:
|
|
35
|
+
- id: actionlint
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Grammy Jiang
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: socratic-method
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A Socratic questioning skill for coding agents (Claude Code, Codex, Copilot), with a one-command installer and an idea-brief validator.
|
|
5
|
+
Project-URL: Homepage, https://github.com/grammy-jiang/socratic-method
|
|
6
|
+
Project-URL: Issues, https://github.com/grammy-jiang/socratic-method/issues
|
|
7
|
+
Author: Grammy Jiang
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: agent-skills,claude-code,codex,copilot,socratic-method
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Environment :: Console
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
18
|
+
Requires-Python: >=3.11
|
|
19
|
+
Requires-Dist: jsonschema>=4.0
|
|
20
|
+
Requires-Dist: pyyaml>=6.0
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
|
|
23
|
+
# socratic-method
|
|
24
|
+
|
|
25
|
+
A **Socratic questioning skill for coding agents** — Claude Code, OpenAI Codex, and
|
|
26
|
+
GitHub Copilot — packaged with a one-command installer and a deterministic artifact
|
|
27
|
+
validator.
|
|
28
|
+
|
|
29
|
+
Invoke the skill before real work starts (writing software, drafting a plan or document,
|
|
30
|
+
making a decision) and the agent becomes a disciplined questioner: it steelmans your idea,
|
|
31
|
+
interrogates it one question at a time (classic elenchus — six Socratic question types,
|
|
32
|
+
counterexamples, contradiction-surfacing by quoting your own words), and ends with an
|
|
33
|
+
honest verdict — **sharpened**, **aporia** (a genuinely unresolved hole, treated as a
|
|
34
|
+
finding), or **refuted** (only ever out of your own mouth). The result is written down as
|
|
35
|
+
a machine-validatable **idea brief** (`idea-brief-v1`) that downstream work can consume:
|
|
36
|
+
open questions become a research agenda, unvalidated assumptions become a validation
|
|
37
|
+
worklist.
|
|
38
|
+
|
|
39
|
+
The skill was authored and hardened in
|
|
40
|
+
[subagent-factory](https://github.com/grammy-jiang/subagent-factory): seven rounds of
|
|
41
|
+
grounded review plus a six-cell adversarial behavioral eval (examiner vs. scripted user
|
|
42
|
+
simulator, deterministic graders + independent judge), which caught and fixed real
|
|
43
|
+
behavior gaps static review missed. The eval harness ships in this repo under `evals/`.
|
|
44
|
+
|
|
45
|
+
## Install
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
pip install git+https://github.com/grammy-jiang/socratic-method
|
|
49
|
+
# or, without installing:
|
|
50
|
+
uvx --from git+https://github.com/grammy-jiang/socratic-method socratic-method --help
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Set up the skill for your agents
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
# auto-detect which agents are installed on this machine and configure those
|
|
57
|
+
socratic-method setup
|
|
58
|
+
|
|
59
|
+
# or name platforms explicitly / force all three
|
|
60
|
+
socratic-method setup claude codex
|
|
61
|
+
socratic-method setup all
|
|
62
|
+
|
|
63
|
+
# install into your user home instead of the current project
|
|
64
|
+
socratic-method setup claude --scope user
|
|
65
|
+
|
|
66
|
+
# see what would happen first / check current state / remove
|
|
67
|
+
socratic-method setup --dry-run
|
|
68
|
+
socratic-method status
|
|
69
|
+
socratic-method uninstall claude
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
With no targets, `setup` **auto-detects** installed agents and configures only those,
|
|
73
|
+
printing the concrete evidence for each detection (never a bare claim):
|
|
74
|
+
|
|
75
|
+
| Agent | Detection signals, in order |
|
|
76
|
+
|---|---|
|
|
77
|
+
| Claude Code | `claude` CLI on PATH; else `~/.claude/` config directory |
|
|
78
|
+
| OpenAI Codex | `codex` CLI on PATH; else `~/.codex/` config directory |
|
|
79
|
+
| GitHub Copilot | `copilot` CLI on PATH; else `gh-copilot` extension; else a `github.copilot*` VS Code extension |
|
|
80
|
+
|
|
81
|
+
If nothing is detected, `setup` installs nothing and tells you how to name targets
|
|
82
|
+
explicitly. `setup all` bypasses detection.
|
|
83
|
+
|
|
84
|
+
`setup` is **idempotent** (content-hash comparison; an identical install reports
|
|
85
|
+
"up to date"), refuses to overwrite locally modified files without `--force`, and after
|
|
86
|
+
every write **reads the files back from disk before reporting success** — the skill's own
|
|
87
|
+
"verify before claiming" rule, applied to its installer.
|
|
88
|
+
|
|
89
|
+
### Where the skill lands
|
|
90
|
+
|
|
91
|
+
| Platform | `--scope project` (default) | `--scope user` |
|
|
92
|
+
|---|---|---|
|
|
93
|
+
| Claude Code | `<root>/.claude/skills/socratic-method/` | `~/.claude/skills/socratic-method/` |
|
|
94
|
+
| OpenAI Codex | `<root>/.agents/skills/socratic-method/` | `~/.agents/skills/socratic-method/` |
|
|
95
|
+
| GitHub Copilot | `<root>/.github/skills/socratic-method/` | — |
|
|
96
|
+
|
|
97
|
+
Copilot also reads a repo's `.claude/skills/`, so if the Claude project install is already
|
|
98
|
+
present, the Copilot step reports "already covered" and skips (a duplicate would trigger
|
|
99
|
+
twice); `--force` installs to `.github/skills/` anyway. Paths live in one data-driven
|
|
100
|
+
registry (`installer.py`) — if a platform moves its skills directory, the fix is one line.
|
|
101
|
+
|
|
102
|
+
## Use the skill
|
|
103
|
+
|
|
104
|
+
In any agent with the skill installed:
|
|
105
|
+
|
|
106
|
+
> stress-test this plan: …
|
|
107
|
+
> help me think through …
|
|
108
|
+
> poke holes in this idea: …
|
|
109
|
+
|
|
110
|
+
Or invoke directly: `/socratic-method <idea> [--mode stress|develop] [--depth quick|standard|deep]`.
|
|
111
|
+
The session ends with the brief saved to `notes/idea-briefs/<slug>-YYYYMMDD.md`.
|
|
112
|
+
|
|
113
|
+
## Validate a brief
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
socratic-method validate notes/idea-briefs/my-idea-20260704.md
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
Checks the YAML frontmatter against the packaged `idea-brief-v1` JSON schema plus the
|
|
120
|
+
cross-field rules a schema can't express (e.g. `verdict: refuted` requires the two
|
|
121
|
+
colliding claims verbatim in the body; `verdict: aporia` requires open questions).
|
|
122
|
+
|
|
123
|
+
## Behavioral eval harness (`evals/`)
|
|
124
|
+
|
|
125
|
+
The six-cell regression matrix that hardened the skill: normal cells (planted
|
|
126
|
+
contradiction → refuted; genuine unknowns → aporia; quick-depth cadence), edge cells
|
|
127
|
+
(mid-session stop; disputed restatement), and an out-of-scope cell (fully specified plan →
|
|
128
|
+
decline). Each cell runs a live examiner against a scripted user simulator, then grades
|
|
129
|
+
the transcript with deterministic graders and an independent model judge.
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
python evals/run_eval.py --dry-run # list cells, no calls
|
|
133
|
+
python evals/run_eval.py --cell O1 # one cell
|
|
134
|
+
python evals/run_eval.py # full matrix — spawns ~30-60 headless `claude` calls
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
Requires the `claude` CLI and real tokens; run cells individually while iterating. A cell
|
|
138
|
+
passes only when **all** deterministic graders pass AND the judge confirms the expected
|
|
139
|
+
behavior with no fabrication.
|
|
140
|
+
|
|
141
|
+
## Development
|
|
142
|
+
|
|
143
|
+
```bash
|
|
144
|
+
uv sync # or: pip install -e . && pip install pytest
|
|
145
|
+
uv run pytest # validator negatives, installer + detection behavior, CLI smoke
|
|
146
|
+
uv run pre-commit install # one tool per file type: ruff (py), syntax checks (yaml/json/toml), actionlint
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
CI (`.github/workflows/ci.yml`) runs the test suite on Python 3.11–3.13, builds the
|
|
150
|
+
sdist/wheel, checks metadata, and smoke-tests the CLI installed from the built wheel.
|
|
151
|
+
|
|
152
|
+
## Releasing to PyPI
|
|
153
|
+
|
|
154
|
+
Publishing uses **PyPI Trusted Publishing** (OIDC) — no API token lives in the repo.
|
|
155
|
+
One-time setup on pypi.org: *Account → Publishing → Add a new pending publisher* with
|
|
156
|
+
project `socratic-method`, owner `grammy-jiang`, repository `socratic-method`, workflow
|
|
157
|
+
`release.yml`, environment `pypi`. Then, to release:
|
|
158
|
+
|
|
159
|
+
1. Bump `__version__` in `src/socratic_method/__init__.py`.
|
|
160
|
+
2. Create a GitHub Release with tag `vX.Y.Z` (must match — the workflow verifies).
|
|
161
|
+
3. `release.yml` builds, checks, and publishes; the package appears at
|
|
162
|
+
`pypi.org/project/socratic-method/`, after which `pip install socratic-method` works.
|
|
163
|
+
|
|
164
|
+
Maintenance rule inherited from the skill's history: a behavioral failure is not fixed
|
|
165
|
+
until something durable changes — a grader, a scenario, or a rail in `SKILL.md` — never a
|
|
166
|
+
prose spot-fix alone.
|
|
167
|
+
|
|
168
|
+
## License
|
|
169
|
+
|
|
170
|
+
MIT
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
# socratic-method
|
|
2
|
+
|
|
3
|
+
A **Socratic questioning skill for coding agents** — Claude Code, OpenAI Codex, and
|
|
4
|
+
GitHub Copilot — packaged with a one-command installer and a deterministic artifact
|
|
5
|
+
validator.
|
|
6
|
+
|
|
7
|
+
Invoke the skill before real work starts (writing software, drafting a plan or document,
|
|
8
|
+
making a decision) and the agent becomes a disciplined questioner: it steelmans your idea,
|
|
9
|
+
interrogates it one question at a time (classic elenchus — six Socratic question types,
|
|
10
|
+
counterexamples, contradiction-surfacing by quoting your own words), and ends with an
|
|
11
|
+
honest verdict — **sharpened**, **aporia** (a genuinely unresolved hole, treated as a
|
|
12
|
+
finding), or **refuted** (only ever out of your own mouth). The result is written down as
|
|
13
|
+
a machine-validatable **idea brief** (`idea-brief-v1`) that downstream work can consume:
|
|
14
|
+
open questions become a research agenda, unvalidated assumptions become a validation
|
|
15
|
+
worklist.
|
|
16
|
+
|
|
17
|
+
The skill was authored and hardened in
|
|
18
|
+
[subagent-factory](https://github.com/grammy-jiang/subagent-factory): seven rounds of
|
|
19
|
+
grounded review plus a six-cell adversarial behavioral eval (examiner vs. scripted user
|
|
20
|
+
simulator, deterministic graders + independent judge), which caught and fixed real
|
|
21
|
+
behavior gaps static review missed. The eval harness ships in this repo under `evals/`.
|
|
22
|
+
|
|
23
|
+
## Install
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
pip install git+https://github.com/grammy-jiang/socratic-method
|
|
27
|
+
# or, without installing:
|
|
28
|
+
uvx --from git+https://github.com/grammy-jiang/socratic-method socratic-method --help
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## Set up the skill for your agents
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
# auto-detect which agents are installed on this machine and configure those
|
|
35
|
+
socratic-method setup
|
|
36
|
+
|
|
37
|
+
# or name platforms explicitly / force all three
|
|
38
|
+
socratic-method setup claude codex
|
|
39
|
+
socratic-method setup all
|
|
40
|
+
|
|
41
|
+
# install into your user home instead of the current project
|
|
42
|
+
socratic-method setup claude --scope user
|
|
43
|
+
|
|
44
|
+
# see what would happen first / check current state / remove
|
|
45
|
+
socratic-method setup --dry-run
|
|
46
|
+
socratic-method status
|
|
47
|
+
socratic-method uninstall claude
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
With no targets, `setup` **auto-detects** installed agents and configures only those,
|
|
51
|
+
printing the concrete evidence for each detection (never a bare claim):
|
|
52
|
+
|
|
53
|
+
| Agent | Detection signals, in order |
|
|
54
|
+
|---|---|
|
|
55
|
+
| Claude Code | `claude` CLI on PATH; else `~/.claude/` config directory |
|
|
56
|
+
| OpenAI Codex | `codex` CLI on PATH; else `~/.codex/` config directory |
|
|
57
|
+
| GitHub Copilot | `copilot` CLI on PATH; else `gh-copilot` extension; else a `github.copilot*` VS Code extension |
|
|
58
|
+
|
|
59
|
+
If nothing is detected, `setup` installs nothing and tells you how to name targets
|
|
60
|
+
explicitly. `setup all` bypasses detection.
|
|
61
|
+
|
|
62
|
+
`setup` is **idempotent** (content-hash comparison; an identical install reports
|
|
63
|
+
"up to date"), refuses to overwrite locally modified files without `--force`, and after
|
|
64
|
+
every write **reads the files back from disk before reporting success** — the skill's own
|
|
65
|
+
"verify before claiming" rule, applied to its installer.
|
|
66
|
+
|
|
67
|
+
### Where the skill lands
|
|
68
|
+
|
|
69
|
+
| Platform | `--scope project` (default) | `--scope user` |
|
|
70
|
+
|---|---|---|
|
|
71
|
+
| Claude Code | `<root>/.claude/skills/socratic-method/` | `~/.claude/skills/socratic-method/` |
|
|
72
|
+
| OpenAI Codex | `<root>/.agents/skills/socratic-method/` | `~/.agents/skills/socratic-method/` |
|
|
73
|
+
| GitHub Copilot | `<root>/.github/skills/socratic-method/` | — |
|
|
74
|
+
|
|
75
|
+
Copilot also reads a repo's `.claude/skills/`, so if the Claude project install is already
|
|
76
|
+
present, the Copilot step reports "already covered" and skips (a duplicate would trigger
|
|
77
|
+
twice); `--force` installs to `.github/skills/` anyway. Paths live in one data-driven
|
|
78
|
+
registry (`installer.py`) — if a platform moves its skills directory, the fix is one line.
|
|
79
|
+
|
|
80
|
+
## Use the skill
|
|
81
|
+
|
|
82
|
+
In any agent with the skill installed:
|
|
83
|
+
|
|
84
|
+
> stress-test this plan: …
|
|
85
|
+
> help me think through …
|
|
86
|
+
> poke holes in this idea: …
|
|
87
|
+
|
|
88
|
+
Or invoke directly: `/socratic-method <idea> [--mode stress|develop] [--depth quick|standard|deep]`.
|
|
89
|
+
The session ends with the brief saved to `notes/idea-briefs/<slug>-YYYYMMDD.md`.
|
|
90
|
+
|
|
91
|
+
## Validate a brief
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
socratic-method validate notes/idea-briefs/my-idea-20260704.md
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
Checks the YAML frontmatter against the packaged `idea-brief-v1` JSON schema plus the
|
|
98
|
+
cross-field rules a schema can't express (e.g. `verdict: refuted` requires the two
|
|
99
|
+
colliding claims verbatim in the body; `verdict: aporia` requires open questions).
|
|
100
|
+
|
|
101
|
+
## Behavioral eval harness (`evals/`)
|
|
102
|
+
|
|
103
|
+
The six-cell regression matrix that hardened the skill: normal cells (planted
|
|
104
|
+
contradiction → refuted; genuine unknowns → aporia; quick-depth cadence), edge cells
|
|
105
|
+
(mid-session stop; disputed restatement), and an out-of-scope cell (fully specified plan →
|
|
106
|
+
decline). Each cell runs a live examiner against a scripted user simulator, then grades
|
|
107
|
+
the transcript with deterministic graders and an independent model judge.
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
python evals/run_eval.py --dry-run # list cells, no calls
|
|
111
|
+
python evals/run_eval.py --cell O1 # one cell
|
|
112
|
+
python evals/run_eval.py # full matrix — spawns ~30-60 headless `claude` calls
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
Requires the `claude` CLI and real tokens; run cells individually while iterating. A cell
|
|
116
|
+
passes only when **all** deterministic graders pass AND the judge confirms the expected
|
|
117
|
+
behavior with no fabrication.
|
|
118
|
+
|
|
119
|
+
## Development
|
|
120
|
+
|
|
121
|
+
```bash
|
|
122
|
+
uv sync # or: pip install -e . && pip install pytest
|
|
123
|
+
uv run pytest # validator negatives, installer + detection behavior, CLI smoke
|
|
124
|
+
uv run pre-commit install # one tool per file type: ruff (py), syntax checks (yaml/json/toml), actionlint
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
CI (`.github/workflows/ci.yml`) runs the test suite on Python 3.11–3.13, builds the
|
|
128
|
+
sdist/wheel, checks metadata, and smoke-tests the CLI installed from the built wheel.
|
|
129
|
+
|
|
130
|
+
## Releasing to PyPI
|
|
131
|
+
|
|
132
|
+
Publishing uses **PyPI Trusted Publishing** (OIDC) — no API token lives in the repo.
|
|
133
|
+
One-time setup on pypi.org: *Account → Publishing → Add a new pending publisher* with
|
|
134
|
+
project `socratic-method`, owner `grammy-jiang`, repository `socratic-method`, workflow
|
|
135
|
+
`release.yml`, environment `pypi`. Then, to release:
|
|
136
|
+
|
|
137
|
+
1. Bump `__version__` in `src/socratic_method/__init__.py`.
|
|
138
|
+
2. Create a GitHub Release with tag `vX.Y.Z` (must match — the workflow verifies).
|
|
139
|
+
3. `release.yml` builds, checks, and publishes; the package appears at
|
|
140
|
+
`pypi.org/project/socratic-method/`, after which `pip install socratic-method` works.
|
|
141
|
+
|
|
142
|
+
Maintenance rule inherited from the skill's history: a behavioral failure is not fixed
|
|
143
|
+
until something durable changes — a grader, a scenario, or a rail in `SKILL.md` — never a
|
|
144
|
+
prose spot-fix alone.
|
|
145
|
+
|
|
146
|
+
## License
|
|
147
|
+
|
|
148
|
+
MIT
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
---
|
|
2
|
+
schema: idea-brief-v1
|
|
3
|
+
idea: tech-talk-series
|
|
4
|
+
date: 2026-07-04
|
|
5
|
+
mode: stress
|
|
6
|
+
depth: standard
|
|
7
|
+
verdict: sharpened
|
|
8
|
+
thesis_final: "A monthly 30-minute internal tech talk, seeded with 3 pre-committed speakers,
|
|
9
|
+
to improve cross-team visibility — piloted for a quarter before any cadence increase."
|
|
10
|
+
questions_asked: 9
|
|
11
|
+
types_used: [clarification, assumptions, evidence, implications]
|
|
12
|
+
assumptions:
|
|
13
|
+
- text: "Cross-team visibility is a widely felt problem, not just the proposer's"
|
|
14
|
+
status: unvalidated
|
|
15
|
+
- text: "Enough engineers will volunteer to speak at monthly cadence"
|
|
16
|
+
status: risky
|
|
17
|
+
- text: "30 minutes is short enough that attendance survives busy weeks"
|
|
18
|
+
status: unvalidated
|
|
19
|
+
open_questions:
|
|
20
|
+
- "Do at least 5 other people report being surprised by another team's work in the last quarter?"
|
|
21
|
+
- "Who are speakers 1-3, by name, before anything is announced?"
|
|
22
|
+
constraints:
|
|
23
|
+
- "No budget; must run on existing meeting infrastructure"
|
|
24
|
+
- "Cannot be mandatory (user: 'the moment it's mandatory it's dead')"
|
|
25
|
+
next_step: "Ask the last 5 people who joined what they wish they'd known — validates the
|
|
26
|
+
visibility assumption before committing anyone's Friday."
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
# Idea brief: internal tech-talk series
|
|
30
|
+
|
|
31
|
+
## What changed under questioning
|
|
32
|
+
Initial: weekly talks to fix visibility and grow juniors, weekly cadence essential.
|
|
33
|
+
Final: monthly pilot with pre-committed speakers; the junior-growth goal was dropped as a
|
|
34
|
+
separate untested claim riding along with the visibility one.
|
|
35
|
+
|
|
36
|
+
## Scope
|
|
37
|
+
For: engineers across teams at the user's company. Out of scope: external audience,
|
|
38
|
+
recorded-content library, mandatory attendance.
|
|
39
|
+
|
|
40
|
+
## Assumptions surfaced
|
|
41
|
+
The visibility problem rests on a sample of one (the user's own migration surprise). Speaker
|
|
42
|
+
supply was the load-bearing weakness: the user's own words — "I'd be begging people" —
|
|
43
|
+
forced the cadence change.
|
|
44
|
+
|
|
45
|
+
## Contradictions & how resolved
|
|
46
|
+
"Weekly, otherwise it loses momentum" vs "most engineers here hate presenting" — resolved by
|
|
47
|
+
the user: cadence yielded ("monthly with strong talks beats weekly with filler").
|
|
48
|
+
|
|
49
|
+
## Open questions (aporia)
|
|
50
|
+
Whether the visibility problem is real beyond the user, and who the first three named
|
|
51
|
+
speakers are. Both must be answered before announcing anything.
|
|
52
|
+
|
|
53
|
+
## Suggested next step
|
|
54
|
+
Interview the last 5 joiners about what they wish they'd known. Cheap, fast, and directly
|
|
55
|
+
tests the founding assumption.
|