teich 0.1.1a1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- teich-0.1.1a1/.github/workflows/# Agent Traces on the Hub.md +67 -0
- teich-0.1.1a1/.github/workflows/test.yml +81 -0
- teich-0.1.1a1/.gitignore +22 -0
- teich-0.1.1a1/PKG-INFO +132 -0
- teich-0.1.1a1/README.md +112 -0
- teich-0.1.1a1/ROADMAP.md +165 -0
- teich-0.1.1a1/docker/codex-runtime.Dockerfile +52 -0
- teich-0.1.1a1/prompts.txt +498 -0
- teich-0.1.1a1/pyproject.toml +41 -0
- teich-0.1.1a1/scripts/integration_test.py +277 -0
- teich-0.1.1a1/src/agentic_datagen/__init__.py +18 -0
- teich-0.1.1a1/src/agentic_datagen/__main__.py +4 -0
- teich-0.1.1a1/src/agentic_datagen/cli.py +333 -0
- teich-0.1.1a1/src/agentic_datagen/config.py +289 -0
- teich-0.1.1a1/src/agentic_datagen/converter.py +647 -0
- teich-0.1.1a1/src/agentic_datagen/formatter.py +792 -0
- teich-0.1.1a1/src/agentic_datagen/loader.py +49 -0
- teich-0.1.1a1/src/agentic_datagen/runner.py +1525 -0
- teich-0.1.1a1/src/agentic_datagen/trace_readme.py +214 -0
- teich-0.1.1a1/src/teich/__init__.py +36 -0
- teich-0.1.1a1/src/teich/__main__.py +4 -0
- teich-0.1.1a1/test.csv +2 -0
- teich-0.1.1a1/tests/conftest.py +8 -0
- teich-0.1.1a1/tests/test_cli.py +153 -0
- teich-0.1.1a1/tests/test_config.py +192 -0
- teich-0.1.1a1/tests/test_converter.py +354 -0
- teich-0.1.1a1/tests/test_formatter.py +594 -0
- teich-0.1.1a1/tests/test_integration.py +247 -0
- teich-0.1.1a1/tests/test_loader.py +137 -0
- teich-0.1.1a1/tests/test_runner.py +1153 -0
- teich-0.1.1a1/tests/test_trace_readme.py +27 -0
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# Agent Traces on the Hub
|
|
2
|
+
|
|
3
|
+
[Hugging Face](https://huggingface.co/)
|
|
4
|
+
- [Models](https://huggingface.co/models)
|
|
5
|
+
- [Datasets](https://huggingface.co/datasets)
|
|
6
|
+
- [Spaces](https://huggingface.co/spaces)
|
|
7
|
+
- [Buckets new](https://huggingface.co/storage)
|
|
8
|
+
- [Docs](https://huggingface.co/docs)
|
|
9
|
+
- [Enterprise](https://huggingface.co/enterprise)
|
|
10
|
+
- [Pricing](https://huggingface.co/pricing)
|
|
11
|
+
-
|
|
12
|
+
-
|
|
13
|
+
- [Log In](https://huggingface.co/login)
|
|
14
|
+
- [Sign Up](https://huggingface.co/join)
|
|
15
|
+
[Models](https://huggingface.co/models)
|
|
16
|
+
[Datasets](https://huggingface.co/datasets)
|
|
17
|
+
[Spaces](https://huggingface.co/spaces)
|
|
18
|
+
[Buckets new](https://huggingface.co/storage)
|
|
19
|
+
[Docs](https://huggingface.co/docs)
|
|
20
|
+
[Enterprise](https://huggingface.co/enterprise)
|
|
21
|
+
[Pricing](https://huggingface.co/pricing)
|
|
22
|
+
[Log In](https://huggingface.co/login)
|
|
23
|
+
[Sign Up](https://huggingface.co/join)
|
|
24
|
+
[Back to Changelog](https://huggingface.co/changelog)
|
|
25
|
+
[Upvote 98](https://huggingface.co/login?next=%2Fchangelog%2Fagent-trace-viewer)
|
|
26
|
+
- https://huggingface.co/julien-c
|
|
27
|
+
- https://huggingface.co/clem
|
|
28
|
+
- https://huggingface.co/lhoestq
|
|
29
|
+
- https://huggingface.co/thomasgauthier
|
|
30
|
+
- https://huggingface.co/lewtun
|
|
31
|
+
- +93
|
|
32
|
+
|
|
33
|
+
# Agent Traces on the Hub
|
|
34
|
+
[Upvote 98](https://huggingface.co/login?next=%2Fchangelog%2Fagent-trace-viewer)
|
|
35
|
+
- https://huggingface.co/julien-c
|
|
36
|
+
- https://huggingface.co/clem
|
|
37
|
+
- https://huggingface.co/lhoestq
|
|
38
|
+
- https://huggingface.co/thomasgauthier
|
|
39
|
+
- https://huggingface.co/lewtun
|
|
40
|
+
- +93
|
|
41
|
+
You can now upload traces from your agents (Claude Code, Codex, Pi) directly to Hugging Face Datasets. The Hub auto-detects trace formats and tags your dataset as [Traces](https://huggingface.co/datasets?format=format:agent-traces&sort=trending), with a dedicated viewer for browsing sessions, turns, tool calls, and model responses.
|
|
42
|
+
No preprocessing needed, just upload the JSONL files from your local session directories as-is:
|
|
43
|
+
|
|
44
|
+
```
|
|
45
|
+
~/.claude/projects
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
```
|
|
50
|
+
~/.codex/sessions
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
```
|
|
55
|
+
~/.pi/agent/sessions
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Useful for sharing debugging workflows, benchmarking agent behavior across models, or building training data from real coding sessions.
|
|
59
|
+
[TOS](https://huggingface.co/terms-of-service)
|
|
60
|
+
[Privacy](https://huggingface.co/privacy)
|
|
61
|
+
[About](https://huggingface.co/huggingface)
|
|
62
|
+
[Careers](https://apply.workable.com/huggingface/)
|
|
63
|
+
[Models](https://huggingface.co/models)
|
|
64
|
+
[Datasets](https://huggingface.co/datasets)
|
|
65
|
+
[Spaces](https://huggingface.co/spaces)
|
|
66
|
+
[Pricing](https://huggingface.co/pricing)
|
|
67
|
+
[Docs](https://huggingface.co/docs)
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
name: Tests
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main, master]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main, master]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
unit-tests:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v4
|
|
14
|
+
|
|
15
|
+
- name: Set up Python
|
|
16
|
+
uses: actions/setup-python@v5
|
|
17
|
+
with:
|
|
18
|
+
python-version: '3.12'
|
|
19
|
+
|
|
20
|
+
- name: Install uv
|
|
21
|
+
run: |
|
|
22
|
+
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
23
|
+
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
|
|
24
|
+
|
|
25
|
+
- name: Install dependencies
|
|
26
|
+
run: |
|
|
27
|
+
cd v2
|
|
28
|
+
uv pip install -e ".[dev]"
|
|
29
|
+
|
|
30
|
+
- name: Run unit tests
|
|
31
|
+
run: |
|
|
32
|
+
cd v2
|
|
33
|
+
pytest tests/ -v -m "not integration" --tb=short
|
|
34
|
+
|
|
35
|
+
docker-build:
|
|
36
|
+
runs-on: ubuntu-latest
|
|
37
|
+
steps:
|
|
38
|
+
- uses: actions/checkout@v4
|
|
39
|
+
|
|
40
|
+
- name: Set up Docker Buildx
|
|
41
|
+
uses: docker/setup-buildx-action@v3
|
|
42
|
+
|
|
43
|
+
- name: Build Docker image
|
|
44
|
+
run: |
|
|
45
|
+
cd v2
|
|
46
|
+
docker build -f docker/codex-runtime.Dockerfile -t agentic-datagen-codex:v2 .
|
|
47
|
+
|
|
48
|
+
- name: Test Codex CLI in container
|
|
49
|
+
run: |
|
|
50
|
+
docker run --rm agentic-datagen-codex:v2 codex --version
|
|
51
|
+
|
|
52
|
+
integration-tests:
|
|
53
|
+
runs-on: ubuntu-latest
|
|
54
|
+
needs: [unit-tests, docker-build]
|
|
55
|
+
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
|
56
|
+
# Only run on main to avoid burning API credits on every PR
|
|
57
|
+
environment: integration-tests
|
|
58
|
+
steps:
|
|
59
|
+
- uses: actions/checkout@v4
|
|
60
|
+
|
|
61
|
+
- name: Set up Python
|
|
62
|
+
uses: actions/setup-python@v5
|
|
63
|
+
with:
|
|
64
|
+
python-version: '3.12'
|
|
65
|
+
|
|
66
|
+
- name: Install uv
|
|
67
|
+
run: |
|
|
68
|
+
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
69
|
+
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
|
|
70
|
+
|
|
71
|
+
- name: Install dependencies
|
|
72
|
+
run: |
|
|
73
|
+
cd v2
|
|
74
|
+
uv pip install -e ".[dev]"
|
|
75
|
+
|
|
76
|
+
- name: Run integration tests
|
|
77
|
+
env:
|
|
78
|
+
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
|
79
|
+
run: |
|
|
80
|
+
cd v2
|
|
81
|
+
pytest tests/test_integration.py -v --tb=short
|
teich-0.1.1a1/.gitignore
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
__pycache__/
|
|
2
|
+
*.py[cod]
|
|
3
|
+
*.pyo
|
|
4
|
+
*.pyd
|
|
5
|
+
*.so
|
|
6
|
+
.venv/
|
|
7
|
+
venv/
|
|
8
|
+
.pytest_cache/
|
|
9
|
+
.benchmarks/
|
|
10
|
+
.deepeval/
|
|
11
|
+
.mypy_cache/
|
|
12
|
+
.ruff_cache/
|
|
13
|
+
output/
|
|
14
|
+
sandbox/
|
|
15
|
+
test_run/
|
|
16
|
+
outputs/
|
|
17
|
+
qwen_lora/
|
|
18
|
+
traces-test/
|
|
19
|
+
*.env
|
|
20
|
+
*.env.*
|
|
21
|
+
.DS_Store
|
|
22
|
+
Thumbs.db
|
teich-0.1.1a1/PKG-INFO
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: teich
|
|
3
|
+
Version: 0.1.1a1
|
|
4
|
+
Summary: Generate agent training data from Codex and Pi traces
|
|
5
|
+
License: Apache-2.0
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Requires-Dist: datasets>=2.19.0
|
|
8
|
+
Requires-Dist: docker>=7.0
|
|
9
|
+
Requires-Dist: huggingface-hub>=0.23.0
|
|
10
|
+
Requires-Dist: pydantic>=2.0
|
|
11
|
+
Requires-Dist: pyyaml>=6.0
|
|
12
|
+
Requires-Dist: rich>=13.0
|
|
13
|
+
Requires-Dist: typer>=0.12
|
|
14
|
+
Provides-Extra: dev
|
|
15
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
|
|
16
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
17
|
+
Requires-Dist: respx>=0.22; extra == 'dev'
|
|
18
|
+
Requires-Dist: ruff>=0.4; extra == 'dev'
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
|
|
21
|
+
# Teich
|
|
22
|
+
|
|
23
|
+
`v2/` is the experimental trace-first package for collecting raw agent sessions and converting them into training-ready data.
|
|
24
|
+
|
|
25
|
+
## What it does today
|
|
26
|
+
|
|
27
|
+
- Runs Codex and Pi in a shared Docker runtime with `uv`, `npm`, `@openai/codex`, and `@mariozechner/pi-coding-agent`
|
|
28
|
+
- Configures Codex through a mounted `CODEX_HOME/config.toml`
|
|
29
|
+
- Configures Pi through an isolated mounted `~/.pi/agent/settings.json`
|
|
30
|
+
- Exports raw session traces from mounted Codex and Pi session directories
|
|
31
|
+
- Writes a trace-folder `README.md` for upload
|
|
32
|
+
- Exposes Python conversion helpers for training data preparation
|
|
33
|
+
|
|
34
|
+
## Usage
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
# Initialize a project
|
|
38
|
+
uvx teich init my-project
|
|
39
|
+
cd my-project
|
|
40
|
+
|
|
41
|
+
# Run with the configured agent provider and model settings
|
|
42
|
+
uvx teich generate -c config.yaml
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Local OSS providers
|
|
46
|
+
|
|
47
|
+
If you want Codex to talk to a local provider like LM Studio or Ollama, set the provider in config or env:
|
|
48
|
+
|
|
49
|
+
```powershell
|
|
50
|
+
$env:TEICH_PROVIDER='LMstudio'
|
|
51
|
+
$env:TEICH_MODEL='gemma-4'
|
|
52
|
+
$env:TEICH_API_KEY='llm'
|
|
53
|
+
$env:TEICH_BASE_URL='http://localhost:1234/v1'
|
|
54
|
+
python -m teich generate -c test_run/config.yaml
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
`v2` maps `LMstudio` and `ollama` onto Codex's native `--oss --local-provider ...` flow.
|
|
58
|
+
|
|
59
|
+
## Configuration model
|
|
60
|
+
|
|
61
|
+
Important fields in `config.yaml`:
|
|
62
|
+
|
|
63
|
+
```yaml
|
|
64
|
+
agent:
|
|
65
|
+
provider: codex # or pi
|
|
66
|
+
|
|
67
|
+
model:
|
|
68
|
+
model: codex-mini-latest
|
|
69
|
+
approval_policy: never
|
|
70
|
+
sandbox: danger-full-access
|
|
71
|
+
reasoning_effort: null
|
|
72
|
+
|
|
73
|
+
api:
|
|
74
|
+
provider: openai
|
|
75
|
+
base_url: null
|
|
76
|
+
api_key: null
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
Legacy `model.approval_mode` is still accepted and normalized internally.
|
|
80
|
+
|
|
81
|
+
## Python conversion API
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
from pathlib import Path
|
|
85
|
+
from teich import convert_traces_to_training_data
|
|
86
|
+
|
|
87
|
+
examples = convert_traces_to_training_data(Path("./output"))
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
The converter currently maps example-style raw traces into message/tool records with:
|
|
91
|
+
|
|
92
|
+
- system/developer instructions
|
|
93
|
+
- user messages
|
|
94
|
+
- assistant messages
|
|
95
|
+
- `reasoning_content`
|
|
96
|
+
- tool calls
|
|
97
|
+
- tool results
|
|
98
|
+
|
|
99
|
+
## Development
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
uv pip install -e ".[dev]"
|
|
103
|
+
pytest tests/test_config.py tests/test_cli.py tests/test_runner.py -q
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## Architecture
|
|
107
|
+
|
|
108
|
+
- **Shared Docker runtime**: container image includes Node.js, `uv`, `uvx`, `@openai/codex`, and `@mariozechner/pi-coding-agent`
|
|
109
|
+
- **Isolated Pi config**: Pi runs with a mounted per-run `~/.pi/agent` directory inside the container
|
|
110
|
+
- **Codex config**: generated `config.toml` under a mounted `CODEX_HOME`
|
|
111
|
+
- **Session export**: raw JSONL sessions are copied from mounted Codex or Pi session storage into the user output directory
|
|
112
|
+
- **Upload-first output**: traces are preserved in raw form before later conversion
|
|
113
|
+
- **Provider-aware boundary**: `agent.provider` selects either the Codex or Pi raw-trace path
|
|
114
|
+
|
|
115
|
+
## Project Structure
|
|
116
|
+
|
|
117
|
+
```text
|
|
118
|
+
v2/
|
|
119
|
+
├── docker/
|
|
120
|
+
│ └── codex-runtime.Dockerfile
|
|
121
|
+
├── src/teich/
|
|
122
|
+
│ ├── __init__.py
|
|
123
|
+
│ ├── __main__.py
|
|
124
|
+
│ ├── cli.py
|
|
125
|
+
│ ├── config.py
|
|
126
|
+
│ ├── converter.py
|
|
127
|
+
│ ├── runner.py
|
|
128
|
+
│ └── trace_readme.py
|
|
129
|
+
└── tests/
|
|
130
|
+
├── test_cli.py
|
|
131
|
+
├── test_config.py
|
|
132
|
+
└── test_runner.py
|
teich-0.1.1a1/README.md
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
# Teich
|
|
2
|
+
|
|
3
|
+
`v2/` is the experimental trace-first package for collecting raw agent sessions and converting them into training-ready data.
|
|
4
|
+
|
|
5
|
+
## What it does today
|
|
6
|
+
|
|
7
|
+
- Runs Codex and Pi in a shared Docker runtime with `uv`, `npm`, `@openai/codex`, and `@mariozechner/pi-coding-agent`
|
|
8
|
+
- Configures Codex through a mounted `CODEX_HOME/config.toml`
|
|
9
|
+
- Configures Pi through an isolated mounted `~/.pi/agent/settings.json`
|
|
10
|
+
- Exports raw session traces from mounted Codex and Pi session directories
|
|
11
|
+
- Writes a trace-folder `README.md` for upload
|
|
12
|
+
- Exposes Python conversion helpers for training data preparation
|
|
13
|
+
|
|
14
|
+
## Usage
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
# Initialize a project
|
|
18
|
+
uvx teich init my-project
|
|
19
|
+
cd my-project
|
|
20
|
+
|
|
21
|
+
# Run with the configured agent provider and model settings
|
|
22
|
+
uvx teich generate -c config.yaml
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Local OSS providers
|
|
26
|
+
|
|
27
|
+
If you want Codex to talk to a local provider like LM Studio or Ollama, set the provider in config or env:
|
|
28
|
+
|
|
29
|
+
```powershell
|
|
30
|
+
$env:TEICH_PROVIDER='LMstudio'
|
|
31
|
+
$env:TEICH_MODEL='gemma-4'
|
|
32
|
+
$env:TEICH_API_KEY='llm'
|
|
33
|
+
$env:TEICH_BASE_URL='http://localhost:1234/v1'
|
|
34
|
+
python -m teich generate -c test_run/config.yaml
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
`v2` maps `LMstudio` and `ollama` onto Codex's native `--oss --local-provider ...` flow.
|
|
38
|
+
|
|
39
|
+
## Configuration model
|
|
40
|
+
|
|
41
|
+
Important fields in `config.yaml`:
|
|
42
|
+
|
|
43
|
+
```yaml
|
|
44
|
+
agent:
|
|
45
|
+
provider: codex # or pi
|
|
46
|
+
|
|
47
|
+
model:
|
|
48
|
+
model: codex-mini-latest
|
|
49
|
+
approval_policy: never
|
|
50
|
+
sandbox: danger-full-access
|
|
51
|
+
reasoning_effort: null
|
|
52
|
+
|
|
53
|
+
api:
|
|
54
|
+
provider: openai
|
|
55
|
+
base_url: null
|
|
56
|
+
api_key: null
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
Legacy `model.approval_mode` is still accepted and normalized internally.
|
|
60
|
+
|
|
61
|
+
## Python conversion API
|
|
62
|
+
|
|
63
|
+
```python
|
|
64
|
+
from pathlib import Path
|
|
65
|
+
from teich import convert_traces_to_training_data
|
|
66
|
+
|
|
67
|
+
examples = convert_traces_to_training_data(Path("./output"))
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
The converter currently maps example-style raw traces into message/tool records with:
|
|
71
|
+
|
|
72
|
+
- system/developer instructions
|
|
73
|
+
- user messages
|
|
74
|
+
- assistant messages
|
|
75
|
+
- `reasoning_content`
|
|
76
|
+
- tool calls
|
|
77
|
+
- tool results
|
|
78
|
+
|
|
79
|
+
## Development
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
uv pip install -e ".[dev]"
|
|
83
|
+
pytest tests/test_config.py tests/test_cli.py tests/test_runner.py -q
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Architecture
|
|
87
|
+
|
|
88
|
+
- **Shared Docker runtime**: container image includes Node.js, `uv`, `uvx`, `@openai/codex`, and `@mariozechner/pi-coding-agent`
|
|
89
|
+
- **Isolated Pi config**: Pi runs with a mounted per-run `~/.pi/agent` directory inside the container
|
|
90
|
+
- **Codex config**: generated `config.toml` under a mounted `CODEX_HOME`
|
|
91
|
+
- **Session export**: raw JSONL sessions are copied from mounted Codex or Pi session storage into the user output directory
|
|
92
|
+
- **Upload-first output**: traces are preserved in raw form before later conversion
|
|
93
|
+
- **Provider-aware boundary**: `agent.provider` selects either the Codex or Pi raw-trace path
|
|
94
|
+
|
|
95
|
+
## Project Structure
|
|
96
|
+
|
|
97
|
+
```text
|
|
98
|
+
v2/
|
|
99
|
+
├── docker/
|
|
100
|
+
│ └── codex-runtime.Dockerfile
|
|
101
|
+
├── src/teich/
|
|
102
|
+
│ ├── __init__.py
|
|
103
|
+
│ ├── __main__.py
|
|
104
|
+
│ ├── cli.py
|
|
105
|
+
│ ├── config.py
|
|
106
|
+
│ ├── converter.py
|
|
107
|
+
│ ├── runner.py
|
|
108
|
+
│ └── trace_readme.py
|
|
109
|
+
└── tests/
|
|
110
|
+
├── test_cli.py
|
|
111
|
+
├── test_config.py
|
|
112
|
+
└── test_runner.py
|
teich-0.1.1a1/ROADMAP.md
ADDED
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
# Teich - Roadmap
|
|
2
|
+
|
|
3
|
+
## Current Status: Functional Experimental Baseline
|
|
4
|
+
- Docker runtime with Codex, uv, npm
|
|
5
|
+
- Configuration system with YAML models and MCP server definitions
|
|
6
|
+
- `python -m teich` and CLI init/generate commands
|
|
7
|
+
- Raw session extraction from mounted `CODEX_HOME/sessions`
|
|
8
|
+
- Auto-generated trace-folder README
|
|
9
|
+
- Importable converter for raw Codex traces to training-style messages/tools
|
|
10
|
+
- Focused unit coverage for config, CLI, and runner behavior
|
|
11
|
+
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
## Phase 1: Testing & Hardening (Next)
|
|
15
|
+
|
|
16
|
+
### 1.1 Integration Testing
|
|
17
|
+
- [x] Test actual Docker image build
|
|
18
|
+
- [ ] Validate live `codex exec` end-to-end against installed Codex CLI versions
|
|
19
|
+
- [ ] Verify session files are extracted correctly with real sessions
|
|
20
|
+
- [ ] Verify trace format matches HF expectations on actual generated traces
|
|
21
|
+
- [ ] Test MCP server configuration with real servers
|
|
22
|
+
- [ ] Validate LM Studio / Ollama local-provider runs through Codex OSS mode
|
|
23
|
+
|
|
24
|
+
### 1.2 Error Handling
|
|
25
|
+
- [ ] Handle Docker not installed/running
|
|
26
|
+
- [ ] Handle invalid OpenAI API key or unavailable local provider
|
|
27
|
+
- [x] Handle network timeouts during Codex execution
|
|
28
|
+
- [x] Handle session extraction failures
|
|
29
|
+
- [ ] Retry logic for failed prompts
|
|
30
|
+
|
|
31
|
+
### 1.3 Output Format Validation
|
|
32
|
+
- [x] Validate trace JSONL structure against example traces
|
|
33
|
+
- [ ] Ensure HF trace viewer compatibility
|
|
34
|
+
- [x] Generate README for trace upload directories
|
|
35
|
+
|
|
36
|
+
---
|
|
37
|
+
|
|
38
|
+
## Phase 2: Training Data Conversion
|
|
39
|
+
|
|
40
|
+
### 2.1 Converter Module
|
|
41
|
+
Implemented `src/teich/converter.py`:
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
from teich import convert_traces_to_training_data, TrainingExample
|
|
45
|
+
|
|
46
|
+
examples = convert_traces_to_training_data(
|
|
47
|
+
traces_dir=Path("./output")
|
|
48
|
+
)
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### 2.2 Supported Formats
|
|
52
|
+
- [x] **OpenAI-style chat/message format** (primary internal output):
|
|
53
|
+
```json
|
|
54
|
+
{
|
|
55
|
+
"messages": [
|
|
56
|
+
{"role": "system", "content": "..."},
|
|
57
|
+
{"role": "user", "content": "..."},
|
|
58
|
+
{"role": "assistant", "content": "...", "reasoning_content": "..."},
|
|
59
|
+
{"role": "assistant", "tool_calls": [...]},
|
|
60
|
+
{"role": "tool", "tool_call_id": "...", "content": "..."}
|
|
61
|
+
]
|
|
62
|
+
}
|
|
63
|
+
```
|
|
64
|
+
- [ ] **Anthropic Messages API**
|
|
65
|
+
- [ ] **Gemini Format**
|
|
66
|
+
|
|
67
|
+
### 2.3 Field Mapping
|
|
68
|
+
From Codex traces to training examples:
|
|
69
|
+
- [x] Extract system prompts from session init / developer messages
|
|
70
|
+
- [x] Map `message` events → user/assistant messages
|
|
71
|
+
- [x] Extract `reasoning_content` from reasoning summary events
|
|
72
|
+
- [x] Map `function_call` → assistant message with tool_calls
|
|
73
|
+
- [x] Map `function_call_output` → tool message
|
|
74
|
+
- [ ] Extract tool schemas when present in raw traces
|
|
75
|
+
- [x] Handle multi-turn conversations correctly
|
|
76
|
+
|
|
77
|
+
---
|
|
78
|
+
|
|
79
|
+
## Phase 3: Advanced Features
|
|
80
|
+
|
|
81
|
+
### 3.1 Parallel Execution
|
|
82
|
+
- [ ] Run multiple prompts concurrently
|
|
83
|
+
- [ ] Configurable max_workers
|
|
84
|
+
- [ ] Progress tracking for batch jobs
|
|
85
|
+
|
|
86
|
+
### 3.2 Session Resumption
|
|
87
|
+
- [ ] Save progress checkpoint
|
|
88
|
+
- [ ] Resume interrupted runs
|
|
89
|
+
- [ ] Skip already-completed prompts
|
|
90
|
+
|
|
91
|
+
### 3.3 Output Formats
|
|
92
|
+
- [ ] Hugging Face datasets integration
|
|
93
|
+
- [ ] Parquet output option
|
|
94
|
+
- [ ] Train/validation split generation
|
|
95
|
+
|
|
96
|
+
### 3.4 Quality Filtering
|
|
97
|
+
- [ ] Filter empty/short sessions
|
|
98
|
+
- [ ] Detect failed/error sessions
|
|
99
|
+
- [ ] Workspace artifact validation
|
|
100
|
+
- [ ] Configurable quality thresholds
|
|
101
|
+
|
|
102
|
+
---
|
|
103
|
+
|
|
104
|
+
## Phase 4: Extended Model Support
|
|
105
|
+
|
|
106
|
+
### 4.1 OpenRouter/OpenAI-Compatible APIs
|
|
107
|
+
Since Codex CLI behavior varies by provider, keep exploring alternatives:
|
|
108
|
+
- [ ] Research: Can we use `aider` or similar tools?
|
|
109
|
+
- [ ] Custom runner for generic OpenAI-compatible APIs
|
|
110
|
+
- [x] OpenRouter/config override path in current Codex runner
|
|
111
|
+
- [ ] Harden compatibility for non-OpenAI endpoints under real runs
|
|
112
|
+
|
|
113
|
+
### 4.2 Multi-Provider Support
|
|
114
|
+
- [x] Modular config boundary with `agent.provider`
|
|
115
|
+
- [ ] Pi agent runner
|
|
116
|
+
- [ ] Anthropic Claude runner
|
|
117
|
+
- [ ] Google Gemini runner
|
|
118
|
+
- [ ] Ollama/local model runner beyond Codex OSS mode
|
|
119
|
+
|
|
120
|
+
---
|
|
121
|
+
|
|
122
|
+
## Phase 5: Production Polish
|
|
123
|
+
|
|
124
|
+
### 5.1 Documentation
|
|
125
|
+
- [ ] Full API documentation
|
|
126
|
+
- [ ] Tutorial: Creating your first dataset
|
|
127
|
+
- [ ] Tutorial: Fine-tuning with generated data
|
|
128
|
+
- [ ] Example configs for common use cases
|
|
129
|
+
|
|
130
|
+
### 5.2 CLI Improvements
|
|
131
|
+
- [ ] `validate` command to check config
|
|
132
|
+
- [ ] `preview` command to see what would be generated
|
|
133
|
+
- [ ] `status` command to check previous runs
|
|
134
|
+
- [ ] Rich progress bars and logging
|
|
135
|
+
|
|
136
|
+
### 5.3 Testing
|
|
137
|
+
- [ ] Integration tests with real API calls (mocked)
|
|
138
|
+
- [ ] Docker build tests in CI
|
|
139
|
+
- [ ] Format validation tests
|
|
140
|
+
- [ ] End-to-end workflow tests
|
|
141
|
+
|
|
142
|
+
---
|
|
143
|
+
|
|
144
|
+
## Immediate Next Steps
|
|
145
|
+
|
|
146
|
+
1. **Validate live Codex execution with the installed CLI**:
|
|
147
|
+
```bash
|
|
148
|
+
cd v2
|
|
149
|
+
python -m teich generate -c test_run/config.yaml
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
2. **Verify trace files** are generated and match the example-style raw session format
|
|
153
|
+
|
|
154
|
+
3. **Inspect an actual generated trace** and tighten converter field mapping if needed
|
|
155
|
+
|
|
156
|
+
4. **Prototype Pi-agent trace ingestion** behind the same conversion/export interfaces
|
|
157
|
+
|
|
158
|
+
---
|
|
159
|
+
|
|
160
|
+
## Open Questions
|
|
161
|
+
|
|
162
|
+
1. Which Codex CLI versions should `v2` explicitly support for non-interactive runs?
|
|
163
|
+
2. Should LM Studio and Ollama stay routed through Codex OSS mode, or get their own non-Codex runner?
|
|
164
|
+
3. What quality metrics should we filter on?
|
|
165
|
+
4. How should we handle tool schemas that vary between providers?
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# syntax=docker/dockerfile:1
|
|
2
|
+
FROM node:22-slim
|
|
3
|
+
|
|
4
|
+
# Install system dependencies with cache mount and minimal packages
|
|
5
|
+
# Removed: build-essential (only needed for compiling, not runtime)
|
|
6
|
+
# Added: --no-install-recommends to skip extra packages
|
|
7
|
+
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
|
8
|
+
--mount=type=cache,target=/var/lib/apt,sharing=locked \
|
|
9
|
+
apt-get update && apt-get install -y --no-install-recommends \
|
|
10
|
+
build-essential \
|
|
11
|
+
git \
|
|
12
|
+
curl \
|
|
13
|
+
ca-certificates \
|
|
14
|
+
python3 \
|
|
15
|
+
python3-dev \
|
|
16
|
+
python3-minimal \
|
|
17
|
+
python3-pip \
|
|
18
|
+
python3-venv \
|
|
19
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
20
|
+
|
|
21
|
+
RUN ln -sf /usr/bin/python3 /usr/local/bin/python && \
|
|
22
|
+
python3 -m venv /opt/venv && \
|
|
23
|
+
/opt/venv/bin/python -m pip install --upgrade pip setuptools wheel && \
|
|
24
|
+
ln -sf /opt/venv/bin/pip /usr/local/bin/pip && \
|
|
25
|
+
ln -sf /opt/venv/bin/pip3 /usr/local/bin/pip3
|
|
26
|
+
ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
|
|
27
|
+
ENV VIRTUAL_ENV=/opt/venv
|
|
28
|
+
|
|
29
|
+
# Install Astral uv and @openai/codex in one layer
|
|
30
|
+
# Use npm cache mount for faster installs
|
|
31
|
+
RUN --mount=type=cache,target=/root/.npm \
|
|
32
|
+
mkdir -p ${PLAYWRIGHT_BROWSERS_PATH} && \
|
|
33
|
+
curl -LsSf https://astral.sh/uv/install.sh | sh && \
|
|
34
|
+
mv /root/.local/bin/uv /usr/local/bin/uv && \
|
|
35
|
+
mv /root/.local/bin/uvx /usr/local/bin/uvx && \
|
|
36
|
+
npm install -g @openai/codex @mariozechner/pi-coding-agent playwright && \
|
|
37
|
+
npx playwright install --with-deps chromium && \
|
|
38
|
+
node --version && npm --version && npx --version && uv --version && uvx --version && python --version && python3 --version && pip --version && pip3 --version && codex --version && pi --version
|
|
39
|
+
|
|
40
|
+
# Create working directory and user in one layer
|
|
41
|
+
WORKDIR /workspace
|
|
42
|
+
RUN useradd -m -s /bin/bash codex && \
|
|
43
|
+
mkdir -p /home/codex/.codex/sessions && \
|
|
44
|
+
chown -R codex:codex /home/codex /workspace ${PLAYWRIGHT_BROWSERS_PATH} ${VIRTUAL_ENV}
|
|
45
|
+
|
|
46
|
+
USER codex
|
|
47
|
+
ENV CODEX_HOME=/home/codex
|
|
48
|
+
ENV HOME=/home/codex
|
|
49
|
+
ENV NODE_PATH="/usr/local/lib/node_modules"
|
|
50
|
+
ENV PATH="/opt/venv/bin:/usr/local/bin:$PATH"
|
|
51
|
+
|
|
52
|
+
CMD ["bash"]
|