uscient-mboxer 0.1.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uscient_mboxer-0.1.5/.agents/SKILL.md +32 -0
- uscient_mboxer-0.1.5/.claude/README.md +9 -0
- uscient_mboxer-0.1.5/.claude/commands/mboxer-final-readiness.md +25 -0
- uscient_mboxer-0.1.5/.claude/commands/mboxer-lineage-hardening.md +19 -0
- uscient_mboxer-0.1.5/.claude/commands/mboxer-producer-seam.md +24 -0
- uscient_mboxer-0.1.5/.claude/commands/mboxer-readiness-audit.md +21 -0
- uscient_mboxer-0.1.5/.claude/commands/mboxer-security-boundary.md +18 -0
- uscient_mboxer-0.1.5/.codex/README.md +11 -0
- uscient_mboxer-0.1.5/.codex/prompts/01-readiness-audit.md +35 -0
- uscient_mboxer-0.1.5/.codex/prompts/02-manifest-lineage-hardening.md +43 -0
- uscient_mboxer-0.1.5/.codex/prompts/03-producer-event-seam.md +51 -0
- uscient_mboxer-0.1.5/.codex/prompts/04-security-export-boundary-audit.md +45 -0
- uscient_mboxer-0.1.5/.codex/prompts/05-final-readiness-audit.md +33 -0
- uscient_mboxer-0.1.5/.codex/templates/task-report.md +15 -0
- uscient_mboxer-0.1.5/.env.example +7 -0
- uscient_mboxer-0.1.5/.github/dependabot.yml +33 -0
- uscient_mboxer-0.1.5/.github/workflows/bandit.yml +52 -0
- uscient_mboxer-0.1.5/.github/workflows/ci.yml +66 -0
- uscient_mboxer-0.1.5/.github/workflows/dependency-review.yml +15 -0
- uscient_mboxer-0.1.5/.github/workflows/publish.yml +60 -0
- uscient_mboxer-0.1.5/.github/workflows/schema.yml +21 -0
- uscient_mboxer-0.1.5/.gitignore +40 -0
- uscient_mboxer-0.1.5/AGENTS.md +185 -0
- uscient_mboxer-0.1.5/APPLY.md +41 -0
- uscient_mboxer-0.1.5/CLAUDE.md +11 -0
- uscient_mboxer-0.1.5/HOWTO.md +84 -0
- uscient_mboxer-0.1.5/LICENSE +21 -0
- uscient_mboxer-0.1.5/PKG-INFO +654 -0
- uscient_mboxer-0.1.5/README.md +618 -0
- uscient_mboxer-0.1.5/config/mboxer.example.yaml +208 -0
- uscient_mboxer-0.1.5/data/attachments/.gitkeep +0 -0
- uscient_mboxer-0.1.5/data/mboxes/.gitkeep +0 -0
- uscient_mboxer-0.1.5/docs/architecture.md +109 -0
- uscient_mboxer-0.1.5/docs/naming-conventions.md +84 -0
- uscient_mboxer-0.1.5/docs/notebooklm-limits.md +67 -0
- uscient_mboxer-0.1.5/docs/security-roadmap.md +65 -0
- uscient_mboxer-0.1.5/docs/sqlite-schema.md +49 -0
- uscient_mboxer-0.1.5/exports/notebooklm/.gitkeep +0 -0
- uscient_mboxer-0.1.5/exports/rag/.gitkeep +0 -0
- uscient_mboxer-0.1.5/mboxer-current-config.yaml +190 -0
- uscient_mboxer-0.1.5/prompts/claude-code-mboxer-current.md +370 -0
- uscient_mboxer-0.1.5/pyproject.toml +79 -0
- uscient_mboxer-0.1.5/schema_ingest_tracking.sql +98 -0
- uscient_mboxer-0.1.5/setup.cfg +4 -0
- uscient_mboxer-0.1.5/src/mboxer/__init__.py +6 -0
- uscient_mboxer-0.1.5/src/mboxer/__main__.py +3 -0
- uscient_mboxer-0.1.5/src/mboxer/accounts.py +145 -0
- uscient_mboxer-0.1.5/src/mboxer/attachments.py +168 -0
- uscient_mboxer-0.1.5/src/mboxer/classify.py +447 -0
- uscient_mboxer-0.1.5/src/mboxer/cli.py +491 -0
- uscient_mboxer-0.1.5/src/mboxer/config.py +85 -0
- uscient_mboxer-0.1.5/src/mboxer/db/__init__.py +3 -0
- uscient_mboxer-0.1.5/src/mboxer/db/migrations/001_initial.sql +224 -0
- uscient_mboxer-0.1.5/src/mboxer/db/migrations/002_multi_account.sql +163 -0
- uscient_mboxer-0.1.5/src/mboxer/db/migrations/__init__.py +0 -0
- uscient_mboxer-0.1.5/src/mboxer/db/schema.py +78 -0
- uscient_mboxer-0.1.5/src/mboxer/db/schema.sql +329 -0
- uscient_mboxer-0.1.5/src/mboxer/exporters/__init__.py +1 -0
- uscient_mboxer-0.1.5/src/mboxer/exporters/jsonl.py +293 -0
- uscient_mboxer-0.1.5/src/mboxer/exporters/manifest.py +393 -0
- uscient_mboxer-0.1.5/src/mboxer/exporters/notebooklm.py +496 -0
- uscient_mboxer-0.1.5/src/mboxer/exporters/projection.py +56 -0
- uscient_mboxer-0.1.5/src/mboxer/ingest.py +513 -0
- uscient_mboxer-0.1.5/src/mboxer/limits.py +130 -0
- uscient_mboxer-0.1.5/src/mboxer/naming.py +42 -0
- uscient_mboxer-0.1.5/src/mboxer/normalize.py +173 -0
- uscient_mboxer-0.1.5/src/mboxer/security/__init__.py +1 -0
- uscient_mboxer-0.1.5/src/mboxer/security/policy.py +35 -0
- uscient_mboxer-0.1.5/src/mboxer/security/scan.py +111 -0
- uscient_mboxer-0.1.5/src/mboxer/security/scrub.py +40 -0
- uscient_mboxer-0.1.5/src/mboxer/taxonomy.py +199 -0
- uscient_mboxer-0.1.5/src/uscient_mboxer.egg-info/PKG-INFO +654 -0
- uscient_mboxer-0.1.5/src/uscient_mboxer.egg-info/SOURCES.txt +93 -0
- uscient_mboxer-0.1.5/src/uscient_mboxer.egg-info/dependency_links.txt +1 -0
- uscient_mboxer-0.1.5/src/uscient_mboxer.egg-info/entry_points.txt +2 -0
- uscient_mboxer-0.1.5/src/uscient_mboxer.egg-info/requires.txt +11 -0
- uscient_mboxer-0.1.5/src/uscient_mboxer.egg-info/top_level.txt +1 -0
- uscient_mboxer-0.1.5/tests/fixtures/.gitkeep +0 -0
- uscient_mboxer-0.1.5/tests/fixtures/make_synthetic.py +105 -0
- uscient_mboxer-0.1.5/tests/test_accounts.py +113 -0
- uscient_mboxer-0.1.5/tests/test_classify.py +167 -0
- uscient_mboxer-0.1.5/tests/test_config.py +26 -0
- uscient_mboxer-0.1.5/tests/test_db.py +36 -0
- uscient_mboxer-0.1.5/tests/test_export.py +217 -0
- uscient_mboxer-0.1.5/tests/test_first_run.py +270 -0
- uscient_mboxer-0.1.5/tests/test_ingest.py +706 -0
- uscient_mboxer-0.1.5/tests/test_limits.py +25 -0
- uscient_mboxer-0.1.5/tests/test_manifest.py +793 -0
- uscient_mboxer-0.1.5/tests/test_migration.py +95 -0
- uscient_mboxer-0.1.5/tests/test_naming.py +33 -0
- uscient_mboxer-0.1.5/tests/test_normalize.py +51 -0
- uscient_mboxer-0.1.5/tests/test_scrub_export.py +1002 -0
- uscient_mboxer-0.1.5/tests/test_taxonomy.py +254 -0
- uscient_mboxer-0.1.5/tests/test_thread_classify.py +618 -0
- uscient_mboxer-0.1.5/var/log/.gitkeep +0 -0
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# mboxer Operating Skill Guide
|
|
2
|
+
|
|
3
|
+
This document outlines the standard operating procedures, core competencies, and execution rules for AI agents, LLMs, or advanced users interacting with the `mboxer` environment.
|
|
4
|
+
|
|
5
|
+
## 1. System Understanding
|
|
6
|
+
An agent utilizing `mboxer` must understand that it is operating a **local-first** and **privacy-conscious** data pipeline. `mboxer` assumes mail archives contain sensitive material, so raw exports are local-only by default. The system is *not* a tool that uploads raw email archives by default or a cloud-first archive processor.
|
|
7
|
+
|
|
8
|
+
## 2. Core Execution Workflow
|
|
9
|
+
When instructed to process an email archive, the agent should follow this exact sequence of operations:
|
|
10
|
+
1. **Initialize:** Ensure the SQLite database is ready (`mboxer init`).
|
|
11
|
+
2. **Register:** Verify the target account is registered (`mboxer register`).
|
|
12
|
+
3. **Ingest:** Execute `mboxer ingest` with the `--resume` flag. *Crucial Skill:* Always advise the user to run a test ingest on a small, segmented `.mbox` file before ingesting a large historical archive.
|
|
13
|
+
4. **Classify:** Execute `mboxer classify`.
|
|
14
|
+
5. **Review:** Instruct the user to interactively run `mboxer review-categories` to approve or reject taxonomy proposals.
|
|
15
|
+
6. **Scan:** Execute `mboxer scan` to run configured redaction and security rules.
|
|
16
|
+
7. **Dry-Run:** ALWAYS execute a dry-run (`mboxer export notebooklm --dry-run`) before a real export to validate output shapes and profile limits.
|
|
17
|
+
8. **Export:** Execute the final real export.
|
|
18
|
+
|
|
19
|
+
## 3. Configuration & Profile Management
|
|
20
|
+
The agent must be adept at modifying the `mboxer.yaml` configuration to select the appropriate NotebookLM limit profile:
|
|
21
|
+
* `standard`: 40 target sources, 300,000 words/source
|
|
22
|
+
* `plus`: 80 target sources, 300,000 words/source
|
|
23
|
+
* `pro`: 250 target sources, 300,000 words/source
|
|
24
|
+
* `ultra`: 525 target sources, 300,000 words/source
|
|
25
|
+
* `ultra_safe`: 450 target sources, 225,000 words/source
|
|
26
|
+
|
|
27
|
+
*Strategy Rule:* The agent should default to recommending `ultra_safe` for large NotebookLM-oriented workflows to preserve headroom for manual sources, attachments, and later additions.
|
|
28
|
+
|
|
29
|
+
## 4. Classification Context Preservation
|
|
30
|
+
* **Thread Context:** Recognize that classification runs at both the message and thread level. At the thread level, a matching rule is applied to the whole thread and then inherited down to all messages in the thread.
|
|
31
|
+
* **Confidence Levels:** Understand that rules support two assignment modes: `assign` for confident matches (confidence 1.0) and `assign_hint` for soft matches (confidence 0.75).
|
|
32
|
+
* **Multi-Account:** Maintain strict separation. `mboxer` utilizes per-account keyed storage to keep multi-account exports completely separated.
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
# `.claude/` helper files
|
|
2
|
+
|
|
3
|
+
This directory contains optional Claude Code command prompts for repeatable mboxer work.
|
|
4
|
+
|
|
5
|
+
Root `CLAUDE.md` imports `AGENTS.md`, so durable project instructions should live in `AGENTS.md` unless they are Claude-only.
|
|
6
|
+
|
|
7
|
+
No active project-level `settings.json` is included on purpose. Avoid committing Claude settings that silently change permissions, hooks, or tool behavior for every collaborator unless there is a deliberate review.
|
|
8
|
+
|
|
9
|
+
After changing `.claude/`, `CLAUDE.md`, `AGENTS.md`, hooks, skills, commands, or related files, start a fresh Claude Code session before relying on the new instructions.
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
Perform a final no-code readiness audit for mboxer before any future external API/import handoff work.
|
|
2
|
+
|
|
3
|
+
Requirements:
|
|
4
|
+
|
|
5
|
+
- Do not modify files.
|
|
6
|
+
- Do not invent an external intake API.
|
|
7
|
+
- Separate proven behavior from recommendations.
|
|
8
|
+
- Run `git status --short` first.
|
|
9
|
+
- Inspect current repo state.
|
|
10
|
+
- Confirm whether mboxer is ready to produce safe projections for a configured external destination once an API/import boundary exists.
|
|
11
|
+
- Identify blockers versus nice-to-have improvements.
|
|
12
|
+
- Draft a concrete future external API/import adapter plan that keeps mboxer independent.
|
|
13
|
+
- Identify the safest first integration PR once an external intake endpoint is available.
|
|
14
|
+
- Watch for instruction-surface changes and report them.
|
|
15
|
+
|
|
16
|
+
End with:
|
|
17
|
+
|
|
18
|
+
- Readiness verdict.
|
|
19
|
+
- Evidence supporting the verdict.
|
|
20
|
+
- Blockers.
|
|
21
|
+
- Nice-to-have improvements.
|
|
22
|
+
- Future external adapter plan.
|
|
23
|
+
- First handoff PR recommendation.
|
|
24
|
+
- Tests/checks run.
|
|
25
|
+
- Instruction-surface findings.
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
Harden mboxer export manifest and lineage behavior without adding external custody/API integration.
|
|
2
|
+
|
|
3
|
+
Requirements:
|
|
4
|
+
|
|
5
|
+
- Keep changes PR-sized.
|
|
6
|
+
- Do not add network calls or external service dependencies.
|
|
7
|
+
- Prefer additive manifest fields and tests.
|
|
8
|
+
- Do not include raw sensitive body content in manifests.
|
|
9
|
+
- Inspect `src/mboxer/exporters/manifest.py`, NotebookLM exporter, JSONL exporter, export schema usage, config export profile behavior, and export/manifest tests.
|
|
10
|
+
- Preserve current CLI behavior unless a test proves a bug.
|
|
11
|
+
- Watch for instruction-surface changes and report them.
|
|
12
|
+
|
|
13
|
+
Desired outcome:
|
|
14
|
+
|
|
15
|
+
Manifests should better record export type, account, source DB/config context where available, export profile, scrub/security posture, category path, generated files, item counts, splitting/limits behavior, and timestamps.
|
|
16
|
+
|
|
17
|
+
Run targeted tests and broader tests if feasible.
|
|
18
|
+
|
|
19
|
+
End with the standard task report from `AGENTS.md`.
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
Add the smallest useful neutral producer-event seam for mboxer, without connecting to an external custody system.
|
|
2
|
+
|
|
3
|
+
Requirements:
|
|
4
|
+
|
|
5
|
+
- Do not add network calls.
|
|
6
|
+
- Do not invent a final external API/import boundary.
|
|
7
|
+
- Keep mboxer independent.
|
|
8
|
+
- Prefer a neutral module name like `events`, `activity`, `audit`, or `producer`.
|
|
9
|
+
- Events should be append-oriented descriptions of local operations, not mutable state.
|
|
10
|
+
- Use JSON-serializable payloads.
|
|
11
|
+
- Do not include sensitive raw body content by default.
|
|
12
|
+
- Add tests.
|
|
13
|
+
- Watch for instruction-surface changes and report them.
|
|
14
|
+
|
|
15
|
+
Inspect operational evidence around ingest runs, classifications, category review, security findings, exports, and manifests before implementing.
|
|
16
|
+
|
|
17
|
+
End with:
|
|
18
|
+
|
|
19
|
+
- What seam you added and why.
|
|
20
|
+
- Files changed.
|
|
21
|
+
- Tests run.
|
|
22
|
+
- Why this does not couple mboxer to any external system.
|
|
23
|
+
- Later abstraction opportunities.
|
|
24
|
+
- Instruction-surface changes.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
Perform a no-code readiness audit for `mboxer` as a future safe-projection producer for configured external destinations.
|
|
2
|
+
|
|
3
|
+
Requirements:
|
|
4
|
+
|
|
5
|
+
- Do not modify files.
|
|
6
|
+
- Run `git status --short` first.
|
|
7
|
+
- Inspect CLI, schema, migrations, ingest, normalize, classify, taxonomy, security, exporters, manifests, config, and tests.
|
|
8
|
+
- Identify existing evidence/producer seams.
|
|
9
|
+
- Identify missing readiness pieces for append-only event emission.
|
|
10
|
+
- Identify fragile coupling risks.
|
|
11
|
+
- Identify test gaps.
|
|
12
|
+
- Watch for instruction-surface files: `AGENTS.md`, `CLAUDE.md`, `.codex/`, `.claude/`, `.agents/`, hooks, rules, skills, prompts, MCP config, and related files.
|
|
13
|
+
|
|
14
|
+
End with:
|
|
15
|
+
|
|
16
|
+
- Current readiness assessment.
|
|
17
|
+
- Top gaps.
|
|
18
|
+
- PR-sized tasks in recommended order.
|
|
19
|
+
- Files likely involved.
|
|
20
|
+
- Risks / cautions.
|
|
21
|
+
- Instruction-surface findings.
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
Audit mboxer security and export boundary behavior.
|
|
2
|
+
|
|
3
|
+
Requirements:
|
|
4
|
+
|
|
5
|
+
- Keep changes PR-sized.
|
|
6
|
+
- Do not inspect real user archives.
|
|
7
|
+
- Do not add external services.
|
|
8
|
+
- Do not claim full DLP or semantic PII detection.
|
|
9
|
+
- Do not change export profile semantics without tests and explicit rationale.
|
|
10
|
+
- Watch for instruction-surface changes and report them.
|
|
11
|
+
|
|
12
|
+
Inspect security scan, scrub, policy, NotebookLM export, JSONL export, manifest behavior, tests, and config defaults.
|
|
13
|
+
|
|
14
|
+
Focus on whether `raw`, `reviewed`, `scrubbed`, `metadata-only`, and `exclude` are clear and tested; whether exports can leak raw body text under safer profiles; whether manifests avoid sensitive raw body content; and whether attachments are handled safely.
|
|
15
|
+
|
|
16
|
+
Implement only focused fixes/tests where there is clear evidence of a gap.
|
|
17
|
+
|
|
18
|
+
End with the standard task report from `AGENTS.md`.
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# `.codex/` helper files
|
|
2
|
+
|
|
3
|
+
Codex reads repository instructions from root `AGENTS.md`. These `.codex/` files are optional helper prompts/templates for repeatable mboxer work.
|
|
4
|
+
|
|
5
|
+
Use them by pasting the prompt into Codex from the repo root, or by invoking Codex with the file content in your own shell workflow.
|
|
6
|
+
|
|
7
|
+
Important:
|
|
8
|
+
|
|
9
|
+
- Do not assume edits to this directory affect an already-running Codex session.
|
|
10
|
+
- After changing `.codex/`, `AGENTS.md`, or other instruction-surface files, start a fresh Codex session before relying on the new instructions.
|
|
11
|
+
- These prompts intentionally avoid inventing a downstream API. They prepare mboxer for possible configured external intake/export paths without coupling it to any service that does not exist here.
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# mboxer readiness audit
|
|
2
|
+
|
|
3
|
+
You are working in the `mboxer` repo.
|
|
4
|
+
|
|
5
|
+
Goal: inspect the current repo and produce a readiness map for making `mboxer` a strong future safe-projection producer for configured external destinations, without implementing external custody/API integration yet.
|
|
6
|
+
|
|
7
|
+
Constraints:
|
|
8
|
+
|
|
9
|
+
- Do not modify files.
|
|
10
|
+
- Do not redesign the app.
|
|
11
|
+
- Do not add external service dependencies.
|
|
12
|
+
- Do not invent an external intake API.
|
|
13
|
+
- Do not make broad speculative recommendations without tying them to current code.
|
|
14
|
+
- Treat `mboxer` as a local-first email archive processor whose job is to ingest Gmail/Takeout MBOX archives, normalize emails, store durable SQLite evidence, classify, scan/scrub, and export NotebookLM/JSONL packs.
|
|
15
|
+
- Watch for instruction-surface files: `AGENTS.md`, `CLAUDE.md`, `.codex/`, `.claude/`, `.agents/`, hooks, rules, skills, prompts, MCP config, and related files. Report anything discovered.
|
|
16
|
+
|
|
17
|
+
Tasks:
|
|
18
|
+
|
|
19
|
+
1. Run `git status --short`.
|
|
20
|
+
2. Inspect CLI, schema, migrations, ingest, normalize, classify, taxonomy, security, exporters, manifests, config, and tests.
|
|
21
|
+
3. Identify the strongest existing producer/evidence seams.
|
|
22
|
+
4. Identify missing readiness pieces for future append-only event emission.
|
|
23
|
+
5. Identify fragile areas where future external API/import integration could cause drift or coupling.
|
|
24
|
+
6. Identify test gaps that should be closed before adding any external integration.
|
|
25
|
+
7. Recommend a PR-sized implementation sequence, ordered by safety and value.
|
|
26
|
+
|
|
27
|
+
Output only:
|
|
28
|
+
|
|
29
|
+
- Summary of what you inspected.
|
|
30
|
+
- Current readiness assessment.
|
|
31
|
+
- Top gaps.
|
|
32
|
+
- Recommended PR-sized tasks.
|
|
33
|
+
- Files likely involved.
|
|
34
|
+
- Risks / cautions.
|
|
35
|
+
- Any instruction-surface files discovered or changed.
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# mboxer manifest and lineage hardening
|
|
2
|
+
|
|
3
|
+
You are working in the `mboxer` repo.
|
|
4
|
+
|
|
5
|
+
Goal: harden existing export manifest and lineage behavior so future external systems can understand what was produced, from what inputs, under what config/policy, without adding external custody/API integration yet.
|
|
6
|
+
|
|
7
|
+
Constraints:
|
|
8
|
+
|
|
9
|
+
- Keep changes PR-sized.
|
|
10
|
+
- Do not add external service dependencies.
|
|
11
|
+
- Do not invent a network client.
|
|
12
|
+
- Preserve current CLI behavior unless a test proves a bug.
|
|
13
|
+
- Do not mutate existing historical data unexpectedly.
|
|
14
|
+
- Prefer additive manifest fields and tests.
|
|
15
|
+
- Do not include raw sensitive body content in manifests.
|
|
16
|
+
- Watch for instruction-surface changes and report them explicitly.
|
|
17
|
+
|
|
18
|
+
Inspect first:
|
|
19
|
+
|
|
20
|
+
- `src/mboxer/exporters/manifest.py`
|
|
21
|
+
- NotebookLM exporter
|
|
22
|
+
- JSONL exporter
|
|
23
|
+
- `exports` / `export_items` schema usage
|
|
24
|
+
- config paths and export profile behavior
|
|
25
|
+
- tests around manifests and exports
|
|
26
|
+
|
|
27
|
+
Desired direction:
|
|
28
|
+
|
|
29
|
+
- Manifest should clearly record export type, account, source DB/config context where available, export profile, scrub/security posture, category path, generated files, item counts, limits/splitting behavior, and timestamps.
|
|
30
|
+
- Keep enough information for a future external API/import adapter to emit an append-only handoff record about the export.
|
|
31
|
+
- Add or strengthen tests.
|
|
32
|
+
|
|
33
|
+
Before final report, run targeted tests and then broader tests if feasible.
|
|
34
|
+
|
|
35
|
+
Output:
|
|
36
|
+
|
|
37
|
+
- Summary of inspection.
|
|
38
|
+
- Files changed.
|
|
39
|
+
- Tests added/updated.
|
|
40
|
+
- Exact behavior changes.
|
|
41
|
+
- Limitations left intentionally unresolved.
|
|
42
|
+
- Abstraction seams noticed for later integration.
|
|
43
|
+
- Instruction-surface changes.
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# mboxer neutral producer event seam
|
|
2
|
+
|
|
3
|
+
You are working in the `mboxer` repo.
|
|
4
|
+
|
|
5
|
+
Goal: add a small internal abstraction seam for future producer events, without connecting to an external custody system and without changing core behavior unnecessarily.
|
|
6
|
+
|
|
7
|
+
Important framing:
|
|
8
|
+
|
|
9
|
+
`mboxer` is the local email/MBOX specialist. A future external evidence store may receive safe projections through configured intake routes, but no external system exists in this repo. This task should make future integration easier while keeping `mboxer` independent.
|
|
10
|
+
|
|
11
|
+
Constraints:
|
|
12
|
+
|
|
13
|
+
- Do not add network calls.
|
|
14
|
+
- Do not add external service package names.
|
|
15
|
+
- Do not invent a final event schema that pretends to be an external API.
|
|
16
|
+
- Keep it local, testable, and boring.
|
|
17
|
+
- Prefer a neutral module name like `events`, `activity`, `audit`, or `producer`.
|
|
18
|
+
- Events should be append-oriented descriptions of local operations, not mutable state.
|
|
19
|
+
- Do not include sensitive raw body content by default.
|
|
20
|
+
- Watch for instruction-surface changes and report them explicitly.
|
|
21
|
+
|
|
22
|
+
Tasks:
|
|
23
|
+
|
|
24
|
+
1. Inspect existing places where operational evidence already exists:
|
|
25
|
+
- ingest runs
|
|
26
|
+
- classifications
|
|
27
|
+
- category review/approval/rejection
|
|
28
|
+
- security findings
|
|
29
|
+
- exports/manifests
|
|
30
|
+
2. Propose the smallest neutral internal event abstraction.
|
|
31
|
+
3. Implement only if the seam is clearly useful and does not require broad rewrites.
|
|
32
|
+
4. Add tests.
|
|
33
|
+
5. Document how a future external API/import adapter could consume these events.
|
|
34
|
+
|
|
35
|
+
Preferred shape:
|
|
36
|
+
|
|
37
|
+
- A simple dataclass or typed structure for local producer events.
|
|
38
|
+
- Stable event names for operations like ingest completed, classification completed, security scan completed, export completed, category reviewed.
|
|
39
|
+
- JSON-serializable payloads.
|
|
40
|
+
- No external delivery.
|
|
41
|
+
- Clear boundaries around local evidence, safe projections, and exported content.
|
|
42
|
+
|
|
43
|
+
Output:
|
|
44
|
+
|
|
45
|
+
- What you inspected.
|
|
46
|
+
- What seam you added and why.
|
|
47
|
+
- Files changed.
|
|
48
|
+
- Tests run.
|
|
49
|
+
- Why this does not couple `mboxer` to any external system.
|
|
50
|
+
- Later abstraction/refactor opportunities discovered, without implementing them.
|
|
51
|
+
- Instruction-surface changes.
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# mboxer security and export boundary audit
|
|
2
|
+
|
|
3
|
+
You are working in the `mboxer` repo.
|
|
4
|
+
|
|
5
|
+
Goal: audit and, if safe, tighten tests around scan/scrub/export boundary behavior before mboxer becomes a future safe-projection producer for configured external custody systems.
|
|
6
|
+
|
|
7
|
+
Constraints:
|
|
8
|
+
|
|
9
|
+
- Keep changes PR-sized.
|
|
10
|
+
- Do not claim full DLP or semantic PII detection.
|
|
11
|
+
- Do not inspect real user archives.
|
|
12
|
+
- Do not add external services.
|
|
13
|
+
- Do not change export profile semantics without tests and explicit rationale.
|
|
14
|
+
- Watch for instruction-surface changes and report them explicitly.
|
|
15
|
+
|
|
16
|
+
Inspect:
|
|
17
|
+
|
|
18
|
+
- `src/mboxer/security/scan.py`
|
|
19
|
+
- `src/mboxer/security/scrub.py`
|
|
20
|
+
- `src/mboxer/security/policy.py`
|
|
21
|
+
- NotebookLM exporter
|
|
22
|
+
- JSONL exporter
|
|
23
|
+
- manifest behavior
|
|
24
|
+
- tests for scan/scrub/export profiles
|
|
25
|
+
- config defaults around cloud-style exports
|
|
26
|
+
|
|
27
|
+
Focus questions:
|
|
28
|
+
|
|
29
|
+
1. Are `raw`, `reviewed`, `scrubbed`, `metadata-only`, and `exclude` behavior clear and tested?
|
|
30
|
+
2. Can exported files accidentally include raw body text when a safer profile is expected?
|
|
31
|
+
3. Do manifests avoid sensitive raw body content?
|
|
32
|
+
4. Are security findings represented clearly without overstating scanner capability?
|
|
33
|
+
5. Are attachments excluded, referenced, or represented safely in exports?
|
|
34
|
+
|
|
35
|
+
Implement only focused fixes/tests where there is clear evidence of a gap.
|
|
36
|
+
|
|
37
|
+
Output:
|
|
38
|
+
|
|
39
|
+
- What you inspected.
|
|
40
|
+
- Findings.
|
|
41
|
+
- Files changed.
|
|
42
|
+
- Tests run.
|
|
43
|
+
- Remaining risks.
|
|
44
|
+
- Recommended next step.
|
|
45
|
+
- Instruction-surface changes.
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# mboxer final external intake readiness audit
|
|
2
|
+
|
|
3
|
+
You are working in the `mboxer` repo.
|
|
4
|
+
|
|
5
|
+
Goal: perform a final no-code readiness audit after the manifest, event seam, and security/export boundary work is complete.
|
|
6
|
+
|
|
7
|
+
Constraints:
|
|
8
|
+
|
|
9
|
+
- Do not modify files.
|
|
10
|
+
- Do not invent an external intake API.
|
|
11
|
+
- Do not recommend broad rewrites unless there is a concrete blocker.
|
|
12
|
+
- Separate proven code behavior from recommendations.
|
|
13
|
+
- Watch for instruction-surface files and report them explicitly.
|
|
14
|
+
|
|
15
|
+
Tasks:
|
|
16
|
+
|
|
17
|
+
1. Run `git status --short`.
|
|
18
|
+
2. Inspect the current repo state.
|
|
19
|
+
3. Confirm whether `mboxer` is ready to produce safe projections for a configured external destination once an API/import boundary exists.
|
|
20
|
+
4. Identify remaining blockers vs nice-to-have improvements.
|
|
21
|
+
5. Draft a concrete future external API/import adapter plan that keeps `mboxer` independent.
|
|
22
|
+
6. Identify the safest first integration PR once an external intake endpoint is available.
|
|
23
|
+
|
|
24
|
+
Output:
|
|
25
|
+
|
|
26
|
+
- Readiness verdict.
|
|
27
|
+
- Evidence supporting the verdict.
|
|
28
|
+
- Blockers, if any.
|
|
29
|
+
- Nice-to-have improvements.
|
|
30
|
+
- Future adapter plan.
|
|
31
|
+
- First integration PR recommendation.
|
|
32
|
+
- Tests or checks run.
|
|
33
|
+
- Instruction-surface changes.
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
version: 2
|
|
2
|
+
|
|
3
|
+
updates:
|
|
4
|
+
- package-ecosystem: "pip"
|
|
5
|
+
directory: "/"
|
|
6
|
+
schedule:
|
|
7
|
+
interval: "weekly"
|
|
8
|
+
day: "monday"
|
|
9
|
+
time: "09:00"
|
|
10
|
+
timezone: "America/New_York"
|
|
11
|
+
|
|
12
|
+
open-pull-requests-limit: 5
|
|
13
|
+
|
|
14
|
+
labels:
|
|
15
|
+
- "dependencies"
|
|
16
|
+
- "python"
|
|
17
|
+
|
|
18
|
+
commit-message:
|
|
19
|
+
prefix: "deps"
|
|
20
|
+
include: "scope"
|
|
21
|
+
|
|
22
|
+
groups:
|
|
23
|
+
python-runtime-dependencies:
|
|
24
|
+
dependency-type: "production"
|
|
25
|
+
update-types:
|
|
26
|
+
- "minor"
|
|
27
|
+
- "patch"
|
|
28
|
+
|
|
29
|
+
python-development-dependencies:
|
|
30
|
+
dependency-type: "development"
|
|
31
|
+
update-types:
|
|
32
|
+
- "minor"
|
|
33
|
+
- "patch"
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# This workflow uses actions that are not certified by GitHub.
|
|
2
|
+
# They are provided by a third-party and are governed by
|
|
3
|
+
# separate terms of service, privacy policy, and support
|
|
4
|
+
# documentation.
|
|
5
|
+
|
|
6
|
+
# Bandit is a security linter designed to find common security issues in Python code.
|
|
7
|
+
# This action will run Bandit on your codebase.
|
|
8
|
+
# The results of the scan will be found under the Security tab of your repository.
|
|
9
|
+
|
|
10
|
+
# https://github.com/marketplace/actions/bandit-scan is ISC licensed, by abirismyname
|
|
11
|
+
# https://pypi.org/project/bandit/ is Apache v2.0 licensed, by PyCQA
|
|
12
|
+
|
|
13
|
+
name: Bandit
|
|
14
|
+
on:
|
|
15
|
+
push:
|
|
16
|
+
branches: [ "master" ]
|
|
17
|
+
pull_request:
|
|
18
|
+
# The branches below must be a subset of the branches above
|
|
19
|
+
branches: [ "master" ]
|
|
20
|
+
schedule:
|
|
21
|
+
- cron: '16 13 * * 0'
|
|
22
|
+
|
|
23
|
+
jobs:
|
|
24
|
+
bandit:
|
|
25
|
+
permissions:
|
|
26
|
+
contents: read # for actions/checkout to fetch code
|
|
27
|
+
security-events: write # for github/codeql-action/upload-sarif to upload SARIF results
|
|
28
|
+
actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status
|
|
29
|
+
|
|
30
|
+
runs-on: ubuntu-latest
|
|
31
|
+
steps:
|
|
32
|
+
- uses: actions/checkout@v4
|
|
33
|
+
- name: Bandit Scan
|
|
34
|
+
uses: shundor/python-bandit-scan@ab1d87dfccc5a0ffab88be3aaac6ffe35c10d6cd
|
|
35
|
+
with: # optional arguments
|
|
36
|
+
# exit with 0, even with results found
|
|
37
|
+
exit_zero: true # optional, default is DEFAULT
|
|
38
|
+
# Github token of the repository (automatically created by Github)
|
|
39
|
+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # Needed to get PR information.
|
|
40
|
+
# File or directory to run bandit on
|
|
41
|
+
# path: # optional, default is .
|
|
42
|
+
# Report only issues of a given severity level or higher. Can be LOW, MEDIUM or HIGH. Default is UNDEFINED (everything)
|
|
43
|
+
# level: # optional, default is UNDEFINED
|
|
44
|
+
# Report only issues of a given confidence level or higher. Can be LOW, MEDIUM or HIGH. Default is UNDEFINED (everything)
|
|
45
|
+
# confidence: # optional, default is UNDEFINED
|
|
46
|
+
# comma-separated list of paths (glob patterns supported) to exclude from scan (note that these are in addition to the excluded paths provided in the config file) (default: .svn,CVS,.bzr,.hg,.git,__pycache__,.tox,.eggs,*.egg)
|
|
47
|
+
# excluded_paths: # optional, default is DEFAULT
|
|
48
|
+
# comma-separated list of test IDs to skip
|
|
49
|
+
# skips: # optional, default is DEFAULT
|
|
50
|
+
# path to a .bandit file that supplies command line arguments
|
|
51
|
+
# ini_path: # optional, default is DEFAULT
|
|
52
|
+
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches:
|
|
6
|
+
- master
|
|
7
|
+
pull_request:
|
|
8
|
+
branches:
|
|
9
|
+
- master
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
lint:
|
|
13
|
+
name: Lint (ruff)
|
|
14
|
+
runs-on: ubuntu-latest
|
|
15
|
+
permissions:
|
|
16
|
+
contents: read
|
|
17
|
+
steps:
|
|
18
|
+
- uses: actions/checkout@v4
|
|
19
|
+
- uses: actions/setup-python@v5
|
|
20
|
+
with:
|
|
21
|
+
python-version: "3.11"
|
|
22
|
+
cache: pip
|
|
23
|
+
- name: Install dependencies
|
|
24
|
+
run: pip install -e ".[dev]"
|
|
25
|
+
- name: Run ruff
|
|
26
|
+
run: ruff check src/
|
|
27
|
+
|
|
28
|
+
typecheck:
|
|
29
|
+
name: Type Check (mypy)
|
|
30
|
+
runs-on: ubuntu-latest
|
|
31
|
+
permissions:
|
|
32
|
+
contents: read
|
|
33
|
+
steps:
|
|
34
|
+
- uses: actions/checkout@v4
|
|
35
|
+
- uses: actions/setup-python@v5
|
|
36
|
+
with:
|
|
37
|
+
python-version: "3.11"
|
|
38
|
+
cache: pip
|
|
39
|
+
- name: Install dependencies
|
|
40
|
+
run: pip install -e ".[dev]"
|
|
41
|
+
- name: Run mypy
|
|
42
|
+
run: mypy src/
|
|
43
|
+
|
|
44
|
+
test:
|
|
45
|
+
name: Test (pytest) / Python ${{ matrix.python-version }}
|
|
46
|
+
runs-on: ubuntu-latest
|
|
47
|
+
permissions:
|
|
48
|
+
contents: read
|
|
49
|
+
strategy:
|
|
50
|
+
fail-fast: false
|
|
51
|
+
matrix:
|
|
52
|
+
python-version:
|
|
53
|
+
- "3.11"
|
|
54
|
+
- "3.12"
|
|
55
|
+
steps:
|
|
56
|
+
- uses: actions/checkout@v4
|
|
57
|
+
- uses: actions/setup-python@v5
|
|
58
|
+
with:
|
|
59
|
+
python-version: ${{ matrix.python-version }}
|
|
60
|
+
cache: pip
|
|
61
|
+
- name: Install dependencies
|
|
62
|
+
run: pip install -e ".[dev]"
|
|
63
|
+
- name: Generate synthetic test fixtures
|
|
64
|
+
run: python tests/fixtures/make_synthetic.py
|
|
65
|
+
- name: Run pytest
|
|
66
|
+
run: pytest --tb=short -q
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
name: Dependency Review
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
pull_request:
|
|
5
|
+
|
|
6
|
+
jobs:
|
|
7
|
+
dependency-review:
|
|
8
|
+
name: Dependency Review
|
|
9
|
+
runs-on: ubuntu-latest
|
|
10
|
+
permissions:
|
|
11
|
+
contents: read
|
|
12
|
+
pull-requests: write
|
|
13
|
+
steps:
|
|
14
|
+
- uses: actions/checkout@v4
|
|
15
|
+
- uses: actions/dependency-review-action@v4
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
permissions:
|
|
8
|
+
contents: read
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
build:
|
|
12
|
+
name: Build distribution
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
|
|
15
|
+
steps:
|
|
16
|
+
- name: Check out repository
|
|
17
|
+
uses: actions/checkout@v4
|
|
18
|
+
with:
|
|
19
|
+
fetch-depth: 0
|
|
20
|
+
persist-credentials: false
|
|
21
|
+
|
|
22
|
+
- name: Set up Python
|
|
23
|
+
uses: actions/setup-python@v5
|
|
24
|
+
with:
|
|
25
|
+
python-version: "3.11"
|
|
26
|
+
cache: pip
|
|
27
|
+
|
|
28
|
+
- name: Install build tooling
|
|
29
|
+
run: python -m pip install --upgrade build
|
|
30
|
+
|
|
31
|
+
- name: Build package
|
|
32
|
+
run: python -m build
|
|
33
|
+
|
|
34
|
+
- name: Store distributions
|
|
35
|
+
uses: actions/upload-artifact@v4
|
|
36
|
+
with:
|
|
37
|
+
name: python-package-distributions
|
|
38
|
+
path: dist/
|
|
39
|
+
|
|
40
|
+
publish:
|
|
41
|
+
name: Publish distribution to PyPI
|
|
42
|
+
needs: build
|
|
43
|
+
runs-on: ubuntu-latest
|
|
44
|
+
|
|
45
|
+
environment:
|
|
46
|
+
name: pypi
|
|
47
|
+
url: https://pypi.org/p/uscient-mboxer
|
|
48
|
+
|
|
49
|
+
permissions:
|
|
50
|
+
id-token: write
|
|
51
|
+
|
|
52
|
+
steps:
|
|
53
|
+
- name: Download distributions
|
|
54
|
+
uses: actions/download-artifact@v4
|
|
55
|
+
with:
|
|
56
|
+
name: python-package-distributions
|
|
57
|
+
path: dist/
|
|
58
|
+
|
|
59
|
+
- name: Publish to PyPI
|
|
60
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
name: Schema
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
paths:
|
|
6
|
+
- "src/mboxer/db/schema.sql"
|
|
7
|
+
- "src/mboxer/db/migrations/**"
|
|
8
|
+
- "schema_ingest_tracking.sql"
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
validate:
|
|
12
|
+
name: Validate SQLite schema
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
permissions:
|
|
15
|
+
contents: read
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
- name: Apply schema to a fresh database
|
|
19
|
+
run: |
|
|
20
|
+
sqlite3 /tmp/test.sqlite < src/mboxer/db/schema.sql
|
|
21
|
+
echo "Schema applied cleanly"
|