tkm-agentsim 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tkm_agentsim-0.1.0/.github/FUNDING.yml +1 -0
- tkm_agentsim-0.1.0/.github/workflows/ci.yml +45 -0
- tkm_agentsim-0.1.0/.pre-commit-config.yaml +15 -0
- tkm_agentsim-0.1.0/AGENTS.md +118 -0
- tkm_agentsim-0.1.0/CHANGELOG.md +84 -0
- tkm_agentsim-0.1.0/CONTRIBUTING.md +13 -0
- tkm_agentsim-0.1.0/EVOLUTION.md +159 -0
- tkm_agentsim-0.1.0/LICENSE +21 -0
- tkm_agentsim-0.1.0/Makefile +40 -0
- tkm_agentsim-0.1.0/PKG-INFO +155 -0
- tkm_agentsim-0.1.0/README.md +127 -0
- tkm_agentsim-0.1.0/agentsim/__init__.py +38 -0
- tkm_agentsim-0.1.0/agentsim/agents/__init__.py +13 -0
- tkm_agentsim-0.1.0/agentsim/agents/base.py +86 -0
- tkm_agentsim-0.1.0/agentsim/agents/deliberative.py +73 -0
- tkm_agentsim-0.1.0/agentsim/agents/learning.py +101 -0
- tkm_agentsim-0.1.0/agentsim/agents/reactive.py +40 -0
- tkm_agentsim-0.1.0/agentsim/analysis.py +164 -0
- tkm_agentsim-0.1.0/agentsim/environment/__init__.py +4 -0
- tkm_agentsim-0.1.0/agentsim/environment/base.py +34 -0
- tkm_agentsim-0.1.0/agentsim/environment/grid.py +261 -0
- tkm_agentsim-0.1.0/agentsim/scenarios/__init__.py +12 -0
- tkm_agentsim-0.1.0/agentsim/scenarios/foraging.py +126 -0
- tkm_agentsim-0.1.0/agentsim/scenarios/pursuit.py +113 -0
- tkm_agentsim-0.1.0/agentsim/simulation.py +138 -0
- tkm_agentsim-0.1.0/agentsim/viz.py +132 -0
- tkm_agentsim-0.1.0/examples/example_foraging.py +57 -0
- tkm_agentsim-0.1.0/examples/example_learning_agent.py +76 -0
- tkm_agentsim-0.1.0/examples/example_pursuit.py +46 -0
- tkm_agentsim-0.1.0/pyproject.toml +56 -0
- tkm_agentsim-0.1.0/tests/__init__.py +0 -0
- tkm_agentsim-0.1.0/tests/conftest.py +98 -0
- tkm_agentsim-0.1.0/tests/test_agents.py +75 -0
- tkm_agentsim-0.1.0/tests/test_analysis.py +68 -0
- tkm_agentsim-0.1.0/tests/test_coverage_extra.py +355 -0
- tkm_agentsim-0.1.0/tests/test_environment.py +68 -0
- tkm_agentsim-0.1.0/tests/test_error_hardening.py +183 -0
- tkm_agentsim-0.1.0/tests/test_performance.py +162 -0
- tkm_agentsim-0.1.0/tests/test_property_based.py +243 -0
- tkm_agentsim-0.1.0/tests/test_scenarios.py +25 -0
- tkm_agentsim-0.1.0/tests/test_security.py +155 -0
- tkm_agentsim-0.1.0/tests/test_simulation.py +31 -0
- tkm_agentsim-0.1.0/uv.lock +890 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
github: TECHKNOWMAD-LABS
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: ["main"]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: ["main"]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
name: Test & Lint (Python ${{ matrix.python-version }})
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
strategy:
|
|
14
|
+
fail-fast: false
|
|
15
|
+
matrix:
|
|
16
|
+
python-version: ["3.10", "3.11", "3.12"]
|
|
17
|
+
|
|
18
|
+
steps:
|
|
19
|
+
- uses: actions/checkout@v4
|
|
20
|
+
|
|
21
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
22
|
+
uses: actions/setup-python@v5
|
|
23
|
+
with:
|
|
24
|
+
python-version: ${{ matrix.python-version }}
|
|
25
|
+
cache: pip
|
|
26
|
+
|
|
27
|
+
- name: Install dependencies
|
|
28
|
+
run: |
|
|
29
|
+
python -m pip install --upgrade pip
|
|
30
|
+
pip install numpy matplotlib pytest pytest-cov hypothesis ruff
|
|
31
|
+
|
|
32
|
+
- name: Lint with ruff
|
|
33
|
+
run: |
|
|
34
|
+
ruff check agentsim/ tests/
|
|
35
|
+
|
|
36
|
+
- name: Run tests with coverage
|
|
37
|
+
run: |
|
|
38
|
+
pytest -v --tb=short --cov=agentsim --cov-report=term-missing --cov-fail-under=95
|
|
39
|
+
|
|
40
|
+
- name: Upload coverage report
|
|
41
|
+
if: matrix.python-version == '3.12'
|
|
42
|
+
uses: actions/upload-artifact@v4
|
|
43
|
+
with:
|
|
44
|
+
name: coverage-report
|
|
45
|
+
path: .coverage
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
repos:
|
|
2
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
3
|
+
rev: v0.4.4
|
|
4
|
+
hooks:
|
|
5
|
+
- id: ruff
|
|
6
|
+
args: [--fix]
|
|
7
|
+
- id: ruff-format
|
|
8
|
+
|
|
9
|
+
- repo: https://github.com/pre-commit/mirrors-mypy
|
|
10
|
+
rev: v1.10.0
|
|
11
|
+
hooks:
|
|
12
|
+
- id: mypy
|
|
13
|
+
additional_dependencies: [numpy]
|
|
14
|
+
args: [--ignore-missing-imports, --strict-optional]
|
|
15
|
+
files: ^agentsim/
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
# AGENTS.md — Edgecraft Autonomous Development Protocol
|
|
2
|
+
|
|
3
|
+
This repository was developed autonomously using the **Edgecraft Protocol v1.0**,
|
|
4
|
+
a structured 8-cycle autonomous software engineering methodology developed by
|
|
5
|
+
[TechKnowMad Labs](https://techknowmad.ai).
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## What is Edgecraft?
|
|
10
|
+
|
|
11
|
+
Edgecraft is a self-directed development loop in which an AI agent iterates
|
|
12
|
+
through well-defined engineering cycles — test coverage, error hardening,
|
|
13
|
+
performance, security, CI/CD, property testing, documentation, and release —
|
|
14
|
+
without human intervention between cycles.
|
|
15
|
+
|
|
16
|
+
Each cycle follows the **L-notation protocol**:
|
|
17
|
+
|
|
18
|
+
| Level | Label | Meaning |
|
|
19
|
+
|-------|-------------|--------------------------------------------------|
|
|
20
|
+
| L1 | detection | Identify a gap or deficiency |
|
|
21
|
+
| L2 | noise | Filter false positives / irrelevant signals |
|
|
22
|
+
| L3 | sub-noise | Surface subtle edge cases |
|
|
23
|
+
| L4 | conjecture | Form a hypothesis about a fix or improvement |
|
|
24
|
+
| L5 | action | Implement the change |
|
|
25
|
+
| L6 | grounding | Verify with tests or measurements |
|
|
26
|
+
| L7 | flywheel | Generalise the lesson to other modules/repos |
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
## The 8 Edgecraft Cycles
|
|
31
|
+
|
|
32
|
+
### Cycle 1 — Test Coverage
|
|
33
|
+
- Run baseline coverage analysis
|
|
34
|
+
- Identify modules at 0% or low coverage
|
|
35
|
+
- Write tests for every missed branch
|
|
36
|
+
- Commit with coverage delta
|
|
37
|
+
|
|
38
|
+
### Cycle 2 — Error Hardening
|
|
39
|
+
- Audit all public APIs for missing validation
|
|
40
|
+
- Handle None, NaN, Inf, empty, and out-of-bounds inputs
|
|
41
|
+
- Add graceful error messages (no bare `except`)
|
|
42
|
+
- Write tests confirming each guard
|
|
43
|
+
|
|
44
|
+
### Cycle 3 — Performance
|
|
45
|
+
- Identify sequential operations suitable for parallelism
|
|
46
|
+
- Add `ThreadPoolExecutor` or `asyncio.gather` + semaphore patterns
|
|
47
|
+
- Add `functools.lru_cache` for pure, expensive computations
|
|
48
|
+
- Measure and document speedups in commit messages
|
|
49
|
+
|
|
50
|
+
### Cycle 4 — Security
|
|
51
|
+
- AST scan: eval, exec, pickle, subprocess, os.system
|
|
52
|
+
- Pattern scan: hardcoded secrets, API keys, tokens
|
|
53
|
+
- Document findings and false positives
|
|
54
|
+
- Embed scan as a live CI test (regression guard)
|
|
55
|
+
|
|
56
|
+
### Cycle 5 — CI/CD
|
|
57
|
+
- Create `.github/workflows/ci.yml` (multi-Python, lint + test + coverage gate)
|
|
58
|
+
- Create `.pre-commit-config.yaml` (ruff + mypy)
|
|
59
|
+
- Fix all lint issues before committing
|
|
60
|
+
- Ensure every push triggers automated checks
|
|
61
|
+
|
|
62
|
+
### Cycle 6 — Property-Based Testing
|
|
63
|
+
- Write Hypothesis strategies for core data types
|
|
64
|
+
- Test invariants: range bounds, round-trips, no-crash guarantees
|
|
65
|
+
- Run with ≥50 examples per property
|
|
66
|
+
- Fix any Hypothesis-discovered edge cases before committing
|
|
67
|
+
|
|
68
|
+
### Cycle 7 — Examples + Docs
|
|
69
|
+
- Write 2-3 runnable scripts in `examples/`
|
|
70
|
+
- Verify each example runs without errors
|
|
71
|
+
- Add Google-style docstrings to every public function
|
|
72
|
+
- Ensure examples import cleanly via `PYTHONPATH=.`
|
|
73
|
+
|
|
74
|
+
### Cycle 8 — Release Engineering
|
|
75
|
+
- Validate `pyproject.toml` has author, license, classifiers
|
|
76
|
+
- Write `CHANGELOG.md` following Keep a Changelog format
|
|
77
|
+
- Create `Makefile` with `test`, `lint`, `format`, `security`, `clean`
|
|
78
|
+
- Tag `v0.1.0` and push with tags
|
|
79
|
+
|
|
80
|
+
---
|
|
81
|
+
|
|
82
|
+
## Commit Convention
|
|
83
|
+
|
|
84
|
+
All Edgecraft commits are prefixed with L-notation to make the development
|
|
85
|
+
rationale machine-readable and auditable:
|
|
86
|
+
|
|
87
|
+
```
|
|
88
|
+
L5/action: add input validation and error handling
|
|
89
|
+
L6/grounding: 30 tests passing, coverage 99%
|
|
90
|
+
L3/sub-noise: hypothesis found edge case — NaN epsilon
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
---
|
|
94
|
+
|
|
95
|
+
## Running the Protocol Yourself
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
# Install dev dependencies
|
|
99
|
+
pip install -e ".[dev]"
|
|
100
|
+
|
|
101
|
+
# Run all tests with coverage
|
|
102
|
+
make coverage
|
|
103
|
+
|
|
104
|
+
# Lint
|
|
105
|
+
make lint
|
|
106
|
+
|
|
107
|
+
# Security scan
|
|
108
|
+
make security
|
|
109
|
+
|
|
110
|
+
# Run examples
|
|
111
|
+
PYTHONPATH=. python3 examples/example_foraging.py
|
|
112
|
+
PYTHONPATH=. python3 examples/example_pursuit.py
|
|
113
|
+
PYTHONPATH=. python3 examples/example_learning_agent.py
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
---
|
|
117
|
+
|
|
118
|
+
*Edgecraft Protocol v1.0 — TechKnowMad Labs, 2026*
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to **agentsim** are documented here.
|
|
4
|
+
Format follows [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
|
5
|
+
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
## [0.1.0] — 2026-03-23
|
|
9
|
+
|
|
10
|
+
Initial release produced by **8 autonomous Edgecraft development cycles**.
|
|
11
|
+
|
|
12
|
+
### Added — Cycle 1: Test Coverage
|
|
13
|
+
- `tests/conftest.py`: shared fixtures (`basic_reactive_agent`, `forager_agent`,
|
|
14
|
+
`small_grid`, `seeded_grid`, `simple_sim`) and `make_obs()` helper.
|
|
15
|
+
- `tests/test_coverage_extra.py`: 31 tests covering previously-missed branches in
|
|
16
|
+
`deliberative.py`, `learning.py`, `reactive.py`, `analysis.py`, `grid.py`,
|
|
17
|
+
`simulation.py`, `viz.py`, `pursuit.py`.
|
|
18
|
+
- Coverage raised from **94% → 99%** (3 unreachable lines remain).
|
|
19
|
+
|
|
20
|
+
### Added — Cycle 2: Error Hardening
|
|
21
|
+
- `agentsim/agents/base.py`: `ValueError` for empty/invalid `agent_id` and
|
|
22
|
+
`position`; non-finite rewards silently treated as 0; `None` observation
|
|
23
|
+
handled gracefully.
|
|
24
|
+
- `agentsim/agents/learning.py`: validation for `state_size`, `learning_rate`,
|
|
25
|
+
`discount`, `epsilon`, `factor`, `minimum`; `None` observation safe in
|
|
26
|
+
`perceive()`; non-finite reward in `update()` treated as 0.
|
|
27
|
+
- `agentsim/environment/grid.py`: validation for `rows`, `cols`, `food_count`,
|
|
28
|
+
`wall_density`; `add_agent()` validates id and bounds; `step()` and
|
|
29
|
+
`get_observation()` raise `KeyError` for unknown agents; non-string action
|
|
30
|
+
treated as `stay`; exhaustive fallback in `_random_empty()`.
|
|
31
|
+
- `agentsim/simulation.py`: `SimulationConfig.__post_init__` validates
|
|
32
|
+
`max_steps`, `n_episodes`, `render_every`; `Simulation.__init__` rejects
|
|
33
|
+
`None` env and empty agent list.
|
|
34
|
+
- `tests/test_error_hardening.py`: 30 validation tests.
|
|
35
|
+
|
|
36
|
+
### Added — Cycle 3: Performance
|
|
37
|
+
- `agentsim/analysis.py`: `run_episodes_parallel()` using `ThreadPoolExecutor`
|
|
38
|
+
for parallel episode execution; `lru_cache(maxsize=256)` on trajectory stats
|
|
39
|
+
computation kernel `_trajectory_stats_cached()`.
|
|
40
|
+
- `tests/test_performance.py`: 9 tests covering parallelism correctness, cache
|
|
41
|
+
hits/misses, and timing benchmarks (foraging < 2s, metrics < 0.5s).
|
|
42
|
+
|
|
43
|
+
### Added — Cycle 4: Security
|
|
44
|
+
- `tests/test_security.py`: 4 live AST-based security tests that fail CI if
|
|
45
|
+
`eval`, `exec`, `pickle`, `subprocess`, or hardcoded secrets are introduced.
|
|
46
|
+
- Scan result: **0 findings**, 2 false positives filtered (stdlib `random` for
|
|
47
|
+
agent exploration — not cryptographic use).
|
|
48
|
+
|
|
49
|
+
### Added — Cycle 5: CI/CD
|
|
50
|
+
- `.github/workflows/ci.yml`: GitHub Actions pipeline running ruff + pytest
|
|
51
|
+
with 95% coverage gate on Python 3.10, 3.11, and 3.12.
|
|
52
|
+
- `.pre-commit-config.yaml`: ruff (lint + format) and mypy hooks.
|
|
53
|
+
- Fixed 12 ruff lint issues across 5 test files (import ordering, unused
|
|
54
|
+
imports, f-string without placeholders).
|
|
55
|
+
|
|
56
|
+
### Added — Cycle 6: Property-Based Testing
|
|
57
|
+
- `tests/test_property_based.py`: 9 Hypothesis property tests with strategies
|
|
58
|
+
covering 50–200 examples each:
|
|
59
|
+
- ReactiveAgent always returns a valid action
|
|
60
|
+
- Q-table never produces NaN/Inf
|
|
61
|
+
- Step reward always finite
|
|
62
|
+
- Agent position always in bounds
|
|
63
|
+
- Epsilon decay bounded in `[minimum, 1.0]`
|
|
64
|
+
- Trajectory stats consistent with history length
|
|
65
|
+
- `AgentState.copy()` deep-equal and independent
|
|
66
|
+
- DeliberativeAgent never crashes
|
|
67
|
+
|
|
68
|
+
### Added — Cycle 7: Examples + Docs
|
|
69
|
+
- `examples/example_foraging.py`: 3-agent foraging with full metrics report.
|
|
70
|
+
- `examples/example_pursuit.py`: predator-prey across 5 seeds + 20×20 grid.
|
|
71
|
+
- `examples/example_learning_agent.py`: 30-episode Q-learning training with
|
|
72
|
+
sparkline trend and visitation heatmap.
|
|
73
|
+
- Extended Google-style docstrings on `viz.py` and `scenarios/foraging.py`.
|
|
74
|
+
|
|
75
|
+
### Changed — Cycle 8: Release Engineering
|
|
76
|
+
- `pyproject.toml`: added `authors`, `readme`, `keywords`, `classifiers`, and
|
|
77
|
+
full dev dependency group (`pytest-cov`, `hypothesis`, `mypy`, `pre-commit`).
|
|
78
|
+
- `Makefile`: `test`, `lint`, `format`, `security`, `coverage`, `clean` targets.
|
|
79
|
+
- `AGENTS.md`: Edgecraft autonomous development protocol documentation.
|
|
80
|
+
- `EVOLUTION.md`: per-cycle timestamps and findings log.
|
|
81
|
+
|
|
82
|
+
---
|
|
83
|
+
|
|
84
|
+
[0.1.0]: https://github.com/TECHKNOWMAD-LABS/agent-sim/releases/tag/v0.1.0
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Contributing to this project
|
|
2
|
+
|
|
3
|
+
1. Fork this repository
|
|
4
|
+
2. Create a feature branch (`git checkout -b feat/your-feature`)
|
|
5
|
+
3. Write tests for your changes
|
|
6
|
+
4. Ensure all tests pass (`pytest -v` or `npm test`)
|
|
7
|
+
5. Ensure linter passes (`ruff check .` for Python)
|
|
8
|
+
6. Commit with a descriptive message
|
|
9
|
+
7. Open a Pull Request
|
|
10
|
+
|
|
11
|
+
By contributing, you agree that your contributions will be licensed under the MIT License.
|
|
12
|
+
|
|
13
|
+
Built by [TechKnowMad Labs](https://techknowmad.ai)
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
# EVOLUTION.md — Edgecraft Development Log
|
|
2
|
+
|
|
3
|
+
Autonomous development run by **Edgecraft Protocol v1.0**
|
|
4
|
+
Repository: `TECHKNOWMAD-LABS/agent-sim`
|
|
5
|
+
Started: 2026-03-23
|
|
6
|
+
Completed: 2026-03-23
|
|
7
|
+
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
## Cycle 1 — Test Coverage
|
|
11
|
+
|
|
12
|
+
**Timestamp:** 2026-03-23T00:00
|
|
13
|
+
**Finding:** Baseline coverage 94% (36 lines missed across 7 modules)
|
|
14
|
+
**Key misses:** deliberative.py (78%), grid.py (90%), viz.py (93%)
|
|
15
|
+
**Action:** Created `tests/conftest.py` with shared fixtures; wrote 31 new
|
|
16
|
+
tests in `tests/test_coverage_extra.py` covering every missed branch.
|
|
17
|
+
**Result:** Coverage improved to **99%** (3 genuinely unreachable lines).
|
|
18
|
+
**Tests added:** 31 | **Total tests:** 46
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
## Cycle 2 — Error Hardening
|
|
23
|
+
|
|
24
|
+
**Timestamp:** 2026-03-23T00:10
|
|
25
|
+
**Findings:**
|
|
26
|
+
- `BaseAgent.__init__`: no validation on `agent_id` (empty string accepted) or `position` (wrong type accepted)
|
|
27
|
+
- `GridEnvironment.__init__`: no bounds check on `rows`, `cols`, `food_count`, `wall_density`
|
|
28
|
+
- `LearningAgent.__init__`: `learning_rate=0` accepted (division risk), `epsilon<0` accepted
|
|
29
|
+
- `SimulationConfig`: `max_steps=0` and `n_episodes=-1` accepted silently
|
|
30
|
+
- `receive_reward(float('nan'))` caused silent state corruption
|
|
31
|
+
- `env.step('ghost_id', 'up')` raised unguided `KeyError`
|
|
32
|
+
|
|
33
|
+
**Action:** Added `ValueError`/`KeyError` guards to all public constructors and
|
|
34
|
+
methods; NaN/Inf rewards treated as 0; `_random_empty()` gained exhaustive
|
|
35
|
+
fallback scan.
|
|
36
|
+
**Tests added:** 30 | **Total tests:** 76
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
## Cycle 3 — Performance
|
|
41
|
+
|
|
42
|
+
**Timestamp:** 2026-03-23T00:20
|
|
43
|
+
**Finding:** `compute_trajectory_stats()` recomputes full numpy stats on every
|
|
44
|
+
call; episodes run strictly sequentially even when independent.
|
|
45
|
+
**Action:**
|
|
46
|
+
- Added `run_episodes_parallel()` with `ThreadPoolExecutor(max_workers=4)`
|
|
47
|
+
- Added `lru_cache(maxsize=256)` on `_trajectory_stats_cached()`
|
|
48
|
+
(keyed on immutable tuple snapshot of history)
|
|
49
|
+
|
|
50
|
+
**Measurements:**
|
|
51
|
+
- Foraging scenario (200 steps, 4 agents): `<2s`
|
|
52
|
+
- `compute_metrics` on 50 episodes: `<0.5s`
|
|
53
|
+
- Parallel 8-episode run ≤ 3.5× sequential wall time (IO-light workload)
|
|
54
|
+
|
|
55
|
+
**Tests added:** 9 | **Total tests:** 85
|
|
56
|
+
|
|
57
|
+
---
|
|
58
|
+
|
|
59
|
+
## Cycle 4 — Security
|
|
60
|
+
|
|
61
|
+
**Timestamp:** 2026-03-23T00:30
|
|
62
|
+
**Scan scope:** All `.py` files in `agentsim/`
|
|
63
|
+
**Technique:** AST walk + regex pattern matching
|
|
64
|
+
|
|
65
|
+
| Check | Findings | False Positives |
|
|
66
|
+
|-------|----------|----------------|
|
|
67
|
+
| `eval` / `exec` | 0 | 0 |
|
|
68
|
+
| `pickle` imports | 0 | 0 |
|
|
69
|
+
| `subprocess` / `os.system` | 0 | 0 |
|
|
70
|
+
| Hardcoded secrets | 0 | 2 (stdlib `random` for exploration) |
|
|
71
|
+
| Path traversal | 0 | 0 |
|
|
72
|
+
|
|
73
|
+
**Action:** Embedded all 4 checks as live CI tests in `tests/test_security.py`
|
|
74
|
+
so any future regression fails the pipeline immediately.
|
|
75
|
+
**Tests added:** 4 | **Total tests:** 89
|
|
76
|
+
|
|
77
|
+
---
|
|
78
|
+
|
|
79
|
+
## Cycle 5 — CI/CD
|
|
80
|
+
|
|
81
|
+
**Timestamp:** 2026-03-23T00:40
|
|
82
|
+
**Action:**
|
|
83
|
+
- Created `.github/workflows/ci.yml` with matrix (Python 3.10/3.11/3.12),
|
|
84
|
+
ruff lint check, pytest with `--cov-fail-under=95`, coverage artifact upload.
|
|
85
|
+
- Created `.pre-commit-config.yaml` with ruff (lint + format) and mypy hooks.
|
|
86
|
+
- Fixed 12 ruff lint issues across 5 test files.
|
|
87
|
+
|
|
88
|
+
**Outcome:** All pushes and PRs now gate on lint + 95% coverage.
|
|
89
|
+
**Tests:** 89 (unchanged)
|
|
90
|
+
|
|
91
|
+
---
|
|
92
|
+
|
|
93
|
+
## Cycle 6 — Property-Based Testing
|
|
94
|
+
|
|
95
|
+
**Timestamp:** 2026-03-23T00:50
|
|
96
|
+
**Strategy library:** Hypothesis
|
|
97
|
+
**Properties verified:**
|
|
98
|
+
1. `ReactiveAgent.step()` always in `ACTIONS` — 200 examples
|
|
99
|
+
2. Q-table never NaN/Inf after any update — 200 examples
|
|
100
|
+
3. `env.step()` always returns finite reward — 50 examples
|
|
101
|
+
4. Agent position always in bounds after any action sequence — 50 examples
|
|
102
|
+
5. Epsilon decay always in `[minimum, 1.0]` — 200 examples
|
|
103
|
+
6. Trajectory stats `n_steps` matches history — 100 examples
|
|
104
|
+
7. `AgentState.copy()` deep-equal and independent — 200 examples
|
|
105
|
+
8. `DeliberativeAgent.step()` never crashes — 100 examples
|
|
106
|
+
|
|
107
|
+
**Hypothesis findings:** 0 bugs found (codebase already hardened in Cycle 2)
|
|
108
|
+
**Tests added:** 9 | **Total tests:** 98
|
|
109
|
+
|
|
110
|
+
---
|
|
111
|
+
|
|
112
|
+
## Cycle 7 — Examples + Docs
|
|
113
|
+
|
|
114
|
+
**Timestamp:** 2026-03-23T01:00
|
|
115
|
+
**Action:**
|
|
116
|
+
- Created 3 runnable example scripts in `examples/`:
|
|
117
|
+
- `example_foraging.py` — multi-agent foraging with full report
|
|
118
|
+
- `example_pursuit.py` — predator-prey across 5 seeds
|
|
119
|
+
- `example_learning_agent.py` — Q-learning training with sparkline + heatmap
|
|
120
|
+
- All 3 verified runnable with `PYTHONPATH=. python3 examples/<name>.py`
|
|
121
|
+
- Extended Google-style docstrings on `viz.py` and `scenarios/foraging.py`
|
|
122
|
+
|
|
123
|
+
**Tests:** 98 (unchanged)
|
|
124
|
+
|
|
125
|
+
---
|
|
126
|
+
|
|
127
|
+
## Cycle 8 — Release Engineering
|
|
128
|
+
|
|
129
|
+
**Timestamp:** 2026-03-23T01:10
|
|
130
|
+
**Action:**
|
|
131
|
+
- `pyproject.toml`: added `authors`, `readme`, `keywords`, `classifiers`,
|
|
132
|
+
full dev dependency group
|
|
133
|
+
- `CHANGELOG.md`: Keep-a-Changelog format covering all 8 cycles
|
|
134
|
+
- `Makefile`: `test`, `coverage`, `lint`, `format`, `security`, `clean` targets
|
|
135
|
+
- `AGENTS.md`: Edgecraft protocol documentation
|
|
136
|
+
- `EVOLUTION.md`: this file
|
|
137
|
+
- Tagged `v0.1.0`
|
|
138
|
+
|
|
139
|
+
**Final test count:** 98
|
|
140
|
+
**Final coverage:** 99%
|
|
141
|
+
**Ruff:** clean
|
|
142
|
+
|
|
143
|
+
---
|
|
144
|
+
|
|
145
|
+
## Summary Table
|
|
146
|
+
|
|
147
|
+
| Cycle | Focus | Tests Added | Total Tests | Coverage |
|
|
148
|
+
|-------|-------------|-------------|-------------|---------|
|
|
149
|
+
| 0 | baseline | 15 | 15 | 94% |
|
|
150
|
+
| 1 | test cov | 31 | 46 | 99% |
|
|
151
|
+
| 2 | hardening | 30 | 76 | 99% |
|
|
152
|
+
| 3 | performance | 9 | 85 | 99% |
|
|
153
|
+
| 4 | security | 4 | 89 | 99% |
|
|
154
|
+
| 5 | CI/CD | 0 | 89 | 99% |
|
|
155
|
+
| 6 | property | 9 | 98 | 99% |
|
|
156
|
+
| 7 | examples | 0 | 98 | 99% |
|
|
157
|
+
| 8 | release | 0 | 98 | 99% |
|
|
158
|
+
|
|
159
|
+
*Total commits across all cycles: 9*
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 TechKnowMad Labs Private Limited
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
.PHONY: test lint format security coverage clean help
|
|
2
|
+
|
|
3
|
+
PYTHON ?= python3
|
|
4
|
+
SRC = agentsim
|
|
5
|
+
TESTS = tests
|
|
6
|
+
|
|
7
|
+
help:
|
|
8
|
+
@echo "Available targets:"
|
|
9
|
+
@echo " test — run pytest (fast, no coverage)"
|
|
10
|
+
@echo " coverage — run pytest with coverage report"
|
|
11
|
+
@echo " lint — check code with ruff"
|
|
12
|
+
@echo " format — auto-format code with ruff"
|
|
13
|
+
@echo " security — run security scan tests only"
|
|
14
|
+
@echo " clean — remove build/cache artefacts"
|
|
15
|
+
|
|
16
|
+
test:
|
|
17
|
+
$(PYTHON) -m pytest -v --tb=short $(TESTS)
|
|
18
|
+
|
|
19
|
+
coverage:
|
|
20
|
+
$(PYTHON) -m pytest -v --tb=short \
|
|
21
|
+
--cov=$(SRC) --cov-report=term-missing --cov-fail-under=95 \
|
|
22
|
+
$(TESTS)
|
|
23
|
+
|
|
24
|
+
lint:
|
|
25
|
+
$(PYTHON) -m ruff check $(SRC) $(TESTS)
|
|
26
|
+
|
|
27
|
+
format:
|
|
28
|
+
$(PYTHON) -m ruff check $(SRC) $(TESTS) --fix
|
|
29
|
+
$(PYTHON) -m ruff format $(SRC) $(TESTS)
|
|
30
|
+
|
|
31
|
+
security:
|
|
32
|
+
$(PYTHON) -m pytest -v --tb=short -k "security" $(TESTS)
|
|
33
|
+
|
|
34
|
+
clean:
|
|
35
|
+
find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true
|
|
36
|
+
find . -type d -name ".pytest_cache" -exec rm -rf {} + 2>/dev/null || true
|
|
37
|
+
find . -type d -name ".hypothesis" -exec rm -rf {} + 2>/dev/null || true
|
|
38
|
+
find . -name "*.pyc" -delete 2>/dev/null || true
|
|
39
|
+
find . -name ".coverage" -delete 2>/dev/null || true
|
|
40
|
+
@echo "Clean complete."
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tkm-agentsim
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Multi-agent simulation environment with scenarios, analysis, and visualization
|
|
5
|
+
Author-email: TechKnowMad Labs <admin@techknowmad.ai>
|
|
6
|
+
License: MIT
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Keywords: grid-world,multi-agent,reinforcement-learning,simulation
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Science/Research
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
17
|
+
Requires-Python: >=3.10
|
|
18
|
+
Requires-Dist: matplotlib>=3.7
|
|
19
|
+
Requires-Dist: numpy>=1.24
|
|
20
|
+
Provides-Extra: dev
|
|
21
|
+
Requires-Dist: hypothesis>=6.0; extra == 'dev'
|
|
22
|
+
Requires-Dist: mypy>=1.0; extra == 'dev'
|
|
23
|
+
Requires-Dist: pre-commit>=3.0; extra == 'dev'
|
|
24
|
+
Requires-Dist: pytest-cov>=4.0; extra == 'dev'
|
|
25
|
+
Requires-Dist: pytest>=7.4; extra == 'dev'
|
|
26
|
+
Requires-Dist: ruff>=0.4; extra == 'dev'
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
|
|
29
|
+
# AgentSim
|
|
30
|
+
|
|
31
|
+
[](LICENSE)
|
|
32
|
+
[](https://www.python.org/downloads/)
|
|
33
|
+
[](tests/)
|
|
34
|
+
|
|
35
|
+
Multi-agent simulation framework for studying agent behaviors in grid-based environments. Supports reactive, deliberative (BDI), and reinforcement-learning agents with built-in scenarios, metrics, and ASCII visualization.
|
|
36
|
+
|
|
37
|
+
---
|
|
38
|
+
|
|
39
|
+
## Features
|
|
40
|
+
|
|
41
|
+
- **Three agent architectures** — Reactive (condition-action rules), Deliberative (BDI goals/beliefs/plans), and Q-learning agents in a single unified interface.
|
|
42
|
+
- **Grid environment** — 2D grid with configurable walls, food resources, and multi-agent support; extend via `BaseEnvironment`.
|
|
43
|
+
- **Turnkey scenarios** — `ForagingScenario` (multi-agent food collection) and `PursuitScenario` (predator-prey) runnable in one call.
|
|
44
|
+
- **Episode orchestration** — `Simulation` drives agent-environment loops across multiple episodes, tracking per-agent rewards and step counts.
|
|
45
|
+
- **Metrics and analysis** — `compute_metrics` and `compute_trajectory_stats` aggregate episode results into structured summaries.
|
|
46
|
+
- **ASCII visualization** — `render_grid_ascii` and `simulation_report` produce plain-text grid snapshots and run reports with no GUI dependency.
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
## Quick Start
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
pip install agentsim
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
from agentsim import (
|
|
58
|
+
GridEnvironment,
|
|
59
|
+
LearningAgent,
|
|
60
|
+
Simulation,
|
|
61
|
+
SimulationConfig,
|
|
62
|
+
compute_metrics,
|
|
63
|
+
simulation_report,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
# Build environment and agent
|
|
67
|
+
env = GridEnvironment(width=10, height=10, n_food=15, n_walls=8)
|
|
68
|
+
agent = LearningAgent("learner", position=(0, 0))
|
|
69
|
+
env.add_agent(agent)
|
|
70
|
+
|
|
71
|
+
# Run 20 episodes
|
|
72
|
+
cfg = SimulationConfig(max_steps=200, n_episodes=20)
|
|
73
|
+
sim = Simulation(env, [agent], config=cfg)
|
|
74
|
+
results = sim.run()
|
|
75
|
+
|
|
76
|
+
# Analyse and display
|
|
77
|
+
metrics = compute_metrics(results, [agent])
|
|
78
|
+
print(simulation_report(results, [agent]))
|
|
79
|
+
print(sim.summary())
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
Use a built-in scenario instead:
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
from agentsim import ForagingScenario, make_forager
|
|
86
|
+
|
|
87
|
+
scenario = ForagingScenario(grid_size=12, n_food=20)
|
|
88
|
+
agents = [make_forager(f"agent_{i}", position=(i, 0)) for i in range(3)]
|
|
89
|
+
result = scenario.run(agents)
|
|
90
|
+
print(f"Collected {result.total_collected} food in {result.steps} steps "
|
|
91
|
+
f"(efficiency {result.efficiency:.2f})")
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
---
|
|
95
|
+
|
|
96
|
+
## Architecture
|
|
97
|
+
|
|
98
|
+
```
|
|
99
|
+
agentsim/
|
|
100
|
+
├── agents/
|
|
101
|
+
│ ├── base.py # BaseAgent, AgentState — abstract interface
|
|
102
|
+
│ ├── reactive.py # ReactiveAgent — condition-action rules
|
|
103
|
+
│ ├── deliberative.py # DeliberativeAgent — BDI (goals, beliefs, plans)
|
|
104
|
+
│ └── learning.py # LearningAgent — Q-learning, epsilon-greedy
|
|
105
|
+
├── environment/
|
|
106
|
+
│ ├── base.py # BaseEnvironment — reset/step/render contract
|
|
107
|
+
│ └── grid.py # GridEnvironment — 2D grid, walls, food
|
|
108
|
+
├── scenarios/
|
|
109
|
+
│ ├── foraging.py # ForagingScenario, make_forager()
|
|
110
|
+
│ └── pursuit.py # PursuitScenario, make_predator(), make_prey()
|
|
111
|
+
├── simulation.py # Simulation, SimulationConfig, EpisodeResult
|
|
112
|
+
├── analysis.py # compute_metrics, compute_trajectory_stats
|
|
113
|
+
└── viz.py # render_grid_ascii, simulation_report
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
**Data flow per episode:**
|
|
117
|
+
|
|
118
|
+
1. `Simulation.run_episode()` calls `env.reset()` → returns initial observations per agent.
|
|
119
|
+
2. Each step: `agent.step(obs)` → action → `env.step(agent_id, action)` → `(new_obs, reward, done)`.
|
|
120
|
+
3. `agent.receive_reward(reward)` updates internal state; loop continues until `done` or `max_steps`.
|
|
121
|
+
4. `EpisodeResult` collected; `compute_metrics()` aggregates across episodes.
|
|
122
|
+
|
|
123
|
+
**Extension points:**
|
|
124
|
+
|
|
125
|
+
- New agent type: subclass `BaseAgent`, implement `perceive()` and `decide()`.
|
|
126
|
+
- New environment: subclass `BaseEnvironment`, implement `reset()`, `step()`, `render()`.
|
|
127
|
+
- New scenario: compose agents + environment setup and delegate to `Simulation`.
|
|
128
|
+
|
|
129
|
+
---
|
|
130
|
+
|
|
131
|
+
## Development
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
git clone https://github.com/techknowmad/agent-sim.git
|
|
135
|
+
cd agent-sim
|
|
136
|
+
pip install -e ".[dev]"
|
|
137
|
+
pytest -v
|
|
138
|
+
ruff check .
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
---
|
|
142
|
+
|
|
143
|
+
## Contributing
|
|
144
|
+
|
|
145
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md) for branch, test, and PR conventions.
|
|
146
|
+
|
|
147
|
+
---
|
|
148
|
+
|
|
149
|
+
## License
|
|
150
|
+
|
|
151
|
+
MIT — see [LICENSE](LICENSE).
|
|
152
|
+
|
|
153
|
+
---
|
|
154
|
+
|
|
155
|
+
Built by [TechKnowMad Labs](https://techknowmad.ai)
|