tensor-optix 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. tensor_optix-0.1.0/.gitignore +53 -0
  2. tensor_optix-0.1.0/LICENSE +21 -0
  3. tensor_optix-0.1.0/PKG-INFO +299 -0
  4. tensor_optix-0.1.0/PLAN.md +268 -0
  5. tensor_optix-0.1.0/README.md +259 -0
  6. tensor_optix-0.1.0/pyproject.toml +42 -0
  7. tensor_optix-0.1.0/tensor_optix/__init__.py +29 -0
  8. tensor_optix-0.1.0/tensor_optix/adapters/__init__.py +0 -0
  9. tensor_optix-0.1.0/tensor_optix/adapters/tensorflow/__init__.py +4 -0
  10. tensor_optix-0.1.0/tensor_optix/adapters/tensorflow/tf_agent.py +130 -0
  11. tensor_optix-0.1.0/tensor_optix/adapters/tensorflow/tf_evaluator.py +86 -0
  12. tensor_optix-0.1.0/tensor_optix/core/__init__.py +0 -0
  13. tensor_optix-0.1.0/tensor_optix/core/backoff_scheduler.py +114 -0
  14. tensor_optix-0.1.0/tensor_optix/core/base_agent.py +57 -0
  15. tensor_optix-0.1.0/tensor_optix/core/base_evaluator.py +40 -0
  16. tensor_optix-0.1.0/tensor_optix/core/base_optimizer.py +45 -0
  17. tensor_optix-0.1.0/tensor_optix/core/base_pipeline.py +42 -0
  18. tensor_optix-0.1.0/tensor_optix/core/checkpoint_registry.py +159 -0
  19. tensor_optix-0.1.0/tensor_optix/core/loop_controller.py +238 -0
  20. tensor_optix-0.1.0/tensor_optix/core/types.py +95 -0
  21. tensor_optix-0.1.0/tensor_optix/optimizer.py +105 -0
  22. tensor_optix-0.1.0/tensor_optix/optimizers/__init__.py +4 -0
  23. tensor_optix-0.1.0/tensor_optix/optimizers/backoff_optimizer.py +140 -0
  24. tensor_optix-0.1.0/tensor_optix/optimizers/pbt_optimizer.py +117 -0
  25. tensor_optix-0.1.0/tensor_optix/pipeline/__init__.py +4 -0
  26. tensor_optix-0.1.0/tensor_optix/pipeline/batch_pipeline.py +93 -0
  27. tensor_optix-0.1.0/tensor_optix/pipeline/live_pipeline.py +161 -0
  28. tensor_optix-0.1.0/tests/conftest.py +135 -0
  29. tensor_optix-0.1.0/tests/test_adapters/__init__.py +0 -0
  30. tensor_optix-0.1.0/tests/test_adapters/test_tf_agent.py +114 -0
  31. tensor_optix-0.1.0/tests/test_adapters/test_tf_evaluator.py +81 -0
  32. tensor_optix-0.1.0/tests/test_core/__init__.py +0 -0
  33. tensor_optix-0.1.0/tests/test_core/test_backoff_scheduler.py +111 -0
  34. tensor_optix-0.1.0/tests/test_core/test_checkpoint_registry.py +78 -0
  35. tensor_optix-0.1.0/tests/test_core/test_loop_controller.py +107 -0
  36. tensor_optix-0.1.0/tests/test_core/test_types.py +54 -0
  37. tensor_optix-0.1.0/tests/test_integration/__init__.py +0 -0
  38. tensor_optix-0.1.0/tests/test_integration/test_end_to_end.py +164 -0
  39. tensor_optix-0.1.0/tests/test_optimizers/__init__.py +0 -0
  40. tensor_optix-0.1.0/tests/test_optimizers/test_backoff_optimizer.py +124 -0
  41. tensor_optix-0.1.0/tests/test_optimizers/test_pbt_optimizer.py +87 -0
  42. tensor_optix-0.1.0/tests/test_pipeline/__init__.py +0 -0
  43. tensor_optix-0.1.0/tests/test_pipeline/test_batch_pipeline.py +81 -0
  44. tensor_optix-0.1.0/tests/test_pipeline/test_live_pipeline.py +93 -0
@@ -0,0 +1,53 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ *.egg
7
+ *.egg-info/
8
+ dist/
9
+ build/
10
+ eggs/
11
+ parts/
12
+ var/
13
+ sdist/
14
+ develop-eggs/
15
+ .installed.cfg
16
+ lib/
17
+ lib64/
18
+
19
+ # Virtual environments
20
+ .venv/
21
+ venv/
22
+ ENV/
23
+ env/
24
+
25
+ # uv lock (libraries don't pin lock files)
26
+ uv.lock
27
+
28
+ # Testing
29
+ .pytest_cache/
30
+ .coverage
31
+ coverage.xml
32
+ htmlcov/
33
+ .tox/
34
+
35
+ # Type checking
36
+ .mypy_cache/
37
+ .dmypy.json
38
+
39
+ # IDEs
40
+ .vscode/
41
+ .idea/
42
+ *.swp
43
+ *.swo
44
+ *~
45
+
46
+ # OS
47
+ .DS_Store
48
+ Thumbs.db
49
+
50
+ # Project-specific
51
+ tensor_optix_checkpoints/
52
+ *.keras
53
+ *.h5
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 sup3rus3r
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,299 @@
1
+ Metadata-Version: 2.4
2
+ Name: tensor-optix
3
+ Version: 0.1.0
4
+ Summary: Autonomous continuous learning loop for TensorFlow RL agents
5
+ Author: sup3rus3r
6
+ License: MIT License
7
+
8
+ Copyright (c) 2026 sup3rus3r
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+ License-File: LICENSE
28
+ Requires-Python: >=3.11
29
+ Requires-Dist: gymnasium>=1.0.0
30
+ Requires-Dist: numpy>=1.24.0
31
+ Requires-Dist: tensorflow>=2.18.0
32
+ Provides-Extra: dev
33
+ Requires-Dist: black; extra == 'dev'
34
+ Requires-Dist: mypy; extra == 'dev'
35
+ Requires-Dist: pytest-cov; extra == 'dev'
36
+ Requires-Dist: pytest>=7.0; extra == 'dev'
37
+ Requires-Dist: ruff; extra == 'dev'
38
+ Requires-Dist: twine>=5.0; extra == 'dev'
39
+ Description-Content-Type: text/markdown
40
+
41
+ # tensor-optix
42
+
43
+ Autonomous continuous learning loop for TensorFlow RL agents.
44
+
45
+ > **We own the loop. You own the model.**
46
+
47
+ tensor-optix wraps your TensorFlow model and Gymnasium environment and takes full ownership of the training loop — stepping continuously, evaluating performance windows, tuning hyperparameters, checkpointing, and adapting over time without manual intervention.
48
+
49
+ **No fixed episodes.** Training runs as a continuous stream of steps. The loop determines when training ends — not the environment's `done` flag.
50
+
51
+ ---
52
+
53
+ ## Install
54
+
55
+ ```bash
56
+ pip install tensor-optix
57
+ ```
58
+
59
+ **Requirements:** Python >= 3.11, TensorFlow >= 2.18, Gymnasium >= 1.0
60
+
61
+ ---
62
+
63
+ ## Quick Start
64
+
65
+ ```python
66
+ import tensorflow as tf
67
+ import gymnasium as gym
68
+ from tensor_optix import RLOptimizer, TFAgent, BatchPipeline, HyperparamSet
69
+
70
+ # Build your model normally
71
+ model = tf.keras.Sequential([
72
+ tf.keras.layers.Input(shape=(4,)),
73
+ tf.keras.layers.Dense(64, activation="relu"),
74
+ tf.keras.layers.Dense(64, activation="relu"),
75
+ tf.keras.layers.Dense(2),
76
+ ])
77
+ optimizer = tf.keras.optimizers.Adam(learning_rate=3e-4)
78
+
79
+ agent = TFAgent(
80
+ model=model,
81
+ optimizer=optimizer,
82
+ hyperparams=HyperparamSet(
83
+ params={"learning_rate": 3e-4, "gamma": 0.99},
84
+ episode_id=0,
85
+ ),
86
+ )
87
+
88
+ # Continuous stepping — windows of 200 steps, no forced resets
89
+ env = gym.make("CartPole-v1")
90
+ pipeline = BatchPipeline(env=env, agent=agent, window_size=200)
91
+
92
+ opt = RLOptimizer(agent=agent, pipeline=pipeline)
93
+ opt.run() # runs until DORMANT (plateau) or max_episodes
94
+ ```
95
+
96
+ ---
97
+
98
+ ## How It Works
99
+
100
+ tensor-optix runs an autonomous improvement loop with four states:
101
+
102
+ ```
103
+ ACTIVE → aggressive tuning, evaluates every window
104
+ COOLING → recent improvement, exponential backoff on eval frequency
105
+ DORMANT → plateau reached — model is trained, minimal intervention
106
+ WATCHDOG → monitoring for degradation
107
+ ```
108
+
109
+ **DORMANT = trained.** The backoff determines when the model can no longer improve, not a fixed episode count.
110
+
111
+ The loop:
112
+ 1. Steps continuously through the environment in fixed-size windows
113
+ 2. Evaluates each window via `primary_score`
114
+ 3. If improved: saves checkpoint, resets backoff
115
+ 4. If plateau: backs off evaluation, eventually reaches DORMANT
116
+ 5. If degraded: optionally rolls back to best checkpoint, re-activates
117
+ 6. Tunes hyperparameters using two-phase finite difference
118
+
119
+ ---
120
+
121
+ ## Optimizer — Two-Phase Finite Difference
122
+
123
+ `BackoffOptimizer` uses staggered two-phase finite difference per param:
124
+
125
+ ```
126
+ Phase 1 (probe): apply θᵢ + δᵢ, run one window
127
+ Phase 2 (commit): gradient = (score_after - score_before) / δᵢ
128
+ if gradient > 0: keep θᵢ + δᵢ
129
+ if gradient < 0: apply θᵢ - δᵢ (reverse)
130
+ ```
131
+
132
+ Params are cycled round-robin. Each param is probed and committed independently. Step size adapts on improvement and plateau.
133
+
134
+ ```python
135
+ from tensor_optix import BackoffOptimizer
136
+
137
+ opt = RLOptimizer(
138
+ agent=agent,
139
+ pipeline=pipeline,
140
+ optimizer=BackoffOptimizer(
141
+ param_bounds={
142
+ "learning_rate": (1e-5, 1e-2),
143
+ "gamma": (0.9, 0.999),
144
+ },
145
+ perturbation_scale=0.05,
146
+ ),
147
+ )
148
+ ```
149
+
150
+ ### PBTOptimizer
151
+
152
+ Pseudo population-based training. Maintains a history of `(hyperparams, score)` pairs. Exploits top performers when in the bottom 20%, explores otherwise.
153
+
154
+ ```python
155
+ from tensor_optix import PBTOptimizer
156
+
157
+ opt = RLOptimizer(
158
+ agent=agent,
159
+ pipeline=pipeline,
160
+ optimizer=PBTOptimizer(
161
+ param_bounds={"learning_rate": (1e-5, 1e-2)},
162
+ history_size=50,
163
+ ),
164
+ )
165
+ ```
166
+
167
+ ---
168
+
169
+ ## Custom Evaluator
170
+
171
+ ```python
172
+ from tensor_optix import BaseEvaluator, EpisodeData, EvalMetrics
173
+
174
+ class TotalRewardEvaluator(BaseEvaluator):
175
+ def score(self, episode_data: EpisodeData, train_diagnostics: dict) -> EvalMetrics:
176
+ total = sum(episode_data.rewards)
177
+ return EvalMetrics(
178
+ primary_score=total,
179
+ metrics={"total_reward": total},
180
+ episode_id=episode_data.episode_id,
181
+ )
182
+
183
+ opt = RLOptimizer(agent=agent, pipeline=pipeline, evaluator=TotalRewardEvaluator())
184
+ ```
185
+
186
+ ---
187
+
188
+ ## Custom Agent (Algorithm-Specific Learning)
189
+
190
+ `TFAgent` provides a REINFORCE baseline. Subclass and override `learn()` for PPO, SAC, DQN, etc.:
191
+
192
+ ```python
193
+ from tensor_optix import TFAgent
194
+ from tensor_optix.core.types import EpisodeData
195
+ import tensorflow as tf
196
+
197
+ class PPOAgent(TFAgent):
198
+ def learn(self, episode_data: EpisodeData) -> dict:
199
+ clip_ratio = self._hyperparams.params.get("clip_ratio", 0.2)
200
+ # ... PPO update logic ...
201
+ return {"loss": loss_value, "entropy": entropy_value}
202
+ ```
203
+
204
+ ---
205
+
206
+ ## Live Pipeline
207
+
208
+ For real-time data sources (trading, robotics, online environments):
209
+
210
+ ```python
211
+ from tensor_optix import LivePipeline
212
+
213
+ class MyFeed:
214
+ def stream(self):
215
+ while True:
216
+ yield obs, reward, terminated, truncated, info
217
+
218
+ pipeline = LivePipeline(
219
+ data_source=MyFeed(),
220
+ agent=agent,
221
+ episode_boundary_fn=LivePipeline.every_n_seconds(300),
222
+ )
223
+ ```
224
+
225
+ ---
226
+
227
+ ## Callbacks
228
+
229
+ ```python
230
+ from tensor_optix import LoopCallback
231
+
232
+ class MyLogger(LoopCallback):
233
+ def on_improvement(self, snapshot):
234
+ print(f"New best: {snapshot.eval_metrics.primary_score:.4f}")
235
+
236
+ def on_dormant(self, window_id):
237
+ print(f"Training complete at window {window_id}")
238
+
239
+ opt = RLOptimizer(agent=agent, pipeline=pipeline, callbacks=[MyLogger()])
240
+ ```
241
+
242
+ Available hooks: `on_loop_start`, `on_loop_stop`, `on_episode_end`, `on_improvement`, `on_plateau`, `on_dormant`, `on_degradation`, `on_hyperparam_update`.
243
+
244
+ ---
245
+
246
+ ## Full Configuration
247
+
248
+ ```python
249
+ opt = RLOptimizer(
250
+ agent=agent,
251
+ pipeline=pipeline,
252
+ evaluator=None, # default: TFEvaluator
253
+ optimizer=None, # default: BackoffOptimizer
254
+ checkpoint_dir="./checkpoints",
255
+ max_snapshots=10,
256
+ rollback_on_degradation=False,
257
+ improvement_margin=0.0,
258
+ max_episodes=None, # None = run until DORMANT
259
+ base_interval=1,
260
+ backoff_factor=2.0,
261
+ max_interval_episodes=100,
262
+ plateau_threshold=5,
263
+ dormant_threshold=20,
264
+ degradation_threshold=0.95,
265
+ callbacks=[],
266
+ )
267
+ ```
268
+
269
+ ---
270
+
271
+ ## Architecture
272
+
273
+ ```
274
+ tensor_optix/
275
+ ├── core/
276
+ │ ├── types.py # EpisodeData, EvalMetrics, HyperparamSet, LoopState
277
+ │ ├── base_agent.py # BaseAgent — 6-method contract
278
+ │ ├── base_evaluator.py
279
+ │ ├── base_optimizer.py
280
+ │ ├── base_pipeline.py
281
+ │ ├── loop_controller.py # State machine + main loop
282
+ │ ├── checkpoint_registry.py
283
+ │ └── backoff_scheduler.py
284
+ ├── adapters/tensorflow/
285
+ │ ├── tf_agent.py # TFAgent — Keras model wrapper
286
+ │ └── tf_evaluator.py # TFEvaluator — default scorer
287
+ ├── pipeline/
288
+ │ ├── batch_pipeline.py # Continuous stepping, fixed windows
289
+ │ └── live_pipeline.py # Real-time streaming
290
+ └── optimizers/
291
+ ├── backoff_optimizer.py # Two-phase finite difference
292
+ └── pbt_optimizer.py # Pseudo population-based training
293
+ ```
294
+
295
+ ---
296
+
297
+ ## License
298
+
299
+ MIT — Copyright (c) 2026 sup3rus3r
@@ -0,0 +1,268 @@
1
+ # tensor-optix — Living Implementation Plan
2
+
3
+ > This document is the single source of truth for building tensor-optix.
4
+ > Update it as decisions are made, issues are found, and tasks complete.
5
+
6
+ ---
7
+
8
+ ## Project Identity
9
+
10
+ - **Package name:** `tensor-optix`
11
+ - **Import name:** `tensor_optix`
12
+ - **Root directory:** `d:\development\AugData\tensor-optix\`
13
+ - **Python:** `>=3.11`
14
+ - **Framework:** TensorFlow `>=2.18.0` (TF only, no framework abstraction)
15
+ - **Environment API:** Gymnasium `>=1.0.0` (modern API: `terminated | truncated`, not `done`)
16
+
17
+ ---
18
+
19
+ ## What This Is
20
+
21
+ A PyPI-distributable Python library that replaces the conventional RL training loop with an autonomous, continuously-learning optimization system. The user builds their TF model and Gymnasium environment. The library owns the training loop, evaluation, hyperparameter tuning, checkpointing, and adaptation lifecycle.
22
+
23
+ **Core philosophy:** We own the loop. The user owns the model.
24
+
25
+ ---
26
+
27
+ ## Architecture Summary
28
+
29
+ ```
30
+ RLOptimizer (main entry point)
31
+ └── LoopController (state machine + loop orchestration)
32
+ ├── BaseAgent ← user implements this
33
+ ├── BaseEvaluator ← user implements or use TFEvaluator default
34
+ ├── BaseOptimizer ← BackoffOptimizer or PBTOptimizer
35
+ ├── BasePipeline ← BatchPipeline or LivePipeline
36
+ ├── CheckpointRegistry ← snapshot storage
37
+ └── BackoffScheduler ← interval + state management
38
+ ```
39
+
40
+ ### Loop States
41
+ | State | Behavior |
42
+ |-------|----------|
43
+ | ACTIVE | Aggressive tuning, eval every episode |
44
+ | COOLING | Recent improvement, exponential backoff |
45
+ | DORMANT | Plateau, minimal intervention |
46
+ | WATCHDOG | Monitoring for degradation |
47
+
48
+ ---
49
+
50
+ ## Repository Structure
51
+
52
+ ```
53
+ tensor-optix/
54
+ ├── PLAN.md ← this file
55
+ ├── pyproject.toml
56
+ ├── README.md
57
+ ├── LICENSE
58
+
59
+ ├── tensor_optix/
60
+ │ ├── __init__.py # Public API surface
61
+ │ │
62
+ │ ├── core/
63
+ │ │ ├── __init__.py
64
+ │ │ ├── types.py # EpisodeData, EvalMetrics, HyperparamSet, PolicySnapshot, LoopState
65
+ │ │ ├── base_agent.py # Abstract BaseAgent
66
+ │ │ ├── base_evaluator.py # Abstract BaseEvaluator
67
+ │ │ ├── base_optimizer.py # Abstract BaseOptimizer
68
+ │ │ ├── base_pipeline.py # Abstract BasePipeline + EpisodeBoundaryFn
69
+ │ │ ├── loop_controller.py # LoopController + LoopCallback
70
+ │ │ ├── checkpoint_registry.py # CheckpointRegistry
71
+ │ │ └── backoff_scheduler.py # BackoffScheduler
72
+ │ │
73
+ │ ├── adapters/
74
+ │ │ ├── __init__.py
75
+ │ │ └── tensorflow/
76
+ │ │ ├── __init__.py
77
+ │ │ ├── tf_agent.py # TFAgent(BaseAgent)
78
+ │ │ └── tf_evaluator.py # TFEvaluator(BaseEvaluator)
79
+ │ │
80
+ │ ├── pipeline/
81
+ │ │ ├── __init__.py
82
+ │ │ ├── batch_pipeline.py # BatchPipeline — Gymnasium env, static/episodic
83
+ │ │ └── live_pipeline.py # LivePipeline — real-time streaming source
84
+ │ │
85
+ │ └── optimizers/
86
+ │ ├── __init__.py
87
+ │ ├── backoff_optimizer.py # BackoffOptimizer (default, perturbation-based)
88
+ │ └── pbt_optimizer.py # PBTOptimizer (pseudo population-based training)
89
+
90
+ └── tests/
91
+ ├── conftest.py
92
+ ├── test_core/
93
+ │ ├── test_types.py
94
+ │ ├── test_backoff_scheduler.py
95
+ │ ├── test_checkpoint_registry.py
96
+ │ └── test_loop_controller.py
97
+ ├── test_adapters/
98
+ │ ├── test_tf_agent.py
99
+ │ └── test_tf_evaluator.py
100
+ ├── test_pipeline/
101
+ │ ├── test_batch_pipeline.py
102
+ │ └── test_live_pipeline.py
103
+ ├── test_optimizers/
104
+ │ ├── test_backoff_optimizer.py
105
+ │ └── test_pbt_optimizer.py
106
+ └── test_integration/
107
+ └── test_end_to_end.py
108
+ ```
109
+
110
+ ---
111
+
112
+ ## Critical Rules (never violate)
113
+
114
+ 1. **Gymnasium API only.** `env.reset()` → `(obs, info)`. `env.step()` → `(obs, reward, terminated, truncated, info)`. Never use legacy `done` flag internally — merge `terminated | truncated` at the pipeline boundary.
115
+ 2. **`BaseAgent` is the only contract.** `LoopController` calls only: `act()`, `learn()`, `get_hyperparams()`, `set_hyperparams()`, `save_weights()`, `load_weights()`.
116
+ 3. **`HyperparamSet.params` is an open dict.** Core never reads specific key names. Opaque blob passed between optimizer and agent.
117
+ 4. **`EpisodeData` carries raw interaction data only.** No algorithm-specific fields.
118
+ 5. **No algorithm-specific code in `core/` or `loop_controller.py`.** PPO, DQN, SAC, etc. are never referenced there.
119
+ 6. **`LoopController` is algorithm-blind.** run episode → get score → compare → tune → repeat.
120
+
121
+ ---
122
+
123
+ ## Implementation Tasks
124
+
125
+ ### Phase 1 — Core Foundation
126
+ - [ ] `pyproject.toml`
127
+ - [ ] `tensor_optix/core/types.py`
128
+ - [ ] `tensor_optix/core/base_agent.py`
129
+ - [ ] `tensor_optix/core/base_evaluator.py`
130
+ - [ ] `tensor_optix/core/base_optimizer.py`
131
+ - [ ] `tensor_optix/core/base_pipeline.py`
132
+ - [ ] `tensor_optix/core/backoff_scheduler.py`
133
+ - [ ] `tensor_optix/core/checkpoint_registry.py`
134
+ - [ ] `tensor_optix/core/loop_controller.py`
135
+
136
+ ### Phase 2 — TensorFlow Adapter
137
+ - [ ] `tensor_optix/adapters/tensorflow/tf_agent.py`
138
+ - [ ] `tensor_optix/adapters/tensorflow/tf_evaluator.py`
139
+
140
+ ### Phase 3 — Pipelines
141
+ - [ ] `tensor_optix/pipeline/batch_pipeline.py`
142
+ - [ ] `tensor_optix/pipeline/live_pipeline.py`
143
+
144
+ ### Phase 4 — Optimizers
145
+ - [ ] `tensor_optix/optimizers/backoff_optimizer.py`
146
+ - [ ] `tensor_optix/optimizers/pbt_optimizer.py`
147
+
148
+ ### Phase 5 — Wiring
149
+ - [ ] `tensor_optix/optimizer.py` (RLOptimizer entry point)
150
+ - [ ] `tensor_optix/__init__.py` (public API surface)
151
+ - [ ] All `core/__init__.py`, `adapters/__init__.py`, `pipeline/__init__.py`, `optimizers/__init__.py`
152
+
153
+ ### Phase 6 — Tests
154
+ - [ ] `tests/conftest.py`
155
+ - [ ] `tests/test_core/test_types.py`
156
+ - [ ] `tests/test_core/test_backoff_scheduler.py`
157
+ - [ ] `tests/test_core/test_checkpoint_registry.py`
158
+ - [ ] `tests/test_core/test_loop_controller.py`
159
+ - [ ] `tests/test_adapters/test_tf_agent.py`
160
+ - [ ] `tests/test_adapters/test_tf_evaluator.py`
161
+ - [ ] `tests/test_pipeline/test_batch_pipeline.py`
162
+ - [ ] `tests/test_pipeline/test_live_pipeline.py`
163
+ - [ ] `tests/test_optimizers/test_backoff_optimizer.py`
164
+ - [ ] `tests/test_optimizers/test_pbt_optimizer.py`
165
+ - [ ] `tests/test_integration/test_end_to_end.py`
166
+
167
+ ---
168
+
169
+ ## Known Issues / Decisions Log
170
+
171
+ | Date | Issue | Decision |
172
+ |------|-------|----------|
173
+ | 2026-03-27 | Blueprint said "framework-agnostic" | Corrected: TensorFlow only |
174
+ | 2026-03-27 | Blueprint used legacy gym API | Corrected: Gymnasium >=1.0.0 |
175
+ | 2026-03-27 | Blueprint hardcoded TF as required dep in a "framework-agnostic" core | N/A — TF-only removes the contradiction |
176
+ | 2026-03-27 | Degradation check `score < best * threshold` breaks for negative scores | Fixed: use `score < best - abs(best) * (1 - threshold)` |
177
+
178
+ ---
179
+
180
+ ## Notes
181
+
182
+ - `BatchPipeline` wraps a Gymnasium-compatible env for episodic/batch training. Not a static dataset loader.
183
+ - `LivePipeline` wraps a streaming data source (e.g. websocket feed). User provides a `stream()` generator.
184
+ - `TFAgent.learn()` provides a generic gradient update baseline. Users subclass and override for specific algorithms (PPO clipping, SAC entropy tuning, etc.).
185
+ - `PBTOptimizer` approximates population-based training for single-agent use via a virtual population from history.
186
+
187
+ ---
188
+
189
+ ## Optimizer Math — BackoffOptimizer (Running Finite Difference)
190
+
191
+ ### Core Idea
192
+ Estimate the gradient of `primary_score` w.r.t. each hyperparam using finite differences accumulated across episodes. Step in the direction that increases score.
193
+
194
+ ### Per-param gradient estimate
195
+ ```
196
+ ∂score/∂θᵢ ≈ (score_avg_after - score_avg_before) / Δθᵢ
197
+ ```
198
+ Where `score_avg` is a rolling mean over the last N episodes (noise reduction).
199
+
200
+ ### Update rule
201
+ ```
202
+ θᵢ_new = clip(θᵢ + α * ∂score/∂θᵢ, low_bound, high_bound)
203
+ ```
204
+
205
+ ### Step size α (adaptive)
206
+ ```
207
+ α = base_lr / (1 + β * score_variance)
208
+ ```
209
+ High variance in recent scores → smaller steps. Low variance → larger steps.
210
+
211
+ ### Perturbation size δ (per param)
212
+ - Multiplicative: `δᵢ = perturbation_scale * |θᵢ|` (scale-invariant)
213
+ - Clamped: `δᵢ = max(δᵢ, min_delta)` to avoid zero delta on small params
214
+
215
+ ### Directional memory
216
+ - Track last direction moved per param (`+1` or `-1`)
217
+ - Track whether that move improved score
218
+ - If improvement: continue in same direction (momentum)
219
+ - If no improvement: reverse direction, halve step size
220
+
221
+ ### Score buffer
222
+ - Rolling window of last `score_window` (default: 5) primary scores
223
+ - Use mean of buffer as the stable score signal for gradient estimation
224
+ - Do not update params until buffer has at least `min_samples` entries
225
+
226
+ ### Bounds enforcement
227
+ - User provides `param_bounds: dict[str, tuple[float, float]]`
228
+ - Params not in bounds are left unchanged
229
+ - All updates clipped to `[low, high]` after step
230
+
231
+ ### Variance-gated updates
232
+ - If `score_variance > high_variance_threshold`: skip update this cycle (too noisy to trust)
233
+ - Log skipped updates for observability
234
+
235
+ ---
236
+
237
+ ## Optimizer Math — PBTOptimizer (Pseudo Population-Based Training)
238
+
239
+ ### Core Idea
240
+ Maintain a history of `(HyperparamSet, primary_score)` pairs as a virtual population. Use exploit/explore logic from PBT without parallel workers.
241
+
242
+ ### Exploit condition
243
+ ```
244
+ if current_score < percentile(history_scores, 20):
245
+ # bottom 20% — exploit top 20%
246
+ best_params = params from top 20% of history (by score)
247
+ new_params = perturb(best_params, scale=small)
248
+ ```
249
+
250
+ ### Explore condition
251
+ ```
252
+ else:
253
+ # not bottom 20% — explore
254
+ new_params = perturb(current_params, scale=medium)
255
+ ```
256
+
257
+ ### Perturbation function (shared with BackoffOptimizer)
258
+ ```
259
+ perturb(θ, scale) → for each param:
260
+ δ = scale * (high - low) # fraction of param range
261
+ new_val = θ + uniform(-δ, +δ)
262
+ new_val = clip(new_val, low, high)
263
+ ```
264
+
265
+ ### History management
266
+ - Keep last `history_size` (default: 50) `(params, score)` pairs
267
+ - FIFO eviction
268
+ - Percentile computed over this window only