weiss-sim 0.1.3__cp312-cp312-win_amd64.whl → 0.2.0__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
weiss_sim/__init__.py CHANGED
@@ -1,18 +1,238 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import json
3
4
  import numpy as np
4
5
 
5
6
  from .weiss_sim import (
6
7
  ACTION_SPACE_SIZE,
8
+ ACTOR_NONE,
9
+ DECISION_KIND_NONE,
10
+ POLICY_VERSION,
7
11
  OBS_LEN,
8
12
  PASS_ACTION_ID,
9
13
  SPEC_HASH,
14
+ BatchOutMinimalI16,
15
+ BatchOutMinimalI16LegalIds,
10
16
  BatchOutDebug,
11
17
  BatchOutMinimal,
18
+ BatchOutMinimalNoMask,
19
+ BatchOutTrajectory,
20
+ BatchOutTrajectoryI16,
21
+ BatchOutTrajectoryI16LegalIds,
22
+ BatchOutTrajectoryNoMask,
12
23
  EnvPool,
24
+ action_spec_json,
25
+ build_info,
26
+ decode_action_id,
27
+ observation_spec_json,
13
28
  __version__,
14
29
  )
15
- from .rl import RlStep, pass_action_id_for_decision_kind, reset_rl, step_rl
30
+ from .rl import (
31
+ RlStepI16LegalIds,
32
+ RlStep,
33
+ RlStepNoMask,
34
+ pass_action_id_for_decision_kind,
35
+ reset_rl,
36
+ reset_rl_into,
37
+ reset_rl_nomask,
38
+ reset_rl_nomask_into,
39
+ reset_rl_i16_legal_ids,
40
+ reset_rl_i16_legal_ids_into,
41
+ step_rl,
42
+ step_rl_into,
43
+ step_rl_nomask,
44
+ step_rl_nomask_into,
45
+ step_rl_i16_legal_ids,
46
+ step_rl_i16_legal_ids_into,
47
+ step_rl_select_from_logits_i16_legal_ids,
48
+ step_rl_select_from_logits_i16_legal_ids_into,
49
+ step_rl_sample_from_logits_i16_legal_ids,
50
+ step_rl_sample_from_logits_i16_legal_ids_into,
51
+ )
52
+
53
+ _PROFILE_FAST = "fast"
54
+ _PROFILE_BALANCED = "balanced"
55
+ _PROFILE_EVAL = "eval"
56
+ _PROFILE_DEBUG = "debug"
57
+
58
+
59
+ def _resolve_profile(profile: str):
60
+ profile_norm = profile.lower().strip()
61
+ if profile_norm == _PROFILE_FAST:
62
+ return profile_norm, False, True, True, True
63
+ if profile_norm in (_PROFILE_BALANCED, _PROFILE_EVAL, _PROFILE_DEBUG):
64
+ return profile_norm, True, False, False, False
65
+ raise ValueError(f"unknown profile '{profile}' (expected fast, balanced, eval, debug)")
66
+
67
+
68
+ def make_train_pool(
69
+ num_envs: int,
70
+ db_path: str,
71
+ deck_lists,
72
+ deck_ids=None,
73
+ max_decisions: int = 2000,
74
+ max_ticks: int = 100_000,
75
+ seed: int = 0,
76
+ curriculum_json: str | None = None,
77
+ reward_json: str | None = None,
78
+ error_policy: str | None = None,
79
+ num_threads: int | None = None,
80
+ debug_fingerprint_every_n: int = 0,
81
+ debug_event_ring_capacity: int = 0,
82
+ *,
83
+ profile: str = _PROFILE_FAST,
84
+ output_masks: bool | None = None,
85
+ use_i16: bool | None = None,
86
+ legal_ids: bool | None = None,
87
+ unsafe_i16: bool | None = None,
88
+ rollout_steps: int | None = None,
89
+ ):
90
+ """Create an RL training pool plus preallocated buffers with sensible defaults.
91
+
92
+ Profiles:
93
+ - fast: masks off + i16 obs + legal ids (highest throughput)
94
+ - balanced/eval/debug: masks on + i32 obs (easier debugging)
95
+
96
+ Returns: (pool, buffers)
97
+ """
98
+ _, profile_masks, profile_i16, profile_legal_ids, profile_unsafe_i16 = _resolve_profile(profile)
99
+ if output_masks is None:
100
+ output_masks = profile_masks
101
+ if use_i16 is None:
102
+ use_i16 = profile_i16
103
+ if legal_ids is None:
104
+ legal_ids = profile_legal_ids
105
+ if unsafe_i16 is None:
106
+ unsafe_i16 = profile_unsafe_i16
107
+ if legal_ids:
108
+ if output_masks:
109
+ raise ValueError("legal_ids requires output_masks=False")
110
+ if use_i16 is False:
111
+ raise ValueError("legal_ids currently requires use_i16=True")
112
+ output_masks = False
113
+ use_i16 = True
114
+ if unsafe_i16 and not use_i16:
115
+ raise ValueError("unsafe_i16 requires use_i16=True")
116
+ pool = EnvPool.new_rl_train(
117
+ num_envs,
118
+ db_path,
119
+ deck_lists,
120
+ deck_ids=deck_ids,
121
+ max_decisions=max_decisions,
122
+ max_ticks=max_ticks,
123
+ seed=seed,
124
+ curriculum_json=curriculum_json,
125
+ reward_json=reward_json,
126
+ error_policy=error_policy,
127
+ num_threads=num_threads,
128
+ output_masks=output_masks,
129
+ debug_fingerprint_every_n=debug_fingerprint_every_n,
130
+ debug_event_ring_capacity=debug_event_ring_capacity,
131
+ )
132
+ if legal_ids:
133
+ pool.set_output_mask_bits_enabled(False)
134
+ if unsafe_i16:
135
+ pool.set_i16_clamp_enabled(False)
136
+ if rollout_steps is not None:
137
+ if legal_ids:
138
+ return pool, EnvPoolTrajectoryBuffersI16LegalIds(pool, rollout_steps)
139
+ if use_i16:
140
+ return pool, EnvPoolTrajectoryBuffersI16(pool, rollout_steps)
141
+ if output_masks:
142
+ return pool, EnvPoolTrajectoryBuffers(pool, rollout_steps)
143
+ return pool, EnvPoolTrajectoryBuffersNoMask(pool, rollout_steps)
144
+ if legal_ids:
145
+ return pool, EnvPoolBuffersI16LegalIds(pool)
146
+ if use_i16:
147
+ return pool, EnvPoolBuffersI16(pool)
148
+ if output_masks:
149
+ return pool, EnvPoolBuffers(pool)
150
+ return pool, EnvPoolBuffersNoMask(pool)
151
+
152
+
153
+ def make_eval_pool(
154
+ num_envs: int,
155
+ db_path: str,
156
+ deck_lists,
157
+ deck_ids=None,
158
+ max_decisions: int = 2000,
159
+ max_ticks: int = 100_000,
160
+ seed: int = 0,
161
+ curriculum_json: str | None = None,
162
+ reward_json: str | None = None,
163
+ error_policy: str | None = None,
164
+ num_threads: int | None = None,
165
+ debug_fingerprint_every_n: int = 0,
166
+ debug_event_ring_capacity: int = 0,
167
+ *,
168
+ profile: str = _PROFILE_BALANCED,
169
+ output_masks: bool | None = None,
170
+ use_i16: bool | None = None,
171
+ legal_ids: bool | None = None,
172
+ unsafe_i16: bool | None = None,
173
+ rollout_steps: int | None = None,
174
+ ):
175
+ """Create an RL eval/debug pool plus preallocated buffers with sensible defaults.
176
+
177
+ Profiles:
178
+ - balanced/eval/debug: masks on + i32 obs
179
+ - fast: masks off + i16 obs + legal ids (opt-in)
180
+
181
+ Returns: (pool, buffers)
182
+ """
183
+ _, profile_masks, profile_i16, profile_legal_ids, profile_unsafe_i16 = _resolve_profile(profile)
184
+ if output_masks is None:
185
+ output_masks = profile_masks
186
+ if use_i16 is None:
187
+ use_i16 = profile_i16
188
+ if legal_ids is None:
189
+ legal_ids = profile_legal_ids
190
+ if unsafe_i16 is None:
191
+ unsafe_i16 = profile_unsafe_i16
192
+ if legal_ids:
193
+ if output_masks:
194
+ raise ValueError("legal_ids requires output_masks=False")
195
+ if use_i16 is False:
196
+ raise ValueError("legal_ids currently requires use_i16=True")
197
+ output_masks = False
198
+ use_i16 = True
199
+ if unsafe_i16 and not use_i16:
200
+ raise ValueError("unsafe_i16 requires use_i16=True")
201
+ pool = EnvPool.new_rl_eval(
202
+ num_envs,
203
+ db_path,
204
+ deck_lists,
205
+ deck_ids=deck_ids,
206
+ max_decisions=max_decisions,
207
+ max_ticks=max_ticks,
208
+ seed=seed,
209
+ curriculum_json=curriculum_json,
210
+ reward_json=reward_json,
211
+ error_policy=error_policy,
212
+ num_threads=num_threads,
213
+ output_masks=output_masks,
214
+ debug_fingerprint_every_n=debug_fingerprint_every_n,
215
+ debug_event_ring_capacity=debug_event_ring_capacity,
216
+ )
217
+ if legal_ids:
218
+ pool.set_output_mask_bits_enabled(False)
219
+ if unsafe_i16:
220
+ pool.set_i16_clamp_enabled(False)
221
+ if rollout_steps is not None:
222
+ if legal_ids:
223
+ return pool, EnvPoolTrajectoryBuffersI16LegalIds(pool, rollout_steps)
224
+ if use_i16:
225
+ return pool, EnvPoolTrajectoryBuffersI16(pool, rollout_steps)
226
+ if output_masks:
227
+ return pool, EnvPoolTrajectoryBuffers(pool, rollout_steps)
228
+ return pool, EnvPoolTrajectoryBuffersNoMask(pool, rollout_steps)
229
+ if legal_ids:
230
+ return pool, EnvPoolBuffersI16LegalIds(pool)
231
+ if use_i16:
232
+ return pool, EnvPoolBuffersI16(pool)
233
+ if output_masks:
234
+ return pool, EnvPoolBuffers(pool)
235
+ return pool, EnvPoolBuffersNoMask(pool)
16
236
 
17
237
 
18
238
  class EnvPoolBuffers:
@@ -28,11 +248,13 @@ class EnvPoolBuffers:
28
248
  self.terminated = self.out.terminated
29
249
  self.truncated = self.out.truncated
30
250
  self.actor = self.out.actor
251
+ self.decision_kind = self.out.decision_kind
31
252
  self.decision_id = self.out.decision_id
32
253
  self.engine_status = self.out.engine_status
33
254
  self.spec_hash = self.out.spec_hash
34
255
  self.legal_ids = np.empty(num_envs * pool.action_space, dtype=np.uint16)
35
256
  self.legal_offsets = np.zeros(num_envs + 1, dtype=np.uint32)
257
+ self.actions = np.empty(num_envs, dtype=np.uint32)
36
258
 
37
259
  def reset(self):
38
260
  self.pool.reset_into(self.out)
@@ -46,27 +268,520 @@ class EnvPoolBuffers:
46
268
  self.pool.reset_done_into(done_mask, self.out)
47
269
  return self.out
48
270
 
271
+ def reset_indices_with_episode_seeds(self, indices, episode_seeds):
272
+ self.pool.reset_indices_with_episode_seeds_into(
273
+ list(indices), list(episode_seeds), self.out
274
+ )
275
+ return self.out
276
+
49
277
  def step(self, actions):
50
278
  self.pool.step_into(actions, self.out)
51
279
  return self.out
52
280
 
281
+ def step_first_legal(self):
282
+ self.pool.step_first_legal_into(self.actions, self.out)
283
+ return self.out, self.actions
284
+
285
+ def step_random_legal(self, seeds):
286
+ self.pool.step_sample_legal_action_ids_uniform_into(seeds, self.actions, self.out)
287
+ return self.out, self.actions
288
+
289
+ def step_select_from_logits(self, logits):
290
+ logits = np.ascontiguousarray(logits, dtype=np.float32)
291
+ self.pool.step_select_from_logits_into(logits, self.actions, self.out)
292
+ return self.out, self.actions
293
+
294
+ def step_sample_from_logits(self, logits, seeds):
295
+ logits = np.ascontiguousarray(logits, dtype=np.float32)
296
+ seeds = np.asarray(seeds, dtype=np.uint64).ravel()
297
+ self.pool.step_sample_from_logits_into(logits, seeds, self.actions, self.out)
298
+ return self.out, self.actions
299
+
300
+ def select_actions_from_logits(self, logits):
301
+ logits = np.ascontiguousarray(logits, dtype=np.float32)
302
+ self.pool.select_actions_from_logits_into(logits, self.actions)
303
+ return self.actions
304
+
305
+ def sample_actions_from_logits(self, logits, seeds):
306
+ logits = np.ascontiguousarray(logits, dtype=np.float32)
307
+ seeds = np.asarray(seeds, dtype=np.uint64).ravel()
308
+ self.pool.sample_actions_from_logits_into(logits, seeds, self.actions)
309
+ return self.actions
310
+
311
+ def set_output_mask_enabled(self, enabled: bool):
312
+ self.pool.set_output_mask_enabled(enabled)
313
+ if not enabled:
314
+ self.out.masks.fill(0)
315
+
316
+ def set_output_mask_bits_enabled(self, enabled: bool):
317
+ self.pool.set_output_mask_bits_enabled(enabled)
318
+
319
+ def set_i16_overflow_counter_enabled(self, enabled: bool):
320
+ self.pool.set_i16_overflow_counter_enabled(enabled)
321
+
322
+ def i16_overflow_count(self) -> int:
323
+ return int(self.pool.i16_overflow_count())
324
+
325
+ def reset_i16_overflow_count(self) -> None:
326
+ self.pool.reset_i16_overflow_count()
327
+
53
328
  def legal_action_ids(self):
54
329
  count = self.pool.legal_action_ids_into(self.legal_ids, self.legal_offsets)
55
330
  return self.legal_ids[:count], self.legal_offsets
56
331
 
332
+ def legal_action_ids_and_sample_uniform(self, seeds):
333
+ count = self.pool.legal_action_ids_and_sample_uniform_into(
334
+ self.legal_ids, self.legal_offsets, seeds, self.actions
335
+ )
336
+ return self.legal_ids[:count], self.legal_offsets, self.actions
337
+
338
+
339
+ class EnvPoolBuffersNoMask:
340
+ """Preallocated numpy buffers for stepping without dense masks."""
341
+
342
+ def __init__(self, pool: EnvPool) -> None:
343
+ self.pool = pool
344
+ num_envs = pool.envs_len
345
+ self.out = BatchOutMinimalNoMask(num_envs)
346
+ self.obs = self.out.obs
347
+ self.rewards = self.out.rewards
348
+ self.terminated = self.out.terminated
349
+ self.truncated = self.out.truncated
350
+ self.actor = self.out.actor
351
+ self.decision_kind = self.out.decision_kind
352
+ self.decision_id = self.out.decision_id
353
+ self.engine_status = self.out.engine_status
354
+ self.spec_hash = self.out.spec_hash
355
+ self.legal_ids = np.empty(num_envs * pool.action_space, dtype=np.uint16)
356
+ self.legal_offsets = np.zeros(num_envs + 1, dtype=np.uint32)
357
+ self.actions = np.empty(num_envs, dtype=np.uint32)
358
+
359
+ def reset(self):
360
+ self.pool.reset_into_nomask(self.out)
361
+ return self.out
362
+
363
+ def reset_indices(self, indices):
364
+ self.pool.reset_indices_into_nomask(list(indices), self.out)
365
+ return self.out
366
+
367
+ def reset_done(self, done_mask):
368
+ self.pool.reset_done_into_nomask(done_mask, self.out)
369
+ return self.out
370
+
371
+ def reset_indices_with_episode_seeds(self, indices, episode_seeds):
372
+ self.pool.reset_indices_with_episode_seeds_into_nomask(
373
+ list(indices), list(episode_seeds), self.out
374
+ )
375
+ return self.out
376
+
377
+ def step(self, actions):
378
+ self.pool.step_into_nomask(actions, self.out)
379
+ return self.out
380
+
381
+ def step_first_legal(self):
382
+ self.pool.step_first_legal_into_nomask(self.actions, self.out)
383
+ return self.out, self.actions
384
+
385
+ def step_random_legal(self, seeds):
386
+ self.pool.step_sample_legal_action_ids_uniform_into_nomask(seeds, self.actions, self.out)
387
+ return self.out, self.actions
388
+
389
+ def step_select_from_logits(self, logits):
390
+ logits = np.ascontiguousarray(logits, dtype=np.float32)
391
+ self.pool.step_select_from_logits_into_nomask(logits, self.actions, self.out)
392
+ return self.out, self.actions
393
+
394
+ def step_sample_from_logits(self, logits, seeds):
395
+ logits = np.ascontiguousarray(logits, dtype=np.float32)
396
+ seeds = np.asarray(seeds, dtype=np.uint64).ravel()
397
+ self.pool.step_sample_from_logits_into_nomask(logits, seeds, self.actions, self.out)
398
+ return self.out, self.actions
399
+
400
+ def select_actions_from_logits(self, logits):
401
+ logits = np.ascontiguousarray(logits, dtype=np.float32)
402
+ self.pool.select_actions_from_logits_into(logits, self.actions)
403
+ return self.actions
404
+
405
+ def sample_actions_from_logits(self, logits, seeds):
406
+ logits = np.ascontiguousarray(logits, dtype=np.float32)
407
+ seeds = np.asarray(seeds, dtype=np.uint64).ravel()
408
+ self.pool.sample_actions_from_logits_into(logits, seeds, self.actions)
409
+ return self.actions
410
+
411
+ def set_output_mask_bits_enabled(self, enabled: bool):
412
+ self.pool.set_output_mask_bits_enabled(enabled)
413
+
414
+ def set_i16_overflow_counter_enabled(self, enabled: bool):
415
+ self.pool.set_i16_overflow_counter_enabled(enabled)
416
+
417
+ def i16_overflow_count(self) -> int:
418
+ return int(self.pool.i16_overflow_count())
419
+
420
+ def reset_i16_overflow_count(self) -> None:
421
+ self.pool.reset_i16_overflow_count()
422
+
423
+ def legal_action_ids(self):
424
+ count = self.pool.legal_action_ids_into(self.legal_ids, self.legal_offsets)
425
+ return self.legal_ids[:count], self.legal_offsets
426
+
427
+ def legal_action_ids_and_sample_uniform(self, seeds):
428
+ count = self.pool.legal_action_ids_and_sample_uniform_into(
429
+ self.legal_ids, self.legal_offsets, seeds, self.actions
430
+ )
431
+ return self.legal_ids[:count], self.legal_offsets, self.actions
432
+
433
+
434
+ class EnvPoolBuffersI16:
435
+ """Preallocated numpy buffers for high-throughput stepping with i16 obs."""
436
+
437
+ def __init__(self, pool: EnvPool) -> None:
438
+ self.pool = pool
439
+ num_envs = pool.envs_len
440
+ self.out = BatchOutMinimalI16(num_envs)
441
+ self.obs = self.out.obs
442
+ self.masks = self.out.masks
443
+ self.rewards = self.out.rewards
444
+ self.terminated = self.out.terminated
445
+ self.truncated = self.out.truncated
446
+ self.actor = self.out.actor
447
+ self.decision_kind = self.out.decision_kind
448
+ self.decision_id = self.out.decision_id
449
+ self.engine_status = self.out.engine_status
450
+ self.spec_hash = self.out.spec_hash
451
+ self.legal_ids = np.empty(num_envs * pool.action_space, dtype=np.uint16)
452
+ self.legal_offsets = np.zeros(num_envs + 1, dtype=np.uint32)
453
+ self.actions = np.empty(num_envs, dtype=np.uint32)
454
+
455
+ def reset(self):
456
+ self.pool.reset_into_i16(self.out)
457
+ return self.out
458
+
459
+ def reset_indices(self, indices):
460
+ self.pool.reset_indices_into_i16(list(indices), self.out)
461
+ return self.out
462
+
463
+ def reset_done(self, done_mask):
464
+ self.pool.reset_done_into_i16(done_mask, self.out)
465
+ return self.out
466
+
467
+ def reset_indices_with_episode_seeds(self, indices, episode_seeds):
468
+ self.pool.reset_indices_with_episode_seeds_into_i16(
469
+ list(indices), list(episode_seeds), self.out
470
+ )
471
+ return self.out
472
+
473
+ def step(self, actions):
474
+ self.pool.step_into_i16(actions, self.out)
475
+ return self.out
476
+
477
+ def step_first_legal(self):
478
+ self.pool.step_first_legal_into_i16(self.actions, self.out)
479
+ return self.out, self.actions
480
+
481
+ def step_random_legal(self, seeds):
482
+ self.pool.step_sample_legal_action_ids_uniform_into_i16(seeds, self.actions, self.out)
483
+ return self.out, self.actions
484
+
485
+ def step_select_from_logits(self, logits):
486
+ logits = np.ascontiguousarray(logits, dtype=np.float32)
487
+ self.pool.step_select_from_logits_into_i16(logits, self.actions, self.out)
488
+ return self.out, self.actions
489
+
490
+ def step_sample_from_logits(self, logits, seeds):
491
+ logits = np.ascontiguousarray(logits, dtype=np.float32)
492
+ seeds = np.asarray(seeds, dtype=np.uint64).ravel()
493
+ self.pool.step_sample_from_logits_into_i16(logits, seeds, self.actions, self.out)
494
+ return self.out, self.actions
495
+
496
+ def select_actions_from_logits(self, logits):
497
+ logits = np.ascontiguousarray(logits, dtype=np.float32)
498
+ self.pool.select_actions_from_logits_into(logits, self.actions)
499
+ return self.actions
500
+
501
+ def sample_actions_from_logits(self, logits, seeds):
502
+ logits = np.ascontiguousarray(logits, dtype=np.float32)
503
+ seeds = np.asarray(seeds, dtype=np.uint64).ravel()
504
+ self.pool.sample_actions_from_logits_into(logits, seeds, self.actions)
505
+ return self.actions
506
+
507
+ def set_output_mask_bits_enabled(self, enabled: bool):
508
+ self.pool.set_output_mask_bits_enabled(enabled)
509
+
510
+ def set_i16_clamp_enabled(self, enabled: bool):
511
+ self.pool.set_i16_clamp_enabled(enabled)
512
+
513
+ def set_i16_overflow_counter_enabled(self, enabled: bool):
514
+ self.pool.set_i16_overflow_counter_enabled(enabled)
515
+
516
+ def i16_overflow_count(self) -> int:
517
+ return int(self.pool.i16_overflow_count())
518
+
519
+ def reset_i16_overflow_count(self) -> None:
520
+ self.pool.reset_i16_overflow_count()
521
+
522
+ def legal_action_ids(self):
523
+ count = self.pool.legal_action_ids_into(self.legal_ids, self.legal_offsets)
524
+ return self.legal_ids[:count], self.legal_offsets
525
+
526
+ def legal_action_ids_and_sample_uniform(self, seeds):
527
+ count = self.pool.legal_action_ids_and_sample_uniform_into(
528
+ self.legal_ids, self.legal_offsets, seeds, self.actions
529
+ )
530
+ return self.legal_ids[:count], self.legal_offsets, self.actions
531
+
532
+
533
+ class EnvPoolBuffersI16LegalIds:
534
+ """Preallocated numpy buffers for stepping with i16 obs + legal ids."""
535
+
536
+ def __init__(self, pool: EnvPool) -> None:
537
+ self.pool = pool
538
+ num_envs = pool.envs_len
539
+ self.out = BatchOutMinimalI16LegalIds(num_envs)
540
+ self.obs = self.out.obs
541
+ self.legal_ids = self.out.legal_ids
542
+ self.legal_offsets = self.out.legal_offsets
543
+ self.rewards = self.out.rewards
544
+ self.terminated = self.out.terminated
545
+ self.truncated = self.out.truncated
546
+ self.actor = self.out.actor
547
+ self.decision_kind = self.out.decision_kind
548
+ self.decision_id = self.out.decision_id
549
+ self.engine_status = self.out.engine_status
550
+ self.spec_hash = self.out.spec_hash
551
+ self.actions = np.empty(num_envs, dtype=np.uint32)
552
+
553
+ def reset(self):
554
+ self.pool.reset_into_i16_legal_ids(self.out)
555
+ return self.out
556
+
557
+ def reset_indices(self, indices):
558
+ self.pool.reset_indices_into_i16_legal_ids(list(indices), self.out)
559
+ return self.out
560
+
561
+ def reset_done(self, done_mask):
562
+ self.pool.reset_done_into_i16_legal_ids(done_mask, self.out)
563
+ return self.out
564
+
565
+ def reset_indices_with_episode_seeds(self, indices, episode_seeds):
566
+ self.pool.reset_indices_with_episode_seeds_into_i16_legal_ids(
567
+ list(indices), list(episode_seeds), self.out
568
+ )
569
+ return self.out
570
+
571
+ def step(self, actions):
572
+ self.pool.step_into_i16_legal_ids(actions, self.out)
573
+ return self.out
574
+
575
+ def step_first_legal(self):
576
+ self.pool.step_first_legal_into_i16_legal_ids(self.actions, self.out)
577
+ return self.out, self.actions
578
+
579
+ def step_random_legal(self, seeds):
580
+ self.pool.step_sample_legal_action_ids_uniform_into_i16_legal_ids(
581
+ seeds, self.actions, self.out
582
+ )
583
+ return self.out, self.actions
584
+
585
+ def step_select_from_logits(self, logits):
586
+ logits = np.ascontiguousarray(logits, dtype=np.float32)
587
+ self.pool.step_select_from_logits_into_i16_legal_ids(logits, self.actions, self.out)
588
+ return self.out, self.actions
589
+
590
+ def step_sample_from_logits(self, logits, seeds):
591
+ logits = np.ascontiguousarray(logits, dtype=np.float32)
592
+ seeds = np.asarray(seeds, dtype=np.uint64).ravel()
593
+ self.pool.step_sample_from_logits_into_i16_legal_ids(logits, seeds, self.actions, self.out)
594
+ return self.out, self.actions
595
+
596
+ def set_i16_overflow_counter_enabled(self, enabled: bool):
597
+ self.pool.set_i16_overflow_counter_enabled(enabled)
598
+
599
+ def i16_overflow_count(self) -> int:
600
+ return int(self.pool.i16_overflow_count())
601
+
602
+ def reset_i16_overflow_count(self) -> None:
603
+ self.pool.reset_i16_overflow_count()
604
+
605
+
606
+ class EnvPoolTrajectoryBuffers:
607
+ """Preallocated numpy buffers for multi-step rollouts with masks."""
608
+
609
+ def __init__(self, pool: EnvPool, steps: int) -> None:
610
+ self.pool = pool
611
+ self.out = BatchOutTrajectory(steps, pool.envs_len)
612
+ self.steps = steps
613
+ self.obs = self.out.obs
614
+ self.masks = self.out.masks
615
+ self.rewards = self.out.rewards
616
+ self.terminated = self.out.terminated
617
+ self.truncated = self.out.truncated
618
+ self.actor = self.out.actor
619
+ self.decision_kind = self.out.decision_kind
620
+ self.decision_id = self.out.decision_id
621
+ self.engine_status = self.out.engine_status
622
+ self.spec_hash = self.out.spec_hash
623
+ self.actions = self.out.actions
624
+
625
+ def rollout_first_legal(self):
626
+ self.pool.rollout_first_legal_into(self.steps, self.out)
627
+ return self.out
628
+
629
+ def rollout_random_legal(self, seeds):
630
+ seeds = np.asarray(seeds, dtype=np.uint64).ravel()
631
+ self.pool.rollout_sample_legal_action_ids_uniform_into(self.steps, seeds, self.out)
632
+ return self.out
633
+
634
+
635
+ class EnvPoolTrajectoryBuffersI16:
636
+ """Preallocated numpy buffers for multi-step rollouts with i16 obs."""
637
+
638
+ def __init__(self, pool: EnvPool, steps: int) -> None:
639
+ self.pool = pool
640
+ self.out = BatchOutTrajectoryI16(steps, pool.envs_len)
641
+ self.steps = steps
642
+ self.obs = self.out.obs
643
+ self.masks = self.out.masks
644
+ self.rewards = self.out.rewards
645
+ self.terminated = self.out.terminated
646
+ self.truncated = self.out.truncated
647
+ self.actor = self.out.actor
648
+ self.decision_kind = self.out.decision_kind
649
+ self.decision_id = self.out.decision_id
650
+ self.engine_status = self.out.engine_status
651
+ self.spec_hash = self.out.spec_hash
652
+ self.actions = self.out.actions
653
+
654
+ def rollout_first_legal(self):
655
+ self.pool.rollout_first_legal_into_i16(self.steps, self.out)
656
+ return self.out
657
+
658
+ def rollout_random_legal(self, seeds):
659
+ seeds = np.asarray(seeds, dtype=np.uint64).ravel()
660
+ self.pool.rollout_sample_legal_action_ids_uniform_into_i16(self.steps, seeds, self.out)
661
+ return self.out
662
+
663
+
664
+ class EnvPoolTrajectoryBuffersI16LegalIds:
665
+ """Preallocated numpy buffers for multi-step rollouts with i16 obs + legal ids."""
666
+
667
+ def __init__(self, pool: EnvPool, steps: int) -> None:
668
+ self.pool = pool
669
+ self.out = BatchOutTrajectoryI16LegalIds(steps, pool.envs_len)
670
+ self.steps = steps
671
+ self.obs = self.out.obs
672
+ self.legal_ids = self.out.legal_ids
673
+ self.legal_offsets = self.out.legal_offsets
674
+ self.rewards = self.out.rewards
675
+ self.terminated = self.out.terminated
676
+ self.truncated = self.out.truncated
677
+ self.actor = self.out.actor
678
+ self.decision_kind = self.out.decision_kind
679
+ self.decision_id = self.out.decision_id
680
+ self.engine_status = self.out.engine_status
681
+ self.spec_hash = self.out.spec_hash
682
+ self.actions = self.out.actions
683
+
684
+ def rollout_first_legal(self):
685
+ self.pool.rollout_first_legal_into_i16_legal_ids(self.steps, self.out)
686
+ return self.out
687
+
688
+ def rollout_random_legal(self, seeds):
689
+ seeds = np.asarray(seeds, dtype=np.uint64).ravel()
690
+ self.pool.rollout_sample_legal_action_ids_uniform_into_i16_legal_ids(
691
+ self.steps, seeds, self.out
692
+ )
693
+ return self.out
694
+
695
+
696
+ class EnvPoolTrajectoryBuffersNoMask:
697
+ """Preallocated numpy buffers for multi-step rollouts without masks."""
698
+
699
+ def __init__(self, pool: EnvPool, steps: int) -> None:
700
+ self.pool = pool
701
+ self.out = BatchOutTrajectoryNoMask(steps, pool.envs_len)
702
+ self.steps = steps
703
+ self.obs = self.out.obs
704
+ self.rewards = self.out.rewards
705
+ self.terminated = self.out.terminated
706
+ self.truncated = self.out.truncated
707
+ self.actor = self.out.actor
708
+ self.decision_kind = self.out.decision_kind
709
+ self.decision_id = self.out.decision_id
710
+ self.engine_status = self.out.engine_status
711
+ self.spec_hash = self.out.spec_hash
712
+ self.actions = self.out.actions
713
+
714
+ def rollout_first_legal(self):
715
+ self.pool.rollout_first_legal_into_nomask(self.steps, self.out)
716
+ return self.out
717
+
718
+ def rollout_random_legal(self, seeds):
719
+ seeds = np.asarray(seeds, dtype=np.uint64).ravel()
720
+ self.pool.rollout_sample_legal_action_ids_uniform_into_nomask(self.steps, seeds, self.out)
721
+ return self.out
722
+
723
+
724
+ def spec_bundle():
725
+ return {
726
+ "policy_version": POLICY_VERSION,
727
+ "spec_hash": SPEC_HASH,
728
+ "observation": json.loads(observation_spec_json()),
729
+ "action": json.loads(action_spec_json()),
730
+ }
731
+
57
732
 
58
733
  __all__ = [
59
734
  "EnvPool",
60
735
  "EnvPoolBuffers",
736
+ "EnvPoolBuffersNoMask",
737
+ "EnvPoolBuffersI16",
738
+ "EnvPoolBuffersI16LegalIds",
739
+ "EnvPoolTrajectoryBuffers",
740
+ "EnvPoolTrajectoryBuffersI16",
741
+ "EnvPoolTrajectoryBuffersI16LegalIds",
742
+ "EnvPoolTrajectoryBuffersNoMask",
61
743
  "BatchOutMinimal",
744
+ "BatchOutMinimalI16",
745
+ "BatchOutMinimalI16LegalIds",
746
+ "BatchOutMinimalNoMask",
747
+ "BatchOutTrajectory",
748
+ "BatchOutTrajectoryI16",
749
+ "BatchOutTrajectoryI16LegalIds",
750
+ "BatchOutTrajectoryNoMask",
62
751
  "BatchOutDebug",
63
752
  "ACTION_SPACE_SIZE",
753
+ "ACTOR_NONE",
754
+ "DECISION_KIND_NONE",
755
+ "POLICY_VERSION",
64
756
  "OBS_LEN",
65
757
  "SPEC_HASH",
66
758
  "RlStep",
759
+ "RlStepNoMask",
760
+ "RlStepI16LegalIds",
67
761
  "reset_rl",
762
+ "reset_rl_into",
763
+ "reset_rl_nomask",
764
+ "reset_rl_nomask_into",
765
+ "reset_rl_i16_legal_ids",
766
+ "reset_rl_i16_legal_ids_into",
68
767
  "step_rl",
768
+ "step_rl_into",
769
+ "step_rl_nomask",
770
+ "step_rl_nomask_into",
771
+ "step_rl_i16_legal_ids",
772
+ "step_rl_i16_legal_ids_into",
773
+ "step_rl_select_from_logits_i16_legal_ids",
774
+ "step_rl_select_from_logits_i16_legal_ids_into",
775
+ "step_rl_sample_from_logits_i16_legal_ids",
776
+ "step_rl_sample_from_logits_i16_legal_ids_into",
69
777
  "pass_action_id_for_decision_kind",
70
778
  "PASS_ACTION_ID",
779
+ "observation_spec_json",
780
+ "action_spec_json",
781
+ "decode_action_id",
782
+ "build_info",
783
+ "make_train_pool",
784
+ "make_eval_pool",
785
+ "spec_bundle",
71
786
  "__version__",
72
787
  ]
weiss_sim/rl.py CHANGED
@@ -1,10 +1,31 @@
1
+ """Lightweight RL helpers for the Weiss simulator.
2
+
3
+ Spec exports:
4
+ - `weiss_sim.observation_spec_json()` and `weiss_sim.action_spec_json()` for
5
+ the stable layout and versioned encoding contract.
6
+ - `weiss_sim.spec_bundle()` for a combined, self-describing spec payload.
7
+
8
+ Episode metadata:
9
+ - `EnvPool.episode_seed_batch()`, `episode_index_batch()`, `env_index_batch()`,
10
+ `starting_player_batch()` for deterministic bookkeeping.
11
+
12
+ Replay controls:
13
+ - `EnvPool.enable_replay_sampling(..., visibility_mode="public"|"full")`
14
+ """
15
+
1
16
  from __future__ import annotations
2
17
 
3
18
  from dataclasses import dataclass
4
19
 
5
20
  import numpy as np
6
21
 
7
- from .weiss_sim import PASS_ACTION_ID, BatchOutMinimal, EnvPool
22
+ from .weiss_sim import (
23
+ PASS_ACTION_ID,
24
+ BatchOutMinimal,
25
+ BatchOutMinimalI16LegalIds,
26
+ BatchOutMinimalNoMask,
27
+ EnvPool,
28
+ )
8
29
 
9
30
 
10
31
  @dataclass(frozen=True)
@@ -15,6 +36,35 @@ class RlStep:
15
36
  terminated: np.ndarray
16
37
  truncated: np.ndarray
17
38
  actor: np.ndarray
39
+ decision_kind: np.ndarray
40
+ decision_id: np.ndarray
41
+ engine_status: np.ndarray
42
+ spec_hash: np.ndarray
43
+
44
+
45
+ @dataclass(frozen=True)
46
+ class RlStepNoMask:
47
+ obs: np.ndarray
48
+ rewards: np.ndarray
49
+ terminated: np.ndarray
50
+ truncated: np.ndarray
51
+ actor: np.ndarray
52
+ decision_kind: np.ndarray
53
+ decision_id: np.ndarray
54
+ engine_status: np.ndarray
55
+ spec_hash: np.ndarray
56
+
57
+
58
+ @dataclass(frozen=True)
59
+ class RlStepI16LegalIds:
60
+ obs: np.ndarray
61
+ legal_ids: np.ndarray
62
+ legal_offsets: np.ndarray
63
+ rewards: np.ndarray
64
+ terminated: np.ndarray
65
+ truncated: np.ndarray
66
+ actor: np.ndarray
67
+ decision_kind: np.ndarray
18
68
  decision_id: np.ndarray
19
69
  engine_status: np.ndarray
20
70
  spec_hash: np.ndarray
@@ -25,7 +75,19 @@ def pass_action_id_for_decision_kind(decision_kind):
25
75
 
26
76
 
27
77
  def reset_rl(pool: EnvPool) -> RlStep:
78
+ """Reset the pool and return a minimal step bundle."""
79
+ out = BatchOutMinimal(pool.envs_len)
80
+ return reset_rl_into(pool, out)
81
+
82
+
83
+ def step_rl(pool: EnvPool, actions) -> RlStep:
84
+ """Step the pool once with one action per env and return a minimal bundle."""
28
85
  out = BatchOutMinimal(pool.envs_len)
86
+ return step_rl_into(pool, actions, out)
87
+
88
+
89
+ def reset_rl_into(pool: EnvPool, out: BatchOutMinimal) -> RlStep:
90
+ """Reset the pool into a preallocated BatchOutMinimal."""
29
91
  pool.reset_into(out)
30
92
  return RlStep(
31
93
  obs=out.obs,
@@ -34,14 +96,15 @@ def reset_rl(pool: EnvPool) -> RlStep:
34
96
  terminated=out.terminated,
35
97
  truncated=out.truncated,
36
98
  actor=out.actor,
99
+ decision_kind=out.decision_kind,
37
100
  decision_id=out.decision_id,
38
101
  engine_status=out.engine_status,
39
102
  spec_hash=out.spec_hash,
40
103
  )
41
104
 
42
105
 
43
- def step_rl(pool: EnvPool, actions) -> RlStep:
44
- out = BatchOutMinimal(pool.envs_len)
106
+ def step_rl_into(pool: EnvPool, actions, out: BatchOutMinimal) -> RlStep:
107
+ """Step the pool into a preallocated BatchOutMinimal."""
45
108
  pool.step_into(actions, out)
46
109
  return RlStep(
47
110
  obs=out.obs,
@@ -50,7 +113,167 @@ def step_rl(pool: EnvPool, actions) -> RlStep:
50
113
  terminated=out.terminated,
51
114
  truncated=out.truncated,
52
115
  actor=out.actor,
116
+ decision_kind=out.decision_kind,
117
+ decision_id=out.decision_id,
118
+ engine_status=out.engine_status,
119
+ spec_hash=out.spec_hash,
120
+ )
121
+
122
+
123
+ def reset_rl_nomask(pool: EnvPool) -> RlStepNoMask:
124
+ """Reset the pool and return a minimal bundle without dense masks."""
125
+ out = BatchOutMinimalNoMask(pool.envs_len)
126
+ return reset_rl_nomask_into(pool, out)
127
+
128
+
129
+ def step_rl_nomask(pool: EnvPool, actions) -> RlStepNoMask:
130
+ """Step the pool once without dense masks."""
131
+ out = BatchOutMinimalNoMask(pool.envs_len)
132
+ return step_rl_nomask_into(pool, actions, out)
133
+
134
+
135
+ def reset_rl_nomask_into(pool: EnvPool, out: BatchOutMinimalNoMask) -> RlStepNoMask:
136
+ """Reset the pool into a preallocated BatchOutMinimalNoMask."""
137
+ pool.reset_into_nomask(out)
138
+ return RlStepNoMask(
139
+ obs=out.obs,
140
+ rewards=out.rewards,
141
+ terminated=out.terminated,
142
+ truncated=out.truncated,
143
+ actor=out.actor,
144
+ decision_kind=out.decision_kind,
145
+ decision_id=out.decision_id,
146
+ engine_status=out.engine_status,
147
+ spec_hash=out.spec_hash,
148
+ )
149
+
150
+
151
+ def step_rl_nomask_into(pool: EnvPool, actions, out: BatchOutMinimalNoMask) -> RlStepNoMask:
152
+ """Step the pool into a preallocated BatchOutMinimalNoMask."""
153
+ pool.step_into_nomask(actions, out)
154
+ return RlStepNoMask(
155
+ obs=out.obs,
156
+ rewards=out.rewards,
157
+ terminated=out.terminated,
158
+ truncated=out.truncated,
159
+ actor=out.actor,
160
+ decision_kind=out.decision_kind,
161
+ decision_id=out.decision_id,
162
+ engine_status=out.engine_status,
163
+ spec_hash=out.spec_hash,
164
+ )
165
+
166
+
167
+ def reset_rl_i16_legal_ids(pool: EnvPool) -> RlStepI16LegalIds:
168
+ """Reset the pool and return an i16+legal-ids step bundle."""
169
+ out = BatchOutMinimalI16LegalIds(pool.envs_len)
170
+ return reset_rl_i16_legal_ids_into(pool, out)
171
+
172
+
173
+ def step_rl_i16_legal_ids(pool: EnvPool, actions) -> RlStepI16LegalIds:
174
+ """Step the pool once with one action per env and return i16+legal-ids bundle."""
175
+ out = BatchOutMinimalI16LegalIds(pool.envs_len)
176
+ return step_rl_i16_legal_ids_into(pool, actions, out)
177
+
178
+
179
+ def reset_rl_i16_legal_ids_into(
180
+ pool: EnvPool, out: BatchOutMinimalI16LegalIds
181
+ ) -> RlStepI16LegalIds:
182
+ """Reset the pool into a preallocated BatchOutMinimalI16LegalIds."""
183
+ pool.reset_into_i16_legal_ids(out)
184
+ return RlStepI16LegalIds(
185
+ obs=out.obs,
186
+ legal_ids=out.legal_ids,
187
+ legal_offsets=out.legal_offsets,
188
+ rewards=out.rewards,
189
+ terminated=out.terminated,
190
+ truncated=out.truncated,
191
+ actor=out.actor,
192
+ decision_kind=out.decision_kind,
193
+ decision_id=out.decision_id,
194
+ engine_status=out.engine_status,
195
+ spec_hash=out.spec_hash,
196
+ )
197
+
198
+
199
+ def step_rl_i16_legal_ids_into(
200
+ pool: EnvPool, actions, out: BatchOutMinimalI16LegalIds
201
+ ) -> RlStepI16LegalIds:
202
+ """Step the pool into a preallocated BatchOutMinimalI16LegalIds."""
203
+ pool.step_into_i16_legal_ids(actions, out)
204
+ return RlStepI16LegalIds(
205
+ obs=out.obs,
206
+ legal_ids=out.legal_ids,
207
+ legal_offsets=out.legal_offsets,
208
+ rewards=out.rewards,
209
+ terminated=out.terminated,
210
+ truncated=out.truncated,
211
+ actor=out.actor,
212
+ decision_kind=out.decision_kind,
53
213
  decision_id=out.decision_id,
54
214
  engine_status=out.engine_status,
55
215
  spec_hash=out.spec_hash,
56
216
  )
217
+
218
+
219
+ def step_rl_select_from_logits_i16_legal_ids(pool: EnvPool, logits):
220
+ """Select actions from logits in Rust, step envs, return i16+legal-ids bundle."""
221
+ out = BatchOutMinimalI16LegalIds(pool.envs_len)
222
+ actions = np.empty(pool.envs_len, dtype=np.uint32)
223
+ return step_rl_select_from_logits_i16_legal_ids_into(pool, logits, actions, out)
224
+
225
+
226
+ def step_rl_sample_from_logits_i16_legal_ids(pool: EnvPool, logits, seeds):
227
+ """Sample actions from logits in Rust, step envs, return i16+legal-ids bundle."""
228
+ out = BatchOutMinimalI16LegalIds(pool.envs_len)
229
+ actions = np.empty(pool.envs_len, dtype=np.uint32)
230
+ return step_rl_sample_from_logits_i16_legal_ids_into(pool, logits, seeds, actions, out)
231
+
232
+
233
+ def step_rl_select_from_logits_i16_legal_ids_into(
234
+ pool: EnvPool, logits, actions, out: BatchOutMinimalI16LegalIds
235
+ ):
236
+ """Select actions from logits into preallocated buffers."""
237
+ logits = np.ascontiguousarray(logits, dtype=np.float32)
238
+ pool.step_select_from_logits_into_i16_legal_ids(logits, actions, out)
239
+ return (
240
+ RlStepI16LegalIds(
241
+ obs=out.obs,
242
+ legal_ids=out.legal_ids,
243
+ legal_offsets=out.legal_offsets,
244
+ rewards=out.rewards,
245
+ terminated=out.terminated,
246
+ truncated=out.truncated,
247
+ actor=out.actor,
248
+ decision_kind=out.decision_kind,
249
+ decision_id=out.decision_id,
250
+ engine_status=out.engine_status,
251
+ spec_hash=out.spec_hash,
252
+ ),
253
+ actions,
254
+ )
255
+
256
+
257
+ def step_rl_sample_from_logits_i16_legal_ids_into(
258
+ pool: EnvPool, logits, seeds, actions, out: BatchOutMinimalI16LegalIds
259
+ ):
260
+ """Sample actions from logits into preallocated buffers."""
261
+ logits = np.ascontiguousarray(logits, dtype=np.float32)
262
+ seeds = np.asarray(seeds, dtype=np.uint64).ravel()
263
+ pool.step_sample_from_logits_into_i16_legal_ids(logits, seeds, actions, out)
264
+ return (
265
+ RlStepI16LegalIds(
266
+ obs=out.obs,
267
+ legal_ids=out.legal_ids,
268
+ legal_offsets=out.legal_offsets,
269
+ rewards=out.rewards,
270
+ terminated=out.terminated,
271
+ truncated=out.truncated,
272
+ actor=out.actor,
273
+ decision_kind=out.decision_kind,
274
+ decision_id=out.decision_id,
275
+ engine_status=out.engine_status,
276
+ spec_hash=out.spec_hash,
277
+ ),
278
+ actions,
279
+ )
Binary file
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: weiss-sim
3
- Version: 0.1.3
3
+ Version: 0.2.0
4
4
  Classifier: Development Status :: 3 - Alpha
5
5
  Classifier: Intended Audience :: Developers
6
6
  Classifier: License :: OSI Approved :: MIT License
@@ -12,17 +12,19 @@ Classifier: Programming Language :: Rust
12
12
  Classifier: Topic :: Games/Entertainment
13
13
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
14
14
  Requires-Dist: numpy>=1.23
15
+ License-File: LICENSE-APACHE
16
+ License-File: LICENSE-MIT
15
17
  Summary: Deterministic Weiss Schwarz simulator with a Rust core and Python bindings.
16
18
  Keywords: weiss-schwarz,reinforcement-learning,simulation,pyo3,rl
17
19
  Author: Lallan
18
20
  License: MIT OR Apache-2.0
19
21
  Requires-Python: >=3.10
20
22
  Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
21
- Project-URL: Homepage, https://github.com/victorwp288/weiss-schwarz-simulator
22
- Project-URL: Repository, https://github.com/victorwp288/weiss-schwarz-simulator
23
+ Project-URL: Changelog, https://github.com/victorwp288/weiss-schwarz-simulator/blob/main/CHANGELOG.md
23
24
  Project-URL: Documentation, https://victorwp288.github.io/weiss-schwarz-simulator/rustdoc/
25
+ Project-URL: Homepage, https://github.com/victorwp288/weiss-schwarz-simulator
24
26
  Project-URL: Issues, https://github.com/victorwp288/weiss-schwarz-simulator/issues
25
- Project-URL: Changelog, https://github.com/victorwp288/weiss-schwarz-simulator/blob/main/CHANGELOG.md
27
+ Project-URL: Repository, https://github.com/victorwp288/weiss-schwarz-simulator
26
28
 
27
29
  # Weiss Schwarz Simulator (Rust core + Python bindings)
28
30
 
@@ -91,22 +93,22 @@ in GitHub Actions secrets; otherwise you can manually run the `Wheels` workflow
91
93
  ### Benchmark Snapshot (main, top 12)
92
94
 
93
95
  <!-- BENCHMARKS:START -->
94
- _Last updated: 2026-01-05 00:44 UTC_
96
+ _Last updated: 2026-02-04 08:56 UTC_
95
97
 
96
98
  | Benchmark | Time |
97
99
  | --- | --- |
98
- | rust/advance_until_decision | 63280 ns/iter |
99
- | rust/step_batch_64 | 26268 ns/iter |
100
- | rust/step_batch_fast_256_priority_off | 111592 ns/iter |
101
- | rust/step_batch_fast_256_priority_on | 109646 ns/iter |
102
- | rust/legal_actions | 44 ns/iter |
103
- | rust/legal_actions_forced | 43 ns/iter |
104
- | rust/on_reverse_decision_frequency_on | 1534 ns/iter |
105
- | rust/on_reverse_decision_frequency_off | 1540 ns/iter |
106
- | rust/observation_encode | 228 ns/iter |
107
- | rust/observation_encode_forced | 233 ns/iter |
108
- | rust/mask_construction | 455 ns/iter |
109
- | rust/mask_construction_forced | 412 ns/iter |
100
+ | rust/advance_until_decision | 31245 ns/iter |
101
+ | rust/step_batch_64 | 15036 ns/iter |
102
+ | rust/step_batch_fast_256_priority_off | 74182 ns/iter |
103
+ | rust/step_batch_fast_256_priority_on | 67407 ns/iter |
104
+ | rust/legal_actions | 12 ns/iter |
105
+ | rust/legal_actions_forced | 12 ns/iter |
106
+ | rust/on_reverse_decision_frequency_on | 1098 ns/iter |
107
+ | rust/on_reverse_decision_frequency_off | 1094 ns/iter |
108
+ | rust/observation_encode | 172 ns/iter |
109
+ | rust/observation_encode_forced | 171 ns/iter |
110
+ | rust/mask_construction | 407 ns/iter |
111
+ | rust/mask_construction_forced | 408 ns/iter |
110
112
  <!-- BENCHMARKS:END -->
111
113
 
112
114
 
@@ -272,6 +274,10 @@ Convenience properties:
272
274
  Python helper:
273
275
  - `EnvPoolBuffers(pool)` allocates persistent numpy buffers and exposes `reset()`, `step()`, and `legal_action_ids()`.
274
276
  - `reset_rl(pool)` / `step_rl(pool, actions)` return a `RlStep` dataclass with named fields.
277
+ - `RlStepI16LegalIds` is like `RlStep` but uses `int16` observations and `uint16` legal action IDs to save memory. The `I16` name refers to the observation dtype; legal IDs are still unsigned. Use `reset_rl_i16_legal_ids` / `step_rl_i16_legal_ids` when you want compact 16-bit legal IDs, and use `reset_rl` / `step_rl` for the standard `RlStep` representation.
278
+ - `*_i16_legal_ids` APIs require `ACTION_SPACE_SIZE <= 65535` (fits in `uint16`) and raise `ValueError` if the action space exceeds that limit.
279
+ - `reset_rl_i16_legal_ids(pool)` / `step_rl_i16_legal_ids(pool, actions)` return `RlStepI16LegalIds`.
280
+ - `step_rl_select_from_logits_i16_legal_ids(pool, logits)` selects actions in Rust and returns `(RlStepI16LegalIds, actions)`. `logits` must be a `np.ndarray` of `float32` with shape `(num_envs, action_space)`, and `actions` is a `np.ndarray` of `uint32` with shape `(num_envs,)`. Type hint example (requires `from numpy.typing import NDArray` and `import numpy as np`): `step_rl_select_from_logits_i16_legal_ids(pool: EnvPool, logits: NDArray[np.float32]) -> tuple[RlStepI16LegalIds, NDArray[np.uint32]]`
275
281
  - `pass_action_id_for_decision_kind(decision_kind)` returns `PASS_ACTION_ID` for convenience.
276
282
 
277
283
  ---
@@ -0,0 +1,8 @@
1
+ weiss_sim\__init__.py,sha256=4h_nXIvfgsOCzHPn3htbOj1YlsbIMJkqIcR2dCu2ENw,28782
2
+ weiss_sim\rl.py,sha256=Xi9VjI5_khkiBEoKcKu5hQrQIPihY0ceFMNB91_Phh0,9078
3
+ weiss_sim\weiss_sim.cp312-win_amd64.pyd,sha256=UnTcX-yFrLv3jP_jF1HLzDfHxk8qCPHkIQhgOieVUfI,2033664
4
+ weiss_sim-0.2.0.dist-info\METADATA,sha256=Y0A9RFwrVZTDDgx604VDCa3fICFWvdK2v5f_PGkYb3k,16725
5
+ weiss_sim-0.2.0.dist-info\WHEEL,sha256=xGRyMfC2nQGcuDMHm_QfSkv2HVaAAAo1DvjuUmalQqw,97
6
+ weiss_sim-0.2.0.dist-info\licenses\LICENSE-APACHE,sha256=-ZG9DmEHrv435__Y2U--eTa6XO4tkE36MBOrSbq30os,10975
7
+ weiss_sim-0.2.0.dist-info\licenses\LICENSE-MIT,sha256=q1kMWmXeoM3k6opAnUoxADX62Z_qbvQCYKrGoRF8BVI,1114
8
+ weiss_sim-0.2.0.dist-info\RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: maturin (1.10.2)
2
+ Generator: maturin (1.11.5)
3
3
  Root-Is-Purelib: false
4
4
  Tag: cp312-cp312-win_amd64
@@ -0,0 +1,190 @@
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ Copyright (c) 2025 Weiss Schwarz Simulator contributors
179
+
180
+ Licensed under the Apache License, Version 2.0 (the "License");
181
+ you may not use this file except in compliance with the License.
182
+ You may obtain a copy of the License at
183
+
184
+ http://www.apache.org/licenses/LICENSE-2.0
185
+
186
+ Unless required by applicable law or agreed to in writing, software
187
+ distributed under the License is distributed on an "AS IS" BASIS,
188
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
189
+ See the License for the specific language governing permissions and
190
+ limitations under the License.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Weiss Schwarz Simulator contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -1,6 +0,0 @@
1
- weiss_sim-0.1.3.dist-info/METADATA,sha256=EVgOL0kkb-25UMNjZT69AsvDN3ZA2GvLM2ONM5jih6U,15555
2
- weiss_sim-0.1.3.dist-info/WHEEL,sha256=MicGETHZ2NSvTN4l4NAAzDXbOHnrlOKS0CnBsnFTiLs,97
3
- weiss_sim/__init__.py,sha256=1pcn5K4wCGxSycm-JmR0B5x3n2EZdXAOClvDwTafXDM,2025
4
- weiss_sim/rl.py,sha256=Ylh0NTOgf2XEubEw-LZNawNTzHZv5SRKMpXBISqTQAI,1395
5
- weiss_sim/weiss_sim.cp312-win_amd64.pyd,sha256=mXCWFlFnec9-IVI5MF5yY63R0mIfNq0hfTMJE_3E6oE,1466368
6
- weiss_sim-0.1.3.dist-info/RECORD,,