spells-mtg 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spells-mtg might be problematic. Click here for more details.

spells/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
- from spells import columns
2
- from spells import enums
1
+ from spells.columns import ColSpec
2
+ from spells.enums import ColType, ColName
3
3
  from spells.draft_data import summon
4
4
 
5
- __all__ = ["summon", "enums", "columns"]
5
+ __all__ = ["summon", "ColSpec", "ColType", "ColName"]
spells/columns.py CHANGED
@@ -7,8 +7,7 @@ from spells.enums import View, ColName, ColType
7
7
 
8
8
 
9
9
  @dataclass(frozen=True)
10
- class ColumnSpec:
11
- name: str
10
+ class ColSpec:
12
11
  col_type: ColType
13
12
  expr: pl.Expr | Callable[..., pl.Expr] | None = None
14
13
  views: list[View] | None = None
@@ -16,7 +15,7 @@ class ColumnSpec:
16
15
 
17
16
 
18
17
  @dataclass(frozen=True)
19
- class ColumnDefinition:
18
+ class ColDef:
20
19
  name: str
21
20
  col_type: ColType
22
21
  expr: pl.Expr | tuple[pl.Expr, ...]
@@ -41,69 +40,56 @@ default_columns = [
41
40
  ColName.GIH_WR,
42
41
  ]
43
42
 
44
- _column_specs = [
45
- ColumnSpec(
46
- name=ColName.NAME,
43
+ specs: dict[str, ColSpec] = {
44
+ ColName.NAME: ColSpec(
47
45
  col_type=ColType.GROUP_BY,
48
46
  views=[View.CARD],
49
47
  ),
50
- ColumnSpec(
51
- name=ColName.EXPANSION,
48
+ ColName.EXPANSION: ColSpec(
52
49
  col_type=ColType.GROUP_BY,
53
50
  views=[View.GAME, View.DRAFT],
54
51
  ),
55
- ColumnSpec(
56
- name=ColName.EVENT_TYPE,
52
+ ColName.EVENT_TYPE: ColSpec(
57
53
  col_type=ColType.GROUP_BY,
58
54
  views=[View.GAME, View.DRAFT],
59
55
  ),
60
- ColumnSpec(
61
- name=ColName.DRAFT_ID,
56
+ ColName.DRAFT_ID: ColSpec(
62
57
  views=[View.GAME, View.DRAFT],
63
58
  col_type=ColType.FILTER_ONLY,
64
59
  ),
65
- ColumnSpec(
66
- name=ColName.DRAFT_TIME,
60
+ ColName.DRAFT_TIME: ColSpec(
67
61
  col_type=ColType.FILTER_ONLY,
68
62
  views=[View.GAME, View.DRAFT],
69
63
  ),
70
- ColumnSpec(
71
- name=ColName.DRAFT_DATE,
64
+ ColName.DRAFT_DATE: ColSpec(
72
65
  col_type=ColType.GROUP_BY,
73
66
  expr=pl.col(ColName.DRAFT_TIME).str.to_datetime("%Y-%m-%d %H:%M:%S").dt.date(),
74
67
  ),
75
- ColumnSpec(
76
- name=ColName.DRAFT_DAY_OF_WEEK,
68
+ ColName.DRAFT_DAY_OF_WEEK: ColSpec(
77
69
  col_type=ColType.GROUP_BY,
78
70
  expr=pl.col(ColName.DRAFT_TIME).str.to_datetime("%Y-%m-%d %H:%M:%S").dt.weekday(),
79
71
  ),
80
- ColumnSpec(
81
- name=ColName.DRAFT_HOUR,
72
+ ColName.DRAFT_HOUR: ColSpec(
82
73
  col_type=ColType.GROUP_BY,
83
74
  expr=pl.col(ColName.DRAFT_TIME).str.to_datetime("%Y-%m-%d %H:%M:%S").dt.hour(),
84
75
  ),
85
- ColumnSpec(
86
- name=ColName.DRAFT_WEEK,
76
+ ColName.DRAFT_WEEK: ColSpec(
87
77
  col_type=ColType.GROUP_BY,
88
78
  expr=pl.col(ColName.DRAFT_TIME).str.to_datetime("%Y-%m-%d %H:%M:%S").dt.week(),
89
79
  ),
90
- ColumnSpec(
91
- name=ColName.RANK,
80
+ ColName.RANK: ColSpec(
92
81
  col_type=ColType.GROUP_BY,
93
82
  views=[View.GAME, View.DRAFT],
94
83
  ),
95
- ColumnSpec(
96
- name=ColName.USER_N_GAMES_BUCKET,
84
+ ColName.USER_N_GAMES_BUCKET: ColSpec(
97
85
  col_type=ColType.GROUP_BY,
98
86
  views=[View.DRAFT, View.GAME],
99
87
  ),
100
- ColumnSpec(
101
- name=ColName.USER_GAME_WIN_RATE_BUCKET,
88
+ ColName.USER_GAME_WIN_RATE_BUCKET: ColSpec(
102
89
  col_type=ColType.GROUP_BY,
103
90
  views=[View.DRAFT, View.GAME],
104
91
  ),
105
- ColumnSpec(
106
- name=ColName.PLAYER_COHORT,
92
+ ColName.PLAYER_COHORT: ColSpec(
107
93
  col_type=ColType.GROUP_BY,
108
94
  expr=pl.when(pl.col(ColName.USER_N_GAMES_BUCKET) < 100)
109
95
  .then(pl.lit("Other"))
@@ -117,304 +103,249 @@ _column_specs = [
117
103
  )
118
104
  ),
119
105
  ),
120
- ColumnSpec(
121
- name=ColName.EVENT_MATCH_WINS,
106
+ ColName.EVENT_MATCH_WINS: ColSpec(
122
107
  col_type=ColType.GROUP_BY,
123
108
  views=[View.DRAFT],
124
109
  ),
125
- ColumnSpec(
126
- name=ColName.EVENT_MATCH_WINS_SUM,
110
+ ColName.EVENT_MATCH_WINS_SUM: ColSpec(
127
111
  col_type=ColType.PICK_SUM,
128
112
  views=[View.DRAFT],
129
113
  expr=pl.col(ColName.EVENT_MATCH_WINS),
130
114
  ),
131
- ColumnSpec(
132
- name=ColName.EVENT_MATCH_LOSSES,
115
+ ColName.EVENT_MATCH_LOSSES: ColSpec(
133
116
  col_type=ColType.GROUP_BY,
134
117
  views=[View.DRAFT],
135
118
  ),
136
- ColumnSpec(
137
- name=ColName.EVENT_MATCH_LOSSES_SUM,
119
+ ColName.EVENT_MATCH_LOSSES_SUM: ColSpec(
138
120
  col_type=ColType.PICK_SUM,
139
121
  expr=pl.col(ColName.EVENT_MATCH_LOSSES),
140
122
  ),
141
- ColumnSpec(
142
- name=ColName.EVENT_MATCHES,
123
+ ColName.EVENT_MATCHES: ColSpec(
143
124
  col_type=ColType.GROUP_BY,
144
125
  expr=pl.col(ColName.EVENT_MATCH_WINS) + pl.col(ColName.EVENT_MATCH_LOSSES),
145
126
  ),
146
- ColumnSpec(
147
- name=ColName.EVENT_MATCHES_SUM,
127
+ ColName.EVENT_MATCHES_SUM: ColSpec(
148
128
  col_type=ColType.PICK_SUM,
149
129
  expr=pl.col(ColName.EVENT_MATCHES),
150
130
  ),
151
- ColumnSpec(
152
- name=ColName.IS_TROPHY,
131
+ ColName.IS_TROPHY: ColSpec(
153
132
  col_type=ColType.GROUP_BY,
154
133
  expr=pl.when(pl.col(ColName.EVENT_TYPE) == "Traditional")
155
134
  .then(pl.col(ColName.EVENT_MATCH_WINS) == 3)
156
135
  .otherwise(pl.col(ColName.EVENT_MATCH_WINS) == 7),
157
136
  ),
158
- ColumnSpec(
159
- name=ColName.IS_TROPHY_SUM,
137
+ ColName.IS_TROPHY_SUM: ColSpec(
160
138
  col_type=ColType.PICK_SUM,
161
139
  expr=pl.col(ColName.IS_TROPHY),
162
140
  ),
163
- ColumnSpec(
164
- name=ColName.PACK_NUMBER,
141
+ ColName.PACK_NUMBER: ColSpec(
165
142
  col_type=ColType.FILTER_ONLY, # use pack_num
166
143
  views=[View.DRAFT],
167
144
  ),
168
- ColumnSpec(
169
- name=ColName.PACK_NUM,
145
+ ColName.PACK_NUM: ColSpec(
170
146
  col_type=ColType.GROUP_BY,
171
147
  expr=pl.col(ColName.PACK_NUMBER) + 1,
172
148
  ),
173
- ColumnSpec(
174
- name=ColName.PICK_NUMBER,
149
+ ColName.PICK_NUMBER: ColSpec(
175
150
  col_type=ColType.FILTER_ONLY, # use pick_num
176
151
  views=[View.DRAFT],
177
152
  ),
178
- ColumnSpec(
179
- name=ColName.PICK_NUM,
153
+ ColName.PICK_NUM: ColSpec(
180
154
  col_type=ColType.GROUP_BY,
181
155
  expr=pl.col(ColName.PICK_NUMBER) + 1,
182
156
  ),
183
- ColumnSpec(
184
- name=ColName.TAKEN_AT,
157
+ ColName.TAKEN_AT: ColSpec(
185
158
  col_type=ColType.PICK_SUM,
186
159
  expr=pl.col(ColName.PICK_NUM),
187
160
  ),
188
- ColumnSpec(
189
- name=ColName.NUM_TAKEN,
161
+ ColName.NUM_TAKEN: ColSpec(
190
162
  col_type=ColType.PICK_SUM,
191
163
  expr=pl.when(pl.col(ColName.PICK).is_not_null())
192
164
  .then(1)
193
165
  .otherwise(0),
194
166
  ),
195
- ColumnSpec(
196
- name=ColName.PICK,
167
+ ColName.NUM_DRAFTS: ColSpec(
168
+ col_type=ColType.PICK_SUM,
169
+ expr=pl.when((pl.col(ColName.PACK_NUMBER) == 0) & (pl.col(ColName.PICK_NUMBER) == 0)).then(1).otherwise(0),
170
+ ),
171
+ ColName.PICK: ColSpec(
197
172
  col_type=ColType.FILTER_ONLY,
198
173
  views=[View.DRAFT],
199
174
  ),
200
- ColumnSpec(
201
- name=ColName.PICK_MAINDECK_RATE,
175
+ ColName.PICK_MAINDECK_RATE: ColSpec(
202
176
  col_type=ColType.PICK_SUM,
203
177
  views=[View.DRAFT],
204
178
  ),
205
- ColumnSpec(
206
- name=ColName.PICK_SIDEBOARD_IN_RATE,
179
+ ColName.PICK_SIDEBOARD_IN_RATE: ColSpec(
207
180
  col_type=ColType.PICK_SUM,
208
181
  views=[View.DRAFT],
209
182
  ),
210
- ColumnSpec(
211
- name=ColName.PACK_CARD,
183
+ ColName.PACK_CARD: ColSpec(
212
184
  col_type=ColType.NAME_SUM,
213
185
  views=[View.DRAFT],
214
186
  ),
215
- ColumnSpec(
216
- name=ColName.LAST_SEEN,
187
+ ColName.LAST_SEEN: ColSpec(
217
188
  col_type=ColType.NAME_SUM,
218
189
  expr=lambda name: pl.col(f"pack_card_{name}")
219
190
  * pl.min_horizontal(ColName.PICK_NUM, 8),
220
191
  ),
221
- ColumnSpec(
222
- name=ColName.NUM_SEEN,
192
+ ColName.NUM_SEEN: ColSpec(
223
193
  col_type=ColType.NAME_SUM,
224
194
  expr=lambda name: pl.col(f"pack_card_{name}") * (pl.col(ColName.PICK_NUM) <= 8),
225
195
  ),
226
- ColumnSpec(
227
- name=ColName.POOL,
196
+ ColName.POOL: ColSpec(
228
197
  col_type=ColType.NAME_SUM,
229
198
  views=[View.DRAFT],
230
199
  ),
231
- ColumnSpec(
232
- name=ColName.GAME_TIME,
200
+ ColName.GAME_TIME: ColSpec(
233
201
  col_type=ColType.FILTER_ONLY,
234
202
  views=[View.GAME],
235
203
  ),
236
- ColumnSpec(
237
- name=ColName.GAME_DATE,
204
+ ColName.GAME_DATE: ColSpec(
238
205
  col_type=ColType.GROUP_BY,
239
206
  expr=pl.col(ColName.GAME_TIME).str.to_datetime("%Y-%m-%d %H-%M-%S").dt.date(),
240
207
  ),
241
- ColumnSpec(
242
- name=ColName.GAME_DAY_OF_WEEK,
208
+ ColName.GAME_DAY_OF_WEEK: ColSpec(
243
209
  col_type=ColType.GROUP_BY,
244
210
  expr=pl.col(ColName.GAME_TIME).str.to_datetime("%Y-%m-%d %H-%M-%S").dt.weekday(),
245
211
  ),
246
- ColumnSpec(
247
- name=ColName.GAME_HOUR,
212
+ ColName.GAME_HOUR: ColSpec(
248
213
  col_type=ColType.GROUP_BY,
249
214
  expr=pl.col(ColName.GAME_TIME).str.to_datetime("%Y-%m-%d %H-%M-%S").dt.hour(),
250
215
  ),
251
- ColumnSpec(
252
- name=ColName.GAME_WEEK,
216
+ ColName.GAME_WEEK: ColSpec(
253
217
  col_type=ColType.GROUP_BY,
254
218
  expr=pl.col(ColName.GAME_TIME).str.to_datetime("%Y-%m-%d %H-%M-%S").dt.week(),
255
219
  ),
256
- ColumnSpec(
257
- name=ColName.BUILD_INDEX,
220
+ ColName.BUILD_INDEX: ColSpec(
258
221
  col_type=ColType.GROUP_BY,
259
222
  views=[View.GAME],
260
223
  ),
261
- ColumnSpec(
262
- name=ColName.MATCH_NUMBER,
224
+ ColName.MATCH_NUMBER: ColSpec(
263
225
  col_type=ColType.GROUP_BY,
264
226
  views=[View.GAME],
265
227
  ),
266
- ColumnSpec(
267
- name=ColName.GAME_NUMBER,
228
+ ColName.GAME_NUMBER: ColSpec(
268
229
  col_type=ColType.GROUP_BY,
269
230
  views=[View.GAME],
270
231
  ),
271
- ColumnSpec(
272
- name=ColName.NUM_GAMES,
232
+ ColName.NUM_GAMES: ColSpec(
273
233
  col_type=ColType.GAME_SUM,
274
234
  expr=pl.col(ColName.GAME_NUMBER).is_not_null(),
275
235
  ),
276
- ColumnSpec(
277
- name=ColName.NUM_MATCHES,
236
+ ColName.NUM_MATCHES: ColSpec(
278
237
  col_type=ColType.GAME_SUM,
279
238
  expr=pl.col(ColName.GAME_NUMBER) == 1,
280
239
  ),
281
- ColumnSpec(
282
- name=ColName.NUM_EVENTS,
240
+ ColName.NUM_EVENTS: ColSpec(
283
241
  col_type=ColType.GAME_SUM,
284
242
  expr=(pl.col(ColName.GAME_NUMBER) == 1) & (pl.col(ColName.MATCH_NUMBER) == 1),
285
243
  ),
286
- ColumnSpec(
287
- name=ColName.OPP_RANK,
244
+ ColName.OPP_RANK: ColSpec(
288
245
  col_type=ColType.GROUP_BY,
289
246
  views=[View.GAME],
290
247
  ),
291
- ColumnSpec(
292
- name=ColName.MAIN_COLORS,
248
+ ColName.MAIN_COLORS: ColSpec(
293
249
  col_type=ColType.GROUP_BY,
294
250
  views=[View.GAME],
295
251
  ),
296
- ColumnSpec(
297
- name=ColName.NUM_COLORS,
252
+ ColName.NUM_COLORS: ColSpec(
298
253
  col_type=ColType.GROUP_BY,
299
254
  expr=pl.col(ColName.MAIN_COLORS).str.len_chars(),
300
255
  ),
301
- ColumnSpec(
302
- name=ColName.SPLASH_COLORS,
256
+ ColName.SPLASH_COLORS: ColSpec(
303
257
  col_type=ColType.GROUP_BY,
304
258
  views=[View.GAME],
305
259
  ),
306
- ColumnSpec(
307
- name=ColName.HAS_SPLASH,
260
+ ColName.HAS_SPLASH: ColSpec(
308
261
  col_type=ColType.GROUP_BY,
309
262
  expr=pl.col(ColName.SPLASH_COLORS).str.len_chars() > 0,
310
263
  ),
311
- ColumnSpec(
312
- name=ColName.ON_PLAY,
264
+ ColName.ON_PLAY: ColSpec(
313
265
  col_type=ColType.GROUP_BY,
314
266
  views=[View.GAME],
315
267
  ),
316
- ColumnSpec(
317
- name=ColName.NUM_ON_PLAY,
268
+ ColName.NUM_ON_PLAY: ColSpec(
318
269
  col_type=ColType.GAME_SUM,
319
270
  expr=pl.col(ColName.ON_PLAY),
320
271
  ),
321
- ColumnSpec(
322
- name=ColName.NUM_MULLIGANS,
272
+ ColName.NUM_MULLIGANS: ColSpec(
323
273
  col_type=ColType.GROUP_BY,
324
274
  views=[View.GAME],
325
275
  ),
326
- ColumnSpec(
327
- name=ColName.NUM_MULLIGANS_SUM,
276
+ ColName.NUM_MULLIGANS_SUM: ColSpec(
328
277
  col_type=ColType.GAME_SUM,
329
278
  expr=pl.col(ColName.NUM_MULLIGANS),
330
279
  ),
331
- ColumnSpec(
332
- name=ColName.OPP_NUM_MULLIGANS,
280
+ ColName.OPP_NUM_MULLIGANS: ColSpec(
333
281
  col_type=ColType.GAME_SUM,
334
282
  views=[View.GAME],
335
283
  ),
336
- ColumnSpec(
337
- name=ColName.OPP_NUM_MULLIGANS_SUM,
284
+ ColName.OPP_NUM_MULLIGANS_SUM: ColSpec(
338
285
  col_type=ColType.GAME_SUM,
339
286
  expr=pl.col(ColName.OPP_NUM_MULLIGANS),
340
287
  ),
341
- ColumnSpec(
342
- name=ColName.OPP_COLORS,
288
+ ColName.OPP_COLORS: ColSpec(
343
289
  col_type=ColType.GROUP_BY,
344
290
  views=[View.GAME],
345
291
  ),
346
- ColumnSpec(
347
- name=ColName.NUM_TURNS,
292
+ ColName.NUM_TURNS: ColSpec(
348
293
  col_type=ColType.GROUP_BY,
349
294
  views=[View.GAME],
350
295
  ),
351
- ColumnSpec(
352
- name=ColName.NUM_TURNS_SUM,
296
+ ColName.NUM_TURNS_SUM: ColSpec(
353
297
  col_type=ColType.GAME_SUM,
354
298
  expr=pl.col(ColName.NUM_TURNS),
355
299
  ),
356
- ColumnSpec(
357
- name=ColName.WON,
300
+ ColName.WON: ColSpec(
358
301
  col_type=ColType.GROUP_BY,
359
302
  views=[View.GAME],
360
303
  ),
361
- ColumnSpec(
362
- name=ColName.NUM_WON,
304
+ ColName.NUM_WON: ColSpec(
363
305
  col_type=ColType.GAME_SUM,
364
306
  expr=pl.col(ColName.WON),
365
307
  ),
366
- ColumnSpec(
367
- name=ColName.OPENING_HAND,
308
+ ColName.OPENING_HAND: ColSpec(
368
309
  col_type=ColType.NAME_SUM,
369
310
  views=[View.GAME],
370
311
  ),
371
- ColumnSpec(
372
- name=ColName.WON_OPENING_HAND,
312
+ ColName.WON_OPENING_HAND: ColSpec(
373
313
  col_type=ColType.NAME_SUM,
374
314
  expr=lambda name: pl.col(f"opening_hand_{name}") * pl.col(ColName.WON),
375
315
  ),
376
- ColumnSpec(
377
- name=ColName.DRAWN,
316
+ ColName.DRAWN: ColSpec(
378
317
  col_type=ColType.NAME_SUM,
379
318
  views=[View.GAME],
380
319
  ),
381
- ColumnSpec(
382
- name=ColName.WON_DRAWN,
320
+ ColName.WON_DRAWN: ColSpec(
383
321
  col_type=ColType.NAME_SUM,
384
322
  expr=lambda name: pl.col(f"drawn_{name}") * pl.col(ColName.WON),
385
323
  ),
386
- ColumnSpec(
387
- name=ColName.TUTORED,
324
+ ColName.TUTORED: ColSpec(
388
325
  col_type=ColType.NAME_SUM,
389
326
  views=[View.GAME],
390
327
  ),
391
- ColumnSpec(
392
- name=ColName.WON_TUTORED,
328
+ ColName.WON_TUTORED: ColSpec(
393
329
  col_type=ColType.NAME_SUM,
394
330
  expr=lambda name: pl.col(f"tutored_{name}") * pl.col(ColName.WON),
395
331
  ),
396
- ColumnSpec(
397
- name=ColName.DECK,
332
+ ColName.DECK: ColSpec(
398
333
  col_type=ColType.NAME_SUM,
399
334
  views=[View.GAME],
400
335
  ),
401
- ColumnSpec(
402
- name=ColName.WON_DECK,
336
+ ColName.WON_DECK: ColSpec(
403
337
  col_type=ColType.NAME_SUM,
404
338
  expr=lambda name: pl.col(f"deck_{name}") * pl.col(ColName.WON),
405
339
  ),
406
- ColumnSpec(
407
- name=ColName.SIDEBOARD,
340
+ ColName.SIDEBOARD: ColSpec(
408
341
  col_type=ColType.NAME_SUM,
409
342
  views=[View.GAME],
410
343
  ),
411
- ColumnSpec(
412
- name=ColName.WON_SIDEBOARD,
344
+ ColName.WON_SIDEBOARD: ColSpec(
413
345
  col_type=ColType.NAME_SUM,
414
346
  expr=lambda name: pl.col(f"sideboard_{name}") * pl.col(ColName.WON),
415
347
  ),
416
- ColumnSpec(
417
- name=ColName.NUM_GNS,
348
+ ColName.NUM_GNS: ColSpec(
418
349
  col_type=ColType.NAME_SUM,
419
350
  expr=lambda name: pl.max_horizontal(
420
351
  0,
@@ -424,258 +355,204 @@ _column_specs = [
424
355
  - pl.col(f"opening_hand_{name}"),
425
356
  ),
426
357
  ),
427
- ColumnSpec(
428
- name=ColName.WON_NUM_GNS,
358
+ ColName.WON_NUM_GNS: ColSpec(
429
359
  col_type=ColType.NAME_SUM,
430
360
  expr=lambda name: pl.col(ColName.WON) * pl.col(f"num_gns_{name}"),
431
361
  ),
432
- ColumnSpec(
433
- name=ColName.SET_CODE,
362
+ ColName.SET_CODE: ColSpec(
434
363
  col_type=ColType.CARD_ATTR,
435
364
  ),
436
- ColumnSpec(
437
- name=ColName.COLOR,
365
+ ColName.COLOR: ColSpec(
438
366
  col_type=ColType.CARD_ATTR,
439
367
  ),
440
- ColumnSpec(
441
- name=ColName.RARITY,
368
+ ColName.RARITY: ColSpec(
442
369
  col_type=ColType.CARD_ATTR,
443
370
  ),
444
- ColumnSpec(
445
- name=ColName.COLOR_IDENTITY,
371
+ ColName.COLOR_IDENTITY: ColSpec(
446
372
  col_type=ColType.CARD_ATTR,
447
373
  ),
448
- ColumnSpec(
449
- name=ColName.CARD_TYPE,
374
+ ColName.CARD_TYPE: ColSpec(
450
375
  col_type=ColType.CARD_ATTR,
451
376
  ),
452
- ColumnSpec(
453
- name=ColName.SUBTYPE,
377
+ ColName.SUBTYPE: ColSpec(
454
378
  col_type=ColType.CARD_ATTR,
455
379
  ),
456
- ColumnSpec(
457
- name=ColName.MANA_VALUE,
380
+ ColName.MANA_VALUE: ColSpec(
458
381
  col_type=ColType.CARD_ATTR,
459
382
  ),
460
- ColumnSpec(
461
- name=ColName.DECK_MANA_VALUE,
383
+ ColName.DECK_MANA_VALUE: ColSpec(
462
384
  col_type=ColType.NAME_SUM,
463
385
  expr=lambda name, card_context: card_context[name][ColName.MANA_VALUE] * pl.col(f"deck_{name}"),
464
386
  ),
465
- ColumnSpec(
466
- name=ColName.DECK_LANDS,
387
+ ColName.DECK_LANDS: ColSpec(
467
388
  col_type=ColType.NAME_SUM,
468
389
  expr=lambda name, card_context: pl.col(f"deck_{name}") * ( 1 if 'Land' in card_context[name][ColName.CARD_TYPE] else 0 )
469
390
  ),
470
- ColumnSpec(
471
- name=ColName.DECK_SPELLS,
391
+ ColName.DECK_SPELLS: ColSpec(
472
392
  col_type=ColType.NAME_SUM,
473
393
  expr=lambda name: pl.col(f"deck_{name}") - pl.col(f"deck_lands_{name}"),
474
394
  ),
475
- ColumnSpec(
476
- name=ColName.MANA_COST,
395
+ ColName.MANA_COST: ColSpec(
477
396
  col_type=ColType.CARD_ATTR,
478
397
  ),
479
- ColumnSpec(
480
- name=ColName.POWER,
398
+ ColName.POWER: ColSpec(
481
399
  col_type=ColType.CARD_ATTR,
482
400
  ),
483
- ColumnSpec(
484
- name=ColName.TOUGHNESS,
401
+ ColName.TOUGHNESS: ColSpec(
485
402
  col_type=ColType.CARD_ATTR,
486
403
  ),
487
- ColumnSpec(
488
- name=ColName.IS_BONUS_SHEET,
404
+ ColName.IS_BONUS_SHEET: ColSpec(
489
405
  col_type=ColType.CARD_ATTR,
490
406
  ),
491
- ColumnSpec(
492
- name=ColName.IS_DFC,
407
+ ColName.IS_DFC: ColSpec(
493
408
  col_type=ColType.CARD_ATTR,
494
409
  ),
495
- ColumnSpec(
496
- name=ColName.ORACLE_TEXT,
410
+ ColName.ORACLE_TEXT: ColSpec(
497
411
  col_type=ColType.CARD_ATTR,
498
412
  ),
499
- ColumnSpec(
500
- name=ColName.CARD_JSON,
413
+ ColName.CARD_JSON: ColSpec(
501
414
  col_type=ColType.CARD_ATTR,
502
415
  ),
503
- ColumnSpec(
504
- name=ColName.PICKED_MATCH_WR,
416
+ ColName.PICKED_MATCH_WR: ColSpec(
505
417
  col_type=ColType.AGG,
506
- expr=pl.col(ColName.EVENT_MATCH_WINS_SUM) / pl.col(ColName.EVENT_MATCHES),
418
+ expr=pl.col(ColName.EVENT_MATCH_WINS_SUM) / pl.col(ColName.EVENT_MATCHES_SUM),
507
419
  ),
508
- ColumnSpec(
509
- name=ColName.TROPHY_RATE,
420
+ ColName.TROPHY_RATE: ColSpec(
510
421
  col_type=ColType.AGG,
511
422
  expr=pl.col(ColName.IS_TROPHY_SUM) / pl.col(ColName.NUM_TAKEN),
512
423
  ),
513
- ColumnSpec(
514
- name=ColName.GAME_WR,
424
+ ColName.GAME_WR: ColSpec(
515
425
  col_type=ColType.AGG,
516
426
  expr=pl.col(ColName.NUM_WON) / pl.col(ColName.NUM_GAMES),
517
427
  ),
518
- ColumnSpec(
519
- name=ColName.ALSA,
428
+ ColName.ALSA: ColSpec(
520
429
  col_type=ColType.AGG,
521
430
  expr=pl.col(ColName.LAST_SEEN) / pl.col(ColName.NUM_SEEN),
522
431
  ),
523
- ColumnSpec(
524
- name=ColName.ATA,
432
+ ColName.ATA: ColSpec(
525
433
  col_type=ColType.AGG,
526
434
  expr=pl.col(ColName.TAKEN_AT) / pl.col(ColName.NUM_TAKEN),
527
435
  ),
528
- ColumnSpec(
529
- name=ColName.NUM_GP,
436
+ ColName.NUM_GP: ColSpec(
530
437
  col_type=ColType.AGG,
531
438
  expr=pl.col(ColName.DECK),
532
439
  ),
533
- ColumnSpec(
534
- name=ColName.PCT_GP,
440
+ ColName.PCT_GP: ColSpec(
535
441
  col_type=ColType.AGG,
536
442
  expr=pl.col(ColName.DECK) / (pl.col(ColName.DECK) + pl.col(ColName.SIDEBOARD)),
537
443
  ),
538
- ColumnSpec(
539
- name=ColName.GP_WR,
444
+ ColName.GP_WR: ColSpec(
540
445
  col_type=ColType.AGG,
541
446
  expr=pl.col(ColName.WON_DECK) / pl.col(ColName.DECK),
542
447
  ),
543
- ColumnSpec(
544
- name=ColName.NUM_OH,
448
+ ColName.NUM_OH: ColSpec(
545
449
  col_type=ColType.AGG,
546
450
  expr=pl.col(ColName.OPENING_HAND),
547
451
  ),
548
- ColumnSpec(
549
- name=ColName.OH_WR,
452
+ ColName.OH_WR: ColSpec(
550
453
  col_type=ColType.AGG,
551
454
  expr=pl.col(ColName.WON_OPENING_HAND) / pl.col(ColName.OPENING_HAND),
552
455
  ),
553
- ColumnSpec(
554
- name=ColName.NUM_GIH,
456
+ ColName.NUM_GIH: ColSpec(
555
457
  col_type=ColType.AGG,
556
458
  expr=pl.col(ColName.OPENING_HAND) + pl.col(ColName.DRAWN),
557
459
  ),
558
- ColumnSpec(
559
- name=ColName.NUM_GIH_WON,
460
+ ColName.NUM_GIH_WON: ColSpec(
560
461
  col_type=ColType.AGG,
561
462
  expr=pl.col(ColName.WON_OPENING_HAND) + pl.col(ColName.WON_DRAWN),
562
463
  ),
563
- ColumnSpec(
564
- name=ColName.GIH_WR,
464
+ ColName.GIH_WR: ColSpec(
565
465
  col_type=ColType.AGG,
566
466
  expr=pl.col(ColName.NUM_GIH_WON) / pl.col(ColName.NUM_GIH),
567
467
  ),
568
- ColumnSpec(
569
- name=ColName.GNS_WR,
468
+ ColName.GNS_WR: ColSpec(
570
469
  col_type=ColType.AGG,
571
470
  expr=pl.col(ColName.WON_NUM_GNS) / pl.col(ColName.NUM_GNS),
572
471
  ),
573
- ColumnSpec(
574
- name=ColName.IWD,
472
+ ColName.IWD: ColSpec(
575
473
  col_type=ColType.AGG,
576
474
  expr=pl.col(ColName.GIH_WR) - pl.col(ColName.GNS_WR),
577
475
  ),
578
- ColumnSpec(
579
- name=ColName.NUM_IN_POOL,
476
+ ColName.NUM_IN_POOL: ColSpec(
580
477
  col_type=ColType.AGG,
581
478
  expr=pl.col(ColName.DECK) + pl.col(ColName.SIDEBOARD),
582
479
  ),
583
- ColumnSpec(
584
- name=ColName.IN_POOL_WR,
480
+ ColName.IN_POOL_WR: ColSpec(
585
481
  col_type=ColType.AGG,
586
482
  expr=(pl.col(ColName.WON_DECK) + pl.col(ColName.WON_SIDEBOARD))
587
483
  / pl.col(ColName.NUM_IN_POOL),
588
484
  ),
589
- ColumnSpec(
590
- name=ColName.DECK_TOTAL,
485
+ ColName.DECK_TOTAL: ColSpec(
591
486
  col_type=ColType.AGG,
592
487
  expr=pl.col(ColName.DECK).sum(),
593
488
  ),
594
- ColumnSpec(
595
- name=ColName.WON_DECK_TOTAL,
489
+ ColName.WON_DECK_TOTAL: ColSpec(
596
490
  col_type=ColType.AGG,
597
491
  expr=pl.col(ColName.WON_DECK).sum(),
598
492
  ),
599
- ColumnSpec(
600
- name=ColName.GP_WR_MEAN,
493
+ ColName.GP_WR_MEAN: ColSpec(
601
494
  col_type=ColType.AGG,
602
495
  expr=pl.col(ColName.WON_DECK_TOTAL) / pl.col(ColName.DECK_TOTAL),
603
496
  ),
604
- ColumnSpec(
605
- name=ColName.GP_WR_EXCESS,
497
+ ColName.GP_WR_EXCESS: ColSpec(
606
498
  col_type=ColType.AGG,
607
499
  expr=pl.col(ColName.GP_WR) - pl.col(ColName.GP_WR_MEAN),
608
500
  ),
609
- ColumnSpec(
610
- name=ColName.GP_WR_VAR,
501
+ ColName.GP_WR_VAR: ColSpec(
611
502
  col_type=ColType.AGG,
612
503
  expr=(pl.col(ColName.GP_WR_EXCESS).pow(2) * pl.col(ColName.NUM_GP)).sum()
613
504
  / pl.col(ColName.DECK_TOTAL),
614
505
  ),
615
- ColumnSpec(
616
- name=ColName.GP_WR_STDEV,
506
+ ColName.GP_WR_STDEV: ColSpec(
617
507
  col_type=ColType.AGG,
618
508
  expr=pl.col(ColName.GP_WR_VAR).sqrt(),
619
509
  ),
620
- ColumnSpec(
621
- name=ColName.GP_WR_Z,
510
+ ColName.GP_WR_Z: ColSpec(
622
511
  col_type=ColType.AGG,
623
512
  expr=pl.col(ColName.GP_WR_EXCESS) / pl.col(ColName.GP_WR_STDEV),
624
513
  ),
625
- ColumnSpec(
626
- name=ColName.GIH_TOTAL,
514
+ ColName.GIH_TOTAL: ColSpec(
627
515
  col_type=ColType.AGG,
628
516
  expr=pl.col(ColName.NUM_GIH).sum(),
629
517
  ),
630
- ColumnSpec(
631
- name=ColName.WON_GIH_TOTAL,
518
+ ColName.WON_GIH_TOTAL: ColSpec(
632
519
  col_type=ColType.AGG,
633
520
  expr=pl.col(ColName.NUM_GIH_WON).sum(),
634
521
  ),
635
- ColumnSpec(
636
- name=ColName.GIH_WR_MEAN,
522
+ ColName.GIH_WR_MEAN: ColSpec(
637
523
  col_type=ColType.AGG,
638
524
  expr=pl.col(ColName.WON_GIH_TOTAL) / pl.col(ColName.GIH_TOTAL),
639
525
  ),
640
- ColumnSpec(
641
- name=ColName.GIH_WR_EXCESS,
526
+ ColName.GIH_WR_EXCESS: ColSpec(
642
527
  col_type=ColType.AGG,
643
528
  expr=pl.col(ColName.GIH_WR) - pl.col(ColName.GIH_WR_MEAN),
644
529
  ),
645
- ColumnSpec(
646
- name=ColName.GIH_WR_VAR,
530
+ ColName.GIH_WR_VAR: ColSpec(
647
531
  col_type=ColType.AGG,
648
532
  expr=(pl.col(ColName.GIH_WR_EXCESS).pow(2) * pl.col(ColName.NUM_GIH)).sum()
649
533
  / pl.col(ColName.GIH_TOTAL),
650
534
  ),
651
- ColumnSpec(
652
- name=ColName.GIH_WR_STDEV,
535
+ ColName.GIH_WR_STDEV: ColSpec(
653
536
  col_type=ColType.AGG,
654
537
  expr=pl.col(ColName.GIH_WR_VAR).sqrt(),
655
538
  ),
656
- ColumnSpec(
657
- name=ColName.GIH_WR_Z,
539
+ ColName.GIH_WR_Z: ColSpec(
658
540
  col_type=ColType.AGG,
659
541
  expr=pl.col(ColName.GIH_WR_EXCESS) / pl.col(ColName.GIH_WR_STDEV),
660
542
  ),
661
- ColumnSpec(
662
- name=ColName.DECK_MANA_VALUE_AVG,
543
+ ColName.DECK_MANA_VALUE_AVG: ColSpec(
663
544
  col_type=ColType.AGG,
664
545
  expr=pl.col(ColName.DECK_MANA_VALUE) / pl.col(ColName.DECK_SPELLS),
665
546
  ),
666
- ColumnSpec(
667
- name=ColName.DECK_LANDS_AVG,
547
+ ColName.DECK_LANDS_AVG: ColSpec(
668
548
  col_type=ColType.AGG,
669
549
  expr=pl.col(ColName.DECK_LANDS) / pl.col(ColName.NUM_GAMES),
670
550
  ),
671
- ColumnSpec(
672
- name=ColName.DECK_SPELLS_AVG,
551
+ ColName.DECK_SPELLS_AVG: ColSpec(
673
552
  col_type=ColType.AGG,
674
553
  expr=pl.col(ColName.DECK_SPELLS) / pl.col(ColName.NUM_GAMES),
675
554
  ),
676
- ]
677
-
678
- col_spec_map = {col.name: col for col in _column_specs}
555
+ }
679
556
 
680
557
  for item in ColName:
681
- assert item in col_spec_map, f"column {item} enumerated but not specified"
558
+ assert item in specs, f"column {item} enumerated but not specified"
spells/draft_data.py CHANGED
@@ -14,12 +14,13 @@ from inspect import signature
14
14
  from typing import Callable, TypeVar, Any
15
15
 
16
16
  import polars as pl
17
+ from polars.exceptions import ColumnNotFoundError
17
18
 
18
19
  from spells.external import data_file_path
19
20
  import spells.cache
20
21
  import spells.filter
21
22
  import spells.manifest
22
- from spells.columns import ColumnDefinition, ColumnSpec
23
+ from spells.columns import ColDef, ColSpec
23
24
  from spells.enums import View, ColName, ColType
24
25
 
25
26
 
@@ -53,8 +54,8 @@ def _get_names(set_code: str) -> list[str]:
53
54
  return names
54
55
 
55
56
 
56
- def _get_card_context(set_code: str, col_spec_map: dict[str, ColumnSpec]) -> dict[str, dict[str, Any]]:
57
- card_attr_specs = {col:spec for col, spec in col_spec_map.items() if spec.col_type == ColType.CARD_ATTR or spec.name == ColName.NAME}
57
+ def _get_card_context(set_code: str, specs: dict[str, ColSpec], card_context: pl.DataFrame | dict[str, dict[str, Any]] | None) -> dict[str, dict[str, Any]]:
58
+ card_attr_specs = {col:spec for col, spec in specs.items() if spec.col_type == ColType.CARD_ATTR or col == ColName.NAME}
58
59
  col_def_map = _hydrate_col_defs(set_code, card_attr_specs, card_only=True)
59
60
 
60
61
  columns = list(col_def_map.keys())
@@ -65,12 +66,25 @@ def _get_card_context(set_code: str, col_spec_map: dict[str, ColumnSpec]) -> dic
65
66
  card_df, frozenset(columns), col_def_map, is_agg_view=False
66
67
  ).to_dicts()
67
68
 
68
- card_context = {row[ColName.NAME]: row for row in select_rows}
69
+ loaded_context = {row[ColName.NAME]: row for row in select_rows}
69
70
 
70
- return card_context
71
+ if card_context is not None:
72
+ if isinstance(card_context, pl.DataFrame):
73
+ try:
74
+ card_context = {row[ColName.NAME]: row for row in card_context.to_dicts()}
75
+ except ColumnNotFoundError:
76
+ raise ValueError("card_context DataFrame must have column 'name'")
77
+
78
+ names = list(loaded_context.keys())
79
+ for name in names:
80
+ assert name in card_context, f"card_context must include a row for each card name. {name} missing."
81
+ for col, value in card_context[name].items():
82
+ loaded_context[name][col] = value
83
+
84
+ return loaded_context
71
85
 
72
86
 
73
- def _determine_expression(spec: ColumnSpec, names: list[str], card_context: dict[str, dict]) -> pl.Expr | tuple[pl.Expr, ...]:
87
+ def _determine_expression(col: str, spec: ColSpec, names: list[str], card_context: dict[str, dict]) -> pl.Expr | tuple[pl.Expr, ...]:
74
88
  def seed_params(expr):
75
89
  params = {}
76
90
 
@@ -83,18 +97,18 @@ def _determine_expression(spec: ColumnSpec, names: list[str], card_context: dict
83
97
 
84
98
  if spec.col_type == ColType.NAME_SUM:
85
99
  if spec.expr is not None:
86
- assert isinstance(spec.expr, Callable), f"NAME_SUM column {spec.name} must have a callable `expr` accepting a `name` argument"
100
+ assert isinstance(spec.expr, Callable), f"NAME_SUM column {col} must have a callable `expr` accepting a `name` argument"
87
101
  unnamed_exprs = [spec.expr(**{'name': name, **seed_params(spec.expr)}) for name in names]
88
102
 
89
103
  expr = tuple(
90
104
  map(
91
- lambda ex, name: ex.alias(f"{spec.name}_{name}"),
105
+ lambda ex, name: ex.alias(f"{col}_{name}"),
92
106
  unnamed_exprs,
93
107
  names,
94
108
  )
95
109
  )
96
110
  else:
97
- expr = tuple(map(lambda name: pl.col(f"{spec.name}_{name}"), names))
111
+ expr = tuple(map(lambda name: pl.col(f"{col}_{name}"), names))
98
112
 
99
113
  elif spec.expr is not None:
100
114
  if isinstance(spec.expr, Callable):
@@ -104,25 +118,27 @@ def _determine_expression(spec: ColumnSpec, names: list[str], card_context: dict
104
118
  for name in names:
105
119
  name_params = {'name': name, **params}
106
120
  expr = pl.when(pl.col(ColName.PICK) == name).then(spec.expr(**name_params)).otherwise(expr)
121
+ elif spec.col_type == ColType.CARD_ATTR and 'name' in signature(spec.expr).parameters:
122
+ expr = spec.expr(**{'name': pl.col('name'), **params})
107
123
  else:
108
124
  expr = spec.expr(**params)
109
125
  else:
110
126
  expr = spec.expr
111
- expr = expr.alias(spec.name)
127
+ expr = expr.alias(col)
112
128
  else:
113
- expr = pl.col(spec.name)
129
+ expr = pl.col(col)
114
130
 
115
131
  return expr
116
132
 
117
133
 
118
- def _infer_dependencies(name: str, expr: pl.Expr | tuple[pl.Expr,...], col_spec_map: dict[str, ColumnSpec], names: list[str]) -> set[str]:
134
+ def _infer_dependencies(name: str, expr: pl.Expr | tuple[pl.Expr,...], specs: dict[str, ColSpec], names: list[str]) -> set[str]:
119
135
  dependencies = set()
120
136
  tricky_ones = set()
121
137
 
122
138
  if isinstance(expr, pl.Expr):
123
139
  dep_cols = [c for c in expr.meta.root_names() if c != name]
124
140
  for dep_col in dep_cols:
125
- if dep_col in col_spec_map.keys():
141
+ if dep_col in specs.keys():
126
142
  dependencies.add(dep_col)
127
143
  else:
128
144
  tricky_ones.add(dep_col)
@@ -131,9 +147,9 @@ def _infer_dependencies(name: str, expr: pl.Expr | tuple[pl.Expr,...], col_spec_
131
147
  pattern = f"_{names[idx]}$"
132
148
  dep_cols = [c for c in exp.meta.root_names() if c != name]
133
149
  for dep_col in dep_cols:
134
- if dep_col in col_spec_map.keys():
150
+ if dep_col in specs.keys():
135
151
  dependencies.add(dep_col)
136
- elif len(split := re.split(pattern, dep_col)) == 2 and split[0] in col_spec_map:
152
+ elif len(split := re.split(pattern, dep_col)) == 2 and split[0] in specs:
137
153
  dependencies.add(split[0])
138
154
  else:
139
155
  tricky_ones.add(dep_col)
@@ -142,7 +158,7 @@ def _infer_dependencies(name: str, expr: pl.Expr | tuple[pl.Expr,...], col_spec_
142
158
  found = False
143
159
  for n in names:
144
160
  pattern = f"_{n}$"
145
- if not found and len(split := re.split(pattern, item)) == 2 and split[0] in col_spec_map:
161
+ if not found and len(split := re.split(pattern, item)) == 2 and split[0] in specs:
146
162
  dependencies.add(split[0])
147
163
  found = True
148
164
  assert found, f"Could not locate column spec for root col {item}"
@@ -150,56 +166,57 @@ def _infer_dependencies(name: str, expr: pl.Expr | tuple[pl.Expr,...], col_spec_
150
166
  return dependencies
151
167
 
152
168
 
153
- def _hydrate_col_defs(set_code: str, col_spec_map: dict[str, ColumnSpec], card_only=False):
169
+ def _hydrate_col_defs(set_code: str, specs: dict[str, ColSpec], card_context: pl.DataFrame | dict[str, dict] | None = None, card_only: bool =False):
154
170
  names = _get_names(set_code)
155
171
 
156
172
  if card_only:
157
173
  card_context = {}
158
174
  else:
159
- card_context = _get_card_context(set_code, col_spec_map)
175
+ card_context = _get_card_context(set_code, specs, card_context)
160
176
 
161
177
  assert len(names) > 0, "there should be names"
162
178
  hydrated = {}
163
- for key, spec in col_spec_map.items():
164
- expr = _determine_expression(spec, names, card_context)
165
- dependencies = _infer_dependencies(key, expr, col_spec_map, names)
179
+ for col, spec in specs.items():
180
+ expr = _determine_expression(col, spec, names, card_context)
181
+ dependencies = _infer_dependencies(col, expr, specs, names)
166
182
 
183
+ sig_expr = expr if isinstance(expr, pl.Expr) else expr[0]
167
184
  try:
168
- sig_expr = expr if isinstance(expr, pl.Expr) else expr[0]
169
185
  expr_sig = sig_expr.meta.serialize(
170
186
  format="json"
171
- ) # not compatible with renaming
187
+ )
172
188
  except pl.exceptions.ComputeError:
173
189
  if spec.version is not None:
174
- expr_sig = spec.name + spec.version
190
+ expr_sig = col + spec.version
175
191
  else:
176
- expr_sig = str(datetime.datetime.now)
192
+ print(f"Using session-only signature for non-serializable column {col}, please provide a version value")
193
+ expr_sig = str(sig_expr)
177
194
 
178
195
  signature = str(
179
196
  (
180
- spec.name,
197
+ col,
181
198
  spec.col_type.value,
182
199
  expr_sig,
183
- dependencies,
200
+ sorted(dependencies),
184
201
  )
185
202
  )
186
203
 
187
- cdef = ColumnDefinition(
188
- name=spec.name,
204
+ cdef = ColDef(
205
+ name=col,
189
206
  col_type=spec.col_type,
190
207
  views=set(spec.views or set()),
191
208
  expr=expr,
192
209
  dependencies=dependencies,
193
210
  signature=signature,
194
211
  )
195
- hydrated[key] = cdef
212
+ hydrated[col] = cdef
196
213
  return hydrated
197
214
 
198
215
 
199
216
  def _view_select(
200
217
  df: DF,
201
218
  view_cols: frozenset[str],
202
- col_def_map: dict[str, ColumnDefinition],
219
+ col_def_map: dict[str, ColDef],
203
220
  is_agg_view: bool,
204
221
  ) -> DF:
205
222
  base_cols = frozenset()
@@ -339,17 +356,18 @@ def summon(
339
356
  columns: list[str] | None = None,
340
357
  group_by: list[str] | None = None,
341
358
  filter_spec: dict | None = None,
342
- extensions: list[ColumnSpec] | None = None,
359
+ extensions: dict[str, ColSpec] | None = None,
343
360
  use_streaming: bool = False,
344
361
  read_cache: bool = True,
345
362
  write_cache: bool = True,
363
+ card_context: pl.DataFrame | dict[str, dict] | None = None
346
364
  ) -> pl.DataFrame:
347
- col_spec_map = dict(spells.columns.col_spec_map)
365
+ specs = dict(spells.columns.specs)
366
+
348
367
  if extensions is not None:
349
- for spec in extensions:
350
- col_spec_map[spec.name] = spec
368
+ specs.update(extensions)
351
369
 
352
- col_def_map = _hydrate_col_defs(set_code, col_spec_map)
370
+ col_def_map = _hydrate_col_defs(set_code, specs, card_context)
353
371
  m = spells.manifest.create(col_def_map, columns, group_by, filter_spec)
354
372
 
355
373
  calc_fn = functools.partial(_base_agg_df, set_code, m, use_streaming=use_streaming)
spells/enums.py CHANGED
@@ -60,6 +60,7 @@ class ColName(StrEnum):
60
60
  PICK_NUM = "pick_num" # pick_number plus 1
61
61
  TAKEN_AT = "taken_at"
62
62
  NUM_TAKEN = "num_taken"
63
+ NUM_DRAFTS = "num_drafts"
63
64
  PICK = "pick"
64
65
  PICK_MAINDECK_RATE = "pick_maindeck_rate"
65
66
  PICK_SIDEBOARD_IN_RATE = "pick_sideboard_in_rate"
spells/extension.py ADDED
@@ -0,0 +1,40 @@
1
+ import polars as pl
2
+
3
+ from spells.enums import ColType
4
+ from spells.columns import ColSpec
5
+
6
+ def attr_metrics(attr):
7
+ return {
8
+ f"seen_{attr}": ColSpec(
9
+ col_type=ColType.NAME_SUM,
10
+ expr=(lambda name, card_context: pl.when(pl.col(f"pack_card_{name}") > 0)
11
+ .then(card_context[name][attr])
12
+ .otherwise(None)),
13
+ ),
14
+ f"pick_{attr}": ColSpec(
15
+ col_type=ColType.PICK_SUM,
16
+ expr=lambda name, card_context: card_context[name][attr]
17
+ ),
18
+ f"least_{attr}_taken": ColSpec(
19
+ col_type=ColType.PICK_SUM,
20
+ expr=(lambda names: pl.col(f'pick_{attr}')
21
+ <= pl.min_horizontal([pl.col(f"seen_{attr}_{name}") for name in names])),
22
+ ),
23
+ f"least_{attr}_taken_rate": ColSpec(
24
+ col_type=ColType.AGG,
25
+ expr=pl.col(f"least_{attr}_taken") / pl.col("num_taken"),
26
+ ),
27
+ f"greatest_{attr}_taken": ColSpec(
28
+ col_type=ColType.PICK_SUM,
29
+ expr=(lambda names: pl.col(f'pick_{attr}')
30
+ >= pl.max_horizontal([pl.col(f"seen_{attr}_{name}") for name in names])),
31
+ ),
32
+ f"greatest_{attr}_taken_rate": ColSpec(
33
+ col_type=ColType.AGG,
34
+ expr=pl.col(f"greatest_{attr}_taken") / pl.col("num_taken"),
35
+ ),
36
+ f"pick_{attr}_mean": ColSpec(
37
+ col_type=ColType.AGG,
38
+ expr=pl.col(f"pick_{attr}") / pl.col("num_taken")
39
+ )
40
+ }
spells/manifest.py CHANGED
@@ -3,13 +3,13 @@ from dataclasses import dataclass
3
3
  import spells.columns
4
4
  import spells.filter
5
5
  from spells.enums import View, ColName, ColType
6
- from spells.columns import ColumnDefinition
6
+ from spells.columns import ColDef
7
7
 
8
8
 
9
9
  @dataclass(frozen=True)
10
10
  class Manifest:
11
11
  columns: tuple[str, ...]
12
- col_def_map: dict[str, ColumnDefinition]
12
+ col_def_map: dict[str, ColDef]
13
13
  base_view_group_by: frozenset[str]
14
14
  view_cols: dict[View, frozenset[str]]
15
15
  group_by: tuple[str, ...]
@@ -85,7 +85,7 @@ class Manifest:
85
85
 
86
86
  def _resolve_view_cols(
87
87
  col_set: frozenset[str],
88
- col_def_map: dict[str, ColumnDefinition],
88
+ col_def_map: dict[str, ColDef],
89
89
  ) -> dict[View, frozenset[str]]:
90
90
  """
91
91
  For each view ('game', 'draft', and 'card'), return the columns
@@ -153,7 +153,7 @@ def _resolve_view_cols(
153
153
 
154
154
 
155
155
  def create(
156
- col_def_map: dict[str, ColumnDefinition],
156
+ col_def_map: dict[str, ColDef],
157
157
  columns: list[str] | None = None,
158
158
  group_by: list[str] | None = None,
159
159
  filter_spec: dict | None = None,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: spells-mtg
3
- Version: 0.4.0
3
+ Version: 0.5.1
4
4
  Summary: analaysis of 17Lands.com public datasets
5
5
  Author-Email: Joel Barnes <oelarnes@gmail.com>
6
6
  License: MIT
@@ -74,8 +74,9 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
74
74
  - Supports calculating the standard aggregations and measures out of the box with no arguments (ALSA, GIH WR, etc)
75
75
  - Caches aggregate DataFrames in the local file system automatically for instantaneous reproduction of previous analysis
76
76
  - Manages grouping and filtering by built-in and custom columns at the row level
77
- - Provides 121 explicitly specified, enumerated, documented column definitions
77
+ - Provides 122 explicitly specified, enumerated, documented column definitions
78
78
  - Supports "Deck Color Data" aggregations with built-in column definitions.
79
+ - Lets you feed card metrics back in to column definitions to support scientific workflows like MLE
79
80
  - Provides a CLI tool `spells [add|refresh|clean|remove|info] [SET]` to download and manage external files
80
81
  - Downloads and manages public datasets from 17Lands
81
82
  - Retrieves and models booster configuration and card data from [MTGJSON](https://mtgjson.com/)
@@ -87,7 +88,7 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
87
88
 
88
89
  ## summon
89
90
 
90
- `summon` takes four optional arguments, allowing a fully declarative specification of your desired analysis. Basic functionality not provided by this api can often be managed by simple chained calls using the polars API, e.g. sorting and post-agg filtering.
91
+ `summon` takes five optional arguments, allowing a fully declarative specification of your desired analysis. Basic functionality not provided by this api can often be managed by simple chained calls using the polars API, e.g. sorting and post-agg filtering.
91
92
  - `columns` specifies the desired output columns
92
93
  ```python
93
94
  >>> spells.summon('DSK', columns=["num_gp", "pct_gp", "gp_wr", "gp_wr_z"])
@@ -133,7 +134,7 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
133
134
  ```
134
135
  - `filter_spec` specifies a row-level filter for the dataset, using an intuitive custom query formulation
135
136
  ```python
136
- >>> from spells.enums import ColName
137
+ >>> from spells import ColName
137
138
  >>> spells.summon('BLB', columns=[ColName.GAME_WR], group_by=[ColName.PLAYER_COHORT], filter_spec={'lhs': ColName.NUM_MULLIGANS, 'op': '>', 'rhs': 0})
138
139
  shape: (4, 2)
139
140
  ┌───────────────┬──────────┐
@@ -150,14 +151,14 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
150
151
  - `extensions` allows for the specification of arbitrarily complex derived columns and aggregations, including custom columns built on top of custom columns.
151
152
  ```python
152
153
  >>> import polars as pl
153
- >>> from spells.columns import ColumnSpec
154
- >>> from spells.enums import ColType, View, ColName
155
- >>> ext = ColumnSpec(
156
- ... name='deq_base',
157
- ... col_type=ColType.AGG,
158
- ... expr=(pl.col('gp_wr_excess') + 0.03 * (1 - pl.col('ata')/14).pow(2)) * pl.col('pct_gp'),
159
- ... )
160
- >>> spells.summon('DSK', columns=['deq_base', 'color', 'rarity'], filter_spec={'player_cohort': 'Top'}, extensions=[ext])
154
+ >>> from spells import ColSpec, ColType
155
+ >>> ext = {
156
+ ... 'deq_base': ColSpec(
157
+ ... col_type=ColType.AGG,
158
+ ... expr=(pl.col('gp_wr_excess') + 0.03 * (1 - pl.col('ata')/14).pow(2)) * pl.col('pct_gp'),
159
+ ... }
160
+ ... }
161
+ >>> spells.summon('DSK', columns=['deq_base', 'color', 'rarity'], filter_spec={'player_cohort': 'Top'}, extensions=ext)
161
162
  ... .filter(pl.col('deq_base').is_finite())
162
163
  ... .filter(pl.col('rarity').is_in(['common', 'uncommon'])
163
164
  ... .sort('deq_base', descending=True)
@@ -180,8 +181,32 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
180
181
  │ Oblivious Bookworm ┆ 0.028605 ┆ uncommon ┆ GU │
181
182
  └──────────────────────────┴──────────┴──────────┴───────┘
182
183
  ```
183
-
184
- Note the use of chained calls to the Polars DataFrame api to perform manipulations on the result of `summon`.
184
+ - `card_context` takes a name-indexed DataFrame or name-keyed dict and allows the construction of column definitions based on the results.
185
+ ```python
186
+ >>> deq = spells.summon('DSK', columns=['deq_base'], filter_spec={'player_cohort': 'Top'}, extensions=[ext])
187
+ >>> ext = {
188
+ ... 'picked_deq_base': ColSpec(
189
+ ... col_type=ColType.PICK_SUM,
190
+ ... expr=lambda name, card_context: card_context[name]['deq_base']
191
+ ... ),
192
+ ... 'picked_deq_base_avg', ColSpec(
193
+ ... col_type=ColType.AGG,
194
+ ... expr=pl.col('picked_deq_base') / pl.col('num_taken')
195
+ ... ),
196
+ ... }
197
+ >>> spells.summon('DSK', columns=['picked_deq_base_avg'], group_by=['player_cohort'], extensions=ext, card_context=deq)
198
+ shape: (4, 2)
199
+ ┌───────────────┬─────────────────────┐
200
+ │ player_cohort ┆ picked_deq_base_avg │
201
+ │ --- ┆ --- │
202
+ │ str ┆ f64 │
203
+ ╞═══════════════╪═════════════════════╡
204
+ │ Bottom ┆ 0.004826 │
205
+ │ Middle ┆ 0.00532 │
206
+ │ Other ┆ 0.004895 │
207
+ │ Top ┆ 0.005659 │
208
+ └───────────────┴─────────────────────┘
209
+ ```
185
210
 
186
211
  ## Installation
187
212
 
@@ -223,7 +248,7 @@ Spells caches the results of expensive aggregations in the local file system as
223
248
 
224
249
  ### Memory Usage
225
250
 
226
- One of my goals in creating Spells was to eliminate issues with memory pressure by exclusively using the map-reduce paradigm and a technology that supports partitioned/streaming aggregation of larget-than-memory datasets. By default, Polars loads the entire dataset in memory, but the API exposes a parameter `streaming` which I have exposed as `use_streaming`. Unfortunately, that feature does not seem to work for my queries and the memory performance can be quite poor, including poor garbage collection. The one feature that may assist in memory management is the local caching, since you can restart the kernel without losing all of your progress. In particular, be careful about opening multiple Jupyter tabs unless you have at least 32 GB. In general I have not run into issues on my 16 GB MacBook Air except with running multiple kernels at once. Supporting larger-than memory computations is on my roadmap, so check back periodically to see if I've made any progress.
251
+ One of my goals in creating Spells was to eliminate issues with memory pressure by exclusively using the map-reduce paradigm and a technology that supports partitioned/streaming aggregation of larget-than-memory datasets. By default, Polars loads the entire dataset in memory, but the API exposes a parameter `streaming` which I have exposed as `use_streaming`. Unfortunately, that feature does not seem to work for my queries and the memory performance can be quite poor. The one feature that may assist in memory management is the local caching, since you can restart the kernel without losing all of your progress. In particular, be careful about opening multiple Jupyter tabs unless you have at least 32 GB. In general I have not run into issues on my 16 GB MacBook Air except with running multiple kernels at once. Supporting larger-than memory computations is on my roadmap, so check back periodically to see if I've made any progress.
227
252
 
228
253
  When refreshing a given set's data files from 17Lands using the provided cli, the cache for that set is automatically cleared. The `spells` CLI gives additional tools for managing the local and external caches.
229
254
 
@@ -289,25 +314,26 @@ aggregations of non-numeric (or numeric) data types are not supported. If `None`
289
314
  - `{'lhs': 'player_cohort', 'op': 'in', 'rhs': ['Top', 'Middle']}` "player_cohort" value is either "Top" or "Middle". Supported values for `op` are `<`, `<=`, `>`, `>=`, `!=`, `=`, `in` and `nin`.
290
315
  - `{'$and': [{'lhs': 'draft_date', 'op': '>', 'rhs': datetime.date(2024, 10, 7)}, {'rank': 'Mythic'}]}` Drafts after October 7 by Mythic-ranked players. Supported values for query construction keys are `$and`, `$or`, and `$not`.
291
316
 
292
- - extensions: a list of `spells.columns.ColumnSpec` objects, which are appended to the definitions built-in columns described below. A name not in the enum `ColName` can be used in this way if it is the name of a provided extension. Existing names can also be redefined using extensions.
317
+ - extensions: a dict of `spells.columns.ColSpec` objects, keyed by name, which are appended to the definitions built-in columns described below.
318
+
319
+ - card_context: Typically a Polars DataFrame containing a `"name"` column with one row for each card name in the set, such that any usages of `card_context[name][key]` in column specs reference the column `key`. Typically this will be the output of a call to `summon` requesting cards metrics like `GP_WR`. Can also be a dictionary having the necessary form for the same access pattern.
293
320
 
294
321
  - read_cache/write_cache: Use the local file system to cache and retrieve aggregations to minimize expensive reads of the large datasets. You shouldn't need to touch these arguments unless you are debugging.
295
322
 
296
323
  ### Enums
297
324
 
298
325
  ```python
299
- from spells.enums import ColName, ColType
326
+ from spells import ColName, ColType
300
327
  ```
301
328
 
302
329
  Recommended to import `ColName` for any usage of `summon`, and to import `ColType` when defining custom extensions.
303
330
 
304
- ### ColumnSpec
331
+ ### ColSpec
305
332
 
306
333
  ```python
307
- from spells.columns import ColumnSpec
334
+ from spells import ColSpec
308
335
 
309
- ColumnSpec(
310
- name: str,
336
+ ColSpec(
311
337
  col_type: ColType,
312
338
  expr: pl.Expr | Callable[..., pl.Expr] | None = None,
313
339
  version: str | None = None
@@ -318,8 +344,6 @@ Used to define extensions in `summon`
318
344
 
319
345
  #### parameters
320
346
 
321
- - `name`: any string, including existing columns, although this is very likely to break dependent columns, so don't do it. For `NAME_SUM` columns, the name is the prefix without the underscore, e.g. "drawn".
322
-
323
347
  - `col_type`: one of the `ColType` enum values, `FILTER_ONLY`, `GROUP_BY`, `PICK_SUM`, `NAME_SUM`, `GAME_SUM`, `CARD_ATTR`, and `AGG`. See documentation for `summon` for usage. All columns except `CARD_ATTR`
324
348
  and `AGG` must be derivable at the individual row level on one or both base views. `CARD_ATTR` must be derivable at the individual row level from the card file. `AGG` can depend on any column present after
325
349
  summing over groups, and can include polars Expression aggregations. Arbitrarily long chains of aggregate dependencies are supported.
@@ -328,7 +352,7 @@ summing over groups, and can include polars Expression aggregations. Arbitrarily
328
352
  - For `NAME_SUM` columns, `expr` must be a function of `name` which will result in a list of expressions mapped over all card names.
329
353
  - `PICK_SUM` columns can also be functions on `name`, in which case the value will be a function of the value of the `PICK` field.
330
354
  - `AGG` columns that depend on `NAME_SUM` columns reference the prefix (`cdef.name`) only, since the unpivot has occured prior to selection.
331
- - The possible arguments to `expr`, in addition to `name` when appropriate, include the full `names` array as well as a dictionary called `card_context` which contains card dict objects with all `CARD_ATTR` values, including custom extensions. See example notebooks for more details.
355
+ - The possible arguments to `expr`, in addition to `name` when appropriate, include the full `names` array as well as a dictionary called `card_context` which contains card dict objects with all `CARD_ATTR` values, including custom extensions and metric columns passed by the `card_context` argument to `summon`. See example notebooks for more details.
332
356
 
333
357
  - `version`: When defining a column using a python function, as opposed to Polars expressions, add a unique version number so that the unique hashed signature of the column specification can be derived
334
358
  for caching purposes, since Polars cannot generate a serialization natively. When changing the definition, be sure to increment the version value. Otherwise you do not need to use this parameter.
@@ -366,6 +390,7 @@ A table of all included columns. Columns can be referenced by enum or by string
366
390
  | `PICK_NUMBER` | `"pick_number"` | `DRAFT` | `FILTER_ONLY` | Dataset Column | Int |
367
391
  | `PICK_NUM` | `"pick_num"` | `DRAFT` | `GROUP_BY` | 1-indexed | Int |
368
392
  | `TAKEN_AT` | `"taken_at` | `DRAFT` | `PICK_SUM` | Summable alias of `PICK_NUM` | Int |
393
+ | `NUM_DRAFTS` | `"num_drafts"` | `DRAFT` | `PICK_SUM` | | Int |
369
394
  | `NUM_TAKEN` | `"num_taken"` | `DRAFT` | `PICK_SUM` | Sum 1 over rows | Int |
370
395
  | `PICK` | `"pick"` | `DRAFT` | `FILTER_ONLY` | Dataset Column, joined as "name" | String |
371
396
  | `PICK_MAINDECK_RATE` | `"pick_maindeck_rate"` | `DRAFT` | `PICK_SUM` | Dataset Column | Float |
@@ -0,0 +1,16 @@
1
+ spells/__init__.py,sha256=iyX74sLFJIQ_5ShZ7KDOeBlbYLm21ibWxwyzCGnKdg8,169
2
+ spells/cache.py,sha256=4v7h8D3TtaT0R_EdiRNhdcQrXzdH_CukezO6oAXvNEY,2956
3
+ spells/cards.py,sha256=EOXAB_F2yedjf6KquCERCIHl0TSIJIoOe1jv8g4JzOc,3601
4
+ spells/columns.py,sha256=D4dcXVfXlXIbOE2CDgXhGCCYLhUK_XLu4lzm677tj2c,17264
5
+ spells/draft_data.py,sha256=viulqB_kaWHYgEtGrzQ5ZVf2jBau9kfnWwJAcP678Jw,13471
6
+ spells/enums.py,sha256=2IZrty0OtcbXq8oiuDWF75_AacMBBY_JLaisc_bNSdk,4765
7
+ spells/extension.py,sha256=B9U1HvPAVqE0g44sihS6EGlkEtTx2VOrcWtg5fPh-mw,1482
8
+ spells/external.py,sha256=qe6wOBDhPN4CZNQvYRq6G-OpIZcWTZzJjySgnf2Gu1o,10258
9
+ spells/filter.py,sha256=J-YTOOAzOQpvIX29tviYL04RVoOUlfsbjBXoQBDCEdQ,3380
10
+ spells/manifest.py,sha256=UNCZT3StN_1DfYfWNuEQrQx7ZI8Z5Y_l6j2oaUaWdYA,8142
11
+ spells/schema.py,sha256=z8Qn2SiHG4T6YfPsz8xHLGMjU_Ofm76-Vrquh3b9B64,6422
12
+ spells_mtg-0.5.1.dist-info/METADATA,sha256=tr4oUQAo5bJlL1IIKJHDj9EM_bH1PEiHSbKj8NLWelU,45452
13
+ spells_mtg-0.5.1.dist-info/WHEEL,sha256=thaaA2w1JzcGC48WYufAs8nrYZjJm8LqNfnXFOFyCC4,90
14
+ spells_mtg-0.5.1.dist-info/entry_points.txt,sha256=a9Y1omdl9MdnKuIj3aOodgrp-zZII6OCdvqwgP6BFvI,63
15
+ spells_mtg-0.5.1.dist-info/licenses/LICENSE,sha256=tS54XYbJSgmq5zuHhbsQGbNQLJPVgXqhF5nu2CSRMig,1068
16
+ spells_mtg-0.5.1.dist-info/RECORD,,
@@ -1,15 +0,0 @@
1
- spells/__init__.py,sha256=EcI7ijXYvPA8jj7wUZqs6CSWr__MD8AOXhkex-Hj37E,131
2
- spells/cache.py,sha256=4v7h8D3TtaT0R_EdiRNhdcQrXzdH_CukezO6oAXvNEY,2956
3
- spells/cards.py,sha256=EOXAB_F2yedjf6KquCERCIHl0TSIJIoOe1jv8g4JzOc,3601
4
- spells/columns.py,sha256=gpgx8zZ4VmmcJXxMdyEKIOgJaBESKkbtpOm6bVNNopM,19132
5
- spells/draft_data.py,sha256=bJw2ueJ1dNrunVY2GH6pgJCMPFCkydI37JExgpMS6vE,12559
6
- spells/enums.py,sha256=4G-gi75v9bne4cMRgRwwmpWy0kW8NoTjkmGTxEwFDeo,4735
7
- spells/external.py,sha256=qe6wOBDhPN4CZNQvYRq6G-OpIZcWTZzJjySgnf2Gu1o,10258
8
- spells/filter.py,sha256=J-YTOOAzOQpvIX29tviYL04RVoOUlfsbjBXoQBDCEdQ,3380
9
- spells/manifest.py,sha256=Dy0zskkSqPWb3WQqLgNk5pbaCGlEAn7JG9wRFOUA3Jc,8182
10
- spells/schema.py,sha256=z8Qn2SiHG4T6YfPsz8xHLGMjU_Ofm76-Vrquh3b9B64,6422
11
- spells_mtg-0.4.0.dist-info/METADATA,sha256=pEKOk-A27Tq8Fpbs9yPb11tT9av4f7-iOQSIGfWv9Mg,43908
12
- spells_mtg-0.4.0.dist-info/WHEEL,sha256=thaaA2w1JzcGC48WYufAs8nrYZjJm8LqNfnXFOFyCC4,90
13
- spells_mtg-0.4.0.dist-info/entry_points.txt,sha256=a9Y1omdl9MdnKuIj3aOodgrp-zZII6OCdvqwgP6BFvI,63
14
- spells_mtg-0.4.0.dist-info/licenses/LICENSE,sha256=tS54XYbJSgmq5zuHhbsQGbNQLJPVgXqhF5nu2CSRMig,1068
15
- spells_mtg-0.4.0.dist-info/RECORD,,