spells-mtg 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spells-mtg might be problematic. Click here for more details.

spells/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
1
  from spells import columns
2
2
  from spells import enums
3
- from spells.draft_data import summon, card_df
3
+ from spells.draft_data import summon
4
4
 
5
- __all__ = ["summon", "card_df", "enums", "columns"]
5
+ __all__ = ["summon", "enums", "columns"]
spells/columns.py CHANGED
@@ -10,13 +10,9 @@ from spells.enums import View, ColName, ColType
10
10
  class ColumnSpec:
11
11
  name: str
12
12
  col_type: ColType
13
- expr: pl.Expr | None = None
14
- exprMap: Callable[[str], pl.Expr] | None = None
13
+ expr: pl.Expr | Callable[..., pl.Expr] | None = None
15
14
  views: list[View] | None = None
16
- dependencies: list[str] | None = None
17
- version: str | None = (
18
- None # only needed for user-defined functions with python functions in expr
19
- )
15
+ version: str | None = None
20
16
 
21
17
 
22
18
  @dataclass(frozen=True)
@@ -25,7 +21,7 @@ class ColumnDefinition:
25
21
  col_type: ColType
26
22
  expr: pl.Expr | tuple[pl.Expr, ...]
27
23
  views: set[View]
28
- dependencies: tuple[str, ...]
24
+ dependencies: set[str]
29
25
  signature: str
30
26
 
31
27
 
@@ -49,7 +45,7 @@ _column_specs = [
49
45
  ColumnSpec(
50
46
  name=ColName.NAME,
51
47
  col_type=ColType.GROUP_BY,
52
- # handled by internals, derived from both 'pick' and "name mapped" columns
48
+ views=[View.CARD],
53
49
  ),
54
50
  ColumnSpec(
55
51
  name=ColName.EXPANSION,
@@ -74,26 +70,22 @@ _column_specs = [
74
70
  ColumnSpec(
75
71
  name=ColName.DRAFT_DATE,
76
72
  col_type=ColType.GROUP_BY,
77
- expr=pl.col("draft_time").str.to_datetime("%Y-%m-%d %H:%M:%S").dt.date(),
78
- dependencies=[ColName.DRAFT_TIME],
73
+ expr=pl.col(ColName.DRAFT_TIME).str.to_datetime("%Y-%m-%d %H:%M:%S").dt.date(),
79
74
  ),
80
75
  ColumnSpec(
81
76
  name=ColName.DRAFT_DAY_OF_WEEK,
82
77
  col_type=ColType.GROUP_BY,
83
- expr=pl.col("draft_time").str.to_datetime("%Y-%m-%d %H:%M:%S").dt.weekday(),
84
- dependencies=[ColName.DRAFT_TIME],
78
+ expr=pl.col(ColName.DRAFT_TIME).str.to_datetime("%Y-%m-%d %H:%M:%S").dt.weekday(),
85
79
  ),
86
80
  ColumnSpec(
87
81
  name=ColName.DRAFT_HOUR,
88
82
  col_type=ColType.GROUP_BY,
89
- expr=pl.col("draft_time").str.to_datetime("%Y-%m-%d %H:%M:%S").dt.hour(),
90
- dependencies=[ColName.DRAFT_TIME],
83
+ expr=pl.col(ColName.DRAFT_TIME).str.to_datetime("%Y-%m-%d %H:%M:%S").dt.hour(),
91
84
  ),
92
85
  ColumnSpec(
93
86
  name=ColName.DRAFT_WEEK,
94
87
  col_type=ColType.GROUP_BY,
95
- expr=pl.col("draft_time").str.to_datetime("%Y-%m-%d %H:%M:%S").dt.week(),
96
- dependencies=[ColName.DRAFT_TIME],
88
+ expr=pl.col(ColName.DRAFT_TIME).str.to_datetime("%Y-%m-%d %H:%M:%S").dt.week(),
97
89
  ),
98
90
  ColumnSpec(
99
91
  name=ColName.RANK,
@@ -113,18 +105,17 @@ _column_specs = [
113
105
  ColumnSpec(
114
106
  name=ColName.PLAYER_COHORT,
115
107
  col_type=ColType.GROUP_BY,
116
- expr=pl.when(pl.col("user_n_games_bucket") < 100)
108
+ expr=pl.when(pl.col(ColName.USER_N_GAMES_BUCKET) < 100)
117
109
  .then(pl.lit("Other"))
118
110
  .otherwise(
119
- pl.when(pl.col("user_game_win_rate_bucket") > 0.57)
111
+ pl.when(pl.col(ColName.USER_GAME_WIN_RATE_BUCKET) > 0.57)
120
112
  .then(pl.lit("Top"))
121
113
  .otherwise(
122
- pl.when(pl.col("user_game_win_rate_bucket") < 0.49)
114
+ pl.when(pl.col(ColName.USER_GAME_WIN_RATE_BUCKET) < 0.49)
123
115
  .then(pl.lit("Bottom"))
124
116
  .otherwise(pl.lit("Middle"))
125
117
  )
126
118
  ),
127
- dependencies=[ColName.USER_N_GAMES_BUCKET, ColName.USER_GAME_WIN_RATE_BUCKET],
128
119
  ),
129
120
  ColumnSpec(
130
121
  name=ColName.EVENT_MATCH_WINS,
@@ -136,7 +127,6 @@ _column_specs = [
136
127
  col_type=ColType.PICK_SUM,
137
128
  views=[View.DRAFT],
138
129
  expr=pl.col(ColName.EVENT_MATCH_WINS),
139
- dependencies=[ColName.EVENT_MATCH_WINS],
140
130
  ),
141
131
  ColumnSpec(
142
132
  name=ColName.EVENT_MATCH_LOSSES,
@@ -147,33 +137,28 @@ _column_specs = [
147
137
  name=ColName.EVENT_MATCH_LOSSES_SUM,
148
138
  col_type=ColType.PICK_SUM,
149
139
  expr=pl.col(ColName.EVENT_MATCH_LOSSES),
150
- dependencies=[ColName.EVENT_MATCH_LOSSES],
151
140
  ),
152
141
  ColumnSpec(
153
142
  name=ColName.EVENT_MATCHES,
154
143
  col_type=ColType.GROUP_BY,
155
- expr=pl.col("event_match_wins") + pl.col("event_match_losses"),
156
- dependencies=[ColName.EVENT_MATCH_WINS, ColName.EVENT_MATCH_LOSSES],
144
+ expr=pl.col(ColName.EVENT_MATCH_WINS) + pl.col(ColName.EVENT_MATCH_LOSSES),
157
145
  ),
158
146
  ColumnSpec(
159
147
  name=ColName.EVENT_MATCHES_SUM,
160
148
  col_type=ColType.PICK_SUM,
161
149
  expr=pl.col(ColName.EVENT_MATCHES),
162
- dependencies=[ColName.EVENT_MATCHES],
163
150
  ),
164
151
  ColumnSpec(
165
152
  name=ColName.IS_TROPHY,
166
153
  col_type=ColType.GROUP_BY,
167
- expr=pl.when(pl.col("event_type") == "Traditional")
168
- .then(pl.col("event_match_wins") == 3)
169
- .otherwise(pl.col("event_match_wins") == 7),
170
- dependencies=[ColName.EVENT_TYPE, ColName.EVENT_MATCH_WINS],
154
+ expr=pl.when(pl.col(ColName.EVENT_TYPE) == "Traditional")
155
+ .then(pl.col(ColName.EVENT_MATCH_WINS) == 3)
156
+ .otherwise(pl.col(ColName.EVENT_MATCH_WINS) == 7),
171
157
  ),
172
158
  ColumnSpec(
173
159
  name=ColName.IS_TROPHY_SUM,
174
160
  col_type=ColType.PICK_SUM,
175
161
  expr=pl.col(ColName.IS_TROPHY),
176
- dependencies=[ColName.IS_TROPHY],
177
162
  ),
178
163
  ColumnSpec(
179
164
  name=ColName.PACK_NUMBER,
@@ -183,8 +168,7 @@ _column_specs = [
183
168
  ColumnSpec(
184
169
  name=ColName.PACK_NUM,
185
170
  col_type=ColType.GROUP_BY,
186
- expr=pl.col("pack_number") + 1,
187
- dependencies=[ColName.PACK_NUMBER],
171
+ expr=pl.col(ColName.PACK_NUMBER) + 1,
188
172
  ),
189
173
  ColumnSpec(
190
174
  name=ColName.PICK_NUMBER,
@@ -194,26 +178,23 @@ _column_specs = [
194
178
  ColumnSpec(
195
179
  name=ColName.PICK_NUM,
196
180
  col_type=ColType.GROUP_BY,
197
- expr=pl.col("pick_number") + 1,
198
- dependencies=[ColName.PICK_NUMBER],
181
+ expr=pl.col(ColName.PICK_NUMBER) + 1,
199
182
  ),
200
183
  ColumnSpec(
201
184
  name=ColName.TAKEN_AT,
202
185
  col_type=ColType.PICK_SUM,
203
186
  expr=pl.col(ColName.PICK_NUM),
204
- dependencies=[ColName.PICK_NUM],
205
187
  ),
206
188
  ColumnSpec(
207
189
  name=ColName.NUM_TAKEN,
208
190
  col_type=ColType.PICK_SUM,
209
191
  expr=pl.when(pl.col(ColName.PICK).is_not_null())
210
192
  .then(1)
211
- .otherwise(0), # a literal returns one row under select alone
212
- dependencies=[ColName.PICK],
193
+ .otherwise(0),
213
194
  ),
214
195
  ColumnSpec(
215
196
  name=ColName.PICK,
216
- col_type=ColType.FILTER_ONLY, # aggregated as "name"
197
+ col_type=ColType.FILTER_ONLY,
217
198
  views=[View.DRAFT],
218
199
  ),
219
200
  ColumnSpec(
@@ -234,15 +215,13 @@ _column_specs = [
234
215
  ColumnSpec(
235
216
  name=ColName.LAST_SEEN,
236
217
  col_type=ColType.NAME_SUM,
237
- exprMap=lambda name: pl.col(f"pack_card_{name}")
238
- * pl.min_horizontal("pick_num", 8),
239
- dependencies=[ColName.PACK_CARD, ColName.PICK_NUM],
218
+ expr=lambda name: pl.col(f"pack_card_{name}")
219
+ * pl.min_horizontal(ColName.PICK_NUM, 8),
240
220
  ),
241
221
  ColumnSpec(
242
222
  name=ColName.NUM_SEEN,
243
223
  col_type=ColType.NAME_SUM,
244
- exprMap=lambda name: pl.col(f"pack_card_{name}") * (pl.col("pick_num") <= 8),
245
- dependencies=[ColName.PACK_CARD, ColName.PICK_NUM],
224
+ expr=lambda name: pl.col(f"pack_card_{name}") * (pl.col(ColName.PICK_NUM) <= 8),
246
225
  ),
247
226
  ColumnSpec(
248
227
  name=ColName.POOL,
@@ -257,26 +236,22 @@ _column_specs = [
257
236
  ColumnSpec(
258
237
  name=ColName.GAME_DATE,
259
238
  col_type=ColType.GROUP_BY,
260
- expr=pl.col("game_time").str.to_datetime("%Y-%m-%d %H-%M-%S").dt.date(),
261
- dependencies=[ColName.GAME_TIME],
239
+ expr=pl.col(ColName.GAME_TIME).str.to_datetime("%Y-%m-%d %H-%M-%S").dt.date(),
262
240
  ),
263
241
  ColumnSpec(
264
242
  name=ColName.GAME_DAY_OF_WEEK,
265
243
  col_type=ColType.GROUP_BY,
266
- expr=pl.col("game_time").str.to_datetime("%Y-%m-%d %H-%M-%S").dt.weekday(),
267
- dependencies=[ColName.GAME_TIME],
244
+ expr=pl.col(ColName.GAME_TIME).str.to_datetime("%Y-%m-%d %H-%M-%S").dt.weekday(),
268
245
  ),
269
246
  ColumnSpec(
270
247
  name=ColName.GAME_HOUR,
271
248
  col_type=ColType.GROUP_BY,
272
- expr=pl.col("game_time").str.to_datetime("%Y-%m-%d %H-%M-%S").dt.hour(),
273
- dependencies=[ColName.GAME_TIME],
249
+ expr=pl.col(ColName.GAME_TIME).str.to_datetime("%Y-%m-%d %H-%M-%S").dt.hour(),
274
250
  ),
275
251
  ColumnSpec(
276
252
  name=ColName.GAME_WEEK,
277
253
  col_type=ColType.GROUP_BY,
278
- expr=pl.col("game_time").str.to_datetime("%Y-%m-%d %H-%M-%S").dt.week(),
279
- dependencies=[ColName.GAME_TIME],
254
+ expr=pl.col(ColName.GAME_TIME).str.to_datetime("%Y-%m-%d %H-%M-%S").dt.week(),
280
255
  ),
281
256
  ColumnSpec(
282
257
  name=ColName.BUILD_INDEX,
@@ -297,19 +272,16 @@ _column_specs = [
297
272
  name=ColName.NUM_GAMES,
298
273
  col_type=ColType.GAME_SUM,
299
274
  expr=pl.col(ColName.GAME_NUMBER).is_not_null(),
300
- dependencies=[ColName.GAME_NUMBER],
301
275
  ),
302
276
  ColumnSpec(
303
277
  name=ColName.NUM_MATCHES,
304
278
  col_type=ColType.GAME_SUM,
305
279
  expr=pl.col(ColName.GAME_NUMBER) == 1,
306
- dependencies=[ColName.GAME_NUMBER],
307
280
  ),
308
281
  ColumnSpec(
309
282
  name=ColName.NUM_EVENTS,
310
283
  col_type=ColType.GAME_SUM,
311
284
  expr=(pl.col(ColName.GAME_NUMBER) == 1) & (pl.col(ColName.MATCH_NUMBER) == 1),
312
- dependencies=[ColName.GAME_NUMBER, ColName.MATCH_NUMBER],
313
285
  ),
314
286
  ColumnSpec(
315
287
  name=ColName.OPP_RANK,
@@ -325,7 +297,6 @@ _column_specs = [
325
297
  name=ColName.NUM_COLORS,
326
298
  col_type=ColType.GROUP_BY,
327
299
  expr=pl.col(ColName.MAIN_COLORS).str.len_chars(),
328
- dependencies=[ColName.MAIN_COLORS],
329
300
  ),
330
301
  ColumnSpec(
331
302
  name=ColName.SPLASH_COLORS,
@@ -336,7 +307,6 @@ _column_specs = [
336
307
  name=ColName.HAS_SPLASH,
337
308
  col_type=ColType.GROUP_BY,
338
309
  expr=pl.col(ColName.SPLASH_COLORS).str.len_chars() > 0,
339
- dependencies=[ColName.SPLASH_COLORS],
340
310
  ),
341
311
  ColumnSpec(
342
312
  name=ColName.ON_PLAY,
@@ -347,7 +317,6 @@ _column_specs = [
347
317
  name=ColName.NUM_ON_PLAY,
348
318
  col_type=ColType.GAME_SUM,
349
319
  expr=pl.col(ColName.ON_PLAY),
350
- dependencies=[ColName.ON_PLAY],
351
320
  ),
352
321
  ColumnSpec(
353
322
  name=ColName.NUM_MULLIGANS,
@@ -358,7 +327,6 @@ _column_specs = [
358
327
  name=ColName.NUM_MULLIGANS_SUM,
359
328
  col_type=ColType.GAME_SUM,
360
329
  expr=pl.col(ColName.NUM_MULLIGANS),
361
- dependencies=[ColName.NUM_MULLIGANS],
362
330
  ),
363
331
  ColumnSpec(
364
332
  name=ColName.OPP_NUM_MULLIGANS,
@@ -369,7 +337,6 @@ _column_specs = [
369
337
  name=ColName.OPP_NUM_MULLIGANS_SUM,
370
338
  col_type=ColType.GAME_SUM,
371
339
  expr=pl.col(ColName.OPP_NUM_MULLIGANS),
372
- dependencies=[ColName.OPP_NUM_MULLIGANS],
373
340
  ),
374
341
  ColumnSpec(
375
342
  name=ColName.OPP_COLORS,
@@ -385,7 +352,6 @@ _column_specs = [
385
352
  name=ColName.NUM_TURNS_SUM,
386
353
  col_type=ColType.GAME_SUM,
387
354
  expr=pl.col(ColName.NUM_TURNS),
388
- dependencies=[ColName.NUM_TURNS],
389
355
  ),
390
356
  ColumnSpec(
391
357
  name=ColName.WON,
@@ -396,7 +362,6 @@ _column_specs = [
396
362
  name=ColName.NUM_WON,
397
363
  col_type=ColType.GAME_SUM,
398
364
  expr=pl.col(ColName.WON),
399
- dependencies=[ColName.WON],
400
365
  ),
401
366
  ColumnSpec(
402
367
  name=ColName.OPENING_HAND,
@@ -406,8 +371,7 @@ _column_specs = [
406
371
  ColumnSpec(
407
372
  name=ColName.WON_OPENING_HAND,
408
373
  col_type=ColType.NAME_SUM,
409
- exprMap=lambda name: pl.col(f"opening_hand_{name}") * pl.col(ColName.WON),
410
- dependencies=[ColName.OPENING_HAND, ColName.WON],
374
+ expr=lambda name: pl.col(f"opening_hand_{name}") * pl.col(ColName.WON),
411
375
  ),
412
376
  ColumnSpec(
413
377
  name=ColName.DRAWN,
@@ -417,8 +381,7 @@ _column_specs = [
417
381
  ColumnSpec(
418
382
  name=ColName.WON_DRAWN,
419
383
  col_type=ColType.NAME_SUM,
420
- exprMap=lambda name: pl.col(f"drawn_{name}") * pl.col(ColName.WON),
421
- dependencies=[ColName.DRAWN, ColName.WON],
384
+ expr=lambda name: pl.col(f"drawn_{name}") * pl.col(ColName.WON),
422
385
  ),
423
386
  ColumnSpec(
424
387
  name=ColName.TUTORED,
@@ -428,8 +391,7 @@ _column_specs = [
428
391
  ColumnSpec(
429
392
  name=ColName.WON_TUTORED,
430
393
  col_type=ColType.NAME_SUM,
431
- exprMap=lambda name: pl.col(f"tutored_{name}") * pl.col(ColName.WON),
432
- dependencies=[ColName.TUTORED, ColName.WON],
394
+ expr=lambda name: pl.col(f"tutored_{name}") * pl.col(ColName.WON),
433
395
  ),
434
396
  ColumnSpec(
435
397
  name=ColName.DECK,
@@ -439,8 +401,7 @@ _column_specs = [
439
401
  ColumnSpec(
440
402
  name=ColName.WON_DECK,
441
403
  col_type=ColType.NAME_SUM,
442
- exprMap=lambda name: pl.col(f"deck_{name}") * pl.col(ColName.WON),
443
- dependencies=[ColName.DECK, ColName.WON],
404
+ expr=lambda name: pl.col(f"deck_{name}") * pl.col(ColName.WON),
444
405
  ),
445
406
  ColumnSpec(
446
407
  name=ColName.SIDEBOARD,
@@ -450,31 +411,23 @@ _column_specs = [
450
411
  ColumnSpec(
451
412
  name=ColName.WON_SIDEBOARD,
452
413
  col_type=ColType.NAME_SUM,
453
- exprMap=lambda name: pl.col(f"sideboard_{name}") * pl.col(ColName.WON),
454
- dependencies=[ColName.SIDEBOARD, ColName.WON],
414
+ expr=lambda name: pl.col(f"sideboard_{name}") * pl.col(ColName.WON),
455
415
  ),
456
416
  ColumnSpec(
457
417
  name=ColName.NUM_GNS,
458
418
  col_type=ColType.NAME_SUM,
459
- exprMap=lambda name: pl.max_horizontal(
419
+ expr=lambda name: pl.max_horizontal(
460
420
  0,
461
421
  pl.col(f"deck_{name}")
462
422
  - pl.col(f"drawn_{name}")
463
423
  - pl.col(f"tutored_{name}")
464
424
  - pl.col(f"opening_hand_{name}"),
465
425
  ),
466
- dependencies=[
467
- ColName.DECK,
468
- ColName.DRAWN,
469
- ColName.TUTORED,
470
- ColName.OPENING_HAND,
471
- ],
472
426
  ),
473
427
  ColumnSpec(
474
428
  name=ColName.WON_NUM_GNS,
475
429
  col_type=ColType.NAME_SUM,
476
- exprMap=lambda name: pl.col(ColName.WON) * pl.col(f"num_gns_{name}"),
477
- dependencies=[ColName.NUM_GNS, ColName.WON],
430
+ expr=lambda name: pl.col(ColName.WON) * pl.col(f"num_gns_{name}"),
478
431
  ),
479
432
  ColumnSpec(
480
433
  name=ColName.SET_CODE,
@@ -506,23 +459,18 @@ _column_specs = [
506
459
  ),
507
460
  ColumnSpec(
508
461
  name=ColName.DECK_MANA_VALUE,
509
- col_type=ColType.CARD_SUM,
510
- expr=pl.col(ColName.MANA_VALUE) * pl.col(ColName.DECK),
511
- dependencies=[ColName.MANA_VALUE, ColName.DECK],
462
+ col_type=ColType.NAME_SUM,
463
+ expr=lambda name, card_context: card_context[name][ColName.MANA_VALUE] * pl.col(f"deck_{name}"),
512
464
  ),
513
465
  ColumnSpec(
514
466
  name=ColName.DECK_LANDS,
515
- col_type=ColType.CARD_SUM,
516
- expr=pl.when(pl.col(ColName.CARD_TYPE).str.contains("Land"))
517
- .then(pl.col(ColName.DECK))
518
- .otherwise(0),
519
- dependencies=[ColName.CARD_TYPE, ColName.DECK],
467
+ col_type=ColType.NAME_SUM,
468
+ expr=lambda name, card_context: pl.col(f"deck_{name}") * ( 1 if 'Land' in card_context[name][ColName.CARD_TYPE] else 0 )
520
469
  ),
521
470
  ColumnSpec(
522
471
  name=ColName.DECK_SPELLS,
523
- col_type=ColType.CARD_SUM,
524
- expr=pl.col(ColName.DECK) - pl.col(ColName.DECK_SPELLS),
525
- dependencies=[ColName.DECK, ColName.DECK_SPELLS],
472
+ col_type=ColType.NAME_SUM,
473
+ expr=lambda name: pl.col(f"deck_{name}") - pl.col(f"deck_lands_{name}"),
526
474
  ),
527
475
  ColumnSpec(
528
476
  name=ColName.MANA_COST,
@@ -556,208 +504,174 @@ _column_specs = [
556
504
  name=ColName.PICKED_MATCH_WR,
557
505
  col_type=ColType.AGG,
558
506
  expr=pl.col(ColName.EVENT_MATCH_WINS_SUM) / pl.col(ColName.EVENT_MATCHES),
559
- dependencies=[ColName.EVENT_MATCH_WINS_SUM, ColName.EVENT_MATCHES],
560
507
  ),
561
508
  ColumnSpec(
562
509
  name=ColName.TROPHY_RATE,
563
510
  col_type=ColType.AGG,
564
511
  expr=pl.col(ColName.IS_TROPHY_SUM) / pl.col(ColName.NUM_TAKEN),
565
- dependencies=[ColName.IS_TROPHY_SUM, ColName.NUM_TAKEN],
566
512
  ),
567
513
  ColumnSpec(
568
514
  name=ColName.GAME_WR,
569
515
  col_type=ColType.AGG,
570
516
  expr=pl.col(ColName.NUM_WON) / pl.col(ColName.NUM_GAMES),
571
- dependencies=[ColName.NUM_WON, ColName.NUM_GAMES],
572
517
  ),
573
518
  ColumnSpec(
574
519
  name=ColName.ALSA,
575
520
  col_type=ColType.AGG,
576
521
  expr=pl.col(ColName.LAST_SEEN) / pl.col(ColName.NUM_SEEN),
577
- dependencies=[ColName.LAST_SEEN, ColName.NUM_SEEN],
578
522
  ),
579
523
  ColumnSpec(
580
524
  name=ColName.ATA,
581
525
  col_type=ColType.AGG,
582
526
  expr=pl.col(ColName.TAKEN_AT) / pl.col(ColName.NUM_TAKEN),
583
- dependencies=[ColName.TAKEN_AT, ColName.NUM_TAKEN],
584
527
  ),
585
528
  ColumnSpec(
586
529
  name=ColName.NUM_GP,
587
530
  col_type=ColType.AGG,
588
531
  expr=pl.col(ColName.DECK),
589
- dependencies=[ColName.DECK],
590
532
  ),
591
533
  ColumnSpec(
592
534
  name=ColName.PCT_GP,
593
535
  col_type=ColType.AGG,
594
536
  expr=pl.col(ColName.DECK) / (pl.col(ColName.DECK) + pl.col(ColName.SIDEBOARD)),
595
- dependencies=[ColName.DECK, ColName.SIDEBOARD],
596
537
  ),
597
538
  ColumnSpec(
598
539
  name=ColName.GP_WR,
599
540
  col_type=ColType.AGG,
600
541
  expr=pl.col(ColName.WON_DECK) / pl.col(ColName.DECK),
601
- dependencies=[ColName.WON_DECK, ColName.DECK],
602
542
  ),
603
543
  ColumnSpec(
604
544
  name=ColName.NUM_OH,
605
545
  col_type=ColType.AGG,
606
546
  expr=pl.col(ColName.OPENING_HAND),
607
- dependencies=[ColName.OPENING_HAND],
608
547
  ),
609
548
  ColumnSpec(
610
549
  name=ColName.OH_WR,
611
550
  col_type=ColType.AGG,
612
551
  expr=pl.col(ColName.WON_OPENING_HAND) / pl.col(ColName.OPENING_HAND),
613
- dependencies=[ColName.WON_OPENING_HAND, ColName.OPENING_HAND],
614
552
  ),
615
553
  ColumnSpec(
616
554
  name=ColName.NUM_GIH,
617
555
  col_type=ColType.AGG,
618
556
  expr=pl.col(ColName.OPENING_HAND) + pl.col(ColName.DRAWN),
619
- dependencies=[ColName.OPENING_HAND, ColName.DRAWN],
620
557
  ),
621
558
  ColumnSpec(
622
559
  name=ColName.NUM_GIH_WON,
623
560
  col_type=ColType.AGG,
624
561
  expr=pl.col(ColName.WON_OPENING_HAND) + pl.col(ColName.WON_DRAWN),
625
- dependencies=[ColName.WON_OPENING_HAND, ColName.WON_DRAWN],
626
562
  ),
627
563
  ColumnSpec(
628
564
  name=ColName.GIH_WR,
629
565
  col_type=ColType.AGG,
630
566
  expr=pl.col(ColName.NUM_GIH_WON) / pl.col(ColName.NUM_GIH),
631
- dependencies=[ColName.NUM_GIH_WON, ColName.NUM_GIH],
632
567
  ),
633
568
  ColumnSpec(
634
569
  name=ColName.GNS_WR,
635
570
  col_type=ColType.AGG,
636
571
  expr=pl.col(ColName.WON_NUM_GNS) / pl.col(ColName.NUM_GNS),
637
- dependencies=[ColName.WON_NUM_GNS, ColName.NUM_GNS],
638
572
  ),
639
573
  ColumnSpec(
640
574
  name=ColName.IWD,
641
575
  col_type=ColType.AGG,
642
576
  expr=pl.col(ColName.GIH_WR) - pl.col(ColName.GNS_WR),
643
- dependencies=[ColName.GIH_WR, ColName.GNS_WR],
644
577
  ),
645
578
  ColumnSpec(
646
579
  name=ColName.NUM_IN_POOL,
647
580
  col_type=ColType.AGG,
648
581
  expr=pl.col(ColName.DECK) + pl.col(ColName.SIDEBOARD),
649
- dependencies=[ColName.DECK, ColName.SIDEBOARD],
650
582
  ),
651
583
  ColumnSpec(
652
584
  name=ColName.IN_POOL_WR,
653
585
  col_type=ColType.AGG,
654
586
  expr=(pl.col(ColName.WON_DECK) + pl.col(ColName.WON_SIDEBOARD))
655
587
  / pl.col(ColName.NUM_IN_POOL),
656
- dependencies=[ColName.WON_DECK, ColName.WON_SIDEBOARD, ColName.NUM_IN_POOL],
657
588
  ),
658
589
  ColumnSpec(
659
590
  name=ColName.DECK_TOTAL,
660
591
  col_type=ColType.AGG,
661
592
  expr=pl.col(ColName.DECK).sum(),
662
- dependencies=[ColName.DECK],
663
593
  ),
664
594
  ColumnSpec(
665
595
  name=ColName.WON_DECK_TOTAL,
666
596
  col_type=ColType.AGG,
667
597
  expr=pl.col(ColName.WON_DECK).sum(),
668
- dependencies=[ColName.WON_DECK],
669
598
  ),
670
599
  ColumnSpec(
671
600
  name=ColName.GP_WR_MEAN,
672
601
  col_type=ColType.AGG,
673
602
  expr=pl.col(ColName.WON_DECK_TOTAL) / pl.col(ColName.DECK_TOTAL),
674
- dependencies=[ColName.WON_DECK_TOTAL, ColName.DECK_TOTAL],
675
603
  ),
676
604
  ColumnSpec(
677
605
  name=ColName.GP_WR_EXCESS,
678
606
  col_type=ColType.AGG,
679
607
  expr=pl.col(ColName.GP_WR) - pl.col(ColName.GP_WR_MEAN),
680
- dependencies=[ColName.GP_WR, ColName.GP_WR_MEAN],
681
608
  ),
682
609
  ColumnSpec(
683
610
  name=ColName.GP_WR_VAR,
684
611
  col_type=ColType.AGG,
685
612
  expr=(pl.col(ColName.GP_WR_EXCESS).pow(2) * pl.col(ColName.NUM_GP)).sum()
686
613
  / pl.col(ColName.DECK_TOTAL),
687
- dependencies=[ColName.GP_WR_EXCESS, ColName.NUM_GP, ColName.DECK_TOTAL],
688
614
  ),
689
615
  ColumnSpec(
690
616
  name=ColName.GP_WR_STDEV,
691
617
  col_type=ColType.AGG,
692
618
  expr=pl.col(ColName.GP_WR_VAR).sqrt(),
693
- dependencies=[ColName.GP_WR_VAR],
694
619
  ),
695
620
  ColumnSpec(
696
621
  name=ColName.GP_WR_Z,
697
622
  col_type=ColType.AGG,
698
623
  expr=pl.col(ColName.GP_WR_EXCESS) / pl.col(ColName.GP_WR_STDEV),
699
- dependencies=[ColName.GP_WR_EXCESS, ColName.GP_WR_STDEV],
700
624
  ),
701
625
  ColumnSpec(
702
626
  name=ColName.GIH_TOTAL,
703
627
  col_type=ColType.AGG,
704
628
  expr=pl.col(ColName.NUM_GIH).sum(),
705
- dependencies=[ColName.NUM_GIH],
706
629
  ),
707
630
  ColumnSpec(
708
631
  name=ColName.WON_GIH_TOTAL,
709
632
  col_type=ColType.AGG,
710
633
  expr=pl.col(ColName.NUM_GIH_WON).sum(),
711
- dependencies=[ColName.NUM_GIH_WON],
712
634
  ),
713
635
  ColumnSpec(
714
636
  name=ColName.GIH_WR_MEAN,
715
637
  col_type=ColType.AGG,
716
638
  expr=pl.col(ColName.WON_GIH_TOTAL) / pl.col(ColName.GIH_TOTAL),
717
- dependencies=[ColName.WON_GIH_TOTAL, ColName.GIH_TOTAL],
718
639
  ),
719
640
  ColumnSpec(
720
641
  name=ColName.GIH_WR_EXCESS,
721
642
  col_type=ColType.AGG,
722
643
  expr=pl.col(ColName.GIH_WR) - pl.col(ColName.GIH_WR_MEAN),
723
- dependencies=[ColName.GIH_WR, ColName.GIH_WR_MEAN],
724
644
  ),
725
645
  ColumnSpec(
726
646
  name=ColName.GIH_WR_VAR,
727
647
  col_type=ColType.AGG,
728
648
  expr=(pl.col(ColName.GIH_WR_EXCESS).pow(2) * pl.col(ColName.NUM_GIH)).sum()
729
649
  / pl.col(ColName.GIH_TOTAL),
730
- dependencies=[ColName.GIH_WR_EXCESS, ColName.NUM_GIH, ColName.GIH_TOTAL],
731
650
  ),
732
651
  ColumnSpec(
733
652
  name=ColName.GIH_WR_STDEV,
734
653
  col_type=ColType.AGG,
735
654
  expr=pl.col(ColName.GIH_WR_VAR).sqrt(),
736
- dependencies=[ColName.GIH_WR_VAR],
737
655
  ),
738
656
  ColumnSpec(
739
657
  name=ColName.GIH_WR_Z,
740
658
  col_type=ColType.AGG,
741
659
  expr=pl.col(ColName.GIH_WR_EXCESS) / pl.col(ColName.GIH_WR_STDEV),
742
- dependencies=[ColName.GIH_WR_EXCESS, ColName.GIH_WR_STDEV],
743
660
  ),
744
661
  ColumnSpec(
745
662
  name=ColName.DECK_MANA_VALUE_AVG,
746
663
  col_type=ColType.AGG,
747
- expr=pl.col(ColName.DECK_MANA_VALUE) / pl.col(ColName.DECK),
748
- dependencies=[ColName.DECK_MANA_VALUE, ColName.DECK],
664
+ expr=pl.col(ColName.DECK_MANA_VALUE) / pl.col(ColName.DECK_SPELLS),
749
665
  ),
750
666
  ColumnSpec(
751
667
  name=ColName.DECK_LANDS_AVG,
752
668
  col_type=ColType.AGG,
753
669
  expr=pl.col(ColName.DECK_LANDS) / pl.col(ColName.NUM_GAMES),
754
- dependencies=[ColName.DECK_LANDS, ColName.NUM_GAMES],
755
670
  ),
756
671
  ColumnSpec(
757
672
  name=ColName.DECK_SPELLS_AVG,
758
673
  col_type=ColType.AGG,
759
674
  expr=pl.col(ColName.DECK_SPELLS) / pl.col(ColName.NUM_GAMES),
760
- dependencies=[ColName.DECK_SPELLS, ColName.NUM_GAMES],
761
675
  ),
762
676
  ]
763
677
 
spells/draft_data.py CHANGED
@@ -10,7 +10,8 @@ import datetime
10
10
  import functools
11
11
  import hashlib
12
12
  import re
13
- from typing import Callable, TypeVar
13
+ from inspect import signature
14
+ from typing import Callable, TypeVar, Any
14
15
 
15
16
  import polars as pl
16
17
 
@@ -33,7 +34,7 @@ def _cache_key(args) -> str:
33
34
 
34
35
 
35
36
  @functools.lru_cache(maxsize=None)
36
- def _get_names(set_code: str) -> tuple[str, ...]:
37
+ def _get_names(set_code: str) -> list[str]:
37
38
  card_fp = data_file_path(set_code, View.CARD)
38
39
  card_view = pl.read_parquet(card_fp)
39
40
  card_names_set = frozenset(card_view.get_column("name").to_list())
@@ -43,7 +44,7 @@ def _get_names(set_code: str) -> tuple[str, ...]:
43
44
  cols = draft_view.collect_schema().names()
44
45
 
45
46
  prefix = "pack_card_"
46
- names = tuple(col[len(prefix) :] for col in cols if col.startswith(prefix))
47
+ names = [col[len(prefix) :] for col in cols if col.startswith(prefix)]
47
48
  draft_names_set = frozenset(names)
48
49
 
49
50
  assert (
@@ -52,34 +53,39 @@ def _get_names(set_code: str) -> tuple[str, ...]:
52
53
  return names
53
54
 
54
55
 
55
- def _hydrate_col_defs(set_code: str, col_spec_map: dict[str, ColumnSpec]):
56
- def get_views(spec: ColumnSpec) -> set[View]:
57
- if spec.name == ColName.NAME or spec.col_type in (
58
- ColType.AGG,
59
- ColType.CARD_SUM,
60
- ):
61
- return set()
62
- if spec.col_type == ColType.CARD_ATTR:
63
- return {View.CARD}
64
- if spec.views is not None:
65
- return set(spec.views)
66
- assert (
67
- spec.dependencies is not None
68
- ), f"Col {spec.name} should have dependencies"
69
-
70
- views = functools.reduce(
71
- lambda prev, curr: prev.intersection(curr),
72
- [get_views(col_spec_map[dep]) for dep in spec.dependencies],
73
- )
56
+ def _get_card_context(set_code: str, col_spec_map: dict[str, ColumnSpec]) -> dict[str, dict[str, Any]]:
57
+ card_attr_specs = {col:spec for col, spec in col_spec_map.items() if spec.col_type == ColType.CARD_ATTR or spec.name == ColName.NAME}
58
+ col_def_map = _hydrate_col_defs(set_code, card_attr_specs, card_only=True)
74
59
 
75
- return views
60
+ columns = list(col_def_map.keys())
61
+
62
+ fp = data_file_path(set_code, View.CARD)
63
+ card_df = pl.read_parquet(fp)
64
+ select_rows = _view_select(
65
+ card_df, frozenset(columns), col_def_map, is_agg_view=False
66
+ ).to_dicts()
67
+
68
+ card_context = {row[ColName.NAME]: row for row in select_rows}
69
+
70
+ return card_context
71
+
72
+
73
+ def _determine_expression(spec: ColumnSpec, names: list[str], card_context: dict[str, dict]) -> pl.Expr | tuple[pl.Expr, ...]:
74
+ def seed_params(expr):
75
+ params = {}
76
+
77
+ sig_params = signature(expr).parameters
78
+ if 'names' in sig_params:
79
+ params['names'] = names
80
+ if 'card_context' in sig_params:
81
+ params['card_context'] = card_context
82
+ return params
83
+
84
+ if spec.col_type == ColType.NAME_SUM:
85
+ if spec.expr is not None:
86
+ assert isinstance(spec.expr, Callable), f"NAME_SUM column {spec.name} must have a callable `expr` accepting a `name` argument"
87
+ unnamed_exprs = [spec.expr(**{'name': name, **seed_params(spec.expr)}) for name in names]
76
88
 
77
- names = _get_names(set_code)
78
- assert len(names) > 0, "there should be names"
79
- hydrated = {}
80
- for key, spec in col_spec_map.items():
81
- if spec.col_type == ColType.NAME_SUM and spec.exprMap is not None:
82
- unnamed_exprs = map(spec.exprMap, names)
83
89
  expr = tuple(
84
90
  map(
85
91
  lambda ex, name: ex.alias(f"{spec.name}_{name}"),
@@ -87,14 +93,76 @@ def _hydrate_col_defs(set_code: str, col_spec_map: dict[str, ColumnSpec]):
87
93
  names,
88
94
  )
89
95
  )
90
- elif spec.expr is not None:
91
- expr = spec.expr.alias(spec.name)
92
-
93
96
  else:
94
- if spec.col_type == ColType.NAME_SUM:
95
- expr = tuple(map(lambda name: pl.col(f"{spec.name}_{name}"), names))
97
+ expr = tuple(map(lambda name: pl.col(f"{spec.name}_{name}"), names))
98
+
99
+ elif spec.expr is not None:
100
+ if isinstance(spec.expr, Callable):
101
+ params = seed_params(spec.expr)
102
+ if spec.col_type == ColType.PICK_SUM and 'name' in signature(spec.expr).parameters:
103
+ expr = pl.lit(None)
104
+ for name in names:
105
+ name_params = {'name': name, **params}
106
+ expr = pl.when(pl.col(ColName.PICK) == name).then(spec.expr(**name_params)).otherwise(expr)
96
107
  else:
97
- expr = pl.col(spec.name)
108
+ expr = spec.expr(**params)
109
+ else:
110
+ expr = spec.expr
111
+ expr = expr.alias(spec.name)
112
+ else:
113
+ expr = pl.col(spec.name)
114
+
115
+ return expr
116
+
117
+
118
+ def _infer_dependencies(name: str, expr: pl.Expr | tuple[pl.Expr,...], col_spec_map: dict[str, ColumnSpec], names: list[str]) -> set[str]:
119
+ dependencies = set()
120
+ tricky_ones = set()
121
+
122
+ if isinstance(expr, pl.Expr):
123
+ dep_cols = [c for c in expr.meta.root_names() if c != name]
124
+ for dep_col in dep_cols:
125
+ if dep_col in col_spec_map.keys():
126
+ dependencies.add(dep_col)
127
+ else:
128
+ tricky_ones.add(dep_col)
129
+ else:
130
+ for idx, exp in enumerate(expr):
131
+ pattern = f"_{names[idx]}$"
132
+ dep_cols = [c for c in exp.meta.root_names() if c != name]
133
+ for dep_col in dep_cols:
134
+ if dep_col in col_spec_map.keys():
135
+ dependencies.add(dep_col)
136
+ elif len(split := re.split(pattern, dep_col)) == 2 and split[0] in col_spec_map:
137
+ dependencies.add(split[0])
138
+ else:
139
+ tricky_ones.add(dep_col)
140
+
141
+ for item in tricky_ones:
142
+ found = False
143
+ for n in names:
144
+ pattern = f"_{n}$"
145
+ if not found and len(split := re.split(pattern, item)) == 2 and split[0] in col_spec_map:
146
+ dependencies.add(split[0])
147
+ found = True
148
+ assert found, f"Could not locate column spec for root col {item}"
149
+
150
+ return dependencies
151
+
152
+
153
+ def _hydrate_col_defs(set_code: str, col_spec_map: dict[str, ColumnSpec], card_only=False):
154
+ names = _get_names(set_code)
155
+
156
+ if card_only:
157
+ card_context = {}
158
+ else:
159
+ card_context = _get_card_context(set_code, col_spec_map)
160
+
161
+ assert len(names) > 0, "there should be names"
162
+ hydrated = {}
163
+ for key, spec in col_spec_map.items():
164
+ expr = _determine_expression(spec, names, card_context)
165
+ dependencies = _infer_dependencies(key, expr, col_spec_map, names)
98
166
 
99
167
  try:
100
168
  sig_expr = expr if isinstance(expr, pl.Expr) else expr[0]
@@ -107,14 +175,11 @@ def _hydrate_col_defs(set_code: str, col_spec_map: dict[str, ColumnSpec]):
107
175
  else:
108
176
  expr_sig = str(datetime.datetime.now)
109
177
 
110
- dependencies = tuple(spec.dependencies or ())
111
- views = get_views(spec)
112
178
  signature = str(
113
179
  (
114
180
  spec.name,
115
181
  spec.col_type.value,
116
182
  expr_sig,
117
- tuple(view.value for view in views),
118
183
  dependencies,
119
184
  )
120
185
  )
@@ -122,7 +187,7 @@ def _hydrate_col_defs(set_code: str, col_spec_map: dict[str, ColumnSpec]):
122
187
  cdef = ColumnDefinition(
123
188
  name=spec.name,
124
189
  col_type=spec.col_type,
125
- views=views,
190
+ views=set(spec.views or set()),
126
191
  expr=expr,
127
192
  dependencies=dependencies,
128
193
  signature=signature,
@@ -136,18 +201,13 @@ def _view_select(
136
201
  view_cols: frozenset[str],
137
202
  col_def_map: dict[str, ColumnDefinition],
138
203
  is_agg_view: bool,
139
- is_card_sum: bool = False,
140
204
  ) -> DF:
141
205
  base_cols = frozenset()
142
206
  cdefs = [col_def_map[c] for c in view_cols]
143
207
  select = []
144
208
  for cdef in cdefs:
145
209
  if is_agg_view:
146
- if (
147
- cdef.col_type == ColType.AGG
148
- or cdef.col_type == ColType.CARD_SUM
149
- and is_card_sum
150
- ):
210
+ if cdef.col_type == ColType.AGG:
151
211
  base_cols = base_cols.union(cdef.dependencies)
152
212
  select.append(cdef.expr)
153
213
  else:
@@ -164,7 +224,7 @@ def _view_select(
164
224
  select.append(cdef.expr)
165
225
 
166
226
  if base_cols != view_cols:
167
- df = _view_select(df, base_cols, col_def_map, is_agg_view, is_card_sum)
227
+ df = _view_select(df, base_cols, col_def_map, is_agg_view)
168
228
 
169
229
  return df.select(select)
170
230
 
@@ -274,28 +334,6 @@ def _base_agg_df(
274
334
  )
275
335
 
276
336
 
277
- def card_df(
278
- set_code: str,
279
- extensions: list[ColumnSpec] | None = None,
280
- ):
281
- col_spec_map = dict(spells.columns.col_spec_map)
282
- if extensions is not None:
283
- for spec in extensions:
284
- col_spec_map[spec.name] = spec
285
-
286
- col_def_map = _hydrate_col_defs(set_code, col_spec_map)
287
-
288
- columns = [ColName.NAME] + [
289
- c for c, cdef in col_def_map.items() if cdef.col_type == ColType.CARD_ATTR
290
- ]
291
- fp = data_file_path(set_code, View.CARD)
292
- card_df = pl.read_parquet(fp)
293
- select_df = _view_select(
294
- card_df, frozenset(columns), col_def_map, is_agg_view=False
295
- )
296
- return select_df.select(columns)
297
-
298
-
299
337
  def summon(
300
338
  set_code: str,
301
339
  columns: list[str] | None = None,
@@ -337,12 +375,6 @@ def summon(
337
375
  select_df = _view_select(card_df, card_cols, m.col_def_map, is_agg_view=False)
338
376
  agg_df = agg_df.join(select_df, on="name", how="outer", coalesce=True)
339
377
 
340
- if m.card_sum:
341
- card_sum_df = _view_select(
342
- agg_df, m.card_sum, m.col_def_map, is_agg_view=True, is_card_sum=True
343
- )
344
- agg_df = pl.concat([agg_df, card_sum_df], how="horizontal")
345
-
346
378
  if ColName.NAME not in m.group_by:
347
379
  agg_df = agg_df.group_by(m.group_by).sum()
348
380
 
spells/enums.py CHANGED
@@ -19,7 +19,6 @@ class ColType(StrEnum):
19
19
  NAME_SUM = "name_sum"
20
20
  AGG = "agg"
21
21
  CARD_ATTR = "card_attr"
22
- CARD_SUM = "card_sum"
23
22
 
24
23
 
25
24
  class ColName(StrEnum):
spells/manifest.py CHANGED
@@ -14,7 +14,6 @@ class Manifest:
14
14
  view_cols: dict[View, frozenset[str]]
15
15
  group_by: tuple[str, ...]
16
16
  filter: spells.filter.Filter | None
17
- card_sum: frozenset[str]
18
17
 
19
18
  def __post_init__(self):
20
19
  # No name filter check
@@ -40,21 +39,13 @@ class Manifest:
40
39
  ), f"Invalid groupby {col}!"
41
40
 
42
41
  for view, cols_for_view in self.view_cols.items():
43
- # cols_for_view are actually in view check
44
42
  for col in cols_for_view:
45
- assert (
46
- view in self.col_def_map[col].views
47
- ), f"View cols generated incorrectly, {col} not in view {view}"
48
43
  # game sum cols on in game, and no NAME groupby
49
44
  assert self.col_def_map[col].col_type != ColType.GAME_SUM or (
50
45
  view == View.GAME and ColName.NAME not in self.base_view_group_by
51
46
  ), f"Invalid manifest for GAME_SUM column {col}"
52
47
  if view != View.CARD:
53
48
  for col in self.base_view_group_by:
54
- # base_view_groupbys in view check
55
- assert (
56
- col == ColName.NAME or view in self.col_def_map[col].views
57
- ), f"Groupby {col} not in view {view}!"
58
49
  # base_view_groupbys in view_cols for view
59
50
  assert (
60
51
  col == ColName.NAME or col in cols_for_view
@@ -95,7 +86,7 @@ class Manifest:
95
86
  def _resolve_view_cols(
96
87
  col_set: frozenset[str],
97
88
  col_def_map: dict[str, ColumnDefinition],
98
- ) -> tuple[dict[View, frozenset[str]], frozenset[str]]:
89
+ ) -> dict[View, frozenset[str]]:
99
90
  """
100
91
  For each view ('game', 'draft', and 'card'), return the columns
101
92
  that must be present at the aggregation step. 'name' need not be
@@ -104,7 +95,6 @@ def _resolve_view_cols(
104
95
  MAX_DEPTH = 1000
105
96
  unresolved_cols = col_set
106
97
  view_resolution = {}
107
- card_sum = frozenset()
108
98
 
109
99
  iter_num = 0
110
100
  while unresolved_cols and iter_num < MAX_DEPTH:
@@ -116,9 +106,9 @@ def _resolve_view_cols(
116
106
  view_resolution[View.DRAFT] = view_resolution.get(
117
107
  View.DRAFT, frozenset()
118
108
  ).union({ColName.PICK})
119
- if cdef.col_type == ColType.CARD_SUM:
120
- card_sum = card_sum.union({col})
121
- if cdef.views:
109
+ if cdef.col_type == ColType.CARD_ATTR:
110
+ view_resolution[View.CARD] = view_resolution.get(View.CARD, frozenset()).union({col})
111
+ elif cdef.views:
122
112
  for view in cdef.views:
123
113
  view_resolution[view] = view_resolution.get(
124
114
  view, frozenset()
@@ -128,14 +118,38 @@ def _resolve_view_cols(
128
118
  raise ValueError(
129
119
  f"Invalid column def: {col} has neither views nor dependencies!"
130
120
  )
131
- for dep in cdef.dependencies:
132
- next_cols = next_cols.union({dep})
121
+ if cdef.col_type != ColType.AGG:
122
+ fully_resolved = True
123
+ col_views = frozenset({View.GAME, View.DRAFT, View.CARD})
124
+ for dep in cdef.dependencies:
125
+ dep_views = frozenset()
126
+ for view, view_cols in view_resolution.items():
127
+ if dep in view_cols:
128
+ dep_views = dep_views.union({view})
129
+ if not dep_views:
130
+ fully_resolved = False
131
+ next_cols = next_cols.union({dep})
132
+ else:
133
+ col_views = col_views.intersection(dep_views)
134
+ if fully_resolved:
135
+ assert len(col_views), f"Column {col} can't be defined in any views!"
136
+ for view in col_views:
137
+ if view not in view_resolution:
138
+ print(cdef)
139
+ assert False, f"Something went wrong with col {col}"
140
+
141
+ view_resolution[view] = view_resolution[view].union({col})
142
+ else:
143
+ next_cols = next_cols.union({col})
144
+ else:
145
+ for dep in cdef.dependencies:
146
+ next_cols = next_cols.union({dep})
133
147
  unresolved_cols = next_cols
134
148
 
135
149
  if iter_num >= MAX_DEPTH:
136
150
  raise ValueError("broken dependency chain in column spec, loop probable")
137
151
 
138
- return view_resolution, card_sum
152
+ return view_resolution
139
153
 
140
154
 
141
155
  def create(
@@ -148,7 +162,7 @@ def create(
148
162
  if columns is None:
149
163
  cols = tuple(spells.columns.default_columns)
150
164
  if ColName.NAME not in gbs:
151
- cols = tuple(c for c in cols if c not in [ColName.COLOR, ColName.RARITY])
165
+ cols = tuple(c for c in cols if col_def_map[c].col_type != ColType.CARD_ATTR)
152
166
  else:
153
167
  cols = tuple(columns)
154
168
 
@@ -159,12 +173,8 @@ def create(
159
173
  if m_filter is not None:
160
174
  col_set = col_set.union(m_filter.lhs)
161
175
 
162
- view_cols, card_sum = _resolve_view_cols(col_set, col_def_map)
163
176
  base_view_group_by = frozenset()
164
177
 
165
- if card_sum:
166
- base_view_group_by = base_view_group_by.union({ColName.NAME})
167
-
168
178
  for col in gbs:
169
179
  cdef = col_def_map[col]
170
180
  if cdef.col_type == ColType.GROUP_BY:
@@ -172,14 +182,23 @@ def create(
172
182
  elif cdef.col_type == ColType.CARD_ATTR:
173
183
  base_view_group_by = base_view_group_by.union({ColName.NAME})
174
184
 
185
+ view_cols = _resolve_view_cols(col_set, col_def_map)
186
+
175
187
  needed_views = frozenset()
176
- for view, cols_for_view in view_cols.items():
177
- for col in cols_for_view:
178
- if col_def_map[col].views == {view}: # only found in this view
179
- needed_views = needed_views.union({view})
188
+ if View.CARD in view_cols:
189
+ needed_views = needed_views.union({View.CARD})
180
190
 
181
- if not needed_views:
182
- needed_views = {View.DRAFT}
191
+ draft_view_cols = view_cols.get(View.DRAFT, frozenset())
192
+ game_view_cols = view_cols.get(View.GAME, frozenset())
193
+
194
+ base_cols = draft_view_cols.union(game_view_cols)
195
+
196
+ if base_cols == draft_view_cols:
197
+ needed_views = needed_views.union({View.DRAFT})
198
+ elif base_cols == game_view_cols:
199
+ needed_views = needed_views.union({View.GAME})
200
+ else:
201
+ needed_views = needed_views.union({View.GAME, View.DRAFT})
183
202
 
184
203
  view_cols = {v: view_cols[v] for v in needed_views}
185
204
 
@@ -190,5 +209,4 @@ def create(
190
209
  view_cols=view_cols,
191
210
  group_by=gbs,
192
211
  filter=m_filter,
193
- card_sum=card_sum,
194
212
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: spells-mtg
3
- Version: 0.3.1
3
+ Version: 0.4.0
4
4
  Summary: analaysis of 17Lands.com public datasets
5
5
  Author-Email: Joel Barnes <oelarnes@gmail.com>
6
6
  License: MIT
@@ -74,7 +74,7 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
74
74
  - Supports calculating the standard aggregations and measures out of the box with no arguments (ALSA, GIH WR, etc)
75
75
  - Caches aggregate DataFrames in the local file system automatically for instantaneous reproduction of previous analysis
76
76
  - Manages grouping and filtering by built-in and custom columns at the row level
77
- - Provides 118 explicitly specified, enumerated, documented column definitions
77
+ - Provides 121 explicitly specified, enumerated, documented column definitions
78
78
  - Supports "Deck Color Data" aggregations with built-in column definitions.
79
79
  - Provides a CLI tool `spells [add|refresh|clean|remove|info] [SET]` to download and manage external files
80
80
  - Downloads and manages public datasets from 17Lands
@@ -112,24 +112,24 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
112
112
  ```
113
113
  - `group_by` specifies the grouping by one or more columns. By default, group by card names, but optionally group by any of a large set of fundamental and derived columns, including card attributes and your own custom extension.
114
114
  ```python
115
- >>> spells.summon('BLB', columns=["num_won", "num_games", "game_wr"], group_by=["main_colors"], filter_spec={"num_colors": 2})
116
- shape: (10, 4)
117
- ┌─────────────┬─────────┬───────────┬──────────┐
118
- │ main_colors ┆ num_won ┆ num_games ┆ game_wr │
119
- │ --- ┆ --- ┆ --- ┆ --- │
120
- │ str ┆ u32 ┆ u32 ┆ f64 │
121
- ╞═════════════╪═════════╪═══════════╪══════════╡
122
- │ BG ┆ 85022 ┆ 152863 ┆ 0.556197 │
123
- │ BR ┆ 45900 ┆ 81966 ┆ 0.559988 │
124
- │ RG ┆ 34641 ┆ 64428 ┆ 0.53767 │
125
- │ UB ┆ 30922 ┆ 57698 ┆ 0.535928 │
126
- │ UG ┆ 59879 ┆ 109145 ┆ 0.548619 │
127
- │ UR ┆ 19638 ┆ 38679 ┆ 0.507717 │
128
- │ WB ┆ 59480 ┆ 107443 ┆ 0.553596 │
129
- │ WG ┆ 76134 ┆ 136832 ┆ 0.556405 │
130
- │ WR ┆ 49712 ┆ 91224 ┆ 0.544944 │
131
- │ WU ┆ 16483 ┆ 31450 ┆ 0.524102 │
132
- └─────────────┴─────────┴───────────┴──────────┘
115
+ >>> summon('BLB', columns=["num_won", "num_games", "game_wr", "deck_mana_value_avg"], group_by=["main_colors"], filter_spec={"num_colors": 2})
116
+ shape: (10, 5)
117
+ ┌─────────────┬─────────┬───────────┬──────────┬─────────────────────┐
118
+ │ main_colors ┆ num_won ┆ num_games ┆ game_wr ┆ deck_mana_value_avg
119
+ │ --- ┆ --- ┆ --- ┆ --- ┆ ---
120
+ │ str ┆ u32 ┆ u32 ┆ f64 ┆ f64
121
+ ╞═════════════╪═════════╪═══════════╪══════════╪═════════════════════╡
122
+ │ BG ┆ 85022 ┆ 152863 ┆ 0.556197 ┆ 2.862305
123
+ │ BR ┆ 45900 ┆ 81966 ┆ 0.559988 ┆ 2.76198
124
+ │ RG ┆ 34641 ┆ 64428 ┆ 0.53767 ┆ 2.852182
125
+ │ UB ┆ 30922 ┆ 57698 ┆ 0.535928 ┆ 3.10409
126
+ │ UG ┆ 59879 ┆ 109145 ┆ 0.548619 ┆ 2.861026
127
+ │ UR ┆ 19638 ┆ 38679 ┆ 0.507717 ┆ 2.908215
128
+ │ WB ┆ 59480 ┆ 107443 ┆ 0.553596 ┆ 2.9217
129
+ │ WG ┆ 76134 ┆ 136832 ┆ 0.556405 ┆ 2.721064
130
+ │ WR ┆ 49712 ┆ 91224 ┆ 0.544944 ┆ 2.5222
131
+ │ WU ┆ 16483 ┆ 31450 ┆ 0.524102 ┆ 2.930967
132
+ └─────────────┴─────────┴───────────┴──────────┴─────────────────────┘
133
133
  ```
134
134
  - `filter_spec` specifies a row-level filter for the dataset, using an intuitive custom query formulation
135
135
  ```python
@@ -156,8 +156,7 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
156
156
  ... name='deq_base',
157
157
  ... col_type=ColType.AGG,
158
158
  ... expr=(pl.col('gp_wr_excess') + 0.03 * (1 - pl.col('ata')/14).pow(2)) * pl.col('pct_gp'),
159
- ... dependencies=['gp_wr_excess', 'ata', 'pct_gp']
160
- ...)
159
+ ... )
161
160
  >>> spells.summon('DSK', columns=['deq_base', 'color', 'rarity'], filter_spec={'player_cohort': 'Top'}, extensions=[ext])
162
161
  ... .filter(pl.col('deq_base').is_finite())
163
162
  ... .filter(pl.col('rarity').is_in(['common', 'uncommon'])
@@ -278,13 +277,13 @@ summon(
278
277
 
279
278
  #### parameters
280
279
 
281
- - columns: a list of string or `ColName` values to select as non-grouped columns. Valid `ColTypes` are `PICK_SUM`, `NAME_SUM`, `GAME_SUM`, `CARD_ATTR`, `CARD_SUM` and `AGG`. Min/Max/Unique
280
+ - columns: a list of string or `ColName` values to select as non-grouped columns. Valid `ColTypes` are `PICK_SUM`, `NAME_SUM`, `GAME_SUM`, `CARD_ATTR`, and `AGG`. Min/Max/Unique
282
281
  aggregations of non-numeric (or numeric) data types are not supported. If `None`, use a set of columns modeled on the commonly used values on 17Lands.com/card_data.
283
282
 
284
283
  - group_by: a list of string or `ColName` values to display as grouped columns. Valid `ColTypes` are `GROUP_BY` and `CARD_ATTR`. By default, group by "name" (card name).
285
284
 
286
285
  - filter_spec: a dictionary specifying a filter, using a small number of paradigms. Columns used must be in each base view ("draft" and "game") that the `columns` and `group_by` columns depend on, so
287
- `AGG`, `CARD_SUM` and `CARD_ATTR` columns are not valid. `NAME_SUM` columns are also not supported. Derived columns are supported. No filter is applied by default. Yes, I should rewrite it to use the mongo query language. The specification is best understood with examples:
286
+ `AGG` and `CARD_ATTR` columns are not valid. Functions of card attributes in the base views can be filtered on, see the documentation for `expr` for details. `NAME_SUM` columns are also not supported. Derived columns are supported. No filter is applied by default. Yes, I should rewrite it to use the mongo query language. The specification is best understood with examples:
288
287
 
289
288
  - `{'player_cohort': 'Top'}` "player_cohort" value equals "Top".
290
289
  - `{'lhs': 'player_cohort', 'op': 'in', 'rhs': ['Top', 'Middle']}` "player_cohort" value is either "Top" or "Middle". Supported values for `op` are `<`, `<=`, `>`, `>=`, `!=`, `=`, `in` and `nin`.
@@ -297,10 +296,10 @@ aggregations of non-numeric (or numeric) data types are not supported. If `None`
297
296
  ### Enums
298
297
 
299
298
  ```python
300
- from spells.enums import ColName, ColType, View
299
+ from spells.enums import ColName, ColType
301
300
  ```
302
301
 
303
- Recommended to import `ColName` for any usage of `summon`, and to import `ColType` when defining custom extensions. You shouldn't need `VIEW`.
302
+ Recommended to import `ColName` for any usage of `summon`, and to import `ColType` when defining custom extensions.
304
303
 
305
304
  ### ColumnSpec
306
305
 
@@ -310,11 +309,8 @@ from spells.columns import ColumnSpec
310
309
  ColumnSpec(
311
310
  name: str,
312
311
  col_type: ColType,
313
- expr: pl.Expr | None = None,
314
- exprMap: Callable[[str], pl.Expr] | None = None
315
- dependencies: list[str] | None = None
312
+ expr: pl.Expr | Callable[..., pl.Expr] | None = None,
316
313
  version: str | None = None
317
- views: list[View] | None = None,
318
314
  )
319
315
  ```
320
316
 
@@ -324,19 +320,19 @@ Used to define extensions in `summon`
324
320
 
325
321
  - `name`: any string, including existing columns, although this is very likely to break dependent columns, so don't do it. For `NAME_SUM` columns, the name is the prefix without the underscore, e.g. "drawn".
326
322
 
327
- - `col_type`: one of the `ColType` enum values, `FILTER_ONLY`, `GROUP_BY`, `PICK_SUM`, `NAME_SUM`, `GAME_SUM`, `CARD_ATTR`, `CARD_SUM`, and `AGG`. See documentation for `summon` for usage. All columns except `CARD_ATTR`, `CARD_SUM` and `AGG` must be derivable at the individual row level on one or both base views. `CARD_ATTR` must be derivable at the individual row level from the card file. `AGG` can depend on any column present after summing over groups, and can include polars Expression aggregations. `CARD_SUM` columns are expressed similarly to `AGG`, but they are calculated before grouping by card name and are summed before the `AGG` selection stage (for example, to calculate average mana value. See example notebook "Card Attributes"). Arbitrarily long chains of aggregate dependencies are supported.
328
-
329
- - `expr`: A polars expression giving the derivation of the column value at the first level where it is defined. For `NAME_SUM` columns the `exprMap` attribute must be used instead. `AGG` columns that depend on `NAME_SUM` columns reference the prefix (`cdef.name`) only, since the unpivot has occured prior to selection.
323
+ - `col_type`: one of the `ColType` enum values, `FILTER_ONLY`, `GROUP_BY`, `PICK_SUM`, `NAME_SUM`, `GAME_SUM`, `CARD_ATTR`, and `AGG`. See documentation for `summon` for usage. All columns except `CARD_ATTR`
324
+ and `AGG` must be derivable at the individual row level on one or both base views. `CARD_ATTR` must be derivable at the individual row level from the card file. `AGG` can depend on any column present after
325
+ summing over groups, and can include polars Expression aggregations. Arbitrarily long chains of aggregate dependencies are supported.
330
326
 
331
- - `exprMap`: A function of card name that returns the expression for a `NAME_SUM` column.
332
-
333
- - `dependencies`: A list of column names. All dependencies must be declared by name.
327
+ - `expr`: A polars expression or function returning a polars expression giving the derivation of the column value at the first level where it is defined.
328
+ - For `NAME_SUM` columns, `expr` must be a function of `name` which will result in a list of expressions mapped over all card names.
329
+ - `PICK_SUM` columns can also be functions on `name`, in which case the value will be a function of the value of the `PICK` field.
330
+ - `AGG` columns that depend on `NAME_SUM` columns reference the prefix (`cdef.name`) only, since the unpivot has occured prior to selection.
331
+ - The possible arguments to `expr`, in addition to `name` when appropriate, include the full `names` array as well as a dictionary called `card_context` which contains card dict objects with all `CARD_ATTR` values, including custom extensions. See example notebooks for more details.
334
332
 
335
333
  - `version`: When defining a column using a python function, as opposed to Polars expressions, add a unique version number so that the unique hashed signature of the column specification can be derived
336
334
  for caching purposes, since Polars cannot generate a serialization natively. When changing the definition, be sure to increment the version value. Otherwise you do not need to use this parameter.
337
335
 
338
- - `views`: Not needed for custom columns.
339
-
340
336
  ### Columns
341
337
 
342
338
  A table of all included columns. Columns can be referenced by enum or by string value in arguments and filter specs. The string value is always the lowercase version of the enum attribute.
@@ -424,7 +420,9 @@ A table of all included columns. Columns can be referenced by enum or by string
424
420
  | `CARD_TYPE` | `"card_type"` | `CARD` | `CARD_ATTR` | | String |
425
421
  | `SUBTYPE` | `"subtype"` | `CARD` | `CARD_ATTR` | | String |
426
422
  | `MANA_VALUE` | `"mana_value"` | `CARD` | `CARD_ATTR` | | Float |
427
- | `DECK_MANA_VALUE` | `"deck_mana_value"` | | `CARD_SUM` | `DECK` * `MANA_VALUE` | Float |
423
+ | `DECK_MANA_VALUE` | `"deck_mana_value"` | | `NAME_SUM` | `DECK` * `MANA_VALUE` | Float |
424
+ | `DECK_LANDS` | `"deck_lands"` | | `NAME_SUM` | Number of lands in deck | Float |
425
+ | `DECK_SPELLS` | `"deck_spells"` | | `NAME_SUM` | Number of spells in deck | Float |
428
426
  | `MANA_COST` | `"mana_cost"` | `CARD` | `CARD_ATTR` | | String |
429
427
  | `POWER` | `"power"` | `CARD` | `CARD_ATTR` | | Float |
430
428
  | `TOUGHNESS` | `"toughness"` | `CARD` | `CARD_ATTR` | | Float |
@@ -463,7 +461,9 @@ A table of all included columns. Columns can be referenced by enum or by string
463
461
  | `GIH_WR_VAR` | `"gih_wr_var"` | | `AGG` | Game-weighted Variance | Float |
464
462
  | `GIH_WR_STDEV` | `"gh_wr_stdev"` | | `AGG` | Sqrt of `GIH_WR_VAR` | Float |
465
463
  | `GIH_WR_Z` | `"gih_wr_z"` | | `AGG` |`GIH_WR_EXCESS` / `GIH_WR_STDEV` | Float |
466
- | `DECK_MANA_VALUE_AVG` | `"deck_mana_value_avg"` | | `AGG` | `DECK_MANA_VALUE ` / `DECK` | Float |
464
+ | `DECK_MANA_VALUE_AVG` | `"deck_mana_value_avg"` | | `AGG` | `DECK_MANA_VALUE ` / `DECK_SPELLS` | Float |
465
+ | `DECK_LANDS_AVG` | `"deck_lands_avg"` | | `AGG` | `DECK_LANDS ` / `NUM_GAMES` | Float |
466
+ | `DECK_SPELLS_AVG` | `"deck_spells_avg"` | | `AGG` | `DECK_SPELLS ` / `NUM_GAMES` | Float |
467
467
 
468
468
  # Roadmap to 1.0
469
469
 
@@ -0,0 +1,15 @@
1
+ spells/__init__.py,sha256=EcI7ijXYvPA8jj7wUZqs6CSWr__MD8AOXhkex-Hj37E,131
2
+ spells/cache.py,sha256=4v7h8D3TtaT0R_EdiRNhdcQrXzdH_CukezO6oAXvNEY,2956
3
+ spells/cards.py,sha256=EOXAB_F2yedjf6KquCERCIHl0TSIJIoOe1jv8g4JzOc,3601
4
+ spells/columns.py,sha256=gpgx8zZ4VmmcJXxMdyEKIOgJaBESKkbtpOm6bVNNopM,19132
5
+ spells/draft_data.py,sha256=bJw2ueJ1dNrunVY2GH6pgJCMPFCkydI37JExgpMS6vE,12559
6
+ spells/enums.py,sha256=4G-gi75v9bne4cMRgRwwmpWy0kW8NoTjkmGTxEwFDeo,4735
7
+ spells/external.py,sha256=qe6wOBDhPN4CZNQvYRq6G-OpIZcWTZzJjySgnf2Gu1o,10258
8
+ spells/filter.py,sha256=J-YTOOAzOQpvIX29tviYL04RVoOUlfsbjBXoQBDCEdQ,3380
9
+ spells/manifest.py,sha256=Dy0zskkSqPWb3WQqLgNk5pbaCGlEAn7JG9wRFOUA3Jc,8182
10
+ spells/schema.py,sha256=z8Qn2SiHG4T6YfPsz8xHLGMjU_Ofm76-Vrquh3b9B64,6422
11
+ spells_mtg-0.4.0.dist-info/METADATA,sha256=pEKOk-A27Tq8Fpbs9yPb11tT9av4f7-iOQSIGfWv9Mg,43908
12
+ spells_mtg-0.4.0.dist-info/WHEEL,sha256=thaaA2w1JzcGC48WYufAs8nrYZjJm8LqNfnXFOFyCC4,90
13
+ spells_mtg-0.4.0.dist-info/entry_points.txt,sha256=a9Y1omdl9MdnKuIj3aOodgrp-zZII6OCdvqwgP6BFvI,63
14
+ spells_mtg-0.4.0.dist-info/licenses/LICENSE,sha256=tS54XYbJSgmq5zuHhbsQGbNQLJPVgXqhF5nu2CSRMig,1068
15
+ spells_mtg-0.4.0.dist-info/RECORD,,
@@ -1,15 +0,0 @@
1
- spells/__init__.py,sha256=QCPWQySUK2SZtCU-mSZLsn7vrNLJMDsRwil8gmAzmdk,151
2
- spells/cache.py,sha256=4v7h8D3TtaT0R_EdiRNhdcQrXzdH_CukezO6oAXvNEY,2956
3
- spells/cards.py,sha256=EOXAB_F2yedjf6KquCERCIHl0TSIJIoOe1jv8g4JzOc,3601
4
- spells/columns.py,sha256=QIM-dV00Pe5En9EozPWpH_u5f8VSgxd0Pxd2QxYuOFk,23423
5
- spells/draft_data.py,sha256=ABIQoUD8MKnc2goTUGY33dOxCpH8ydXXuQnbHMNULZs,10999
6
- spells/enums.py,sha256=_yvb9AFPofRYwf0gndFteGQS9bSzHy2_HphYYhFr9Ro,4761
7
- spells/external.py,sha256=qe6wOBDhPN4CZNQvYRq6G-OpIZcWTZzJjySgnf2Gu1o,10258
8
- spells/filter.py,sha256=J-YTOOAzOQpvIX29tviYL04RVoOUlfsbjBXoQBDCEdQ,3380
9
- spells/manifest.py,sha256=oGkWuBYquJYmtaIp2O-x5nFKD5TkU5lzS-vera69jyU,7244
10
- spells/schema.py,sha256=z8Qn2SiHG4T6YfPsz8xHLGMjU_Ofm76-Vrquh3b9B64,6422
11
- spells_mtg-0.3.1.dist-info/METADATA,sha256=3K1GrMa-GXcb3J-0Xg71Vqi93LxoynWwLQ8UmcjhVxM,43073
12
- spells_mtg-0.3.1.dist-info/WHEEL,sha256=thaaA2w1JzcGC48WYufAs8nrYZjJm8LqNfnXFOFyCC4,90
13
- spells_mtg-0.3.1.dist-info/entry_points.txt,sha256=a9Y1omdl9MdnKuIj3aOodgrp-zZII6OCdvqwgP6BFvI,63
14
- spells_mtg-0.3.1.dist-info/licenses/LICENSE,sha256=tS54XYbJSgmq5zuHhbsQGbNQLJPVgXqhF5nu2CSRMig,1068
15
- spells_mtg-0.3.1.dist-info/RECORD,,