spells-mtg 0.5.2__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spells-mtg might be problematic. Click here for more details.
- {spells_mtg-0.5.2 → spells_mtg-0.6.0}/PKG-INFO +11 -5
- {spells_mtg-0.5.2 → spells_mtg-0.6.0}/README.md +10 -4
- {spells_mtg-0.5.2 → spells_mtg-0.6.0}/pyproject.toml +1 -1
- {spells_mtg-0.5.2 → spells_mtg-0.6.0}/spells/columns.py +46 -10
- {spells_mtg-0.5.2 → spells_mtg-0.6.0}/spells/draft_data.py +127 -37
- {spells_mtg-0.5.2 → spells_mtg-0.6.0}/spells/enums.py +3 -0
- spells_mtg-0.6.0/spells/extension.py +213 -0
- {spells_mtg-0.5.2 → spells_mtg-0.6.0}/spells/external.py +34 -2
- {spells_mtg-0.5.2 → spells_mtg-0.6.0}/spells/manifest.py +9 -3
- spells_mtg-0.5.2/spells/extension.py +0 -40
- {spells_mtg-0.5.2 → spells_mtg-0.6.0}/LICENSE +0 -0
- {spells_mtg-0.5.2 → spells_mtg-0.6.0}/spells/__init__.py +0 -0
- {spells_mtg-0.5.2 → spells_mtg-0.6.0}/spells/cache.py +0 -0
- {spells_mtg-0.5.2 → spells_mtg-0.6.0}/spells/cards.py +0 -0
- {spells_mtg-0.5.2 → spells_mtg-0.6.0}/spells/filter.py +0 -0
- {spells_mtg-0.5.2 → spells_mtg-0.6.0}/spells/schema.py +0 -0
- {spells_mtg-0.5.2 → spells_mtg-0.6.0}/tests/__init__.py +0 -0
- {spells_mtg-0.5.2 → spells_mtg-0.6.0}/tests/filter_test.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: spells-mtg
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: analaysis of 17Lands.com public datasets
|
|
5
5
|
Author-Email: Joel Barnes <oelarnes@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -74,7 +74,7 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
|
|
|
74
74
|
- Supports calculating the standard aggregations and measures out of the box with no arguments (ALSA, GIH WR, etc)
|
|
75
75
|
- Caches aggregate DataFrames in the local file system automatically for instantaneous reproduction of previous analysis
|
|
76
76
|
- Manages grouping and filtering by built-in and custom columns at the row level
|
|
77
|
-
- Provides
|
|
77
|
+
- Provides 124 explicitly specified, enumerated, documented column definitions
|
|
78
78
|
- Supports "Deck Color Data" aggregations with built-in column definitions.
|
|
79
79
|
- Lets you feed card metrics back in to column definitions to support scientific workflows like MLE
|
|
80
80
|
- Provides a CLI tool `spells [add|refresh|clean|remove|info] [SET]` to download and manage external files
|
|
@@ -296,6 +296,7 @@ summon(
|
|
|
296
296
|
filter_spec: dict | None = None,
|
|
297
297
|
extensions: dict[str, ColSpec] | None = None,
|
|
298
298
|
card_context: pl.DataFrame | dict[str, dict[str, Any] | None = None,
|
|
299
|
+
set_context: pl.DataFrame | dict[str, Any] | None = None,
|
|
299
300
|
read_cache: bool = True,
|
|
300
301
|
write_cache: bool = True,
|
|
301
302
|
) -> polars.DataFrame
|
|
@@ -319,6 +320,8 @@ aggregations of non-numeric (or numeric) data types are not supported. If `None`
|
|
|
319
320
|
|
|
320
321
|
- card_context: Typically a Polars DataFrame containing a `"name"` column with one row for each card name in the set, such that any usages of `card_context[name][key]` in column specs reference the column `key`. Typically this will be the output of a call to `summon` requesting cards metrics like `GP_WR`. Can also be a dictionary having the necessary form for the same access pattern.
|
|
321
322
|
|
|
323
|
+
- set_context: Typically, a dict of abitrary values to use in column definitions, for example, you could provide the quick draft release date and have a column that depended on that.
|
|
324
|
+
|
|
322
325
|
- read_cache/write_cache: Use the local file system to cache and retrieve aggregations to minimize expensive reads of the large datasets. You shouldn't need to touch these arguments unless you are debugging.
|
|
323
326
|
|
|
324
327
|
### Enums
|
|
@@ -353,7 +356,10 @@ summing over groups, and can include polars Expression aggregations. Arbitrarily
|
|
|
353
356
|
- For `NAME_SUM` columns, `expr` must be a function of `name` which will result in a list of expressions mapped over all card names.
|
|
354
357
|
- `PICK_SUM` columns can also be functions on `name`, in which case the value will be a function of the value of the `PICK` field.
|
|
355
358
|
- `AGG` columns that depend on `NAME_SUM` columns reference the prefix (`cdef.name`) only, since the unpivot has occured prior to selection.
|
|
356
|
-
- The possible arguments to `expr`, in addition to `name` when appropriate,
|
|
359
|
+
- The possible arguments to `expr`, in addition to `name` when appropriate, are as follows:
|
|
360
|
+
- `names`: An array of all card names in the canonical order.
|
|
361
|
+
- `card_context`: A dictionary keyed by card name which contains card dict objects with all `CARD_ATTR` values, including custom extensions and metric columns passed by the `card_context` argument to `summon`. See example notebooks for more details.
|
|
362
|
+
- `set_context`: A dictionary with arbitrary fields provided via the `set_context` argument. Has two built-in attributes, `picks_per_pack` (e.g. 13 or 14), and `release_time`, which is the minimum value of the `draft_time` field.
|
|
357
363
|
|
|
358
364
|
- `version`: When defining a column using a python function, as opposed to Polars expressions, add a unique version number so that the unique hashed signature of the column specification can be derived
|
|
359
365
|
for caching purposes, since Polars cannot generate a serialization natively. When changing the definition, be sure to increment the version value. Otherwise you do not need to use this parameter.
|
|
@@ -371,9 +377,11 @@ A table of all included columns. Columns can be referenced by enum or by string
|
|
|
371
377
|
| `DRAFT_ID` | `"draft_id"` | `DRAFT, GAME` | `FILTER_ONLY` | Dataset column | String |
|
|
372
378
|
| `DRAFT_TIME` | `"draft_time"` | `DRAFT, GAME` | `FILTER_ONLY` | Dataset column | String |
|
|
373
379
|
| `DRAFT_DATE` | `"draft_date"` | `DRAFT, GAME` | `GROUP_BY` | | `datetime.date` |
|
|
380
|
+
| `FORMAT_DAY` | `"format_day"` | `DRAFT, GAME` | `GROUP_BY` | 1 for release day, 2, 3, etc. | Int |
|
|
374
381
|
| `DRAFT_DAY_OF_WEEK` | `"draft_day_of_week` | `DRAFT, GAME` | `GROUP_BY` | 1-7 (Mon-Sun) | Int |
|
|
375
382
|
| `DRAFT_HOUR` | `"draft_hour"` | `DRAFT, GAME` | `GROUP_BY` | 0-23 | Int |
|
|
376
383
|
| `DRAFT_WEEK` | `"draft_week"` | `DRAFT, GAME` | `GROUP_BY` | 1-53 | Int |
|
|
384
|
+
| `FORMAT_WEEK` | `"format_week"` | `DRAFT, GAME` | `GROUP_BY` | 1 for `FORMAT_DAY` 1 - 7, etc. | Int |
|
|
377
385
|
| `RANK` | `"rank"` | `DRAFT, GAME` | `GROUP_BY` | Dataset column | String |
|
|
378
386
|
| `USER_N_GAMES_BUCKET` | `"user_n_games_bucket"` | `DRAFT, GAME` | `GROUP_BY` | Dataset Column | Int |
|
|
379
387
|
| `USER_GAME_WIN_RATE_BUCKET` | `"user_game_win_rate_bucket` | `DRAFT, GAME` | `GROUP_BY` | Dataset Column | Float |
|
|
@@ -504,5 +512,3 @@ A table of all included columns. Columns can be referenced by enum or by string
|
|
|
504
512
|
- [ ] Helper functions for common plotting paradigms
|
|
505
513
|
- [ ] Example notebooks
|
|
506
514
|
- [ ] Scientific workflows: regression, MLE, etc
|
|
507
|
-
|
|
508
|
-
|
|
@@ -63,7 +63,7 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
|
|
|
63
63
|
- Supports calculating the standard aggregations and measures out of the box with no arguments (ALSA, GIH WR, etc)
|
|
64
64
|
- Caches aggregate DataFrames in the local file system automatically for instantaneous reproduction of previous analysis
|
|
65
65
|
- Manages grouping and filtering by built-in and custom columns at the row level
|
|
66
|
-
- Provides
|
|
66
|
+
- Provides 124 explicitly specified, enumerated, documented column definitions
|
|
67
67
|
- Supports "Deck Color Data" aggregations with built-in column definitions.
|
|
68
68
|
- Lets you feed card metrics back in to column definitions to support scientific workflows like MLE
|
|
69
69
|
- Provides a CLI tool `spells [add|refresh|clean|remove|info] [SET]` to download and manage external files
|
|
@@ -285,6 +285,7 @@ summon(
|
|
|
285
285
|
filter_spec: dict | None = None,
|
|
286
286
|
extensions: dict[str, ColSpec] | None = None,
|
|
287
287
|
card_context: pl.DataFrame | dict[str, dict[str, Any] | None = None,
|
|
288
|
+
set_context: pl.DataFrame | dict[str, Any] | None = None,
|
|
288
289
|
read_cache: bool = True,
|
|
289
290
|
write_cache: bool = True,
|
|
290
291
|
) -> polars.DataFrame
|
|
@@ -308,6 +309,8 @@ aggregations of non-numeric (or numeric) data types are not supported. If `None`
|
|
|
308
309
|
|
|
309
310
|
- card_context: Typically a Polars DataFrame containing a `"name"` column with one row for each card name in the set, such that any usages of `card_context[name][key]` in column specs reference the column `key`. Typically this will be the output of a call to `summon` requesting cards metrics like `GP_WR`. Can also be a dictionary having the necessary form for the same access pattern.
|
|
310
311
|
|
|
312
|
+
- set_context: Typically, a dict of abitrary values to use in column definitions, for example, you could provide the quick draft release date and have a column that depended on that.
|
|
313
|
+
|
|
311
314
|
- read_cache/write_cache: Use the local file system to cache and retrieve aggregations to minimize expensive reads of the large datasets. You shouldn't need to touch these arguments unless you are debugging.
|
|
312
315
|
|
|
313
316
|
### Enums
|
|
@@ -342,7 +345,10 @@ summing over groups, and can include polars Expression aggregations. Arbitrarily
|
|
|
342
345
|
- For `NAME_SUM` columns, `expr` must be a function of `name` which will result in a list of expressions mapped over all card names.
|
|
343
346
|
- `PICK_SUM` columns can also be functions on `name`, in which case the value will be a function of the value of the `PICK` field.
|
|
344
347
|
- `AGG` columns that depend on `NAME_SUM` columns reference the prefix (`cdef.name`) only, since the unpivot has occured prior to selection.
|
|
345
|
-
- The possible arguments to `expr`, in addition to `name` when appropriate,
|
|
348
|
+
- The possible arguments to `expr`, in addition to `name` when appropriate, are as follows:
|
|
349
|
+
- `names`: An array of all card names in the canonical order.
|
|
350
|
+
- `card_context`: A dictionary keyed by card name which contains card dict objects with all `CARD_ATTR` values, including custom extensions and metric columns passed by the `card_context` argument to `summon`. See example notebooks for more details.
|
|
351
|
+
- `set_context`: A dictionary with arbitrary fields provided via the `set_context` argument. Has two built-in attributes, `picks_per_pack` (e.g. 13 or 14), and `release_time`, which is the minimum value of the `draft_time` field.
|
|
346
352
|
|
|
347
353
|
- `version`: When defining a column using a python function, as opposed to Polars expressions, add a unique version number so that the unique hashed signature of the column specification can be derived
|
|
348
354
|
for caching purposes, since Polars cannot generate a serialization natively. When changing the definition, be sure to increment the version value. Otherwise you do not need to use this parameter.
|
|
@@ -360,9 +366,11 @@ A table of all included columns. Columns can be referenced by enum or by string
|
|
|
360
366
|
| `DRAFT_ID` | `"draft_id"` | `DRAFT, GAME` | `FILTER_ONLY` | Dataset column | String |
|
|
361
367
|
| `DRAFT_TIME` | `"draft_time"` | `DRAFT, GAME` | `FILTER_ONLY` | Dataset column | String |
|
|
362
368
|
| `DRAFT_DATE` | `"draft_date"` | `DRAFT, GAME` | `GROUP_BY` | | `datetime.date` |
|
|
369
|
+
| `FORMAT_DAY` | `"format_day"` | `DRAFT, GAME` | `GROUP_BY` | 1 for release day, 2, 3, etc. | Int |
|
|
363
370
|
| `DRAFT_DAY_OF_WEEK` | `"draft_day_of_week` | `DRAFT, GAME` | `GROUP_BY` | 1-7 (Mon-Sun) | Int |
|
|
364
371
|
| `DRAFT_HOUR` | `"draft_hour"` | `DRAFT, GAME` | `GROUP_BY` | 0-23 | Int |
|
|
365
372
|
| `DRAFT_WEEK` | `"draft_week"` | `DRAFT, GAME` | `GROUP_BY` | 1-53 | Int |
|
|
373
|
+
| `FORMAT_WEEK` | `"format_week"` | `DRAFT, GAME` | `GROUP_BY` | 1 for `FORMAT_DAY` 1 - 7, etc. | Int |
|
|
366
374
|
| `RANK` | `"rank"` | `DRAFT, GAME` | `GROUP_BY` | Dataset column | String |
|
|
367
375
|
| `USER_N_GAMES_BUCKET` | `"user_n_games_bucket"` | `DRAFT, GAME` | `GROUP_BY` | Dataset Column | Int |
|
|
368
376
|
| `USER_GAME_WIN_RATE_BUCKET` | `"user_game_win_rate_bucket` | `DRAFT, GAME` | `GROUP_BY` | Dataset Column | Float |
|
|
@@ -493,5 +501,3 @@ A table of all included columns. Columns can be referenced by enum or by string
|
|
|
493
501
|
- [ ] Helper functions for common plotting paradigms
|
|
494
502
|
- [ ] Example notebooks
|
|
495
503
|
- [ ] Scientific workflows: regression, MLE, etc
|
|
496
|
-
|
|
497
|
-
|
|
@@ -40,7 +40,7 @@ default_columns = [
|
|
|
40
40
|
ColName.GIH_WR,
|
|
41
41
|
]
|
|
42
42
|
|
|
43
|
-
|
|
43
|
+
_specs: dict[str, ColSpec] = {
|
|
44
44
|
ColName.NAME: ColSpec(
|
|
45
45
|
col_type=ColType.GROUP_BY,
|
|
46
46
|
views=[View.CARD],
|
|
@@ -65,9 +65,21 @@ specs: dict[str, ColSpec] = {
|
|
|
65
65
|
col_type=ColType.GROUP_BY,
|
|
66
66
|
expr=pl.col(ColName.DRAFT_TIME).str.to_datetime("%Y-%m-%d %H:%M:%S").dt.date(),
|
|
67
67
|
),
|
|
68
|
+
ColName.FORMAT_DAY: ColSpec(
|
|
69
|
+
col_type=ColType.GROUP_BY,
|
|
70
|
+
expr=lambda set_context: (
|
|
71
|
+
pl.col(ColName.DRAFT_DATE)
|
|
72
|
+
- pl.lit(set_context["release_time"])
|
|
73
|
+
.str.to_datetime("%Y-%m-%d %H:%M:%S")
|
|
74
|
+
.dt.date()
|
|
75
|
+
).dt.total_days()
|
|
76
|
+
+ 1,
|
|
77
|
+
),
|
|
68
78
|
ColName.DRAFT_DAY_OF_WEEK: ColSpec(
|
|
69
79
|
col_type=ColType.GROUP_BY,
|
|
70
|
-
expr=pl.col(ColName.DRAFT_TIME)
|
|
80
|
+
expr=pl.col(ColName.DRAFT_TIME)
|
|
81
|
+
.str.to_datetime("%Y-%m-%d %H:%M:%S")
|
|
82
|
+
.dt.weekday(),
|
|
71
83
|
),
|
|
72
84
|
ColName.DRAFT_HOUR: ColSpec(
|
|
73
85
|
col_type=ColType.GROUP_BY,
|
|
@@ -77,6 +89,9 @@ specs: dict[str, ColSpec] = {
|
|
|
77
89
|
col_type=ColType.GROUP_BY,
|
|
78
90
|
expr=pl.col(ColName.DRAFT_TIME).str.to_datetime("%Y-%m-%d %H:%M:%S").dt.week(),
|
|
79
91
|
),
|
|
92
|
+
ColName.FORMAT_WEEK: ColSpec(
|
|
93
|
+
col_type=ColType.GROUP_BY, expr=(pl.col(ColName.FORMAT_DAY) - 1) // 7 + 1
|
|
94
|
+
),
|
|
80
95
|
ColName.RANK: ColSpec(
|
|
81
96
|
col_type=ColType.GROUP_BY,
|
|
82
97
|
views=[View.GAME, View.DRAFT],
|
|
@@ -160,13 +175,15 @@ specs: dict[str, ColSpec] = {
|
|
|
160
175
|
),
|
|
161
176
|
ColName.NUM_TAKEN: ColSpec(
|
|
162
177
|
col_type=ColType.PICK_SUM,
|
|
163
|
-
expr=pl.when(pl.col(ColName.PICK).is_not_null())
|
|
164
|
-
.then(1)
|
|
165
|
-
.otherwise(0),
|
|
178
|
+
expr=pl.when(pl.col(ColName.PICK).is_not_null()).then(1).otherwise(0),
|
|
166
179
|
),
|
|
167
180
|
ColName.NUM_DRAFTS: ColSpec(
|
|
168
181
|
col_type=ColType.PICK_SUM,
|
|
169
|
-
expr=pl.when(
|
|
182
|
+
expr=pl.when(
|
|
183
|
+
(pl.col(ColName.PACK_NUMBER) == 0) & (pl.col(ColName.PICK_NUMBER) == 0)
|
|
184
|
+
)
|
|
185
|
+
.then(1)
|
|
186
|
+
.otherwise(0),
|
|
170
187
|
),
|
|
171
188
|
ColName.PICK: ColSpec(
|
|
172
189
|
col_type=ColType.FILTER_ONLY,
|
|
@@ -207,7 +224,9 @@ specs: dict[str, ColSpec] = {
|
|
|
207
224
|
),
|
|
208
225
|
ColName.GAME_DAY_OF_WEEK: ColSpec(
|
|
209
226
|
col_type=ColType.GROUP_BY,
|
|
210
|
-
expr=pl.col(ColName.GAME_TIME)
|
|
227
|
+
expr=pl.col(ColName.GAME_TIME)
|
|
228
|
+
.str.to_datetime("%Y-%m-%d %H-%M-%S")
|
|
229
|
+
.dt.weekday(),
|
|
211
230
|
),
|
|
212
231
|
ColName.GAME_HOUR: ColSpec(
|
|
213
232
|
col_type=ColType.GROUP_BY,
|
|
@@ -382,11 +401,13 @@ specs: dict[str, ColSpec] = {
|
|
|
382
401
|
),
|
|
383
402
|
ColName.DECK_MANA_VALUE: ColSpec(
|
|
384
403
|
col_type=ColType.NAME_SUM,
|
|
385
|
-
expr=lambda name, card_context: card_context[name][ColName.MANA_VALUE]
|
|
404
|
+
expr=lambda name, card_context: card_context[name][ColName.MANA_VALUE]
|
|
405
|
+
* pl.col(f"deck_{name}"),
|
|
386
406
|
),
|
|
387
407
|
ColName.DECK_LANDS: ColSpec(
|
|
388
408
|
col_type=ColType.NAME_SUM,
|
|
389
|
-
expr=lambda name, card_context: pl.col(f"deck_{name}")
|
|
409
|
+
expr=lambda name, card_context: pl.col(f"deck_{name}")
|
|
410
|
+
* (1 if "Land" in card_context[name][ColName.CARD_TYPE] else 0),
|
|
390
411
|
),
|
|
391
412
|
ColName.DECK_SPELLS: ColSpec(
|
|
392
413
|
col_type=ColType.NAME_SUM,
|
|
@@ -477,6 +498,10 @@ specs: dict[str, ColSpec] = {
|
|
|
477
498
|
col_type=ColType.AGG,
|
|
478
499
|
expr=pl.col(ColName.DECK) + pl.col(ColName.SIDEBOARD),
|
|
479
500
|
),
|
|
501
|
+
ColName.NUM_IN_POOL_TOTAL: ColSpec(
|
|
502
|
+
col_type=ColType.AGG,
|
|
503
|
+
expr=pl.col(ColName.NUM_IN_POOL).sum(),
|
|
504
|
+
),
|
|
480
505
|
ColName.IN_POOL_WR: ColSpec(
|
|
481
506
|
col_type=ColType.AGG,
|
|
482
507
|
expr=(pl.col(ColName.WON_DECK) + pl.col(ColName.WON_SIDEBOARD))
|
|
@@ -555,4 +580,15 @@ specs: dict[str, ColSpec] = {
|
|
|
555
580
|
}
|
|
556
581
|
|
|
557
582
|
for item in ColName:
|
|
558
|
-
assert item in
|
|
583
|
+
assert item in _specs, f"column {item} enumerated but not specified"
|
|
584
|
+
|
|
585
|
+
|
|
586
|
+
class GetSpecs:
|
|
587
|
+
def __init__(self, spec_dict: dict[str, ColSpec]):
|
|
588
|
+
self._specs = spec_dict
|
|
589
|
+
|
|
590
|
+
def __call__(self):
|
|
591
|
+
return dict(self._specs)
|
|
592
|
+
|
|
593
|
+
|
|
594
|
+
get_specs = GetSpecs(_specs)
|
|
@@ -6,11 +6,11 @@ Aggregate dataframes containing raw counts are cached in the local file system
|
|
|
6
6
|
for performance.
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
|
-
import datetime
|
|
10
9
|
import functools
|
|
11
10
|
import hashlib
|
|
12
11
|
import re
|
|
13
12
|
from inspect import signature
|
|
13
|
+
import os
|
|
14
14
|
from typing import Callable, TypeVar, Any
|
|
15
15
|
|
|
16
16
|
import polars as pl
|
|
@@ -20,7 +20,7 @@ from spells.external import data_file_path
|
|
|
20
20
|
import spells.cache
|
|
21
21
|
import spells.filter
|
|
22
22
|
import spells.manifest
|
|
23
|
-
from spells.columns import ColDef, ColSpec
|
|
23
|
+
from spells.columns import ColDef, ColSpec, get_specs
|
|
24
24
|
from spells.enums import View, ColName, ColType
|
|
25
25
|
|
|
26
26
|
|
|
@@ -54,9 +54,20 @@ def _get_names(set_code: str) -> list[str]:
|
|
|
54
54
|
return names
|
|
55
55
|
|
|
56
56
|
|
|
57
|
-
def _get_card_context(
|
|
58
|
-
|
|
59
|
-
|
|
57
|
+
def _get_card_context(
|
|
58
|
+
set_code: str,
|
|
59
|
+
specs: dict[str, ColSpec],
|
|
60
|
+
card_context: pl.DataFrame | dict[str, dict[str, Any]] | None,
|
|
61
|
+
set_context: pl.DataFrame | dict[str, Any] | None,
|
|
62
|
+
) -> dict[str, dict[str, Any]]:
|
|
63
|
+
card_attr_specs = {
|
|
64
|
+
col: spec
|
|
65
|
+
for col, spec in specs.items()
|
|
66
|
+
if spec.col_type == ColType.CARD_ATTR or col == ColName.NAME
|
|
67
|
+
}
|
|
68
|
+
col_def_map = _hydrate_col_defs(
|
|
69
|
+
set_code, card_attr_specs, set_context=set_context, card_only=True
|
|
70
|
+
)
|
|
60
71
|
|
|
61
72
|
columns = list(col_def_map.keys())
|
|
62
73
|
|
|
@@ -71,34 +82,50 @@ def _get_card_context(set_code: str, specs: dict[str, ColSpec], card_context: pl
|
|
|
71
82
|
if card_context is not None:
|
|
72
83
|
if isinstance(card_context, pl.DataFrame):
|
|
73
84
|
try:
|
|
74
|
-
card_context = {
|
|
85
|
+
card_context = {
|
|
86
|
+
row[ColName.NAME]: row for row in card_context.to_dicts()
|
|
87
|
+
}
|
|
75
88
|
except ColumnNotFoundError:
|
|
76
89
|
raise ValueError("card_context DataFrame must have column 'name'")
|
|
77
90
|
|
|
78
91
|
names = list(loaded_context.keys())
|
|
79
92
|
for name in names:
|
|
80
|
-
assert
|
|
93
|
+
assert (
|
|
94
|
+
name in card_context
|
|
95
|
+
), f"card_context must include a row for each card name. {name} missing."
|
|
81
96
|
for col, value in card_context[name].items():
|
|
82
97
|
loaded_context[name][col] = value
|
|
83
98
|
|
|
84
99
|
return loaded_context
|
|
85
|
-
|
|
86
100
|
|
|
87
|
-
|
|
101
|
+
|
|
102
|
+
def _determine_expression(
|
|
103
|
+
col: str,
|
|
104
|
+
spec: ColSpec,
|
|
105
|
+
names: list[str],
|
|
106
|
+
card_context: dict[str, dict],
|
|
107
|
+
set_context: dict[str, Any],
|
|
108
|
+
) -> pl.Expr | tuple[pl.Expr, ...]:
|
|
88
109
|
def seed_params(expr):
|
|
89
110
|
params = {}
|
|
90
111
|
|
|
91
112
|
sig_params = signature(expr).parameters
|
|
92
|
-
if
|
|
93
|
-
params[
|
|
94
|
-
if
|
|
95
|
-
params[
|
|
113
|
+
if "names" in sig_params:
|
|
114
|
+
params["names"] = names
|
|
115
|
+
if "card_context" in sig_params:
|
|
116
|
+
params["card_context"] = card_context
|
|
117
|
+
if "set_context" in sig_params:
|
|
118
|
+
params["set_context"] = set_context
|
|
96
119
|
return params
|
|
97
120
|
|
|
98
121
|
if spec.col_type == ColType.NAME_SUM:
|
|
99
122
|
if spec.expr is not None:
|
|
100
|
-
assert isinstance(
|
|
101
|
-
|
|
123
|
+
assert isinstance(
|
|
124
|
+
spec.expr, Callable
|
|
125
|
+
), f"NAME_SUM column {col} must have a callable `expr` accepting a `name` argument"
|
|
126
|
+
unnamed_exprs = [
|
|
127
|
+
spec.expr(**{"name": name, **seed_params(spec.expr)}) for name in names
|
|
128
|
+
]
|
|
102
129
|
|
|
103
130
|
expr = tuple(
|
|
104
131
|
map(
|
|
@@ -113,13 +140,23 @@ def _determine_expression(col: str, spec: ColSpec, names: list[str], card_contex
|
|
|
113
140
|
elif spec.expr is not None:
|
|
114
141
|
if isinstance(spec.expr, Callable):
|
|
115
142
|
params = seed_params(spec.expr)
|
|
116
|
-
if
|
|
143
|
+
if (
|
|
144
|
+
spec.col_type == ColType.PICK_SUM
|
|
145
|
+
and "name" in signature(spec.expr).parameters
|
|
146
|
+
):
|
|
117
147
|
expr = pl.lit(None)
|
|
118
148
|
for name in names:
|
|
119
|
-
name_params = {
|
|
120
|
-
expr =
|
|
121
|
-
|
|
122
|
-
|
|
149
|
+
name_params = {"name": name, **params}
|
|
150
|
+
expr = (
|
|
151
|
+
pl.when(pl.col(ColName.PICK) == name)
|
|
152
|
+
.then(spec.expr(**name_params))
|
|
153
|
+
.otherwise(expr)
|
|
154
|
+
)
|
|
155
|
+
elif (
|
|
156
|
+
spec.col_type == ColType.CARD_ATTR
|
|
157
|
+
and "name" in signature(spec.expr).parameters
|
|
158
|
+
):
|
|
159
|
+
expr = spec.expr(**{"name": pl.col("name"), **params})
|
|
123
160
|
else:
|
|
124
161
|
expr = spec.expr(**params)
|
|
125
162
|
else:
|
|
@@ -131,7 +168,12 @@ def _determine_expression(col: str, spec: ColSpec, names: list[str], card_contex
|
|
|
131
168
|
return expr
|
|
132
169
|
|
|
133
170
|
|
|
134
|
-
def _infer_dependencies(
|
|
171
|
+
def _infer_dependencies(
|
|
172
|
+
name: str,
|
|
173
|
+
expr: pl.Expr | tuple[pl.Expr, ...],
|
|
174
|
+
specs: dict[str, ColSpec],
|
|
175
|
+
names: list[str],
|
|
176
|
+
) -> set[str]:
|
|
135
177
|
dependencies = set()
|
|
136
178
|
tricky_ones = set()
|
|
137
179
|
|
|
@@ -140,7 +182,7 @@ def _infer_dependencies(name: str, expr: pl.Expr | tuple[pl.Expr,...], specs: di
|
|
|
140
182
|
for dep_col in dep_cols:
|
|
141
183
|
if dep_col in specs.keys():
|
|
142
184
|
dependencies.add(dep_col)
|
|
143
|
-
else:
|
|
185
|
+
else:
|
|
144
186
|
tricky_ones.add(dep_col)
|
|
145
187
|
else:
|
|
146
188
|
for idx, exp in enumerate(expr):
|
|
@@ -149,7 +191,9 @@ def _infer_dependencies(name: str, expr: pl.Expr | tuple[pl.Expr,...], specs: di
|
|
|
149
191
|
for dep_col in dep_cols:
|
|
150
192
|
if dep_col in specs.keys():
|
|
151
193
|
dependencies.add(dep_col)
|
|
152
|
-
elif
|
|
194
|
+
elif (
|
|
195
|
+
len(split := re.split(pattern, dep_col)) == 2 and split[0] in specs
|
|
196
|
+
):
|
|
153
197
|
dependencies.add(split[0])
|
|
154
198
|
else:
|
|
155
199
|
tricky_ones.add(dep_col)
|
|
@@ -158,38 +202,80 @@ def _infer_dependencies(name: str, expr: pl.Expr | tuple[pl.Expr,...], specs: di
|
|
|
158
202
|
found = False
|
|
159
203
|
for n in names:
|
|
160
204
|
pattern = f"_{n}$"
|
|
161
|
-
if
|
|
205
|
+
if (
|
|
206
|
+
not found
|
|
207
|
+
and len(split := re.split(pattern, item)) == 2
|
|
208
|
+
and split[0] in specs
|
|
209
|
+
):
|
|
162
210
|
dependencies.add(split[0])
|
|
163
211
|
found = True
|
|
164
|
-
assert found, f"Could not locate column spec for root col {item}"
|
|
212
|
+
assert found, f"Could not locate column spec for root col {item}"
|
|
165
213
|
|
|
166
214
|
return dependencies
|
|
167
215
|
|
|
168
216
|
|
|
169
|
-
def
|
|
217
|
+
def _get_set_context(
|
|
218
|
+
set_code: str, set_context: pl.DataFrame | dict[str, Any] | None
|
|
219
|
+
) -> dict[str, Any]:
|
|
220
|
+
context_fp = data_file_path(set_code, "context")
|
|
221
|
+
|
|
222
|
+
report = functools.partial(
|
|
223
|
+
spells.cache.spells_print,
|
|
224
|
+
"report",
|
|
225
|
+
f"Set context for {set_code} invalid, please investigate!",
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
context = {}
|
|
229
|
+
if not os.path.isfile(context_fp):
|
|
230
|
+
report()
|
|
231
|
+
else:
|
|
232
|
+
context_df = pl.read_parquet(context_fp)
|
|
233
|
+
if len(context_df) == 1:
|
|
234
|
+
context.update(context_df.to_dicts()[0])
|
|
235
|
+
else:
|
|
236
|
+
report()
|
|
237
|
+
|
|
238
|
+
if isinstance(set_context, pl.DataFrame):
|
|
239
|
+
assert len(set_context != 1), "Invalid set context provided"
|
|
240
|
+
context.update(set_context.to_dicts()[0])
|
|
241
|
+
elif isinstance(set_context, dict):
|
|
242
|
+
context.update(set_context)
|
|
243
|
+
|
|
244
|
+
return context
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def _hydrate_col_defs(
|
|
248
|
+
set_code: str,
|
|
249
|
+
specs: dict[str, ColSpec],
|
|
250
|
+
card_context: pl.DataFrame | dict[str, dict] | None = None,
|
|
251
|
+
set_context: pl.DataFrame | dict[str, Any] | None = None,
|
|
252
|
+
card_only: bool = False,
|
|
253
|
+
):
|
|
170
254
|
names = _get_names(set_code)
|
|
171
255
|
|
|
256
|
+
set_context = _get_set_context(set_code, set_context)
|
|
257
|
+
|
|
172
258
|
if card_only:
|
|
173
259
|
card_context = {}
|
|
174
260
|
else:
|
|
175
|
-
card_context = _get_card_context(set_code, specs, card_context)
|
|
261
|
+
card_context = _get_card_context(set_code, specs, card_context, set_context)
|
|
176
262
|
|
|
177
263
|
assert len(names) > 0, "there should be names"
|
|
178
264
|
hydrated = {}
|
|
179
265
|
for col, spec in specs.items():
|
|
180
|
-
expr = _determine_expression(col, spec, names, card_context)
|
|
266
|
+
expr = _determine_expression(col, spec, names, card_context, set_context)
|
|
181
267
|
dependencies = _infer_dependencies(col, expr, specs, names)
|
|
182
268
|
|
|
183
269
|
sig_expr = expr if isinstance(expr, pl.Expr) else expr[0]
|
|
184
270
|
try:
|
|
185
|
-
expr_sig = sig_expr.meta.serialize(
|
|
186
|
-
format="json"
|
|
187
|
-
)
|
|
271
|
+
expr_sig = sig_expr.meta.serialize(format="json")
|
|
188
272
|
except pl.exceptions.ComputeError:
|
|
189
273
|
if spec.version is not None:
|
|
190
274
|
expr_sig = col + spec.version
|
|
191
275
|
else:
|
|
192
|
-
print(
|
|
276
|
+
print(
|
|
277
|
+
f"Using session-only signature for non-serializable column {col}, please provide a version value"
|
|
278
|
+
)
|
|
193
279
|
expr_sig = str(sig_expr)
|
|
194
280
|
|
|
195
281
|
signature = str(
|
|
@@ -356,18 +442,22 @@ def summon(
|
|
|
356
442
|
columns: list[str] | None = None,
|
|
357
443
|
group_by: list[str] | None = None,
|
|
358
444
|
filter_spec: dict | None = None,
|
|
359
|
-
extensions: dict[str, ColSpec] | None = None,
|
|
445
|
+
extensions: dict[str, ColSpec] | list[dict[str, ColSpec]] | None = None,
|
|
360
446
|
use_streaming: bool = False,
|
|
361
447
|
read_cache: bool = True,
|
|
362
448
|
write_cache: bool = True,
|
|
363
|
-
card_context: pl.DataFrame | dict[str, dict] | None = None
|
|
449
|
+
card_context: pl.DataFrame | dict[str, dict] | None = None,
|
|
450
|
+
set_context: pl.DataFrame | dict[str, Any] | None = None,
|
|
364
451
|
) -> pl.DataFrame:
|
|
365
|
-
specs =
|
|
452
|
+
specs = get_specs()
|
|
366
453
|
|
|
367
454
|
if extensions is not None:
|
|
368
|
-
|
|
455
|
+
if not isinstance(extensions, list):
|
|
456
|
+
extensions = [extensions]
|
|
457
|
+
for ext in extensions:
|
|
458
|
+
specs.update(ext)
|
|
369
459
|
|
|
370
|
-
col_def_map = _hydrate_col_defs(set_code, specs, card_context)
|
|
460
|
+
col_def_map = _hydrate_col_defs(set_code, specs, card_context, set_context)
|
|
371
461
|
m = spells.manifest.create(col_def_map, columns, group_by, filter_spec)
|
|
372
462
|
|
|
373
463
|
calc_fn = functools.partial(_base_agg_df, set_code, m, use_streaming=use_streaming)
|
|
@@ -38,9 +38,11 @@ class ColName(StrEnum):
|
|
|
38
38
|
DRAFT_ID = "draft_id"
|
|
39
39
|
DRAFT_TIME = "draft_time" # modified, cast to time
|
|
40
40
|
DRAFT_DATE = "draft_date"
|
|
41
|
+
FORMAT_DAY = "format_day"
|
|
41
42
|
DRAFT_DAY_OF_WEEK = "draft_day_of_week"
|
|
42
43
|
DRAFT_HOUR = "draft_hour"
|
|
43
44
|
DRAFT_WEEK = "draft_week"
|
|
45
|
+
FORMAT_WEEK = "format_week"
|
|
44
46
|
RANK = "rank"
|
|
45
47
|
USER_N_GAMES_BUCKET = "user_n_games_bucket"
|
|
46
48
|
USER_GAME_WIN_RATE_BUCKET = "user_game_win_rate_bucket"
|
|
@@ -143,6 +145,7 @@ class ColName(StrEnum):
|
|
|
143
145
|
GNS_WR = "gns_wr"
|
|
144
146
|
IWD = "iwd"
|
|
145
147
|
NUM_IN_POOL = "num_in_pool"
|
|
148
|
+
NUM_IN_POOL_TOTAL = "num_in_pool_total"
|
|
146
149
|
IN_POOL_WR = "in_pool_wr"
|
|
147
150
|
DECK_TOTAL = "deck_total"
|
|
148
151
|
WON_DECK_TOTAL = "won_deck_total"
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
import math
|
|
2
|
+
|
|
3
|
+
import polars as pl
|
|
4
|
+
|
|
5
|
+
from spells.enums import ColType, ColName
|
|
6
|
+
from spells.columns import ColSpec
|
|
7
|
+
from spells.cache import spells_print
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def print_ext(ext: dict[str, ColSpec]) -> None:
|
|
11
|
+
spells_print("create", "Created extensions:")
|
|
12
|
+
for key in ext:
|
|
13
|
+
print("\t" + key)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def attr_cols(attr, silent=False) -> dict[str, ColSpec]:
|
|
17
|
+
ext = {
|
|
18
|
+
f"seen_{attr}": ColSpec(
|
|
19
|
+
col_type=ColType.NAME_SUM,
|
|
20
|
+
expr=(
|
|
21
|
+
lambda name, card_context: pl.lit(None)
|
|
22
|
+
if card_context[name][attr] is None
|
|
23
|
+
or math.isnan(card_context[name][attr])
|
|
24
|
+
else pl.when(pl.col(f"pack_card_{name}") > 0)
|
|
25
|
+
.then(card_context[name][attr])
|
|
26
|
+
.otherwise(None)
|
|
27
|
+
),
|
|
28
|
+
),
|
|
29
|
+
f"pick_{attr}": ColSpec(
|
|
30
|
+
col_type=ColType.PICK_SUM,
|
|
31
|
+
expr=lambda name, card_context: pl.lit(None)
|
|
32
|
+
if card_context[name][attr] is None or math.isnan(card_context[name][attr])
|
|
33
|
+
else card_context[name][attr],
|
|
34
|
+
),
|
|
35
|
+
f"seen_{attr}_greater": ColSpec(
|
|
36
|
+
col_type=ColType.NAME_SUM,
|
|
37
|
+
expr=lambda name: pl.col(f"seen_{attr}_{name}") > pl.col(f"pick_{attr}"),
|
|
38
|
+
),
|
|
39
|
+
f"seen_{attr}_less": ColSpec(
|
|
40
|
+
col_type=ColType.NAME_SUM,
|
|
41
|
+
expr=lambda name: pl.col(f"seen_{attr}_{name}") < pl.col(f"pick_{attr}"),
|
|
42
|
+
),
|
|
43
|
+
f"greatest_{attr}_seen": ColSpec(
|
|
44
|
+
col_type=ColType.PICK_SUM,
|
|
45
|
+
expr=lambda names: pl.max_horizontal(
|
|
46
|
+
[pl.col(f"seen_{attr}_{name}") for name in names]
|
|
47
|
+
),
|
|
48
|
+
),
|
|
49
|
+
f"least_{attr}_seen": ColSpec(
|
|
50
|
+
col_type=ColType.PICK_SUM,
|
|
51
|
+
expr=lambda names: pl.min_horizontal(
|
|
52
|
+
[pl.col(f"seen_{attr}_{name}") for name in names]
|
|
53
|
+
),
|
|
54
|
+
),
|
|
55
|
+
f"pick_{attr}_rank_greatest": ColSpec(
|
|
56
|
+
col_type=ColType.GROUP_BY,
|
|
57
|
+
expr=lambda names: pl.sum_horizontal(
|
|
58
|
+
[pl.col(f"seen_{attr}_greater_{name}") for name in names]
|
|
59
|
+
)
|
|
60
|
+
+ 1,
|
|
61
|
+
),
|
|
62
|
+
f"pick_{attr}_rank_least": ColSpec(
|
|
63
|
+
col_type=ColType.GROUP_BY,
|
|
64
|
+
expr=lambda names: pl.sum_horizontal(
|
|
65
|
+
[pl.col(f"seen_{attr}_less_{name}") for name in names]
|
|
66
|
+
)
|
|
67
|
+
+ 1,
|
|
68
|
+
),
|
|
69
|
+
f"pick_{attr}_rank_greatest_sum": ColSpec(
|
|
70
|
+
col_type=ColType.PICK_SUM, expr=pl.col(f"pick_{attr}_rank_greatest")
|
|
71
|
+
),
|
|
72
|
+
f"pick_{attr}_rank_least_sum": ColSpec(
|
|
73
|
+
col_type=ColType.PICK_SUM, expr=pl.col(f"pick_{attr}_rank_least")
|
|
74
|
+
),
|
|
75
|
+
f"pick_{attr}_vs_least": ColSpec(
|
|
76
|
+
col_type=ColType.PICK_SUM,
|
|
77
|
+
expr=pl.col(f"pick_{attr}") - pl.col(f"least_{attr}_seen"),
|
|
78
|
+
),
|
|
79
|
+
f"pick_{attr}_vs_greatest": ColSpec(
|
|
80
|
+
col_type=ColType.PICK_SUM,
|
|
81
|
+
expr=pl.col(f"pick_{attr}") - pl.col(f"greatest_{attr}_seen"),
|
|
82
|
+
),
|
|
83
|
+
f"pick_{attr}_vs_least_mean": ColSpec(
|
|
84
|
+
col_type=ColType.AGG,
|
|
85
|
+
expr=pl.col(f"pick_{attr}_vs_least") / pl.col(ColName.NUM_TAKEN),
|
|
86
|
+
),
|
|
87
|
+
f"pick_{attr}_vs_greatest_mean": ColSpec(
|
|
88
|
+
col_type=ColType.AGG,
|
|
89
|
+
expr=pl.col(f"pick_{attr}_vs_greatest") / pl.col(ColName.NUM_TAKEN),
|
|
90
|
+
),
|
|
91
|
+
f"least_{attr}_taken": ColSpec(
|
|
92
|
+
col_type=ColType.PICK_SUM,
|
|
93
|
+
expr=pl.col(f"pick_{attr}") <= pl.col(f"least_{attr}_seen"),
|
|
94
|
+
),
|
|
95
|
+
f"least_{attr}_taken_rate": ColSpec(
|
|
96
|
+
col_type=ColType.AGG,
|
|
97
|
+
expr=pl.col(f"least_{attr}_taken") / pl.col(ColName.NUM_TAKEN),
|
|
98
|
+
),
|
|
99
|
+
f"greatest_{attr}_taken": ColSpec(
|
|
100
|
+
col_type=ColType.PICK_SUM,
|
|
101
|
+
expr=pl.col(f"pick_{attr}") >= pl.col(f"greatest_{attr}_seen"),
|
|
102
|
+
),
|
|
103
|
+
f"greatest_{attr}_taken_rate": ColSpec(
|
|
104
|
+
col_type=ColType.AGG,
|
|
105
|
+
expr=pl.col(f"greatest_{attr}_taken") / pl.col(ColName.NUM_TAKEN),
|
|
106
|
+
),
|
|
107
|
+
f"pick_{attr}_mean": ColSpec(
|
|
108
|
+
col_type=ColType.AGG,
|
|
109
|
+
expr=pl.col(f"pick_{attr}") / pl.col(ColName.NUM_TAKEN),
|
|
110
|
+
),
|
|
111
|
+
f"{attr}_deck_weight_group": ColSpec(
|
|
112
|
+
col_type=ColType.AGG, expr=pl.col(f"{attr}") * pl.col(ColName.DECK)
|
|
113
|
+
),
|
|
114
|
+
f"{attr}_deck_weight_total": ColSpec(
|
|
115
|
+
col_type=ColType.AGG, expr=pl.col(f"{attr}_deck_weight_group").sum()
|
|
116
|
+
),
|
|
117
|
+
f"{attr}_dw_mean": ColSpec(
|
|
118
|
+
col_type=ColType.AGG,
|
|
119
|
+
expr=pl.col(f"{attr}_deck_weight_total") / pl.col(ColName.DECK_TOTAL),
|
|
120
|
+
),
|
|
121
|
+
f"{attr}_dw_excess": ColSpec(
|
|
122
|
+
col_type=ColType.AGG,
|
|
123
|
+
expr=pl.col(f"{attr}_dw_mean") - pl.col(f"{attr}"),
|
|
124
|
+
),
|
|
125
|
+
f"{attr}_dw_var": ColSpec(
|
|
126
|
+
col_type=ColType.AGG,
|
|
127
|
+
expr=(pl.col(f"{attr}_dw_excess").pow(2) * pl.col(ColName.DECK))
|
|
128
|
+
/ pl.col(ColName.DECK_TOTAL),
|
|
129
|
+
),
|
|
130
|
+
f"{attr}_dw_stdev": ColSpec(
|
|
131
|
+
col_type=ColType.AGG,
|
|
132
|
+
expr=pl.col(f"{attr}_dw_var").sqrt(),
|
|
133
|
+
),
|
|
134
|
+
f"{attr}_dwz": ColSpec(
|
|
135
|
+
col_type=ColType.AGG,
|
|
136
|
+
expr=pl.col(f"{attr}_dw_excess") / pl.col(f"{attr}_dw_stdev"),
|
|
137
|
+
),
|
|
138
|
+
f"{attr}_pool_weight_group": ColSpec(
|
|
139
|
+
col_type=ColType.AGG, expr=pl.col(f"{attr}") * pl.col(ColName.NUM_IN_POOL)
|
|
140
|
+
),
|
|
141
|
+
f"{attr}_pool_weight_total": ColSpec(
|
|
142
|
+
col_type=ColType.AGG, expr=pl.col(f"{attr}_pool_weight_group").sum()
|
|
143
|
+
),
|
|
144
|
+
f"{attr}_pw_mean": ColSpec(
|
|
145
|
+
col_type=ColType.AGG,
|
|
146
|
+
expr=pl.col(f"{attr}_pool_weight_total")
|
|
147
|
+
/ pl.col(ColName.NUM_IN_POOL_TOTAL),
|
|
148
|
+
),
|
|
149
|
+
f"{attr}_pw_excess": ColSpec(
|
|
150
|
+
col_type=ColType.AGG,
|
|
151
|
+
expr=pl.col(f"{attr}_pw_mean") - pl.col(f"{attr}"),
|
|
152
|
+
),
|
|
153
|
+
f"{attr}_pw_var": ColSpec(
|
|
154
|
+
col_type=ColType.AGG,
|
|
155
|
+
expr=(pl.col(f"{attr}_pw_excess").pow(2) * pl.col(ColName.NUM_IN_POOL))
|
|
156
|
+
/ pl.col(ColName.NUM_IN_POOL_TOTAL),
|
|
157
|
+
),
|
|
158
|
+
f"{attr}_pw_stdev": ColSpec(
|
|
159
|
+
col_type=ColType.AGG,
|
|
160
|
+
expr=pl.col(f"{attr}_pw_var").sqrt(),
|
|
161
|
+
),
|
|
162
|
+
f"{attr}_pwz": ColSpec(
|
|
163
|
+
col_type=ColType.AGG,
|
|
164
|
+
expr=pl.col(f"{attr}_pw_excess") / pl.col(f"{attr}_pw_stdev"),
|
|
165
|
+
),
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
if not silent:
|
|
169
|
+
print_ext(ext)
|
|
170
|
+
|
|
171
|
+
return ext
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def more(silent=True):
|
|
175
|
+
wr_bucket = pl.col(ColName.USER_GAME_WIN_RATE_BUCKET)
|
|
176
|
+
gp_bucket = pl.col(ColName.USER_N_GAMES_BUCKET)
|
|
177
|
+
ext = {
|
|
178
|
+
"deq_base": ColSpec(
|
|
179
|
+
col_type=ColType.AGG,
|
|
180
|
+
expr=(pl.col("gp_wr_excess") + 0.03 * (1 - pl.col("ata") / 14).pow(2))
|
|
181
|
+
* pl.col("pct_gp"),
|
|
182
|
+
),
|
|
183
|
+
"cohorts_plus": ColSpec(
|
|
184
|
+
col_type=ColType.GROUP_BY,
|
|
185
|
+
expr=pl.when((wr_bucket > 0.65) & (gp_bucket >= 500))
|
|
186
|
+
.then(pl.lit("1 Best"))
|
|
187
|
+
.otherwise(
|
|
188
|
+
pl.when(
|
|
189
|
+
(wr_bucket > 0.61) & (gp_bucket >= 500)
|
|
190
|
+
| (wr_bucket > 0.65) & (gp_bucket >= 100)
|
|
191
|
+
)
|
|
192
|
+
.then(pl.lit("2 Elite"))
|
|
193
|
+
.otherwise(
|
|
194
|
+
pl.when(
|
|
195
|
+
(wr_bucket > 0.57) & (gp_bucket >= 100) | (wr_bucket > 0.61)
|
|
196
|
+
)
|
|
197
|
+
.then(pl.lit("3 Competitive"))
|
|
198
|
+
.otherwise(
|
|
199
|
+
pl.when(
|
|
200
|
+
(wr_bucket > 0.53) & (gp_bucket >= 100) | (wr_bucket > 0.57)
|
|
201
|
+
)
|
|
202
|
+
.then(pl.lit("4 Solid"))
|
|
203
|
+
.otherwise(pl.lit("5 None"))
|
|
204
|
+
)
|
|
205
|
+
)
|
|
206
|
+
),
|
|
207
|
+
),
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
if not silent:
|
|
211
|
+
print_ext(ext)
|
|
212
|
+
|
|
213
|
+
return ext
|
|
@@ -102,6 +102,7 @@ def cli() -> int:
|
|
|
102
102
|
def _add(set_code: str, force_download=False):
|
|
103
103
|
download_data_set(set_code, View.DRAFT, force_download=force_download)
|
|
104
104
|
write_card_file(set_code, force_download=force_download)
|
|
105
|
+
get_set_context(set_code)
|
|
105
106
|
download_data_set(set_code, View.GAME, force_download=force_download)
|
|
106
107
|
return 0
|
|
107
108
|
|
|
@@ -157,9 +158,9 @@ def _info():
|
|
|
157
158
|
)
|
|
158
159
|
print(f" {item.name} {sizeof_fmt(os.stat(item).st_size)}")
|
|
159
160
|
file_count += 1
|
|
160
|
-
if file_count <
|
|
161
|
+
if file_count < 4:
|
|
161
162
|
suggest_add.add(entry.name)
|
|
162
|
-
if file_count >
|
|
163
|
+
if file_count > 4:
|
|
163
164
|
suggest_remove.add(entry.name)
|
|
164
165
|
else:
|
|
165
166
|
cache.spells_print(
|
|
@@ -209,6 +210,9 @@ def _external_set_path(set_code):
|
|
|
209
210
|
|
|
210
211
|
|
|
211
212
|
def data_file_path(set_code, dataset_type: str, event_type=EventType.PREMIER):
|
|
213
|
+
if dataset_type == "set_context":
|
|
214
|
+
return os.path.join(_external_set_path(set_code), f"{set_code}_context.parquet")
|
|
215
|
+
|
|
212
216
|
if dataset_type == "card":
|
|
213
217
|
return os.path.join(_external_set_path(set_code), f"{set_code}_card.parquet")
|
|
214
218
|
|
|
@@ -314,3 +318,31 @@ def write_card_file(draft_set_code: str, force_download=False) -> int:
|
|
|
314
318
|
|
|
315
319
|
cache.spells_print(mode, f"Wrote file {card_filepath}")
|
|
316
320
|
return 0
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def get_set_context(set_code: str, force_download=False) -> int:
|
|
324
|
+
mode = "refresh" if force_download else "add"
|
|
325
|
+
|
|
326
|
+
context_fp = data_file_path(set_code, "context")
|
|
327
|
+
cache.spells_print(mode, "Calculating set context")
|
|
328
|
+
if os.path.isfile(context_fp) and not force_download:
|
|
329
|
+
cache.spells_print(
|
|
330
|
+
mode,
|
|
331
|
+
f"File {context_fp} already exists, use `spells refresh {set_code}` to overwrite",
|
|
332
|
+
)
|
|
333
|
+
return 1
|
|
334
|
+
|
|
335
|
+
draft_fp = data_file_path(set_code, View.DRAFT)
|
|
336
|
+
draft_view = pl.scan_parquet(draft_fp)
|
|
337
|
+
|
|
338
|
+
context_df = draft_view.select(
|
|
339
|
+
[
|
|
340
|
+
pl.max("pick_number").alias("picks_per_pack") + 1,
|
|
341
|
+
pl.min("draft_time").alias("release_time"),
|
|
342
|
+
]
|
|
343
|
+
).collect()
|
|
344
|
+
|
|
345
|
+
context_df.write_parquet(context_fp)
|
|
346
|
+
|
|
347
|
+
cache.spells_print(mode, f"Wrote file {context_fp}")
|
|
348
|
+
return 0
|
|
@@ -107,7 +107,9 @@ def _resolve_view_cols(
|
|
|
107
107
|
View.DRAFT, frozenset()
|
|
108
108
|
).union({ColName.PICK})
|
|
109
109
|
if cdef.col_type == ColType.CARD_ATTR:
|
|
110
|
-
view_resolution[View.CARD] = view_resolution.get(
|
|
110
|
+
view_resolution[View.CARD] = view_resolution.get(
|
|
111
|
+
View.CARD, frozenset()
|
|
112
|
+
).union({col})
|
|
111
113
|
elif cdef.views:
|
|
112
114
|
for view in cdef.views:
|
|
113
115
|
view_resolution[view] = view_resolution.get(
|
|
@@ -132,7 +134,9 @@ def _resolve_view_cols(
|
|
|
132
134
|
else:
|
|
133
135
|
col_views = col_views.intersection(dep_views)
|
|
134
136
|
if fully_resolved:
|
|
135
|
-
assert len(
|
|
137
|
+
assert len(
|
|
138
|
+
col_views
|
|
139
|
+
), f"Column {col} can't be defined in any views!"
|
|
136
140
|
for view in col_views:
|
|
137
141
|
if view not in view_resolution:
|
|
138
142
|
print(cdef)
|
|
@@ -162,7 +166,9 @@ def create(
|
|
|
162
166
|
if columns is None:
|
|
163
167
|
cols = tuple(spells.columns.default_columns)
|
|
164
168
|
if ColName.NAME not in gbs:
|
|
165
|
-
cols = tuple(
|
|
169
|
+
cols = tuple(
|
|
170
|
+
c for c in cols if col_def_map[c].col_type != ColType.CARD_ATTR
|
|
171
|
+
)
|
|
166
172
|
else:
|
|
167
173
|
cols = tuple(columns)
|
|
168
174
|
|
|
@@ -1,40 +0,0 @@
|
|
|
1
|
-
import polars as pl
|
|
2
|
-
|
|
3
|
-
from spells.enums import ColType
|
|
4
|
-
from spells.columns import ColSpec
|
|
5
|
-
|
|
6
|
-
def attr_metrics(attr):
|
|
7
|
-
return {
|
|
8
|
-
f"seen_{attr}": ColSpec(
|
|
9
|
-
col_type=ColType.NAME_SUM,
|
|
10
|
-
expr=(lambda name, card_context: pl.when(pl.col(f"pack_card_{name}") > 0)
|
|
11
|
-
.then(card_context[name][attr])
|
|
12
|
-
.otherwise(None)),
|
|
13
|
-
),
|
|
14
|
-
f"pick_{attr}": ColSpec(
|
|
15
|
-
col_type=ColType.PICK_SUM,
|
|
16
|
-
expr=lambda name, card_context: card_context[name][attr]
|
|
17
|
-
),
|
|
18
|
-
f"least_{attr}_taken": ColSpec(
|
|
19
|
-
col_type=ColType.PICK_SUM,
|
|
20
|
-
expr=(lambda names: pl.col(f'pick_{attr}')
|
|
21
|
-
<= pl.min_horizontal([pl.col(f"seen_{attr}_{name}") for name in names])),
|
|
22
|
-
),
|
|
23
|
-
f"least_{attr}_taken_rate": ColSpec(
|
|
24
|
-
col_type=ColType.AGG,
|
|
25
|
-
expr=pl.col(f"least_{attr}_taken") / pl.col("num_taken"),
|
|
26
|
-
),
|
|
27
|
-
f"greatest_{attr}_taken": ColSpec(
|
|
28
|
-
col_type=ColType.PICK_SUM,
|
|
29
|
-
expr=(lambda names: pl.col(f'pick_{attr}')
|
|
30
|
-
>= pl.max_horizontal([pl.col(f"seen_{attr}_{name}") for name in names])),
|
|
31
|
-
),
|
|
32
|
-
f"greatest_{attr}_taken_rate": ColSpec(
|
|
33
|
-
col_type=ColType.AGG,
|
|
34
|
-
expr=pl.col(f"greatest_{attr}_taken") / pl.col("num_taken"),
|
|
35
|
-
),
|
|
36
|
-
f"pick_{attr}_mean": ColSpec(
|
|
37
|
-
col_type=ColType.AGG,
|
|
38
|
-
expr=pl.col(f"pick_{attr}") / pl.col("num_taken")
|
|
39
|
-
)
|
|
40
|
-
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|