spells-mtg 0.5.3__tar.gz → 0.6.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spells-mtg might be problematic. Click here for more details.
- {spells_mtg-0.5.3 → spells_mtg-0.6.1}/PKG-INFO +11 -5
- {spells_mtg-0.5.3 → spells_mtg-0.6.1}/README.md +10 -4
- {spells_mtg-0.5.3 → spells_mtg-0.6.1}/pyproject.toml +1 -1
- {spells_mtg-0.5.3 → spells_mtg-0.6.1}/spells/columns.py +33 -9
- {spells_mtg-0.5.3 → spells_mtg-0.6.1}/spells/draft_data.py +120 -32
- {spells_mtg-0.5.3 → spells_mtg-0.6.1}/spells/enums.py +2 -0
- {spells_mtg-0.5.3 → spells_mtg-0.6.1}/spells/extension.py +54 -55
- {spells_mtg-0.5.3 → spells_mtg-0.6.1}/spells/external.py +34 -2
- {spells_mtg-0.5.3 → spells_mtg-0.6.1}/spells/manifest.py +9 -3
- {spells_mtg-0.5.3 → spells_mtg-0.6.1}/LICENSE +0 -0
- {spells_mtg-0.5.3 → spells_mtg-0.6.1}/spells/__init__.py +0 -0
- {spells_mtg-0.5.3 → spells_mtg-0.6.1}/spells/cache.py +0 -0
- {spells_mtg-0.5.3 → spells_mtg-0.6.1}/spells/cards.py +0 -0
- {spells_mtg-0.5.3 → spells_mtg-0.6.1}/spells/filter.py +0 -0
- {spells_mtg-0.5.3 → spells_mtg-0.6.1}/spells/schema.py +0 -0
- {spells_mtg-0.5.3 → spells_mtg-0.6.1}/tests/__init__.py +0 -0
- {spells_mtg-0.5.3 → spells_mtg-0.6.1}/tests/filter_test.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: spells-mtg
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.1
|
|
4
4
|
Summary: analaysis of 17Lands.com public datasets
|
|
5
5
|
Author-Email: Joel Barnes <oelarnes@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -74,7 +74,7 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
|
|
|
74
74
|
- Supports calculating the standard aggregations and measures out of the box with no arguments (ALSA, GIH WR, etc)
|
|
75
75
|
- Caches aggregate DataFrames in the local file system automatically for instantaneous reproduction of previous analysis
|
|
76
76
|
- Manages grouping and filtering by built-in and custom columns at the row level
|
|
77
|
-
- Provides
|
|
77
|
+
- Provides 124 explicitly specified, enumerated, documented column definitions
|
|
78
78
|
- Supports "Deck Color Data" aggregations with built-in column definitions.
|
|
79
79
|
- Lets you feed card metrics back in to column definitions to support scientific workflows like MLE
|
|
80
80
|
- Provides a CLI tool `spells [add|refresh|clean|remove|info] [SET]` to download and manage external files
|
|
@@ -296,6 +296,7 @@ summon(
|
|
|
296
296
|
filter_spec: dict | None = None,
|
|
297
297
|
extensions: dict[str, ColSpec] | None = None,
|
|
298
298
|
card_context: pl.DataFrame | dict[str, dict[str, Any] | None = None,
|
|
299
|
+
set_context: pl.DataFrame | dict[str, Any] | None = None,
|
|
299
300
|
read_cache: bool = True,
|
|
300
301
|
write_cache: bool = True,
|
|
301
302
|
) -> polars.DataFrame
|
|
@@ -319,6 +320,8 @@ aggregations of non-numeric (or numeric) data types are not supported. If `None`
|
|
|
319
320
|
|
|
320
321
|
- card_context: Typically a Polars DataFrame containing a `"name"` column with one row for each card name in the set, such that any usages of `card_context[name][key]` in column specs reference the column `key`. Typically this will be the output of a call to `summon` requesting cards metrics like `GP_WR`. Can also be a dictionary having the necessary form for the same access pattern.
|
|
321
322
|
|
|
323
|
+
- set_context: Typically, a dict of abitrary values to use in column definitions, for example, you could provide the quick draft release date and have a column that depended on that.
|
|
324
|
+
|
|
322
325
|
- read_cache/write_cache: Use the local file system to cache and retrieve aggregations to minimize expensive reads of the large datasets. You shouldn't need to touch these arguments unless you are debugging.
|
|
323
326
|
|
|
324
327
|
### Enums
|
|
@@ -353,7 +356,10 @@ summing over groups, and can include polars Expression aggregations. Arbitrarily
|
|
|
353
356
|
- For `NAME_SUM` columns, `expr` must be a function of `name` which will result in a list of expressions mapped over all card names.
|
|
354
357
|
- `PICK_SUM` columns can also be functions on `name`, in which case the value will be a function of the value of the `PICK` field.
|
|
355
358
|
- `AGG` columns that depend on `NAME_SUM` columns reference the prefix (`cdef.name`) only, since the unpivot has occured prior to selection.
|
|
356
|
-
- The possible arguments to `expr`, in addition to `name` when appropriate,
|
|
359
|
+
- The possible arguments to `expr`, in addition to `name` when appropriate, are as follows:
|
|
360
|
+
- `names`: An array of all card names in the canonical order.
|
|
361
|
+
- `card_context`: A dictionary keyed by card name which contains card dict objects with all `CARD_ATTR` values, including custom extensions and metric columns passed by the `card_context` argument to `summon`. See example notebooks for more details.
|
|
362
|
+
- `set_context`: A dictionary with arbitrary fields provided via the `set_context` argument. Has two built-in attributes, `picks_per_pack` (e.g. 13 or 14), and `release_time`, which is the minimum value of the `draft_time` field.
|
|
357
363
|
|
|
358
364
|
- `version`: When defining a column using a python function, as opposed to Polars expressions, add a unique version number so that the unique hashed signature of the column specification can be derived
|
|
359
365
|
for caching purposes, since Polars cannot generate a serialization natively. When changing the definition, be sure to increment the version value. Otherwise you do not need to use this parameter.
|
|
@@ -371,9 +377,11 @@ A table of all included columns. Columns can be referenced by enum or by string
|
|
|
371
377
|
| `DRAFT_ID` | `"draft_id"` | `DRAFT, GAME` | `FILTER_ONLY` | Dataset column | String |
|
|
372
378
|
| `DRAFT_TIME` | `"draft_time"` | `DRAFT, GAME` | `FILTER_ONLY` | Dataset column | String |
|
|
373
379
|
| `DRAFT_DATE` | `"draft_date"` | `DRAFT, GAME` | `GROUP_BY` | | `datetime.date` |
|
|
380
|
+
| `FORMAT_DAY` | `"format_day"` | `DRAFT, GAME` | `GROUP_BY` | 1 for release day, 2, 3, etc. | Int |
|
|
374
381
|
| `DRAFT_DAY_OF_WEEK` | `"draft_day_of_week` | `DRAFT, GAME` | `GROUP_BY` | 1-7 (Mon-Sun) | Int |
|
|
375
382
|
| `DRAFT_HOUR` | `"draft_hour"` | `DRAFT, GAME` | `GROUP_BY` | 0-23 | Int |
|
|
376
383
|
| `DRAFT_WEEK` | `"draft_week"` | `DRAFT, GAME` | `GROUP_BY` | 1-53 | Int |
|
|
384
|
+
| `FORMAT_WEEK` | `"format_week"` | `DRAFT, GAME` | `GROUP_BY` | 1 for `FORMAT_DAY` 1 - 7, etc. | Int |
|
|
377
385
|
| `RANK` | `"rank"` | `DRAFT, GAME` | `GROUP_BY` | Dataset column | String |
|
|
378
386
|
| `USER_N_GAMES_BUCKET` | `"user_n_games_bucket"` | `DRAFT, GAME` | `GROUP_BY` | Dataset Column | Int |
|
|
379
387
|
| `USER_GAME_WIN_RATE_BUCKET` | `"user_game_win_rate_bucket` | `DRAFT, GAME` | `GROUP_BY` | Dataset Column | Float |
|
|
@@ -504,5 +512,3 @@ A table of all included columns. Columns can be referenced by enum or by string
|
|
|
504
512
|
- [ ] Helper functions for common plotting paradigms
|
|
505
513
|
- [ ] Example notebooks
|
|
506
514
|
- [ ] Scientific workflows: regression, MLE, etc
|
|
507
|
-
|
|
508
|
-
|
|
@@ -63,7 +63,7 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
|
|
|
63
63
|
- Supports calculating the standard aggregations and measures out of the box with no arguments (ALSA, GIH WR, etc)
|
|
64
64
|
- Caches aggregate DataFrames in the local file system automatically for instantaneous reproduction of previous analysis
|
|
65
65
|
- Manages grouping and filtering by built-in and custom columns at the row level
|
|
66
|
-
- Provides
|
|
66
|
+
- Provides 124 explicitly specified, enumerated, documented column definitions
|
|
67
67
|
- Supports "Deck Color Data" aggregations with built-in column definitions.
|
|
68
68
|
- Lets you feed card metrics back in to column definitions to support scientific workflows like MLE
|
|
69
69
|
- Provides a CLI tool `spells [add|refresh|clean|remove|info] [SET]` to download and manage external files
|
|
@@ -285,6 +285,7 @@ summon(
|
|
|
285
285
|
filter_spec: dict | None = None,
|
|
286
286
|
extensions: dict[str, ColSpec] | None = None,
|
|
287
287
|
card_context: pl.DataFrame | dict[str, dict[str, Any] | None = None,
|
|
288
|
+
set_context: pl.DataFrame | dict[str, Any] | None = None,
|
|
288
289
|
read_cache: bool = True,
|
|
289
290
|
write_cache: bool = True,
|
|
290
291
|
) -> polars.DataFrame
|
|
@@ -308,6 +309,8 @@ aggregations of non-numeric (or numeric) data types are not supported. If `None`
|
|
|
308
309
|
|
|
309
310
|
- card_context: Typically a Polars DataFrame containing a `"name"` column with one row for each card name in the set, such that any usages of `card_context[name][key]` in column specs reference the column `key`. Typically this will be the output of a call to `summon` requesting cards metrics like `GP_WR`. Can also be a dictionary having the necessary form for the same access pattern.
|
|
310
311
|
|
|
312
|
+
- set_context: Typically, a dict of abitrary values to use in column definitions, for example, you could provide the quick draft release date and have a column that depended on that.
|
|
313
|
+
|
|
311
314
|
- read_cache/write_cache: Use the local file system to cache and retrieve aggregations to minimize expensive reads of the large datasets. You shouldn't need to touch these arguments unless you are debugging.
|
|
312
315
|
|
|
313
316
|
### Enums
|
|
@@ -342,7 +345,10 @@ summing over groups, and can include polars Expression aggregations. Arbitrarily
|
|
|
342
345
|
- For `NAME_SUM` columns, `expr` must be a function of `name` which will result in a list of expressions mapped over all card names.
|
|
343
346
|
- `PICK_SUM` columns can also be functions on `name`, in which case the value will be a function of the value of the `PICK` field.
|
|
344
347
|
- `AGG` columns that depend on `NAME_SUM` columns reference the prefix (`cdef.name`) only, since the unpivot has occured prior to selection.
|
|
345
|
-
- The possible arguments to `expr`, in addition to `name` when appropriate,
|
|
348
|
+
- The possible arguments to `expr`, in addition to `name` when appropriate, are as follows:
|
|
349
|
+
- `names`: An array of all card names in the canonical order.
|
|
350
|
+
- `card_context`: A dictionary keyed by card name which contains card dict objects with all `CARD_ATTR` values, including custom extensions and metric columns passed by the `card_context` argument to `summon`. See example notebooks for more details.
|
|
351
|
+
- `set_context`: A dictionary with arbitrary fields provided via the `set_context` argument. Has two built-in attributes, `picks_per_pack` (e.g. 13 or 14), and `release_time`, which is the minimum value of the `draft_time` field.
|
|
346
352
|
|
|
347
353
|
- `version`: When defining a column using a python function, as opposed to Polars expressions, add a unique version number so that the unique hashed signature of the column specification can be derived
|
|
348
354
|
for caching purposes, since Polars cannot generate a serialization natively. When changing the definition, be sure to increment the version value. Otherwise you do not need to use this parameter.
|
|
@@ -360,9 +366,11 @@ A table of all included columns. Columns can be referenced by enum or by string
|
|
|
360
366
|
| `DRAFT_ID` | `"draft_id"` | `DRAFT, GAME` | `FILTER_ONLY` | Dataset column | String |
|
|
361
367
|
| `DRAFT_TIME` | `"draft_time"` | `DRAFT, GAME` | `FILTER_ONLY` | Dataset column | String |
|
|
362
368
|
| `DRAFT_DATE` | `"draft_date"` | `DRAFT, GAME` | `GROUP_BY` | | `datetime.date` |
|
|
369
|
+
| `FORMAT_DAY` | `"format_day"` | `DRAFT, GAME` | `GROUP_BY` | 1 for release day, 2, 3, etc. | Int |
|
|
363
370
|
| `DRAFT_DAY_OF_WEEK` | `"draft_day_of_week` | `DRAFT, GAME` | `GROUP_BY` | 1-7 (Mon-Sun) | Int |
|
|
364
371
|
| `DRAFT_HOUR` | `"draft_hour"` | `DRAFT, GAME` | `GROUP_BY` | 0-23 | Int |
|
|
365
372
|
| `DRAFT_WEEK` | `"draft_week"` | `DRAFT, GAME` | `GROUP_BY` | 1-53 | Int |
|
|
373
|
+
| `FORMAT_WEEK` | `"format_week"` | `DRAFT, GAME` | `GROUP_BY` | 1 for `FORMAT_DAY` 1 - 7, etc. | Int |
|
|
366
374
|
| `RANK` | `"rank"` | `DRAFT, GAME` | `GROUP_BY` | Dataset column | String |
|
|
367
375
|
| `USER_N_GAMES_BUCKET` | `"user_n_games_bucket"` | `DRAFT, GAME` | `GROUP_BY` | Dataset Column | Int |
|
|
368
376
|
| `USER_GAME_WIN_RATE_BUCKET` | `"user_game_win_rate_bucket` | `DRAFT, GAME` | `GROUP_BY` | Dataset Column | Float |
|
|
@@ -493,5 +501,3 @@ A table of all included columns. Columns can be referenced by enum or by string
|
|
|
493
501
|
- [ ] Helper functions for common plotting paradigms
|
|
494
502
|
- [ ] Example notebooks
|
|
495
503
|
- [ ] Scientific workflows: regression, MLE, etc
|
|
496
|
-
|
|
497
|
-
|
|
@@ -5,6 +5,7 @@ import polars as pl
|
|
|
5
5
|
|
|
6
6
|
from spells.enums import View, ColName, ColType
|
|
7
7
|
|
|
8
|
+
|
|
8
9
|
@dataclass(frozen=True)
|
|
9
10
|
class ColSpec:
|
|
10
11
|
col_type: ColType
|
|
@@ -64,9 +65,21 @@ _specs: dict[str, ColSpec] = {
|
|
|
64
65
|
col_type=ColType.GROUP_BY,
|
|
65
66
|
expr=pl.col(ColName.DRAFT_TIME).str.to_datetime("%Y-%m-%d %H:%M:%S").dt.date(),
|
|
66
67
|
),
|
|
68
|
+
ColName.FORMAT_DAY: ColSpec(
|
|
69
|
+
col_type=ColType.GROUP_BY,
|
|
70
|
+
expr=lambda set_context: (
|
|
71
|
+
pl.col(ColName.DRAFT_DATE)
|
|
72
|
+
- pl.lit(set_context["release_time"])
|
|
73
|
+
.str.to_datetime("%Y-%m-%d %H:%M:%S")
|
|
74
|
+
.dt.date()
|
|
75
|
+
).dt.total_days()
|
|
76
|
+
+ 1,
|
|
77
|
+
),
|
|
67
78
|
ColName.DRAFT_DAY_OF_WEEK: ColSpec(
|
|
68
79
|
col_type=ColType.GROUP_BY,
|
|
69
|
-
expr=pl.col(ColName.DRAFT_TIME)
|
|
80
|
+
expr=pl.col(ColName.DRAFT_TIME)
|
|
81
|
+
.str.to_datetime("%Y-%m-%d %H:%M:%S")
|
|
82
|
+
.dt.weekday(),
|
|
70
83
|
),
|
|
71
84
|
ColName.DRAFT_HOUR: ColSpec(
|
|
72
85
|
col_type=ColType.GROUP_BY,
|
|
@@ -76,6 +89,9 @@ _specs: dict[str, ColSpec] = {
|
|
|
76
89
|
col_type=ColType.GROUP_BY,
|
|
77
90
|
expr=pl.col(ColName.DRAFT_TIME).str.to_datetime("%Y-%m-%d %H:%M:%S").dt.week(),
|
|
78
91
|
),
|
|
92
|
+
ColName.FORMAT_WEEK: ColSpec(
|
|
93
|
+
col_type=ColType.GROUP_BY, expr=(pl.col(ColName.FORMAT_DAY) - 1) // 7 + 1
|
|
94
|
+
),
|
|
79
95
|
ColName.RANK: ColSpec(
|
|
80
96
|
col_type=ColType.GROUP_BY,
|
|
81
97
|
views=[View.GAME, View.DRAFT],
|
|
@@ -159,13 +175,15 @@ _specs: dict[str, ColSpec] = {
|
|
|
159
175
|
),
|
|
160
176
|
ColName.NUM_TAKEN: ColSpec(
|
|
161
177
|
col_type=ColType.PICK_SUM,
|
|
162
|
-
expr=pl.when(pl.col(ColName.PICK).is_not_null())
|
|
163
|
-
.then(1)
|
|
164
|
-
.otherwise(0),
|
|
178
|
+
expr=pl.when(pl.col(ColName.PICK).is_not_null()).then(1).otherwise(0),
|
|
165
179
|
),
|
|
166
180
|
ColName.NUM_DRAFTS: ColSpec(
|
|
167
181
|
col_type=ColType.PICK_SUM,
|
|
168
|
-
expr=pl.when(
|
|
182
|
+
expr=pl.when(
|
|
183
|
+
(pl.col(ColName.PACK_NUMBER) == 0) & (pl.col(ColName.PICK_NUMBER) == 0)
|
|
184
|
+
)
|
|
185
|
+
.then(1)
|
|
186
|
+
.otherwise(0),
|
|
169
187
|
),
|
|
170
188
|
ColName.PICK: ColSpec(
|
|
171
189
|
col_type=ColType.FILTER_ONLY,
|
|
@@ -206,7 +224,9 @@ _specs: dict[str, ColSpec] = {
|
|
|
206
224
|
),
|
|
207
225
|
ColName.GAME_DAY_OF_WEEK: ColSpec(
|
|
208
226
|
col_type=ColType.GROUP_BY,
|
|
209
|
-
expr=pl.col(ColName.GAME_TIME)
|
|
227
|
+
expr=pl.col(ColName.GAME_TIME)
|
|
228
|
+
.str.to_datetime("%Y-%m-%d %H-%M-%S")
|
|
229
|
+
.dt.weekday(),
|
|
210
230
|
),
|
|
211
231
|
ColName.GAME_HOUR: ColSpec(
|
|
212
232
|
col_type=ColType.GROUP_BY,
|
|
@@ -381,11 +401,13 @@ _specs: dict[str, ColSpec] = {
|
|
|
381
401
|
),
|
|
382
402
|
ColName.DECK_MANA_VALUE: ColSpec(
|
|
383
403
|
col_type=ColType.NAME_SUM,
|
|
384
|
-
expr=lambda name, card_context: card_context[name][ColName.MANA_VALUE]
|
|
404
|
+
expr=lambda name, card_context: card_context[name][ColName.MANA_VALUE]
|
|
405
|
+
* pl.col(f"deck_{name}"),
|
|
385
406
|
),
|
|
386
407
|
ColName.DECK_LANDS: ColSpec(
|
|
387
408
|
col_type=ColType.NAME_SUM,
|
|
388
|
-
expr=lambda name, card_context: pl.col(f"deck_{name}")
|
|
409
|
+
expr=lambda name, card_context: pl.col(f"deck_{name}")
|
|
410
|
+
* (1 if "Land" in card_context[name][ColName.CARD_TYPE] else 0),
|
|
389
411
|
),
|
|
390
412
|
ColName.DECK_SPELLS: ColSpec(
|
|
391
413
|
col_type=ColType.NAME_SUM,
|
|
@@ -560,11 +582,13 @@ _specs: dict[str, ColSpec] = {
|
|
|
560
582
|
for item in ColName:
|
|
561
583
|
assert item in _specs, f"column {item} enumerated but not specified"
|
|
562
584
|
|
|
585
|
+
|
|
563
586
|
class GetSpecs:
|
|
564
587
|
def __init__(self, spec_dict: dict[str, ColSpec]):
|
|
565
588
|
self._specs = spec_dict
|
|
589
|
+
|
|
566
590
|
def __call__(self):
|
|
567
591
|
return dict(self._specs)
|
|
568
592
|
|
|
569
|
-
get_specs = GetSpecs(_specs)
|
|
570
593
|
|
|
594
|
+
get_specs = GetSpecs(_specs)
|
|
@@ -10,6 +10,7 @@ import functools
|
|
|
10
10
|
import hashlib
|
|
11
11
|
import re
|
|
12
12
|
from inspect import signature
|
|
13
|
+
import os
|
|
13
14
|
from typing import Callable, TypeVar, Any
|
|
14
15
|
|
|
15
16
|
import polars as pl
|
|
@@ -53,9 +54,20 @@ def _get_names(set_code: str) -> list[str]:
|
|
|
53
54
|
return names
|
|
54
55
|
|
|
55
56
|
|
|
56
|
-
def _get_card_context(
|
|
57
|
-
|
|
58
|
-
|
|
57
|
+
def _get_card_context(
|
|
58
|
+
set_code: str,
|
|
59
|
+
specs: dict[str, ColSpec],
|
|
60
|
+
card_context: pl.DataFrame | dict[str, dict[str, Any]] | None,
|
|
61
|
+
set_context: pl.DataFrame | dict[str, Any] | None,
|
|
62
|
+
) -> dict[str, dict[str, Any]]:
|
|
63
|
+
card_attr_specs = {
|
|
64
|
+
col: spec
|
|
65
|
+
for col, spec in specs.items()
|
|
66
|
+
if spec.col_type == ColType.CARD_ATTR or col == ColName.NAME
|
|
67
|
+
}
|
|
68
|
+
col_def_map = _hydrate_col_defs(
|
|
69
|
+
set_code, card_attr_specs, set_context=set_context, card_only=True
|
|
70
|
+
)
|
|
59
71
|
|
|
60
72
|
columns = list(col_def_map.keys())
|
|
61
73
|
|
|
@@ -70,34 +82,50 @@ def _get_card_context(set_code: str, specs: dict[str, ColSpec], card_context: pl
|
|
|
70
82
|
if card_context is not None:
|
|
71
83
|
if isinstance(card_context, pl.DataFrame):
|
|
72
84
|
try:
|
|
73
|
-
card_context = {
|
|
85
|
+
card_context = {
|
|
86
|
+
row[ColName.NAME]: row for row in card_context.to_dicts()
|
|
87
|
+
}
|
|
74
88
|
except ColumnNotFoundError:
|
|
75
89
|
raise ValueError("card_context DataFrame must have column 'name'")
|
|
76
90
|
|
|
77
91
|
names = list(loaded_context.keys())
|
|
78
92
|
for name in names:
|
|
79
|
-
assert
|
|
93
|
+
assert (
|
|
94
|
+
name in card_context
|
|
95
|
+
), f"card_context must include a row for each card name. {name} missing."
|
|
80
96
|
for col, value in card_context[name].items():
|
|
81
97
|
loaded_context[name][col] = value
|
|
82
98
|
|
|
83
99
|
return loaded_context
|
|
84
|
-
|
|
85
100
|
|
|
86
|
-
|
|
101
|
+
|
|
102
|
+
def _determine_expression(
|
|
103
|
+
col: str,
|
|
104
|
+
spec: ColSpec,
|
|
105
|
+
names: list[str],
|
|
106
|
+
card_context: dict[str, dict],
|
|
107
|
+
set_context: dict[str, Any],
|
|
108
|
+
) -> pl.Expr | tuple[pl.Expr, ...]:
|
|
87
109
|
def seed_params(expr):
|
|
88
110
|
params = {}
|
|
89
111
|
|
|
90
112
|
sig_params = signature(expr).parameters
|
|
91
|
-
if
|
|
92
|
-
params[
|
|
93
|
-
if
|
|
94
|
-
params[
|
|
113
|
+
if "names" in sig_params:
|
|
114
|
+
params["names"] = names
|
|
115
|
+
if "card_context" in sig_params:
|
|
116
|
+
params["card_context"] = card_context
|
|
117
|
+
if "set_context" in sig_params:
|
|
118
|
+
params["set_context"] = set_context
|
|
95
119
|
return params
|
|
96
120
|
|
|
97
121
|
if spec.col_type == ColType.NAME_SUM:
|
|
98
122
|
if spec.expr is not None:
|
|
99
|
-
assert isinstance(
|
|
100
|
-
|
|
123
|
+
assert isinstance(
|
|
124
|
+
spec.expr, Callable
|
|
125
|
+
), f"NAME_SUM column {col} must have a callable `expr` accepting a `name` argument"
|
|
126
|
+
unnamed_exprs = [
|
|
127
|
+
spec.expr(**{"name": name, **seed_params(spec.expr)}) for name in names
|
|
128
|
+
]
|
|
101
129
|
|
|
102
130
|
expr = tuple(
|
|
103
131
|
map(
|
|
@@ -112,13 +140,23 @@ def _determine_expression(col: str, spec: ColSpec, names: list[str], card_contex
|
|
|
112
140
|
elif spec.expr is not None:
|
|
113
141
|
if isinstance(spec.expr, Callable):
|
|
114
142
|
params = seed_params(spec.expr)
|
|
115
|
-
if
|
|
143
|
+
if (
|
|
144
|
+
spec.col_type == ColType.PICK_SUM
|
|
145
|
+
and "name" in signature(spec.expr).parameters
|
|
146
|
+
):
|
|
116
147
|
expr = pl.lit(None)
|
|
117
148
|
for name in names:
|
|
118
|
-
name_params = {
|
|
119
|
-
expr =
|
|
120
|
-
|
|
121
|
-
|
|
149
|
+
name_params = {"name": name, **params}
|
|
150
|
+
expr = (
|
|
151
|
+
pl.when(pl.col(ColName.PICK) == name)
|
|
152
|
+
.then(spec.expr(**name_params))
|
|
153
|
+
.otherwise(expr)
|
|
154
|
+
)
|
|
155
|
+
elif (
|
|
156
|
+
spec.col_type == ColType.CARD_ATTR
|
|
157
|
+
and "name" in signature(spec.expr).parameters
|
|
158
|
+
):
|
|
159
|
+
expr = spec.expr(**{"name": pl.col("name"), **params})
|
|
122
160
|
else:
|
|
123
161
|
expr = spec.expr(**params)
|
|
124
162
|
else:
|
|
@@ -130,7 +168,12 @@ def _determine_expression(col: str, spec: ColSpec, names: list[str], card_contex
|
|
|
130
168
|
return expr
|
|
131
169
|
|
|
132
170
|
|
|
133
|
-
def _infer_dependencies(
|
|
171
|
+
def _infer_dependencies(
|
|
172
|
+
name: str,
|
|
173
|
+
expr: pl.Expr | tuple[pl.Expr, ...],
|
|
174
|
+
specs: dict[str, ColSpec],
|
|
175
|
+
names: list[str],
|
|
176
|
+
) -> set[str]:
|
|
134
177
|
dependencies = set()
|
|
135
178
|
tricky_ones = set()
|
|
136
179
|
|
|
@@ -139,7 +182,7 @@ def _infer_dependencies(name: str, expr: pl.Expr | tuple[pl.Expr,...], specs: di
|
|
|
139
182
|
for dep_col in dep_cols:
|
|
140
183
|
if dep_col in specs.keys():
|
|
141
184
|
dependencies.add(dep_col)
|
|
142
|
-
else:
|
|
185
|
+
else:
|
|
143
186
|
tricky_ones.add(dep_col)
|
|
144
187
|
else:
|
|
145
188
|
for idx, exp in enumerate(expr):
|
|
@@ -148,7 +191,9 @@ def _infer_dependencies(name: str, expr: pl.Expr | tuple[pl.Expr,...], specs: di
|
|
|
148
191
|
for dep_col in dep_cols:
|
|
149
192
|
if dep_col in specs.keys():
|
|
150
193
|
dependencies.add(dep_col)
|
|
151
|
-
elif
|
|
194
|
+
elif (
|
|
195
|
+
len(split := re.split(pattern, dep_col)) == 2 and split[0] in specs
|
|
196
|
+
):
|
|
152
197
|
dependencies.add(split[0])
|
|
153
198
|
else:
|
|
154
199
|
tricky_ones.add(dep_col)
|
|
@@ -157,38 +202,80 @@ def _infer_dependencies(name: str, expr: pl.Expr | tuple[pl.Expr,...], specs: di
|
|
|
157
202
|
found = False
|
|
158
203
|
for n in names:
|
|
159
204
|
pattern = f"_{n}$"
|
|
160
|
-
if
|
|
205
|
+
if (
|
|
206
|
+
not found
|
|
207
|
+
and len(split := re.split(pattern, item)) == 2
|
|
208
|
+
and split[0] in specs
|
|
209
|
+
):
|
|
161
210
|
dependencies.add(split[0])
|
|
162
211
|
found = True
|
|
163
|
-
assert found, f"Could not locate column spec for root col {item}"
|
|
212
|
+
assert found, f"Could not locate column spec for root col {item}"
|
|
164
213
|
|
|
165
214
|
return dependencies
|
|
166
215
|
|
|
167
216
|
|
|
168
|
-
def
|
|
217
|
+
def _get_set_context(
|
|
218
|
+
set_code: str, set_context: pl.DataFrame | dict[str, Any] | None
|
|
219
|
+
) -> dict[str, Any]:
|
|
220
|
+
context_fp = data_file_path(set_code, "context")
|
|
221
|
+
|
|
222
|
+
report = functools.partial(
|
|
223
|
+
spells.cache.spells_print,
|
|
224
|
+
"report",
|
|
225
|
+
f"Set context for {set_code} invalid, please investigate!",
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
context = {}
|
|
229
|
+
if not os.path.isfile(context_fp):
|
|
230
|
+
report()
|
|
231
|
+
else:
|
|
232
|
+
context_df = pl.read_parquet(context_fp)
|
|
233
|
+
if len(context_df) == 1:
|
|
234
|
+
context.update(context_df.to_dicts()[0])
|
|
235
|
+
else:
|
|
236
|
+
report()
|
|
237
|
+
|
|
238
|
+
if isinstance(set_context, pl.DataFrame):
|
|
239
|
+
assert len(set_context != 1), "Invalid set context provided"
|
|
240
|
+
context.update(set_context.to_dicts()[0])
|
|
241
|
+
elif isinstance(set_context, dict):
|
|
242
|
+
context.update(set_context)
|
|
243
|
+
|
|
244
|
+
return context
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def _hydrate_col_defs(
|
|
248
|
+
set_code: str,
|
|
249
|
+
specs: dict[str, ColSpec],
|
|
250
|
+
card_context: pl.DataFrame | dict[str, dict] | None = None,
|
|
251
|
+
set_context: pl.DataFrame | dict[str, Any] | None = None,
|
|
252
|
+
card_only: bool = False,
|
|
253
|
+
):
|
|
169
254
|
names = _get_names(set_code)
|
|
170
255
|
|
|
256
|
+
set_context = _get_set_context(set_code, set_context)
|
|
257
|
+
|
|
171
258
|
if card_only:
|
|
172
259
|
card_context = {}
|
|
173
260
|
else:
|
|
174
|
-
card_context = _get_card_context(set_code, specs, card_context)
|
|
261
|
+
card_context = _get_card_context(set_code, specs, card_context, set_context)
|
|
175
262
|
|
|
176
263
|
assert len(names) > 0, "there should be names"
|
|
177
264
|
hydrated = {}
|
|
178
265
|
for col, spec in specs.items():
|
|
179
|
-
expr = _determine_expression(col, spec, names, card_context)
|
|
266
|
+
expr = _determine_expression(col, spec, names, card_context, set_context)
|
|
180
267
|
dependencies = _infer_dependencies(col, expr, specs, names)
|
|
181
268
|
|
|
182
269
|
sig_expr = expr if isinstance(expr, pl.Expr) else expr[0]
|
|
183
270
|
try:
|
|
184
|
-
expr_sig = sig_expr.meta.serialize(
|
|
185
|
-
format="json"
|
|
186
|
-
)
|
|
271
|
+
expr_sig = sig_expr.meta.serialize(format="json")
|
|
187
272
|
except pl.exceptions.ComputeError:
|
|
188
273
|
if spec.version is not None:
|
|
189
274
|
expr_sig = col + spec.version
|
|
190
275
|
else:
|
|
191
|
-
print(
|
|
276
|
+
print(
|
|
277
|
+
f"Using session-only signature for non-serializable column {col}, please provide a version value"
|
|
278
|
+
)
|
|
192
279
|
expr_sig = str(sig_expr)
|
|
193
280
|
|
|
194
281
|
signature = str(
|
|
@@ -359,7 +446,8 @@ def summon(
|
|
|
359
446
|
use_streaming: bool = False,
|
|
360
447
|
read_cache: bool = True,
|
|
361
448
|
write_cache: bool = True,
|
|
362
|
-
card_context: pl.DataFrame | dict[str, dict] | None = None
|
|
449
|
+
card_context: pl.DataFrame | dict[str, dict] | None = None,
|
|
450
|
+
set_context: pl.DataFrame | dict[str, Any] | None = None,
|
|
363
451
|
) -> pl.DataFrame:
|
|
364
452
|
specs = get_specs()
|
|
365
453
|
|
|
@@ -369,7 +457,7 @@ def summon(
|
|
|
369
457
|
for ext in extensions:
|
|
370
458
|
specs.update(ext)
|
|
371
459
|
|
|
372
|
-
col_def_map = _hydrate_col_defs(set_code, specs, card_context)
|
|
460
|
+
col_def_map = _hydrate_col_defs(set_code, specs, card_context, set_context)
|
|
373
461
|
m = spells.manifest.create(col_def_map, columns, group_by, filter_spec)
|
|
374
462
|
|
|
375
463
|
calc_fn = functools.partial(_base_agg_df, set_code, m, use_streaming=use_streaming)
|
|
@@ -38,9 +38,11 @@ class ColName(StrEnum):
|
|
|
38
38
|
DRAFT_ID = "draft_id"
|
|
39
39
|
DRAFT_TIME = "draft_time" # modified, cast to time
|
|
40
40
|
DRAFT_DATE = "draft_date"
|
|
41
|
+
FORMAT_DAY = "format_day"
|
|
41
42
|
DRAFT_DAY_OF_WEEK = "draft_day_of_week"
|
|
42
43
|
DRAFT_HOUR = "draft_hour"
|
|
43
44
|
DRAFT_WEEK = "draft_week"
|
|
45
|
+
FORMAT_WEEK = "format_week"
|
|
44
46
|
RANK = "rank"
|
|
45
47
|
USER_N_GAMES_BUCKET = "user_n_games_bucket"
|
|
46
48
|
USER_GAME_WIN_RATE_BUCKET = "user_game_win_rate_bucket"
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import math
|
|
1
|
+
import math
|
|
2
2
|
|
|
3
3
|
import polars as pl
|
|
4
4
|
|
|
@@ -6,23 +6,31 @@ from spells.enums import ColType, ColName
|
|
|
6
6
|
from spells.columns import ColSpec
|
|
7
7
|
from spells.cache import spells_print
|
|
8
8
|
|
|
9
|
+
|
|
9
10
|
def print_ext(ext: dict[str, ColSpec]) -> None:
|
|
10
11
|
spells_print("create", "Created extensions:")
|
|
11
12
|
for key in ext:
|
|
12
13
|
print("\t" + key)
|
|
13
14
|
|
|
14
15
|
|
|
15
|
-
def
|
|
16
|
+
def context_cols(attr, silent: bool = False) -> dict[str, ColSpec]:
|
|
16
17
|
ext = {
|
|
17
18
|
f"seen_{attr}": ColSpec(
|
|
18
19
|
col_type=ColType.NAME_SUM,
|
|
19
|
-
expr=(
|
|
20
|
+
expr=(
|
|
21
|
+
lambda name, card_context: pl.lit(None)
|
|
22
|
+
if card_context[name][attr] is None
|
|
23
|
+
or math.isnan(card_context[name][attr])
|
|
24
|
+
else pl.when(pl.col(f"pack_card_{name}") > 0)
|
|
20
25
|
.then(card_context[name][attr])
|
|
21
|
-
.otherwise(None)
|
|
26
|
+
.otherwise(None)
|
|
27
|
+
),
|
|
22
28
|
),
|
|
23
29
|
f"pick_{attr}": ColSpec(
|
|
24
30
|
col_type=ColType.PICK_SUM,
|
|
25
|
-
expr=lambda name, card_context: pl.lit(None)
|
|
31
|
+
expr=lambda name, card_context: pl.lit(None)
|
|
32
|
+
if card_context[name][attr] is None or math.isnan(card_context[name][attr])
|
|
33
|
+
else card_context[name][attr],
|
|
26
34
|
),
|
|
27
35
|
f"seen_{attr}_greater": ColSpec(
|
|
28
36
|
col_type=ColType.NAME_SUM,
|
|
@@ -34,27 +42,35 @@ def attr_cols(attr, silent=False) -> dict[str, ColSpec]:
|
|
|
34
42
|
),
|
|
35
43
|
f"greatest_{attr}_seen": ColSpec(
|
|
36
44
|
col_type=ColType.PICK_SUM,
|
|
37
|
-
expr=lambda names: pl.max_horizontal(
|
|
45
|
+
expr=lambda names: pl.max_horizontal(
|
|
46
|
+
[pl.col(f"seen_{attr}_{name}") for name in names]
|
|
47
|
+
),
|
|
38
48
|
),
|
|
39
49
|
f"least_{attr}_seen": ColSpec(
|
|
40
50
|
col_type=ColType.PICK_SUM,
|
|
41
|
-
expr=lambda names: pl.min_horizontal(
|
|
51
|
+
expr=lambda names: pl.min_horizontal(
|
|
52
|
+
[pl.col(f"seen_{attr}_{name}") for name in names]
|
|
53
|
+
),
|
|
42
54
|
),
|
|
43
55
|
f"pick_{attr}_rank_greatest": ColSpec(
|
|
44
56
|
col_type=ColType.GROUP_BY,
|
|
45
|
-
expr=lambda names: pl.sum_horizontal(
|
|
57
|
+
expr=lambda names: pl.sum_horizontal(
|
|
58
|
+
[pl.col(f"seen_{attr}_greater_{name}") for name in names]
|
|
59
|
+
)
|
|
60
|
+
+ 1,
|
|
46
61
|
),
|
|
47
62
|
f"pick_{attr}_rank_least": ColSpec(
|
|
48
63
|
col_type=ColType.GROUP_BY,
|
|
49
|
-
expr=lambda names: pl.sum_horizontal(
|
|
64
|
+
expr=lambda names: pl.sum_horizontal(
|
|
65
|
+
[pl.col(f"seen_{attr}_less_{name}") for name in names]
|
|
66
|
+
)
|
|
67
|
+
+ 1,
|
|
50
68
|
),
|
|
51
69
|
f"pick_{attr}_rank_greatest_sum": ColSpec(
|
|
52
|
-
col_type=ColType.PICK_SUM,
|
|
53
|
-
expr=pl.col(f"pick_{attr}_rank_greatest")
|
|
70
|
+
col_type=ColType.PICK_SUM, expr=pl.col(f"pick_{attr}_rank_greatest")
|
|
54
71
|
),
|
|
55
72
|
f"pick_{attr}_rank_least_sum": ColSpec(
|
|
56
|
-
col_type=ColType.PICK_SUM,
|
|
57
|
-
expr=pl.col(f"pick_{attr}_rank_least")
|
|
73
|
+
col_type=ColType.PICK_SUM, expr=pl.col(f"pick_{attr}_rank_least")
|
|
58
74
|
),
|
|
59
75
|
f"pick_{attr}_vs_least": ColSpec(
|
|
60
76
|
col_type=ColType.PICK_SUM,
|
|
@@ -74,31 +90,39 @@ def attr_cols(attr, silent=False) -> dict[str, ColSpec]:
|
|
|
74
90
|
),
|
|
75
91
|
f"least_{attr}_taken": ColSpec(
|
|
76
92
|
col_type=ColType.PICK_SUM,
|
|
77
|
-
expr=pl.col(f
|
|
93
|
+
expr=pl.col(f"pick_{attr}") <= pl.col(f"least_{attr}_seen"),
|
|
78
94
|
),
|
|
79
95
|
f"least_{attr}_taken_rate": ColSpec(
|
|
80
96
|
col_type=ColType.AGG,
|
|
81
|
-
expr=pl.col(f"least_{attr}_taken") / pl.col(ColName.NUM_TAKEN),
|
|
97
|
+
expr=pl.col(f"least_{attr}_taken") / pl.col(ColName.NUM_TAKEN),
|
|
82
98
|
),
|
|
83
99
|
f"greatest_{attr}_taken": ColSpec(
|
|
84
100
|
col_type=ColType.PICK_SUM,
|
|
85
|
-
expr=pl.col(f
|
|
101
|
+
expr=pl.col(f"pick_{attr}") >= pl.col(f"greatest_{attr}_seen"),
|
|
86
102
|
),
|
|
87
103
|
f"greatest_{attr}_taken_rate": ColSpec(
|
|
88
104
|
col_type=ColType.AGG,
|
|
89
|
-
expr=pl.col(f"greatest_{attr}_taken") / pl.col(ColName.NUM_TAKEN),
|
|
105
|
+
expr=pl.col(f"greatest_{attr}_taken") / pl.col(ColName.NUM_TAKEN),
|
|
90
106
|
),
|
|
91
107
|
f"pick_{attr}_mean": ColSpec(
|
|
92
108
|
col_type=ColType.AGG,
|
|
93
|
-
expr=pl.col(f"pick_{attr}") / pl.col(ColName.NUM_TAKEN)
|
|
109
|
+
expr=pl.col(f"pick_{attr}") / pl.col(ColName.NUM_TAKEN),
|
|
94
110
|
),
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
if not silent:
|
|
114
|
+
print_ext(ext)
|
|
115
|
+
|
|
116
|
+
return ext
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def stat_cols(attr: str, silent: bool = False) -> dict[str, ColSpec]:
|
|
120
|
+
ext = {
|
|
95
121
|
f"{attr}_deck_weight_group": ColSpec(
|
|
96
|
-
col_type=ColType.AGG,
|
|
97
|
-
expr=pl.col(f"{attr}") * pl.col(ColName.DECK)
|
|
122
|
+
col_type=ColType.AGG, expr=pl.col(f"{attr}") * pl.col(ColName.DECK)
|
|
98
123
|
),
|
|
99
124
|
f"{attr}_deck_weight_total": ColSpec(
|
|
100
|
-
col_type=ColType.AGG,
|
|
101
|
-
expr=pl.col(f"{attr}_deck_weight_group").sum()
|
|
125
|
+
col_type=ColType.AGG, expr=pl.col(f"{attr}_deck_weight_group").sum()
|
|
102
126
|
),
|
|
103
127
|
f"{attr}_dw_mean": ColSpec(
|
|
104
128
|
col_type=ColType.AGG,
|
|
@@ -110,7 +134,8 @@ def attr_cols(attr, silent=False) -> dict[str, ColSpec]:
|
|
|
110
134
|
),
|
|
111
135
|
f"{attr}_dw_var": ColSpec(
|
|
112
136
|
col_type=ColType.AGG,
|
|
113
|
-
expr=(pl.col(f"{attr}_dw_excess").pow(2) * pl.col(ColName.DECK))
|
|
137
|
+
expr=(pl.col(f"{attr}_dw_excess").pow(2) * pl.col(ColName.DECK))
|
|
138
|
+
/ pl.col(ColName.DECK_TOTAL),
|
|
114
139
|
),
|
|
115
140
|
f"{attr}_dw_stdev": ColSpec(
|
|
116
141
|
col_type=ColType.AGG,
|
|
@@ -121,16 +146,15 @@ def attr_cols(attr, silent=False) -> dict[str, ColSpec]:
|
|
|
121
146
|
expr=pl.col(f"{attr}_dw_excess") / pl.col(f"{attr}_dw_stdev"),
|
|
122
147
|
),
|
|
123
148
|
f"{attr}_pool_weight_group": ColSpec(
|
|
124
|
-
col_type=ColType.AGG,
|
|
125
|
-
expr=pl.col(f"{attr}") * pl.col(ColName.NUM_IN_POOL)
|
|
149
|
+
col_type=ColType.AGG, expr=pl.col(f"{attr}") * pl.col(ColName.NUM_IN_POOL)
|
|
126
150
|
),
|
|
127
151
|
f"{attr}_pool_weight_total": ColSpec(
|
|
128
|
-
col_type=ColType.AGG,
|
|
129
|
-
expr=pl.col(f"{attr}_pool_weight_group").sum()
|
|
152
|
+
col_type=ColType.AGG, expr=pl.col(f"{attr}_pool_weight_group").sum()
|
|
130
153
|
),
|
|
131
154
|
f"{attr}_pw_mean": ColSpec(
|
|
132
155
|
col_type=ColType.AGG,
|
|
133
|
-
expr=pl.col(f"{attr}_pool_weight_total")
|
|
156
|
+
expr=pl.col(f"{attr}_pool_weight_total")
|
|
157
|
+
/ pl.col(ColName.NUM_IN_POOL_TOTAL),
|
|
134
158
|
),
|
|
135
159
|
f"{attr}_pw_excess": ColSpec(
|
|
136
160
|
col_type=ColType.AGG,
|
|
@@ -138,7 +162,8 @@ def attr_cols(attr, silent=False) -> dict[str, ColSpec]:
|
|
|
138
162
|
),
|
|
139
163
|
f"{attr}_pw_var": ColSpec(
|
|
140
164
|
col_type=ColType.AGG,
|
|
141
|
-
expr=(pl.col(f"{attr}_pw_excess").pow(2) * pl.col(ColName.NUM_IN_POOL))
|
|
165
|
+
expr=(pl.col(f"{attr}_pw_excess").pow(2) * pl.col(ColName.NUM_IN_POOL))
|
|
166
|
+
/ pl.col(ColName.NUM_IN_POOL_TOTAL),
|
|
142
167
|
),
|
|
143
168
|
f"{attr}_pw_stdev": ColSpec(
|
|
144
169
|
col_type=ColType.AGG,
|
|
@@ -154,29 +179,3 @@ def attr_cols(attr, silent=False) -> dict[str, ColSpec]:
|
|
|
154
179
|
print_ext(ext)
|
|
155
180
|
|
|
156
181
|
return ext
|
|
157
|
-
|
|
158
|
-
def more(silent=True):
|
|
159
|
-
wr_bucket = pl.col(ColName.USER_GAME_WIN_RATE_BUCKET)
|
|
160
|
-
gp_bucket = pl.col(ColName.USER_N_GAMES_BUCKET)
|
|
161
|
-
ext = {
|
|
162
|
-
'deq_base': ColSpec(
|
|
163
|
-
col_type=ColType.AGG,
|
|
164
|
-
expr=(pl.col("gp_wr_excess") + 0.03 * (1 - pl.col("ata") / 14).pow(2)) * pl.col("pct_gp")
|
|
165
|
-
),
|
|
166
|
-
'cohorts_plus': ColSpec(
|
|
167
|
-
col_type=ColType.GROUP_BY,
|
|
168
|
-
expr=pl.when((wr_bucket > 0.65) & (gp_bucket >= 500)).then(pl.lit('1 Best')).otherwise(
|
|
169
|
-
pl.when((wr_bucket > 0.61) & (gp_bucket >= 500) | (wr_bucket > 0.65) & (gp_bucket >= 100)).then(pl.lit('2 Elite')).otherwise(
|
|
170
|
-
pl.when((wr_bucket > 0.57) & (gp_bucket >= 100) | (wr_bucket > 0.61)).then(pl.lit('3 Competitive')).otherwise(
|
|
171
|
-
pl.when((wr_bucket > 0.53) & (gp_bucket >= 100) | (wr_bucket > 0.57)).then(pl.lit('4 Solid')).otherwise(pl.lit('5 None'))
|
|
172
|
-
)
|
|
173
|
-
)
|
|
174
|
-
)
|
|
175
|
-
)
|
|
176
|
-
}
|
|
177
|
-
|
|
178
|
-
if not silent:
|
|
179
|
-
print_ext(ext)
|
|
180
|
-
|
|
181
|
-
return ext
|
|
182
|
-
|
|
@@ -102,6 +102,7 @@ def cli() -> int:
|
|
|
102
102
|
def _add(set_code: str, force_download=False):
|
|
103
103
|
download_data_set(set_code, View.DRAFT, force_download=force_download)
|
|
104
104
|
write_card_file(set_code, force_download=force_download)
|
|
105
|
+
get_set_context(set_code)
|
|
105
106
|
download_data_set(set_code, View.GAME, force_download=force_download)
|
|
106
107
|
return 0
|
|
107
108
|
|
|
@@ -157,9 +158,9 @@ def _info():
|
|
|
157
158
|
)
|
|
158
159
|
print(f" {item.name} {sizeof_fmt(os.stat(item).st_size)}")
|
|
159
160
|
file_count += 1
|
|
160
|
-
if file_count <
|
|
161
|
+
if file_count < 4:
|
|
161
162
|
suggest_add.add(entry.name)
|
|
162
|
-
if file_count >
|
|
163
|
+
if file_count > 4:
|
|
163
164
|
suggest_remove.add(entry.name)
|
|
164
165
|
else:
|
|
165
166
|
cache.spells_print(
|
|
@@ -209,6 +210,9 @@ def _external_set_path(set_code):
|
|
|
209
210
|
|
|
210
211
|
|
|
211
212
|
def data_file_path(set_code, dataset_type: str, event_type=EventType.PREMIER):
|
|
213
|
+
if dataset_type == "set_context":
|
|
214
|
+
return os.path.join(_external_set_path(set_code), f"{set_code}_context.parquet")
|
|
215
|
+
|
|
212
216
|
if dataset_type == "card":
|
|
213
217
|
return os.path.join(_external_set_path(set_code), f"{set_code}_card.parquet")
|
|
214
218
|
|
|
@@ -314,3 +318,31 @@ def write_card_file(draft_set_code: str, force_download=False) -> int:
|
|
|
314
318
|
|
|
315
319
|
cache.spells_print(mode, f"Wrote file {card_filepath}")
|
|
316
320
|
return 0
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def get_set_context(set_code: str, force_download=False) -> int:
|
|
324
|
+
mode = "refresh" if force_download else "add"
|
|
325
|
+
|
|
326
|
+
context_fp = data_file_path(set_code, "context")
|
|
327
|
+
cache.spells_print(mode, "Calculating set context")
|
|
328
|
+
if os.path.isfile(context_fp) and not force_download:
|
|
329
|
+
cache.spells_print(
|
|
330
|
+
mode,
|
|
331
|
+
f"File {context_fp} already exists, use `spells refresh {set_code}` to overwrite",
|
|
332
|
+
)
|
|
333
|
+
return 1
|
|
334
|
+
|
|
335
|
+
draft_fp = data_file_path(set_code, View.DRAFT)
|
|
336
|
+
draft_view = pl.scan_parquet(draft_fp)
|
|
337
|
+
|
|
338
|
+
context_df = draft_view.select(
|
|
339
|
+
[
|
|
340
|
+
pl.max("pick_number").alias("picks_per_pack") + 1,
|
|
341
|
+
pl.min("draft_time").alias("release_time"),
|
|
342
|
+
]
|
|
343
|
+
).collect()
|
|
344
|
+
|
|
345
|
+
context_df.write_parquet(context_fp)
|
|
346
|
+
|
|
347
|
+
cache.spells_print(mode, f"Wrote file {context_fp}")
|
|
348
|
+
return 0
|
|
@@ -107,7 +107,9 @@ def _resolve_view_cols(
|
|
|
107
107
|
View.DRAFT, frozenset()
|
|
108
108
|
).union({ColName.PICK})
|
|
109
109
|
if cdef.col_type == ColType.CARD_ATTR:
|
|
110
|
-
view_resolution[View.CARD] = view_resolution.get(
|
|
110
|
+
view_resolution[View.CARD] = view_resolution.get(
|
|
111
|
+
View.CARD, frozenset()
|
|
112
|
+
).union({col})
|
|
111
113
|
elif cdef.views:
|
|
112
114
|
for view in cdef.views:
|
|
113
115
|
view_resolution[view] = view_resolution.get(
|
|
@@ -132,7 +134,9 @@ def _resolve_view_cols(
|
|
|
132
134
|
else:
|
|
133
135
|
col_views = col_views.intersection(dep_views)
|
|
134
136
|
if fully_resolved:
|
|
135
|
-
assert len(
|
|
137
|
+
assert len(
|
|
138
|
+
col_views
|
|
139
|
+
), f"Column {col} can't be defined in any views!"
|
|
136
140
|
for view in col_views:
|
|
137
141
|
if view not in view_resolution:
|
|
138
142
|
print(cdef)
|
|
@@ -162,7 +166,9 @@ def create(
|
|
|
162
166
|
if columns is None:
|
|
163
167
|
cols = tuple(spells.columns.default_columns)
|
|
164
168
|
if ColName.NAME not in gbs:
|
|
165
|
-
cols = tuple(
|
|
169
|
+
cols = tuple(
|
|
170
|
+
c for c in cols if col_def_map[c].col_type != ColType.CARD_ATTR
|
|
171
|
+
)
|
|
166
172
|
else:
|
|
167
173
|
cols = tuple(columns)
|
|
168
174
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|