spells-mtg 0.5.0__tar.gz → 0.5.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spells-mtg might be problematic. Click here for more details.
- {spells_mtg-0.5.0 → spells_mtg-0.5.1}/PKG-INFO +21 -24
- {spells_mtg-0.5.0 → spells_mtg-0.5.1}/README.md +20 -23
- {spells_mtg-0.5.0 → spells_mtg-0.5.1}/pyproject.toml +1 -1
- spells_mtg-0.5.1/spells/__init__.py +5 -0
- {spells_mtg-0.5.0 → spells_mtg-0.5.1}/spells/columns.py +128 -256
- {spells_mtg-0.5.0 → spells_mtg-0.5.1}/spells/draft_data.py +34 -31
- spells_mtg-0.5.1/spells/extension.py +40 -0
- spells_mtg-0.5.0/spells/__init__.py +0 -5
- {spells_mtg-0.5.0 → spells_mtg-0.5.1}/LICENSE +0 -0
- {spells_mtg-0.5.0 → spells_mtg-0.5.1}/spells/cache.py +0 -0
- {spells_mtg-0.5.0 → spells_mtg-0.5.1}/spells/cards.py +0 -0
- {spells_mtg-0.5.0 → spells_mtg-0.5.1}/spells/enums.py +0 -0
- {spells_mtg-0.5.0 → spells_mtg-0.5.1}/spells/external.py +0 -0
- {spells_mtg-0.5.0 → spells_mtg-0.5.1}/spells/filter.py +0 -0
- {spells_mtg-0.5.0 → spells_mtg-0.5.1}/spells/manifest.py +0 -0
- {spells_mtg-0.5.0 → spells_mtg-0.5.1}/spells/schema.py +0 -0
- {spells_mtg-0.5.0 → spells_mtg-0.5.1}/tests/__init__.py +0 -0
- {spells_mtg-0.5.0 → spells_mtg-0.5.1}/tests/filter_test.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: spells-mtg
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.1
|
|
4
4
|
Summary: analaysis of 17Lands.com public datasets
|
|
5
5
|
Author-Email: Joel Barnes <oelarnes@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -134,7 +134,7 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
|
|
|
134
134
|
```
|
|
135
135
|
- `filter_spec` specifies a row-level filter for the dataset, using an intuitive custom query formulation
|
|
136
136
|
```python
|
|
137
|
-
>>> from spells
|
|
137
|
+
>>> from spells import ColName
|
|
138
138
|
>>> spells.summon('BLB', columns=[ColName.GAME_WR], group_by=[ColName.PLAYER_COHORT], filter_spec={'lhs': ColName.NUM_MULLIGANS, 'op': '>', 'rhs': 0})
|
|
139
139
|
shape: (4, 2)
|
|
140
140
|
┌───────────────┬──────────┐
|
|
@@ -151,14 +151,14 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
|
|
|
151
151
|
- `extensions` allows for the specification of arbitrarily complex derived columns and aggregations, including custom columns built on top of custom columns.
|
|
152
152
|
```python
|
|
153
153
|
>>> import polars as pl
|
|
154
|
-
>>> from spells
|
|
155
|
-
>>>
|
|
156
|
-
|
|
157
|
-
...
|
|
158
|
-
...
|
|
159
|
-
...
|
|
160
|
-
...
|
|
161
|
-
>>> spells.summon('DSK', columns=['deq_base', 'color', 'rarity'], filter_spec={'player_cohort': 'Top'}, extensions=
|
|
154
|
+
>>> from spells import ColSpec, ColType
|
|
155
|
+
>>> ext = {
|
|
156
|
+
... 'deq_base': ColSpec(
|
|
157
|
+
... col_type=ColType.AGG,
|
|
158
|
+
... expr=(pl.col('gp_wr_excess') + 0.03 * (1 - pl.col('ata')/14).pow(2)) * pl.col('pct_gp'),
|
|
159
|
+
... }
|
|
160
|
+
... }
|
|
161
|
+
>>> spells.summon('DSK', columns=['deq_base', 'color', 'rarity'], filter_spec={'player_cohort': 'Top'}, extensions=ext)
|
|
162
162
|
... .filter(pl.col('deq_base').is_finite())
|
|
163
163
|
... .filter(pl.col('rarity').is_in(['common', 'uncommon'])
|
|
164
164
|
... .sort('deq_base', descending=True)
|
|
@@ -184,18 +184,16 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
|
|
|
184
184
|
- `card_context` takes a name-indexed DataFrame or name-keyed dict and allows the construction of column definitions based on the results.
|
|
185
185
|
```python
|
|
186
186
|
>>> deq = spells.summon('DSK', columns=['deq_base'], filter_spec={'player_cohort': 'Top'}, extensions=[ext])
|
|
187
|
-
>>> ext =
|
|
188
|
-
... ColSpec(
|
|
189
|
-
... name='picked_deq_base',
|
|
187
|
+
>>> ext = {
|
|
188
|
+
... 'picked_deq_base': ColSpec(
|
|
190
189
|
... col_type=ColType.PICK_SUM,
|
|
191
190
|
... expr=lambda name, card_context: card_context[name]['deq_base']
|
|
192
191
|
... ),
|
|
193
|
-
... ColSpec(
|
|
194
|
-
... name='picked_deq_base_avg',
|
|
192
|
+
... 'picked_deq_base_avg', ColSpec(
|
|
195
193
|
... col_type=ColType.AGG,
|
|
196
194
|
... expr=pl.col('picked_deq_base') / pl.col('num_taken')
|
|
197
195
|
... ),
|
|
198
|
-
...
|
|
196
|
+
... }
|
|
199
197
|
>>> spells.summon('DSK', columns=['picked_deq_base_avg'], group_by=['player_cohort'], extensions=ext, card_context=deq)
|
|
200
198
|
shape: (4, 2)
|
|
201
199
|
┌───────────────┬─────────────────────┐
|
|
@@ -250,7 +248,7 @@ Spells caches the results of expensive aggregations in the local file system as
|
|
|
250
248
|
|
|
251
249
|
### Memory Usage
|
|
252
250
|
|
|
253
|
-
One of my goals in creating Spells was to eliminate issues with memory pressure by exclusively using the map-reduce paradigm and a technology that supports partitioned/streaming aggregation of larget-than-memory datasets. By default, Polars loads the entire dataset in memory, but the API exposes a parameter `streaming` which I have exposed as `use_streaming`. Unfortunately, that feature does not seem to work for my queries and the memory performance can be quite poor
|
|
251
|
+
One of my goals in creating Spells was to eliminate issues with memory pressure by exclusively using the map-reduce paradigm and a technology that supports partitioned/streaming aggregation of larget-than-memory datasets. By default, Polars loads the entire dataset in memory, but the API exposes a parameter `streaming` which I have exposed as `use_streaming`. Unfortunately, that feature does not seem to work for my queries and the memory performance can be quite poor. The one feature that may assist in memory management is the local caching, since you can restart the kernel without losing all of your progress. In particular, be careful about opening multiple Jupyter tabs unless you have at least 32 GB. In general I have not run into issues on my 16 GB MacBook Air except with running multiple kernels at once. Supporting larger-than memory computations is on my roadmap, so check back periodically to see if I've made any progress.
|
|
254
252
|
|
|
255
253
|
When refreshing a given set's data files from 17Lands using the provided cli, the cache for that set is automatically cleared. The `spells` CLI gives additional tools for managing the local and external caches.
|
|
256
254
|
|
|
@@ -316,14 +314,16 @@ aggregations of non-numeric (or numeric) data types are not supported. If `None`
|
|
|
316
314
|
- `{'lhs': 'player_cohort', 'op': 'in', 'rhs': ['Top', 'Middle']}` "player_cohort" value is either "Top" or "Middle". Supported values for `op` are `<`, `<=`, `>`, `>=`, `!=`, `=`, `in` and `nin`.
|
|
317
315
|
- `{'$and': [{'lhs': 'draft_date', 'op': '>', 'rhs': datetime.date(2024, 10, 7)}, {'rank': 'Mythic'}]}` Drafts after October 7 by Mythic-ranked players. Supported values for query construction keys are `$and`, `$or`, and `$not`.
|
|
318
316
|
|
|
319
|
-
- extensions: a
|
|
317
|
+
- extensions: a dict of `spells.columns.ColSpec` objects, keyed by name, which are appended to the definitions built-in columns described below.
|
|
318
|
+
|
|
319
|
+
- card_context: Typically a Polars DataFrame containing a `"name"` column with one row for each card name in the set, such that any usages of `card_context[name][key]` in column specs reference the column `key`. Typically this will be the output of a call to `summon` requesting cards metrics like `GP_WR`. Can also be a dictionary having the necessary form for the same access pattern.
|
|
320
320
|
|
|
321
321
|
- read_cache/write_cache: Use the local file system to cache and retrieve aggregations to minimize expensive reads of the large datasets. You shouldn't need to touch these arguments unless you are debugging.
|
|
322
322
|
|
|
323
323
|
### Enums
|
|
324
324
|
|
|
325
325
|
```python
|
|
326
|
-
from spells
|
|
326
|
+
from spells import ColName, ColType
|
|
327
327
|
```
|
|
328
328
|
|
|
329
329
|
Recommended to import `ColName` for any usage of `summon`, and to import `ColType` when defining custom extensions.
|
|
@@ -331,10 +331,9 @@ Recommended to import `ColName` for any usage of `summon`, and to import `ColTyp
|
|
|
331
331
|
### ColSpec
|
|
332
332
|
|
|
333
333
|
```python
|
|
334
|
-
from spells
|
|
334
|
+
from spells import ColSpec
|
|
335
335
|
|
|
336
336
|
ColSpec(
|
|
337
|
-
name: str,
|
|
338
337
|
col_type: ColType,
|
|
339
338
|
expr: pl.Expr | Callable[..., pl.Expr] | None = None,
|
|
340
339
|
version: str | None = None
|
|
@@ -345,8 +344,6 @@ Used to define extensions in `summon`
|
|
|
345
344
|
|
|
346
345
|
#### parameters
|
|
347
346
|
|
|
348
|
-
- `name`: any string, including existing columns, although this is very likely to break dependent columns, so don't do it. For `NAME_SUM` columns, the name is the prefix without the underscore, e.g. "drawn".
|
|
349
|
-
|
|
350
347
|
- `col_type`: one of the `ColType` enum values, `FILTER_ONLY`, `GROUP_BY`, `PICK_SUM`, `NAME_SUM`, `GAME_SUM`, `CARD_ATTR`, and `AGG`. See documentation for `summon` for usage. All columns except `CARD_ATTR`
|
|
351
348
|
and `AGG` must be derivable at the individual row level on one or both base views. `CARD_ATTR` must be derivable at the individual row level from the card file. `AGG` can depend on any column present after
|
|
352
349
|
summing over groups, and can include polars Expression aggregations. Arbitrarily long chains of aggregate dependencies are supported.
|
|
@@ -355,7 +352,7 @@ summing over groups, and can include polars Expression aggregations. Arbitrarily
|
|
|
355
352
|
- For `NAME_SUM` columns, `expr` must be a function of `name` which will result in a list of expressions mapped over all card names.
|
|
356
353
|
- `PICK_SUM` columns can also be functions on `name`, in which case the value will be a function of the value of the `PICK` field.
|
|
357
354
|
- `AGG` columns that depend on `NAME_SUM` columns reference the prefix (`cdef.name`) only, since the unpivot has occured prior to selection.
|
|
358
|
-
- The possible arguments to `expr`, in addition to `name` when appropriate, include the full `names` array as well as a dictionary called `card_context` which contains card dict objects with all `CARD_ATTR` values, including custom extensions
|
|
355
|
+
- The possible arguments to `expr`, in addition to `name` when appropriate, include the full `names` array as well as a dictionary called `card_context` which contains card dict objects with all `CARD_ATTR` values, including custom extensions and metric columns passed by the `card_context` argument to `summon`. See example notebooks for more details.
|
|
359
356
|
|
|
360
357
|
- `version`: When defining a column using a python function, as opposed to Polars expressions, add a unique version number so that the unique hashed signature of the column specification can be derived
|
|
361
358
|
for caching purposes, since Polars cannot generate a serialization natively. When changing the definition, be sure to increment the version value. Otherwise you do not need to use this parameter.
|
|
@@ -123,7 +123,7 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
|
|
|
123
123
|
```
|
|
124
124
|
- `filter_spec` specifies a row-level filter for the dataset, using an intuitive custom query formulation
|
|
125
125
|
```python
|
|
126
|
-
>>> from spells
|
|
126
|
+
>>> from spells import ColName
|
|
127
127
|
>>> spells.summon('BLB', columns=[ColName.GAME_WR], group_by=[ColName.PLAYER_COHORT], filter_spec={'lhs': ColName.NUM_MULLIGANS, 'op': '>', 'rhs': 0})
|
|
128
128
|
shape: (4, 2)
|
|
129
129
|
┌───────────────┬──────────┐
|
|
@@ -140,14 +140,14 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
|
|
|
140
140
|
- `extensions` allows for the specification of arbitrarily complex derived columns and aggregations, including custom columns built on top of custom columns.
|
|
141
141
|
```python
|
|
142
142
|
>>> import polars as pl
|
|
143
|
-
>>> from spells
|
|
144
|
-
>>>
|
|
145
|
-
|
|
146
|
-
...
|
|
147
|
-
...
|
|
148
|
-
...
|
|
149
|
-
...
|
|
150
|
-
>>> spells.summon('DSK', columns=['deq_base', 'color', 'rarity'], filter_spec={'player_cohort': 'Top'}, extensions=
|
|
143
|
+
>>> from spells import ColSpec, ColType
|
|
144
|
+
>>> ext = {
|
|
145
|
+
... 'deq_base': ColSpec(
|
|
146
|
+
... col_type=ColType.AGG,
|
|
147
|
+
... expr=(pl.col('gp_wr_excess') + 0.03 * (1 - pl.col('ata')/14).pow(2)) * pl.col('pct_gp'),
|
|
148
|
+
... }
|
|
149
|
+
... }
|
|
150
|
+
>>> spells.summon('DSK', columns=['deq_base', 'color', 'rarity'], filter_spec={'player_cohort': 'Top'}, extensions=ext)
|
|
151
151
|
... .filter(pl.col('deq_base').is_finite())
|
|
152
152
|
... .filter(pl.col('rarity').is_in(['common', 'uncommon'])
|
|
153
153
|
... .sort('deq_base', descending=True)
|
|
@@ -173,18 +173,16 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
|
|
|
173
173
|
- `card_context` takes a name-indexed DataFrame or name-keyed dict and allows the construction of column definitions based on the results.
|
|
174
174
|
```python
|
|
175
175
|
>>> deq = spells.summon('DSK', columns=['deq_base'], filter_spec={'player_cohort': 'Top'}, extensions=[ext])
|
|
176
|
-
>>> ext =
|
|
177
|
-
... ColSpec(
|
|
178
|
-
... name='picked_deq_base',
|
|
176
|
+
>>> ext = {
|
|
177
|
+
... 'picked_deq_base': ColSpec(
|
|
179
178
|
... col_type=ColType.PICK_SUM,
|
|
180
179
|
... expr=lambda name, card_context: card_context[name]['deq_base']
|
|
181
180
|
... ),
|
|
182
|
-
... ColSpec(
|
|
183
|
-
... name='picked_deq_base_avg',
|
|
181
|
+
... 'picked_deq_base_avg', ColSpec(
|
|
184
182
|
... col_type=ColType.AGG,
|
|
185
183
|
... expr=pl.col('picked_deq_base') / pl.col('num_taken')
|
|
186
184
|
... ),
|
|
187
|
-
...
|
|
185
|
+
... }
|
|
188
186
|
>>> spells.summon('DSK', columns=['picked_deq_base_avg'], group_by=['player_cohort'], extensions=ext, card_context=deq)
|
|
189
187
|
shape: (4, 2)
|
|
190
188
|
┌───────────────┬─────────────────────┐
|
|
@@ -239,7 +237,7 @@ Spells caches the results of expensive aggregations in the local file system as
|
|
|
239
237
|
|
|
240
238
|
### Memory Usage
|
|
241
239
|
|
|
242
|
-
One of my goals in creating Spells was to eliminate issues with memory pressure by exclusively using the map-reduce paradigm and a technology that supports partitioned/streaming aggregation of larget-than-memory datasets. By default, Polars loads the entire dataset in memory, but the API exposes a parameter `streaming` which I have exposed as `use_streaming`. Unfortunately, that feature does not seem to work for my queries and the memory performance can be quite poor
|
|
240
|
+
One of my goals in creating Spells was to eliminate issues with memory pressure by exclusively using the map-reduce paradigm and a technology that supports partitioned/streaming aggregation of larget-than-memory datasets. By default, Polars loads the entire dataset in memory, but the API exposes a parameter `streaming` which I have exposed as `use_streaming`. Unfortunately, that feature does not seem to work for my queries and the memory performance can be quite poor. The one feature that may assist in memory management is the local caching, since you can restart the kernel without losing all of your progress. In particular, be careful about opening multiple Jupyter tabs unless you have at least 32 GB. In general I have not run into issues on my 16 GB MacBook Air except with running multiple kernels at once. Supporting larger-than memory computations is on my roadmap, so check back periodically to see if I've made any progress.
|
|
243
241
|
|
|
244
242
|
When refreshing a given set's data files from 17Lands using the provided cli, the cache for that set is automatically cleared. The `spells` CLI gives additional tools for managing the local and external caches.
|
|
245
243
|
|
|
@@ -305,14 +303,16 @@ aggregations of non-numeric (or numeric) data types are not supported. If `None`
|
|
|
305
303
|
- `{'lhs': 'player_cohort', 'op': 'in', 'rhs': ['Top', 'Middle']}` "player_cohort" value is either "Top" or "Middle". Supported values for `op` are `<`, `<=`, `>`, `>=`, `!=`, `=`, `in` and `nin`.
|
|
306
304
|
- `{'$and': [{'lhs': 'draft_date', 'op': '>', 'rhs': datetime.date(2024, 10, 7)}, {'rank': 'Mythic'}]}` Drafts after October 7 by Mythic-ranked players. Supported values for query construction keys are `$and`, `$or`, and `$not`.
|
|
307
305
|
|
|
308
|
-
- extensions: a
|
|
306
|
+
- extensions: a dict of `spells.columns.ColSpec` objects, keyed by name, which are appended to the definitions built-in columns described below.
|
|
307
|
+
|
|
308
|
+
- card_context: Typically a Polars DataFrame containing a `"name"` column with one row for each card name in the set, such that any usages of `card_context[name][key]` in column specs reference the column `key`. Typically this will be the output of a call to `summon` requesting cards metrics like `GP_WR`. Can also be a dictionary having the necessary form for the same access pattern.
|
|
309
309
|
|
|
310
310
|
- read_cache/write_cache: Use the local file system to cache and retrieve aggregations to minimize expensive reads of the large datasets. You shouldn't need to touch these arguments unless you are debugging.
|
|
311
311
|
|
|
312
312
|
### Enums
|
|
313
313
|
|
|
314
314
|
```python
|
|
315
|
-
from spells
|
|
315
|
+
from spells import ColName, ColType
|
|
316
316
|
```
|
|
317
317
|
|
|
318
318
|
Recommended to import `ColName` for any usage of `summon`, and to import `ColType` when defining custom extensions.
|
|
@@ -320,10 +320,9 @@ Recommended to import `ColName` for any usage of `summon`, and to import `ColTyp
|
|
|
320
320
|
### ColSpec
|
|
321
321
|
|
|
322
322
|
```python
|
|
323
|
-
from spells
|
|
323
|
+
from spells import ColSpec
|
|
324
324
|
|
|
325
325
|
ColSpec(
|
|
326
|
-
name: str,
|
|
327
326
|
col_type: ColType,
|
|
328
327
|
expr: pl.Expr | Callable[..., pl.Expr] | None = None,
|
|
329
328
|
version: str | None = None
|
|
@@ -334,8 +333,6 @@ Used to define extensions in `summon`
|
|
|
334
333
|
|
|
335
334
|
#### parameters
|
|
336
335
|
|
|
337
|
-
- `name`: any string, including existing columns, although this is very likely to break dependent columns, so don't do it. For `NAME_SUM` columns, the name is the prefix without the underscore, e.g. "drawn".
|
|
338
|
-
|
|
339
336
|
- `col_type`: one of the `ColType` enum values, `FILTER_ONLY`, `GROUP_BY`, `PICK_SUM`, `NAME_SUM`, `GAME_SUM`, `CARD_ATTR`, and `AGG`. See documentation for `summon` for usage. All columns except `CARD_ATTR`
|
|
340
337
|
and `AGG` must be derivable at the individual row level on one or both base views. `CARD_ATTR` must be derivable at the individual row level from the card file. `AGG` can depend on any column present after
|
|
341
338
|
summing over groups, and can include polars Expression aggregations. Arbitrarily long chains of aggregate dependencies are supported.
|
|
@@ -344,7 +341,7 @@ summing over groups, and can include polars Expression aggregations. Arbitrarily
|
|
|
344
341
|
- For `NAME_SUM` columns, `expr` must be a function of `name` which will result in a list of expressions mapped over all card names.
|
|
345
342
|
- `PICK_SUM` columns can also be functions on `name`, in which case the value will be a function of the value of the `PICK` field.
|
|
346
343
|
- `AGG` columns that depend on `NAME_SUM` columns reference the prefix (`cdef.name`) only, since the unpivot has occured prior to selection.
|
|
347
|
-
- The possible arguments to `expr`, in addition to `name` when appropriate, include the full `names` array as well as a dictionary called `card_context` which contains card dict objects with all `CARD_ATTR` values, including custom extensions
|
|
344
|
+
- The possible arguments to `expr`, in addition to `name` when appropriate, include the full `names` array as well as a dictionary called `card_context` which contains card dict objects with all `CARD_ATTR` values, including custom extensions and metric columns passed by the `card_context` argument to `summon`. See example notebooks for more details.
|
|
348
345
|
|
|
349
346
|
- `version`: When defining a column using a python function, as opposed to Polars expressions, add a unique version number so that the unique hashed signature of the column specification can be derived
|
|
350
347
|
for caching purposes, since Polars cannot generate a serialization natively. When changing the definition, be sure to increment the version value. Otherwise you do not need to use this parameter.
|
|
@@ -8,7 +8,6 @@ from spells.enums import View, ColName, ColType
|
|
|
8
8
|
|
|
9
9
|
@dataclass(frozen=True)
|
|
10
10
|
class ColSpec:
|
|
11
|
-
name: str
|
|
12
11
|
col_type: ColType
|
|
13
12
|
expr: pl.Expr | Callable[..., pl.Expr] | None = None
|
|
14
13
|
views: list[View] | None = None
|
|
@@ -41,69 +40,56 @@ default_columns = [
|
|
|
41
40
|
ColName.GIH_WR,
|
|
42
41
|
]
|
|
43
42
|
|
|
44
|
-
|
|
45
|
-
ColSpec(
|
|
46
|
-
name=ColName.NAME,
|
|
43
|
+
specs: dict[str, ColSpec] = {
|
|
44
|
+
ColName.NAME: ColSpec(
|
|
47
45
|
col_type=ColType.GROUP_BY,
|
|
48
46
|
views=[View.CARD],
|
|
49
47
|
),
|
|
50
|
-
ColSpec(
|
|
51
|
-
name=ColName.EXPANSION,
|
|
48
|
+
ColName.EXPANSION: ColSpec(
|
|
52
49
|
col_type=ColType.GROUP_BY,
|
|
53
50
|
views=[View.GAME, View.DRAFT],
|
|
54
51
|
),
|
|
55
|
-
ColSpec(
|
|
56
|
-
name=ColName.EVENT_TYPE,
|
|
52
|
+
ColName.EVENT_TYPE: ColSpec(
|
|
57
53
|
col_type=ColType.GROUP_BY,
|
|
58
54
|
views=[View.GAME, View.DRAFT],
|
|
59
55
|
),
|
|
60
|
-
ColSpec(
|
|
61
|
-
name=ColName.DRAFT_ID,
|
|
56
|
+
ColName.DRAFT_ID: ColSpec(
|
|
62
57
|
views=[View.GAME, View.DRAFT],
|
|
63
58
|
col_type=ColType.FILTER_ONLY,
|
|
64
59
|
),
|
|
65
|
-
ColSpec(
|
|
66
|
-
name=ColName.DRAFT_TIME,
|
|
60
|
+
ColName.DRAFT_TIME: ColSpec(
|
|
67
61
|
col_type=ColType.FILTER_ONLY,
|
|
68
62
|
views=[View.GAME, View.DRAFT],
|
|
69
63
|
),
|
|
70
|
-
ColSpec(
|
|
71
|
-
name=ColName.DRAFT_DATE,
|
|
64
|
+
ColName.DRAFT_DATE: ColSpec(
|
|
72
65
|
col_type=ColType.GROUP_BY,
|
|
73
66
|
expr=pl.col(ColName.DRAFT_TIME).str.to_datetime("%Y-%m-%d %H:%M:%S").dt.date(),
|
|
74
67
|
),
|
|
75
|
-
ColSpec(
|
|
76
|
-
name=ColName.DRAFT_DAY_OF_WEEK,
|
|
68
|
+
ColName.DRAFT_DAY_OF_WEEK: ColSpec(
|
|
77
69
|
col_type=ColType.GROUP_BY,
|
|
78
70
|
expr=pl.col(ColName.DRAFT_TIME).str.to_datetime("%Y-%m-%d %H:%M:%S").dt.weekday(),
|
|
79
71
|
),
|
|
80
|
-
ColSpec(
|
|
81
|
-
name=ColName.DRAFT_HOUR,
|
|
72
|
+
ColName.DRAFT_HOUR: ColSpec(
|
|
82
73
|
col_type=ColType.GROUP_BY,
|
|
83
74
|
expr=pl.col(ColName.DRAFT_TIME).str.to_datetime("%Y-%m-%d %H:%M:%S").dt.hour(),
|
|
84
75
|
),
|
|
85
|
-
ColSpec(
|
|
86
|
-
name=ColName.DRAFT_WEEK,
|
|
76
|
+
ColName.DRAFT_WEEK: ColSpec(
|
|
87
77
|
col_type=ColType.GROUP_BY,
|
|
88
78
|
expr=pl.col(ColName.DRAFT_TIME).str.to_datetime("%Y-%m-%d %H:%M:%S").dt.week(),
|
|
89
79
|
),
|
|
90
|
-
ColSpec(
|
|
91
|
-
name=ColName.RANK,
|
|
80
|
+
ColName.RANK: ColSpec(
|
|
92
81
|
col_type=ColType.GROUP_BY,
|
|
93
82
|
views=[View.GAME, View.DRAFT],
|
|
94
83
|
),
|
|
95
|
-
ColSpec(
|
|
96
|
-
name=ColName.USER_N_GAMES_BUCKET,
|
|
84
|
+
ColName.USER_N_GAMES_BUCKET: ColSpec(
|
|
97
85
|
col_type=ColType.GROUP_BY,
|
|
98
86
|
views=[View.DRAFT, View.GAME],
|
|
99
87
|
),
|
|
100
|
-
ColSpec(
|
|
101
|
-
name=ColName.USER_GAME_WIN_RATE_BUCKET,
|
|
88
|
+
ColName.USER_GAME_WIN_RATE_BUCKET: ColSpec(
|
|
102
89
|
col_type=ColType.GROUP_BY,
|
|
103
90
|
views=[View.DRAFT, View.GAME],
|
|
104
91
|
),
|
|
105
|
-
ColSpec(
|
|
106
|
-
name=ColName.PLAYER_COHORT,
|
|
92
|
+
ColName.PLAYER_COHORT: ColSpec(
|
|
107
93
|
col_type=ColType.GROUP_BY,
|
|
108
94
|
expr=pl.when(pl.col(ColName.USER_N_GAMES_BUCKET) < 100)
|
|
109
95
|
.then(pl.lit("Other"))
|
|
@@ -117,309 +103,249 @@ _column_specs = [
|
|
|
117
103
|
)
|
|
118
104
|
),
|
|
119
105
|
),
|
|
120
|
-
ColSpec(
|
|
121
|
-
name=ColName.EVENT_MATCH_WINS,
|
|
106
|
+
ColName.EVENT_MATCH_WINS: ColSpec(
|
|
122
107
|
col_type=ColType.GROUP_BY,
|
|
123
108
|
views=[View.DRAFT],
|
|
124
109
|
),
|
|
125
|
-
ColSpec(
|
|
126
|
-
name=ColName.EVENT_MATCH_WINS_SUM,
|
|
110
|
+
ColName.EVENT_MATCH_WINS_SUM: ColSpec(
|
|
127
111
|
col_type=ColType.PICK_SUM,
|
|
128
112
|
views=[View.DRAFT],
|
|
129
113
|
expr=pl.col(ColName.EVENT_MATCH_WINS),
|
|
130
114
|
),
|
|
131
|
-
ColSpec(
|
|
132
|
-
name=ColName.EVENT_MATCH_LOSSES,
|
|
115
|
+
ColName.EVENT_MATCH_LOSSES: ColSpec(
|
|
133
116
|
col_type=ColType.GROUP_BY,
|
|
134
117
|
views=[View.DRAFT],
|
|
135
118
|
),
|
|
136
|
-
ColSpec(
|
|
137
|
-
name=ColName.EVENT_MATCH_LOSSES_SUM,
|
|
119
|
+
ColName.EVENT_MATCH_LOSSES_SUM: ColSpec(
|
|
138
120
|
col_type=ColType.PICK_SUM,
|
|
139
121
|
expr=pl.col(ColName.EVENT_MATCH_LOSSES),
|
|
140
122
|
),
|
|
141
|
-
ColSpec(
|
|
142
|
-
name=ColName.EVENT_MATCHES,
|
|
123
|
+
ColName.EVENT_MATCHES: ColSpec(
|
|
143
124
|
col_type=ColType.GROUP_BY,
|
|
144
125
|
expr=pl.col(ColName.EVENT_MATCH_WINS) + pl.col(ColName.EVENT_MATCH_LOSSES),
|
|
145
126
|
),
|
|
146
|
-
ColSpec(
|
|
147
|
-
name=ColName.EVENT_MATCHES_SUM,
|
|
127
|
+
ColName.EVENT_MATCHES_SUM: ColSpec(
|
|
148
128
|
col_type=ColType.PICK_SUM,
|
|
149
129
|
expr=pl.col(ColName.EVENT_MATCHES),
|
|
150
130
|
),
|
|
151
|
-
ColSpec(
|
|
152
|
-
name=ColName.IS_TROPHY,
|
|
131
|
+
ColName.IS_TROPHY: ColSpec(
|
|
153
132
|
col_type=ColType.GROUP_BY,
|
|
154
133
|
expr=pl.when(pl.col(ColName.EVENT_TYPE) == "Traditional")
|
|
155
134
|
.then(pl.col(ColName.EVENT_MATCH_WINS) == 3)
|
|
156
135
|
.otherwise(pl.col(ColName.EVENT_MATCH_WINS) == 7),
|
|
157
136
|
),
|
|
158
|
-
ColSpec(
|
|
159
|
-
name=ColName.IS_TROPHY_SUM,
|
|
137
|
+
ColName.IS_TROPHY_SUM: ColSpec(
|
|
160
138
|
col_type=ColType.PICK_SUM,
|
|
161
139
|
expr=pl.col(ColName.IS_TROPHY),
|
|
162
140
|
),
|
|
163
|
-
ColSpec(
|
|
164
|
-
name=ColName.PACK_NUMBER,
|
|
141
|
+
ColName.PACK_NUMBER: ColSpec(
|
|
165
142
|
col_type=ColType.FILTER_ONLY, # use pack_num
|
|
166
143
|
views=[View.DRAFT],
|
|
167
144
|
),
|
|
168
|
-
ColSpec(
|
|
169
|
-
name=ColName.PACK_NUM,
|
|
145
|
+
ColName.PACK_NUM: ColSpec(
|
|
170
146
|
col_type=ColType.GROUP_BY,
|
|
171
147
|
expr=pl.col(ColName.PACK_NUMBER) + 1,
|
|
172
148
|
),
|
|
173
|
-
ColSpec(
|
|
174
|
-
name=ColName.PICK_NUMBER,
|
|
149
|
+
ColName.PICK_NUMBER: ColSpec(
|
|
175
150
|
col_type=ColType.FILTER_ONLY, # use pick_num
|
|
176
151
|
views=[View.DRAFT],
|
|
177
152
|
),
|
|
178
|
-
ColSpec(
|
|
179
|
-
name=ColName.PICK_NUM,
|
|
153
|
+
ColName.PICK_NUM: ColSpec(
|
|
180
154
|
col_type=ColType.GROUP_BY,
|
|
181
155
|
expr=pl.col(ColName.PICK_NUMBER) + 1,
|
|
182
156
|
),
|
|
183
|
-
ColSpec(
|
|
184
|
-
name=ColName.TAKEN_AT,
|
|
157
|
+
ColName.TAKEN_AT: ColSpec(
|
|
185
158
|
col_type=ColType.PICK_SUM,
|
|
186
159
|
expr=pl.col(ColName.PICK_NUM),
|
|
187
160
|
),
|
|
188
|
-
ColSpec(
|
|
189
|
-
name=ColName.NUM_TAKEN,
|
|
161
|
+
ColName.NUM_TAKEN: ColSpec(
|
|
190
162
|
col_type=ColType.PICK_SUM,
|
|
191
163
|
expr=pl.when(pl.col(ColName.PICK).is_not_null())
|
|
192
164
|
.then(1)
|
|
193
165
|
.otherwise(0),
|
|
194
166
|
),
|
|
195
|
-
ColSpec(
|
|
196
|
-
name=ColName.NUM_DRAFTS,
|
|
167
|
+
ColName.NUM_DRAFTS: ColSpec(
|
|
197
168
|
col_type=ColType.PICK_SUM,
|
|
198
169
|
expr=pl.when((pl.col(ColName.PACK_NUMBER) == 0) & (pl.col(ColName.PICK_NUMBER) == 0)).then(1).otherwise(0),
|
|
199
170
|
),
|
|
200
|
-
ColSpec(
|
|
201
|
-
name=ColName.PICK,
|
|
171
|
+
ColName.PICK: ColSpec(
|
|
202
172
|
col_type=ColType.FILTER_ONLY,
|
|
203
173
|
views=[View.DRAFT],
|
|
204
174
|
),
|
|
205
|
-
ColSpec(
|
|
206
|
-
name=ColName.PICK_MAINDECK_RATE,
|
|
175
|
+
ColName.PICK_MAINDECK_RATE: ColSpec(
|
|
207
176
|
col_type=ColType.PICK_SUM,
|
|
208
177
|
views=[View.DRAFT],
|
|
209
178
|
),
|
|
210
|
-
ColSpec(
|
|
211
|
-
name=ColName.PICK_SIDEBOARD_IN_RATE,
|
|
179
|
+
ColName.PICK_SIDEBOARD_IN_RATE: ColSpec(
|
|
212
180
|
col_type=ColType.PICK_SUM,
|
|
213
181
|
views=[View.DRAFT],
|
|
214
182
|
),
|
|
215
|
-
ColSpec(
|
|
216
|
-
name=ColName.PACK_CARD,
|
|
183
|
+
ColName.PACK_CARD: ColSpec(
|
|
217
184
|
col_type=ColType.NAME_SUM,
|
|
218
185
|
views=[View.DRAFT],
|
|
219
186
|
),
|
|
220
|
-
ColSpec(
|
|
221
|
-
name=ColName.LAST_SEEN,
|
|
187
|
+
ColName.LAST_SEEN: ColSpec(
|
|
222
188
|
col_type=ColType.NAME_SUM,
|
|
223
189
|
expr=lambda name: pl.col(f"pack_card_{name}")
|
|
224
190
|
* pl.min_horizontal(ColName.PICK_NUM, 8),
|
|
225
191
|
),
|
|
226
|
-
ColSpec(
|
|
227
|
-
name=ColName.NUM_SEEN,
|
|
192
|
+
ColName.NUM_SEEN: ColSpec(
|
|
228
193
|
col_type=ColType.NAME_SUM,
|
|
229
194
|
expr=lambda name: pl.col(f"pack_card_{name}") * (pl.col(ColName.PICK_NUM) <= 8),
|
|
230
195
|
),
|
|
231
|
-
ColSpec(
|
|
232
|
-
name=ColName.POOL,
|
|
196
|
+
ColName.POOL: ColSpec(
|
|
233
197
|
col_type=ColType.NAME_SUM,
|
|
234
198
|
views=[View.DRAFT],
|
|
235
199
|
),
|
|
236
|
-
ColSpec(
|
|
237
|
-
name=ColName.GAME_TIME,
|
|
200
|
+
ColName.GAME_TIME: ColSpec(
|
|
238
201
|
col_type=ColType.FILTER_ONLY,
|
|
239
202
|
views=[View.GAME],
|
|
240
203
|
),
|
|
241
|
-
ColSpec(
|
|
242
|
-
name=ColName.GAME_DATE,
|
|
204
|
+
ColName.GAME_DATE: ColSpec(
|
|
243
205
|
col_type=ColType.GROUP_BY,
|
|
244
206
|
expr=pl.col(ColName.GAME_TIME).str.to_datetime("%Y-%m-%d %H-%M-%S").dt.date(),
|
|
245
207
|
),
|
|
246
|
-
ColSpec(
|
|
247
|
-
name=ColName.GAME_DAY_OF_WEEK,
|
|
208
|
+
ColName.GAME_DAY_OF_WEEK: ColSpec(
|
|
248
209
|
col_type=ColType.GROUP_BY,
|
|
249
210
|
expr=pl.col(ColName.GAME_TIME).str.to_datetime("%Y-%m-%d %H-%M-%S").dt.weekday(),
|
|
250
211
|
),
|
|
251
|
-
ColSpec(
|
|
252
|
-
name=ColName.GAME_HOUR,
|
|
212
|
+
ColName.GAME_HOUR: ColSpec(
|
|
253
213
|
col_type=ColType.GROUP_BY,
|
|
254
214
|
expr=pl.col(ColName.GAME_TIME).str.to_datetime("%Y-%m-%d %H-%M-%S").dt.hour(),
|
|
255
215
|
),
|
|
256
|
-
ColSpec(
|
|
257
|
-
name=ColName.GAME_WEEK,
|
|
216
|
+
ColName.GAME_WEEK: ColSpec(
|
|
258
217
|
col_type=ColType.GROUP_BY,
|
|
259
218
|
expr=pl.col(ColName.GAME_TIME).str.to_datetime("%Y-%m-%d %H-%M-%S").dt.week(),
|
|
260
219
|
),
|
|
261
|
-
ColSpec(
|
|
262
|
-
name=ColName.BUILD_INDEX,
|
|
220
|
+
ColName.BUILD_INDEX: ColSpec(
|
|
263
221
|
col_type=ColType.GROUP_BY,
|
|
264
222
|
views=[View.GAME],
|
|
265
223
|
),
|
|
266
|
-
ColSpec(
|
|
267
|
-
name=ColName.MATCH_NUMBER,
|
|
224
|
+
ColName.MATCH_NUMBER: ColSpec(
|
|
268
225
|
col_type=ColType.GROUP_BY,
|
|
269
226
|
views=[View.GAME],
|
|
270
227
|
),
|
|
271
|
-
ColSpec(
|
|
272
|
-
name=ColName.GAME_NUMBER,
|
|
228
|
+
ColName.GAME_NUMBER: ColSpec(
|
|
273
229
|
col_type=ColType.GROUP_BY,
|
|
274
230
|
views=[View.GAME],
|
|
275
231
|
),
|
|
276
|
-
ColSpec(
|
|
277
|
-
name=ColName.NUM_GAMES,
|
|
232
|
+
ColName.NUM_GAMES: ColSpec(
|
|
278
233
|
col_type=ColType.GAME_SUM,
|
|
279
234
|
expr=pl.col(ColName.GAME_NUMBER).is_not_null(),
|
|
280
235
|
),
|
|
281
|
-
ColSpec(
|
|
282
|
-
name=ColName.NUM_MATCHES,
|
|
236
|
+
ColName.NUM_MATCHES: ColSpec(
|
|
283
237
|
col_type=ColType.GAME_SUM,
|
|
284
238
|
expr=pl.col(ColName.GAME_NUMBER) == 1,
|
|
285
239
|
),
|
|
286
|
-
ColSpec(
|
|
287
|
-
name=ColName.NUM_EVENTS,
|
|
240
|
+
ColName.NUM_EVENTS: ColSpec(
|
|
288
241
|
col_type=ColType.GAME_SUM,
|
|
289
242
|
expr=(pl.col(ColName.GAME_NUMBER) == 1) & (pl.col(ColName.MATCH_NUMBER) == 1),
|
|
290
243
|
),
|
|
291
|
-
ColSpec(
|
|
292
|
-
name=ColName.OPP_RANK,
|
|
244
|
+
ColName.OPP_RANK: ColSpec(
|
|
293
245
|
col_type=ColType.GROUP_BY,
|
|
294
246
|
views=[View.GAME],
|
|
295
247
|
),
|
|
296
|
-
ColSpec(
|
|
297
|
-
name=ColName.MAIN_COLORS,
|
|
248
|
+
ColName.MAIN_COLORS: ColSpec(
|
|
298
249
|
col_type=ColType.GROUP_BY,
|
|
299
250
|
views=[View.GAME],
|
|
300
251
|
),
|
|
301
|
-
ColSpec(
|
|
302
|
-
name=ColName.NUM_COLORS,
|
|
252
|
+
ColName.NUM_COLORS: ColSpec(
|
|
303
253
|
col_type=ColType.GROUP_BY,
|
|
304
254
|
expr=pl.col(ColName.MAIN_COLORS).str.len_chars(),
|
|
305
255
|
),
|
|
306
|
-
ColSpec(
|
|
307
|
-
name=ColName.SPLASH_COLORS,
|
|
256
|
+
ColName.SPLASH_COLORS: ColSpec(
|
|
308
257
|
col_type=ColType.GROUP_BY,
|
|
309
258
|
views=[View.GAME],
|
|
310
259
|
),
|
|
311
|
-
ColSpec(
|
|
312
|
-
name=ColName.HAS_SPLASH,
|
|
260
|
+
ColName.HAS_SPLASH: ColSpec(
|
|
313
261
|
col_type=ColType.GROUP_BY,
|
|
314
262
|
expr=pl.col(ColName.SPLASH_COLORS).str.len_chars() > 0,
|
|
315
263
|
),
|
|
316
|
-
ColSpec(
|
|
317
|
-
name=ColName.ON_PLAY,
|
|
264
|
+
ColName.ON_PLAY: ColSpec(
|
|
318
265
|
col_type=ColType.GROUP_BY,
|
|
319
266
|
views=[View.GAME],
|
|
320
267
|
),
|
|
321
|
-
ColSpec(
|
|
322
|
-
name=ColName.NUM_ON_PLAY,
|
|
268
|
+
ColName.NUM_ON_PLAY: ColSpec(
|
|
323
269
|
col_type=ColType.GAME_SUM,
|
|
324
270
|
expr=pl.col(ColName.ON_PLAY),
|
|
325
271
|
),
|
|
326
|
-
ColSpec(
|
|
327
|
-
name=ColName.NUM_MULLIGANS,
|
|
272
|
+
ColName.NUM_MULLIGANS: ColSpec(
|
|
328
273
|
col_type=ColType.GROUP_BY,
|
|
329
274
|
views=[View.GAME],
|
|
330
275
|
),
|
|
331
|
-
ColSpec(
|
|
332
|
-
name=ColName.NUM_MULLIGANS_SUM,
|
|
276
|
+
ColName.NUM_MULLIGANS_SUM: ColSpec(
|
|
333
277
|
col_type=ColType.GAME_SUM,
|
|
334
278
|
expr=pl.col(ColName.NUM_MULLIGANS),
|
|
335
279
|
),
|
|
336
|
-
ColSpec(
|
|
337
|
-
name=ColName.OPP_NUM_MULLIGANS,
|
|
280
|
+
ColName.OPP_NUM_MULLIGANS: ColSpec(
|
|
338
281
|
col_type=ColType.GAME_SUM,
|
|
339
282
|
views=[View.GAME],
|
|
340
283
|
),
|
|
341
|
-
ColSpec(
|
|
342
|
-
name=ColName.OPP_NUM_MULLIGANS_SUM,
|
|
284
|
+
ColName.OPP_NUM_MULLIGANS_SUM: ColSpec(
|
|
343
285
|
col_type=ColType.GAME_SUM,
|
|
344
286
|
expr=pl.col(ColName.OPP_NUM_MULLIGANS),
|
|
345
287
|
),
|
|
346
|
-
ColSpec(
|
|
347
|
-
name=ColName.OPP_COLORS,
|
|
288
|
+
ColName.OPP_COLORS: ColSpec(
|
|
348
289
|
col_type=ColType.GROUP_BY,
|
|
349
290
|
views=[View.GAME],
|
|
350
291
|
),
|
|
351
|
-
ColSpec(
|
|
352
|
-
name=ColName.NUM_TURNS,
|
|
292
|
+
ColName.NUM_TURNS: ColSpec(
|
|
353
293
|
col_type=ColType.GROUP_BY,
|
|
354
294
|
views=[View.GAME],
|
|
355
295
|
),
|
|
356
|
-
ColSpec(
|
|
357
|
-
name=ColName.NUM_TURNS_SUM,
|
|
296
|
+
ColName.NUM_TURNS_SUM: ColSpec(
|
|
358
297
|
col_type=ColType.GAME_SUM,
|
|
359
298
|
expr=pl.col(ColName.NUM_TURNS),
|
|
360
299
|
),
|
|
361
|
-
ColSpec(
|
|
362
|
-
name=ColName.WON,
|
|
300
|
+
ColName.WON: ColSpec(
|
|
363
301
|
col_type=ColType.GROUP_BY,
|
|
364
302
|
views=[View.GAME],
|
|
365
303
|
),
|
|
366
|
-
ColSpec(
|
|
367
|
-
name=ColName.NUM_WON,
|
|
304
|
+
ColName.NUM_WON: ColSpec(
|
|
368
305
|
col_type=ColType.GAME_SUM,
|
|
369
306
|
expr=pl.col(ColName.WON),
|
|
370
307
|
),
|
|
371
|
-
ColSpec(
|
|
372
|
-
name=ColName.OPENING_HAND,
|
|
308
|
+
ColName.OPENING_HAND: ColSpec(
|
|
373
309
|
col_type=ColType.NAME_SUM,
|
|
374
310
|
views=[View.GAME],
|
|
375
311
|
),
|
|
376
|
-
ColSpec(
|
|
377
|
-
name=ColName.WON_OPENING_HAND,
|
|
312
|
+
ColName.WON_OPENING_HAND: ColSpec(
|
|
378
313
|
col_type=ColType.NAME_SUM,
|
|
379
314
|
expr=lambda name: pl.col(f"opening_hand_{name}") * pl.col(ColName.WON),
|
|
380
315
|
),
|
|
381
|
-
ColSpec(
|
|
382
|
-
name=ColName.DRAWN,
|
|
316
|
+
ColName.DRAWN: ColSpec(
|
|
383
317
|
col_type=ColType.NAME_SUM,
|
|
384
318
|
views=[View.GAME],
|
|
385
319
|
),
|
|
386
|
-
ColSpec(
|
|
387
|
-
name=ColName.WON_DRAWN,
|
|
320
|
+
ColName.WON_DRAWN: ColSpec(
|
|
388
321
|
col_type=ColType.NAME_SUM,
|
|
389
322
|
expr=lambda name: pl.col(f"drawn_{name}") * pl.col(ColName.WON),
|
|
390
323
|
),
|
|
391
|
-
ColSpec(
|
|
392
|
-
name=ColName.TUTORED,
|
|
324
|
+
ColName.TUTORED: ColSpec(
|
|
393
325
|
col_type=ColType.NAME_SUM,
|
|
394
326
|
views=[View.GAME],
|
|
395
327
|
),
|
|
396
|
-
ColSpec(
|
|
397
|
-
name=ColName.WON_TUTORED,
|
|
328
|
+
ColName.WON_TUTORED: ColSpec(
|
|
398
329
|
col_type=ColType.NAME_SUM,
|
|
399
330
|
expr=lambda name: pl.col(f"tutored_{name}") * pl.col(ColName.WON),
|
|
400
331
|
),
|
|
401
|
-
ColSpec(
|
|
402
|
-
name=ColName.DECK,
|
|
332
|
+
ColName.DECK: ColSpec(
|
|
403
333
|
col_type=ColType.NAME_SUM,
|
|
404
334
|
views=[View.GAME],
|
|
405
335
|
),
|
|
406
|
-
ColSpec(
|
|
407
|
-
name=ColName.WON_DECK,
|
|
336
|
+
ColName.WON_DECK: ColSpec(
|
|
408
337
|
col_type=ColType.NAME_SUM,
|
|
409
338
|
expr=lambda name: pl.col(f"deck_{name}") * pl.col(ColName.WON),
|
|
410
339
|
),
|
|
411
|
-
ColSpec(
|
|
412
|
-
name=ColName.SIDEBOARD,
|
|
340
|
+
ColName.SIDEBOARD: ColSpec(
|
|
413
341
|
col_type=ColType.NAME_SUM,
|
|
414
342
|
views=[View.GAME],
|
|
415
343
|
),
|
|
416
|
-
ColSpec(
|
|
417
|
-
name=ColName.WON_SIDEBOARD,
|
|
344
|
+
ColName.WON_SIDEBOARD: ColSpec(
|
|
418
345
|
col_type=ColType.NAME_SUM,
|
|
419
346
|
expr=lambda name: pl.col(f"sideboard_{name}") * pl.col(ColName.WON),
|
|
420
347
|
),
|
|
421
|
-
ColSpec(
|
|
422
|
-
name=ColName.NUM_GNS,
|
|
348
|
+
ColName.NUM_GNS: ColSpec(
|
|
423
349
|
col_type=ColType.NAME_SUM,
|
|
424
350
|
expr=lambda name: pl.max_horizontal(
|
|
425
351
|
0,
|
|
@@ -429,258 +355,204 @@ _column_specs = [
|
|
|
429
355
|
- pl.col(f"opening_hand_{name}"),
|
|
430
356
|
),
|
|
431
357
|
),
|
|
432
|
-
ColSpec(
|
|
433
|
-
name=ColName.WON_NUM_GNS,
|
|
358
|
+
ColName.WON_NUM_GNS: ColSpec(
|
|
434
359
|
col_type=ColType.NAME_SUM,
|
|
435
360
|
expr=lambda name: pl.col(ColName.WON) * pl.col(f"num_gns_{name}"),
|
|
436
361
|
),
|
|
437
|
-
ColSpec(
|
|
438
|
-
name=ColName.SET_CODE,
|
|
362
|
+
ColName.SET_CODE: ColSpec(
|
|
439
363
|
col_type=ColType.CARD_ATTR,
|
|
440
364
|
),
|
|
441
|
-
ColSpec(
|
|
442
|
-
name=ColName.COLOR,
|
|
365
|
+
ColName.COLOR: ColSpec(
|
|
443
366
|
col_type=ColType.CARD_ATTR,
|
|
444
367
|
),
|
|
445
|
-
ColSpec(
|
|
446
|
-
name=ColName.RARITY,
|
|
368
|
+
ColName.RARITY: ColSpec(
|
|
447
369
|
col_type=ColType.CARD_ATTR,
|
|
448
370
|
),
|
|
449
|
-
ColSpec(
|
|
450
|
-
name=ColName.COLOR_IDENTITY,
|
|
371
|
+
ColName.COLOR_IDENTITY: ColSpec(
|
|
451
372
|
col_type=ColType.CARD_ATTR,
|
|
452
373
|
),
|
|
453
|
-
ColSpec(
|
|
454
|
-
name=ColName.CARD_TYPE,
|
|
374
|
+
ColName.CARD_TYPE: ColSpec(
|
|
455
375
|
col_type=ColType.CARD_ATTR,
|
|
456
376
|
),
|
|
457
|
-
ColSpec(
|
|
458
|
-
name=ColName.SUBTYPE,
|
|
377
|
+
ColName.SUBTYPE: ColSpec(
|
|
459
378
|
col_type=ColType.CARD_ATTR,
|
|
460
379
|
),
|
|
461
|
-
ColSpec(
|
|
462
|
-
name=ColName.MANA_VALUE,
|
|
380
|
+
ColName.MANA_VALUE: ColSpec(
|
|
463
381
|
col_type=ColType.CARD_ATTR,
|
|
464
382
|
),
|
|
465
|
-
ColSpec(
|
|
466
|
-
name=ColName.DECK_MANA_VALUE,
|
|
383
|
+
ColName.DECK_MANA_VALUE: ColSpec(
|
|
467
384
|
col_type=ColType.NAME_SUM,
|
|
468
385
|
expr=lambda name, card_context: card_context[name][ColName.MANA_VALUE] * pl.col(f"deck_{name}"),
|
|
469
386
|
),
|
|
470
|
-
ColSpec(
|
|
471
|
-
name=ColName.DECK_LANDS,
|
|
387
|
+
ColName.DECK_LANDS: ColSpec(
|
|
472
388
|
col_type=ColType.NAME_SUM,
|
|
473
389
|
expr=lambda name, card_context: pl.col(f"deck_{name}") * ( 1 if 'Land' in card_context[name][ColName.CARD_TYPE] else 0 )
|
|
474
390
|
),
|
|
475
|
-
ColSpec(
|
|
476
|
-
name=ColName.DECK_SPELLS,
|
|
391
|
+
ColName.DECK_SPELLS: ColSpec(
|
|
477
392
|
col_type=ColType.NAME_SUM,
|
|
478
393
|
expr=lambda name: pl.col(f"deck_{name}") - pl.col(f"deck_lands_{name}"),
|
|
479
394
|
),
|
|
480
|
-
ColSpec(
|
|
481
|
-
name=ColName.MANA_COST,
|
|
395
|
+
ColName.MANA_COST: ColSpec(
|
|
482
396
|
col_type=ColType.CARD_ATTR,
|
|
483
397
|
),
|
|
484
|
-
ColSpec(
|
|
485
|
-
name=ColName.POWER,
|
|
398
|
+
ColName.POWER: ColSpec(
|
|
486
399
|
col_type=ColType.CARD_ATTR,
|
|
487
400
|
),
|
|
488
|
-
ColSpec(
|
|
489
|
-
name=ColName.TOUGHNESS,
|
|
401
|
+
ColName.TOUGHNESS: ColSpec(
|
|
490
402
|
col_type=ColType.CARD_ATTR,
|
|
491
403
|
),
|
|
492
|
-
ColSpec(
|
|
493
|
-
name=ColName.IS_BONUS_SHEET,
|
|
404
|
+
ColName.IS_BONUS_SHEET: ColSpec(
|
|
494
405
|
col_type=ColType.CARD_ATTR,
|
|
495
406
|
),
|
|
496
|
-
ColSpec(
|
|
497
|
-
name=ColName.IS_DFC,
|
|
407
|
+
ColName.IS_DFC: ColSpec(
|
|
498
408
|
col_type=ColType.CARD_ATTR,
|
|
499
409
|
),
|
|
500
|
-
ColSpec(
|
|
501
|
-
name=ColName.ORACLE_TEXT,
|
|
410
|
+
ColName.ORACLE_TEXT: ColSpec(
|
|
502
411
|
col_type=ColType.CARD_ATTR,
|
|
503
412
|
),
|
|
504
|
-
ColSpec(
|
|
505
|
-
name=ColName.CARD_JSON,
|
|
413
|
+
ColName.CARD_JSON: ColSpec(
|
|
506
414
|
col_type=ColType.CARD_ATTR,
|
|
507
415
|
),
|
|
508
|
-
ColSpec(
|
|
509
|
-
name=ColName.PICKED_MATCH_WR,
|
|
416
|
+
ColName.PICKED_MATCH_WR: ColSpec(
|
|
510
417
|
col_type=ColType.AGG,
|
|
511
418
|
expr=pl.col(ColName.EVENT_MATCH_WINS_SUM) / pl.col(ColName.EVENT_MATCHES_SUM),
|
|
512
419
|
),
|
|
513
|
-
ColSpec(
|
|
514
|
-
name=ColName.TROPHY_RATE,
|
|
420
|
+
ColName.TROPHY_RATE: ColSpec(
|
|
515
421
|
col_type=ColType.AGG,
|
|
516
422
|
expr=pl.col(ColName.IS_TROPHY_SUM) / pl.col(ColName.NUM_TAKEN),
|
|
517
423
|
),
|
|
518
|
-
ColSpec(
|
|
519
|
-
name=ColName.GAME_WR,
|
|
424
|
+
ColName.GAME_WR: ColSpec(
|
|
520
425
|
col_type=ColType.AGG,
|
|
521
426
|
expr=pl.col(ColName.NUM_WON) / pl.col(ColName.NUM_GAMES),
|
|
522
427
|
),
|
|
523
|
-
ColSpec(
|
|
524
|
-
name=ColName.ALSA,
|
|
428
|
+
ColName.ALSA: ColSpec(
|
|
525
429
|
col_type=ColType.AGG,
|
|
526
430
|
expr=pl.col(ColName.LAST_SEEN) / pl.col(ColName.NUM_SEEN),
|
|
527
431
|
),
|
|
528
|
-
ColSpec(
|
|
529
|
-
name=ColName.ATA,
|
|
432
|
+
ColName.ATA: ColSpec(
|
|
530
433
|
col_type=ColType.AGG,
|
|
531
434
|
expr=pl.col(ColName.TAKEN_AT) / pl.col(ColName.NUM_TAKEN),
|
|
532
435
|
),
|
|
533
|
-
ColSpec(
|
|
534
|
-
name=ColName.NUM_GP,
|
|
436
|
+
ColName.NUM_GP: ColSpec(
|
|
535
437
|
col_type=ColType.AGG,
|
|
536
438
|
expr=pl.col(ColName.DECK),
|
|
537
439
|
),
|
|
538
|
-
ColSpec(
|
|
539
|
-
name=ColName.PCT_GP,
|
|
440
|
+
ColName.PCT_GP: ColSpec(
|
|
540
441
|
col_type=ColType.AGG,
|
|
541
442
|
expr=pl.col(ColName.DECK) / (pl.col(ColName.DECK) + pl.col(ColName.SIDEBOARD)),
|
|
542
443
|
),
|
|
543
|
-
ColSpec(
|
|
544
|
-
name=ColName.GP_WR,
|
|
444
|
+
ColName.GP_WR: ColSpec(
|
|
545
445
|
col_type=ColType.AGG,
|
|
546
446
|
expr=pl.col(ColName.WON_DECK) / pl.col(ColName.DECK),
|
|
547
447
|
),
|
|
548
|
-
ColSpec(
|
|
549
|
-
name=ColName.NUM_OH,
|
|
448
|
+
ColName.NUM_OH: ColSpec(
|
|
550
449
|
col_type=ColType.AGG,
|
|
551
450
|
expr=pl.col(ColName.OPENING_HAND),
|
|
552
451
|
),
|
|
553
|
-
ColSpec(
|
|
554
|
-
name=ColName.OH_WR,
|
|
452
|
+
ColName.OH_WR: ColSpec(
|
|
555
453
|
col_type=ColType.AGG,
|
|
556
454
|
expr=pl.col(ColName.WON_OPENING_HAND) / pl.col(ColName.OPENING_HAND),
|
|
557
455
|
),
|
|
558
|
-
ColSpec(
|
|
559
|
-
name=ColName.NUM_GIH,
|
|
456
|
+
ColName.NUM_GIH: ColSpec(
|
|
560
457
|
col_type=ColType.AGG,
|
|
561
458
|
expr=pl.col(ColName.OPENING_HAND) + pl.col(ColName.DRAWN),
|
|
562
459
|
),
|
|
563
|
-
ColSpec(
|
|
564
|
-
name=ColName.NUM_GIH_WON,
|
|
460
|
+
ColName.NUM_GIH_WON: ColSpec(
|
|
565
461
|
col_type=ColType.AGG,
|
|
566
462
|
expr=pl.col(ColName.WON_OPENING_HAND) + pl.col(ColName.WON_DRAWN),
|
|
567
463
|
),
|
|
568
|
-
ColSpec(
|
|
569
|
-
name=ColName.GIH_WR,
|
|
464
|
+
ColName.GIH_WR: ColSpec(
|
|
570
465
|
col_type=ColType.AGG,
|
|
571
466
|
expr=pl.col(ColName.NUM_GIH_WON) / pl.col(ColName.NUM_GIH),
|
|
572
467
|
),
|
|
573
|
-
ColSpec(
|
|
574
|
-
name=ColName.GNS_WR,
|
|
468
|
+
ColName.GNS_WR: ColSpec(
|
|
575
469
|
col_type=ColType.AGG,
|
|
576
470
|
expr=pl.col(ColName.WON_NUM_GNS) / pl.col(ColName.NUM_GNS),
|
|
577
471
|
),
|
|
578
|
-
ColSpec(
|
|
579
|
-
name=ColName.IWD,
|
|
472
|
+
ColName.IWD: ColSpec(
|
|
580
473
|
col_type=ColType.AGG,
|
|
581
474
|
expr=pl.col(ColName.GIH_WR) - pl.col(ColName.GNS_WR),
|
|
582
475
|
),
|
|
583
|
-
ColSpec(
|
|
584
|
-
name=ColName.NUM_IN_POOL,
|
|
476
|
+
ColName.NUM_IN_POOL: ColSpec(
|
|
585
477
|
col_type=ColType.AGG,
|
|
586
478
|
expr=pl.col(ColName.DECK) + pl.col(ColName.SIDEBOARD),
|
|
587
479
|
),
|
|
588
|
-
ColSpec(
|
|
589
|
-
name=ColName.IN_POOL_WR,
|
|
480
|
+
ColName.IN_POOL_WR: ColSpec(
|
|
590
481
|
col_type=ColType.AGG,
|
|
591
482
|
expr=(pl.col(ColName.WON_DECK) + pl.col(ColName.WON_SIDEBOARD))
|
|
592
483
|
/ pl.col(ColName.NUM_IN_POOL),
|
|
593
484
|
),
|
|
594
|
-
ColSpec(
|
|
595
|
-
name=ColName.DECK_TOTAL,
|
|
485
|
+
ColName.DECK_TOTAL: ColSpec(
|
|
596
486
|
col_type=ColType.AGG,
|
|
597
487
|
expr=pl.col(ColName.DECK).sum(),
|
|
598
488
|
),
|
|
599
|
-
ColSpec(
|
|
600
|
-
name=ColName.WON_DECK_TOTAL,
|
|
489
|
+
ColName.WON_DECK_TOTAL: ColSpec(
|
|
601
490
|
col_type=ColType.AGG,
|
|
602
491
|
expr=pl.col(ColName.WON_DECK).sum(),
|
|
603
492
|
),
|
|
604
|
-
ColSpec(
|
|
605
|
-
name=ColName.GP_WR_MEAN,
|
|
493
|
+
ColName.GP_WR_MEAN: ColSpec(
|
|
606
494
|
col_type=ColType.AGG,
|
|
607
495
|
expr=pl.col(ColName.WON_DECK_TOTAL) / pl.col(ColName.DECK_TOTAL),
|
|
608
496
|
),
|
|
609
|
-
ColSpec(
|
|
610
|
-
name=ColName.GP_WR_EXCESS,
|
|
497
|
+
ColName.GP_WR_EXCESS: ColSpec(
|
|
611
498
|
col_type=ColType.AGG,
|
|
612
499
|
expr=pl.col(ColName.GP_WR) - pl.col(ColName.GP_WR_MEAN),
|
|
613
500
|
),
|
|
614
|
-
ColSpec(
|
|
615
|
-
name=ColName.GP_WR_VAR,
|
|
501
|
+
ColName.GP_WR_VAR: ColSpec(
|
|
616
502
|
col_type=ColType.AGG,
|
|
617
503
|
expr=(pl.col(ColName.GP_WR_EXCESS).pow(2) * pl.col(ColName.NUM_GP)).sum()
|
|
618
504
|
/ pl.col(ColName.DECK_TOTAL),
|
|
619
505
|
),
|
|
620
|
-
ColSpec(
|
|
621
|
-
name=ColName.GP_WR_STDEV,
|
|
506
|
+
ColName.GP_WR_STDEV: ColSpec(
|
|
622
507
|
col_type=ColType.AGG,
|
|
623
508
|
expr=pl.col(ColName.GP_WR_VAR).sqrt(),
|
|
624
509
|
),
|
|
625
|
-
ColSpec(
|
|
626
|
-
name=ColName.GP_WR_Z,
|
|
510
|
+
ColName.GP_WR_Z: ColSpec(
|
|
627
511
|
col_type=ColType.AGG,
|
|
628
512
|
expr=pl.col(ColName.GP_WR_EXCESS) / pl.col(ColName.GP_WR_STDEV),
|
|
629
513
|
),
|
|
630
|
-
ColSpec(
|
|
631
|
-
name=ColName.GIH_TOTAL,
|
|
514
|
+
ColName.GIH_TOTAL: ColSpec(
|
|
632
515
|
col_type=ColType.AGG,
|
|
633
516
|
expr=pl.col(ColName.NUM_GIH).sum(),
|
|
634
517
|
),
|
|
635
|
-
ColSpec(
|
|
636
|
-
name=ColName.WON_GIH_TOTAL,
|
|
518
|
+
ColName.WON_GIH_TOTAL: ColSpec(
|
|
637
519
|
col_type=ColType.AGG,
|
|
638
520
|
expr=pl.col(ColName.NUM_GIH_WON).sum(),
|
|
639
521
|
),
|
|
640
|
-
ColSpec(
|
|
641
|
-
name=ColName.GIH_WR_MEAN,
|
|
522
|
+
ColName.GIH_WR_MEAN: ColSpec(
|
|
642
523
|
col_type=ColType.AGG,
|
|
643
524
|
expr=pl.col(ColName.WON_GIH_TOTAL) / pl.col(ColName.GIH_TOTAL),
|
|
644
525
|
),
|
|
645
|
-
ColSpec(
|
|
646
|
-
name=ColName.GIH_WR_EXCESS,
|
|
526
|
+
ColName.GIH_WR_EXCESS: ColSpec(
|
|
647
527
|
col_type=ColType.AGG,
|
|
648
528
|
expr=pl.col(ColName.GIH_WR) - pl.col(ColName.GIH_WR_MEAN),
|
|
649
529
|
),
|
|
650
|
-
ColSpec(
|
|
651
|
-
name=ColName.GIH_WR_VAR,
|
|
530
|
+
ColName.GIH_WR_VAR: ColSpec(
|
|
652
531
|
col_type=ColType.AGG,
|
|
653
532
|
expr=(pl.col(ColName.GIH_WR_EXCESS).pow(2) * pl.col(ColName.NUM_GIH)).sum()
|
|
654
533
|
/ pl.col(ColName.GIH_TOTAL),
|
|
655
534
|
),
|
|
656
|
-
ColSpec(
|
|
657
|
-
name=ColName.GIH_WR_STDEV,
|
|
535
|
+
ColName.GIH_WR_STDEV: ColSpec(
|
|
658
536
|
col_type=ColType.AGG,
|
|
659
537
|
expr=pl.col(ColName.GIH_WR_VAR).sqrt(),
|
|
660
538
|
),
|
|
661
|
-
ColSpec(
|
|
662
|
-
name=ColName.GIH_WR_Z,
|
|
539
|
+
ColName.GIH_WR_Z: ColSpec(
|
|
663
540
|
col_type=ColType.AGG,
|
|
664
541
|
expr=pl.col(ColName.GIH_WR_EXCESS) / pl.col(ColName.GIH_WR_STDEV),
|
|
665
542
|
),
|
|
666
|
-
ColSpec(
|
|
667
|
-
name=ColName.DECK_MANA_VALUE_AVG,
|
|
543
|
+
ColName.DECK_MANA_VALUE_AVG: ColSpec(
|
|
668
544
|
col_type=ColType.AGG,
|
|
669
545
|
expr=pl.col(ColName.DECK_MANA_VALUE) / pl.col(ColName.DECK_SPELLS),
|
|
670
546
|
),
|
|
671
|
-
ColSpec(
|
|
672
|
-
name=ColName.DECK_LANDS_AVG,
|
|
547
|
+
ColName.DECK_LANDS_AVG: ColSpec(
|
|
673
548
|
col_type=ColType.AGG,
|
|
674
549
|
expr=pl.col(ColName.DECK_LANDS) / pl.col(ColName.NUM_GAMES),
|
|
675
550
|
),
|
|
676
|
-
ColSpec(
|
|
677
|
-
name=ColName.DECK_SPELLS_AVG,
|
|
551
|
+
ColName.DECK_SPELLS_AVG: ColSpec(
|
|
678
552
|
col_type=ColType.AGG,
|
|
679
553
|
expr=pl.col(ColName.DECK_SPELLS) / pl.col(ColName.NUM_GAMES),
|
|
680
554
|
),
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
col_spec_map = {col.name: col for col in _column_specs}
|
|
555
|
+
}
|
|
684
556
|
|
|
685
557
|
for item in ColName:
|
|
686
|
-
assert item in
|
|
558
|
+
assert item in specs, f"column {item} enumerated but not specified"
|
|
@@ -54,8 +54,8 @@ def _get_names(set_code: str) -> list[str]:
|
|
|
54
54
|
return names
|
|
55
55
|
|
|
56
56
|
|
|
57
|
-
def _get_card_context(set_code: str,
|
|
58
|
-
card_attr_specs = {col:spec for col, spec in
|
|
57
|
+
def _get_card_context(set_code: str, specs: dict[str, ColSpec], card_context: pl.DataFrame | dict[str, dict[str, Any]] | None) -> dict[str, dict[str, Any]]:
|
|
58
|
+
card_attr_specs = {col:spec for col, spec in specs.items() if spec.col_type == ColType.CARD_ATTR or col == ColName.NAME}
|
|
59
59
|
col_def_map = _hydrate_col_defs(set_code, card_attr_specs, card_only=True)
|
|
60
60
|
|
|
61
61
|
columns = list(col_def_map.keys())
|
|
@@ -84,7 +84,7 @@ def _get_card_context(set_code: str, col_spec_map: dict[str, ColSpec], card_cont
|
|
|
84
84
|
return loaded_context
|
|
85
85
|
|
|
86
86
|
|
|
87
|
-
def _determine_expression(spec: ColSpec, names: list[str], card_context: dict[str, dict]) -> pl.Expr | tuple[pl.Expr, ...]:
|
|
87
|
+
def _determine_expression(col: str, spec: ColSpec, names: list[str], card_context: dict[str, dict]) -> pl.Expr | tuple[pl.Expr, ...]:
|
|
88
88
|
def seed_params(expr):
|
|
89
89
|
params = {}
|
|
90
90
|
|
|
@@ -97,18 +97,18 @@ def _determine_expression(spec: ColSpec, names: list[str], card_context: dict[st
|
|
|
97
97
|
|
|
98
98
|
if spec.col_type == ColType.NAME_SUM:
|
|
99
99
|
if spec.expr is not None:
|
|
100
|
-
assert isinstance(spec.expr, Callable), f"NAME_SUM column {
|
|
100
|
+
assert isinstance(spec.expr, Callable), f"NAME_SUM column {col} must have a callable `expr` accepting a `name` argument"
|
|
101
101
|
unnamed_exprs = [spec.expr(**{'name': name, **seed_params(spec.expr)}) for name in names]
|
|
102
102
|
|
|
103
103
|
expr = tuple(
|
|
104
104
|
map(
|
|
105
|
-
lambda ex, name: ex.alias(f"{
|
|
105
|
+
lambda ex, name: ex.alias(f"{col}_{name}"),
|
|
106
106
|
unnamed_exprs,
|
|
107
107
|
names,
|
|
108
108
|
)
|
|
109
109
|
)
|
|
110
110
|
else:
|
|
111
|
-
expr = tuple(map(lambda name: pl.col(f"{
|
|
111
|
+
expr = tuple(map(lambda name: pl.col(f"{col}_{name}"), names))
|
|
112
112
|
|
|
113
113
|
elif spec.expr is not None:
|
|
114
114
|
if isinstance(spec.expr, Callable):
|
|
@@ -118,25 +118,27 @@ def _determine_expression(spec: ColSpec, names: list[str], card_context: dict[st
|
|
|
118
118
|
for name in names:
|
|
119
119
|
name_params = {'name': name, **params}
|
|
120
120
|
expr = pl.when(pl.col(ColName.PICK) == name).then(spec.expr(**name_params)).otherwise(expr)
|
|
121
|
+
elif spec.col_type == ColType.CARD_ATTR and 'name' in signature(spec.expr).parameters:
|
|
122
|
+
expr = spec.expr(**{'name': pl.col('name'), **params})
|
|
121
123
|
else:
|
|
122
124
|
expr = spec.expr(**params)
|
|
123
125
|
else:
|
|
124
126
|
expr = spec.expr
|
|
125
|
-
expr = expr.alias(
|
|
127
|
+
expr = expr.alias(col)
|
|
126
128
|
else:
|
|
127
|
-
expr = pl.col(
|
|
129
|
+
expr = pl.col(col)
|
|
128
130
|
|
|
129
131
|
return expr
|
|
130
132
|
|
|
131
133
|
|
|
132
|
-
def _infer_dependencies(name: str, expr: pl.Expr | tuple[pl.Expr,...],
|
|
134
|
+
def _infer_dependencies(name: str, expr: pl.Expr | tuple[pl.Expr,...], specs: dict[str, ColSpec], names: list[str]) -> set[str]:
|
|
133
135
|
dependencies = set()
|
|
134
136
|
tricky_ones = set()
|
|
135
137
|
|
|
136
138
|
if isinstance(expr, pl.Expr):
|
|
137
139
|
dep_cols = [c for c in expr.meta.root_names() if c != name]
|
|
138
140
|
for dep_col in dep_cols:
|
|
139
|
-
if dep_col in
|
|
141
|
+
if dep_col in specs.keys():
|
|
140
142
|
dependencies.add(dep_col)
|
|
141
143
|
else:
|
|
142
144
|
tricky_ones.add(dep_col)
|
|
@@ -145,9 +147,9 @@ def _infer_dependencies(name: str, expr: pl.Expr | tuple[pl.Expr,...], col_spec_
|
|
|
145
147
|
pattern = f"_{names[idx]}$"
|
|
146
148
|
dep_cols = [c for c in exp.meta.root_names() if c != name]
|
|
147
149
|
for dep_col in dep_cols:
|
|
148
|
-
if dep_col in
|
|
150
|
+
if dep_col in specs.keys():
|
|
149
151
|
dependencies.add(dep_col)
|
|
150
|
-
elif len(split := re.split(pattern, dep_col)) == 2 and split[0] in
|
|
152
|
+
elif len(split := re.split(pattern, dep_col)) == 2 and split[0] in specs:
|
|
151
153
|
dependencies.add(split[0])
|
|
152
154
|
else:
|
|
153
155
|
tricky_ones.add(dep_col)
|
|
@@ -156,7 +158,7 @@ def _infer_dependencies(name: str, expr: pl.Expr | tuple[pl.Expr,...], col_spec_
|
|
|
156
158
|
found = False
|
|
157
159
|
for n in names:
|
|
158
160
|
pattern = f"_{n}$"
|
|
159
|
-
if not found and len(split := re.split(pattern, item)) == 2 and split[0] in
|
|
161
|
+
if not found and len(split := re.split(pattern, item)) == 2 and split[0] in specs:
|
|
160
162
|
dependencies.add(split[0])
|
|
161
163
|
found = True
|
|
162
164
|
assert found, f"Could not locate column spec for root col {item}"
|
|
@@ -164,49 +166,50 @@ def _infer_dependencies(name: str, expr: pl.Expr | tuple[pl.Expr,...], col_spec_
|
|
|
164
166
|
return dependencies
|
|
165
167
|
|
|
166
168
|
|
|
167
|
-
def _hydrate_col_defs(set_code: str,
|
|
169
|
+
def _hydrate_col_defs(set_code: str, specs: dict[str, ColSpec], card_context: pl.DataFrame | dict[str, dict] | None = None, card_only: bool =False):
|
|
168
170
|
names = _get_names(set_code)
|
|
169
171
|
|
|
170
172
|
if card_only:
|
|
171
173
|
card_context = {}
|
|
172
174
|
else:
|
|
173
|
-
card_context = _get_card_context(set_code,
|
|
175
|
+
card_context = _get_card_context(set_code, specs, card_context)
|
|
174
176
|
|
|
175
177
|
assert len(names) > 0, "there should be names"
|
|
176
178
|
hydrated = {}
|
|
177
|
-
for
|
|
178
|
-
expr = _determine_expression(spec, names, card_context)
|
|
179
|
-
dependencies = _infer_dependencies(
|
|
179
|
+
for col, spec in specs.items():
|
|
180
|
+
expr = _determine_expression(col, spec, names, card_context)
|
|
181
|
+
dependencies = _infer_dependencies(col, expr, specs, names)
|
|
180
182
|
|
|
183
|
+
sig_expr = expr if isinstance(expr, pl.Expr) else expr[0]
|
|
181
184
|
try:
|
|
182
|
-
sig_expr = expr if isinstance(expr, pl.Expr) else expr[0]
|
|
183
185
|
expr_sig = sig_expr.meta.serialize(
|
|
184
186
|
format="json"
|
|
185
|
-
)
|
|
187
|
+
)
|
|
186
188
|
except pl.exceptions.ComputeError:
|
|
187
189
|
if spec.version is not None:
|
|
188
|
-
expr_sig =
|
|
190
|
+
expr_sig = col + spec.version
|
|
189
191
|
else:
|
|
190
|
-
|
|
192
|
+
print(f"Using session-only signature for non-serializable column {col}, please provide a version value")
|
|
193
|
+
expr_sig = str(sig_expr)
|
|
191
194
|
|
|
192
195
|
signature = str(
|
|
193
196
|
(
|
|
194
|
-
|
|
197
|
+
col,
|
|
195
198
|
spec.col_type.value,
|
|
196
199
|
expr_sig,
|
|
197
|
-
dependencies,
|
|
200
|
+
sorted(dependencies),
|
|
198
201
|
)
|
|
199
202
|
)
|
|
200
203
|
|
|
201
204
|
cdef = ColDef(
|
|
202
|
-
name=
|
|
205
|
+
name=col,
|
|
203
206
|
col_type=spec.col_type,
|
|
204
207
|
views=set(spec.views or set()),
|
|
205
208
|
expr=expr,
|
|
206
209
|
dependencies=dependencies,
|
|
207
210
|
signature=signature,
|
|
208
211
|
)
|
|
209
|
-
hydrated[
|
|
212
|
+
hydrated[col] = cdef
|
|
210
213
|
return hydrated
|
|
211
214
|
|
|
212
215
|
|
|
@@ -353,18 +356,18 @@ def summon(
|
|
|
353
356
|
columns: list[str] | None = None,
|
|
354
357
|
group_by: list[str] | None = None,
|
|
355
358
|
filter_spec: dict | None = None,
|
|
356
|
-
extensions:
|
|
359
|
+
extensions: dict[str, ColSpec] | None = None,
|
|
357
360
|
use_streaming: bool = False,
|
|
358
361
|
read_cache: bool = True,
|
|
359
362
|
write_cache: bool = True,
|
|
360
363
|
card_context: pl.DataFrame | dict[str, dict] | None = None
|
|
361
364
|
) -> pl.DataFrame:
|
|
362
|
-
|
|
365
|
+
specs = dict(spells.columns.specs)
|
|
366
|
+
|
|
363
367
|
if extensions is not None:
|
|
364
|
-
|
|
365
|
-
col_spec_map[spec.name] = spec
|
|
368
|
+
specs.update(extensions)
|
|
366
369
|
|
|
367
|
-
col_def_map = _hydrate_col_defs(set_code,
|
|
370
|
+
col_def_map = _hydrate_col_defs(set_code, specs, card_context)
|
|
368
371
|
m = spells.manifest.create(col_def_map, columns, group_by, filter_spec)
|
|
369
372
|
|
|
370
373
|
calc_fn = functools.partial(_base_agg_df, set_code, m, use_streaming=use_streaming)
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import polars as pl
|
|
2
|
+
|
|
3
|
+
from spells.enums import ColType
|
|
4
|
+
from spells.columns import ColSpec
|
|
5
|
+
|
|
6
|
+
def attr_metrics(attr):
|
|
7
|
+
return {
|
|
8
|
+
f"seen_{attr}": ColSpec(
|
|
9
|
+
col_type=ColType.NAME_SUM,
|
|
10
|
+
expr=(lambda name, card_context: pl.when(pl.col(f"pack_card_{name}") > 0)
|
|
11
|
+
.then(card_context[name][attr])
|
|
12
|
+
.otherwise(None)),
|
|
13
|
+
),
|
|
14
|
+
f"pick_{attr}": ColSpec(
|
|
15
|
+
col_type=ColType.PICK_SUM,
|
|
16
|
+
expr=lambda name, card_context: card_context[name][attr]
|
|
17
|
+
),
|
|
18
|
+
f"least_{attr}_taken": ColSpec(
|
|
19
|
+
col_type=ColType.PICK_SUM,
|
|
20
|
+
expr=(lambda names: pl.col(f'pick_{attr}')
|
|
21
|
+
<= pl.min_horizontal([pl.col(f"seen_{attr}_{name}") for name in names])),
|
|
22
|
+
),
|
|
23
|
+
f"least_{attr}_taken_rate": ColSpec(
|
|
24
|
+
col_type=ColType.AGG,
|
|
25
|
+
expr=pl.col(f"least_{attr}_taken") / pl.col("num_taken"),
|
|
26
|
+
),
|
|
27
|
+
f"greatest_{attr}_taken": ColSpec(
|
|
28
|
+
col_type=ColType.PICK_SUM,
|
|
29
|
+
expr=(lambda names: pl.col(f'pick_{attr}')
|
|
30
|
+
>= pl.max_horizontal([pl.col(f"seen_{attr}_{name}") for name in names])),
|
|
31
|
+
),
|
|
32
|
+
f"greatest_{attr}_taken_rate": ColSpec(
|
|
33
|
+
col_type=ColType.AGG,
|
|
34
|
+
expr=pl.col(f"greatest_{attr}_taken") / pl.col("num_taken"),
|
|
35
|
+
),
|
|
36
|
+
f"pick_{attr}_mean": ColSpec(
|
|
37
|
+
col_type=ColType.AGG,
|
|
38
|
+
expr=pl.col(f"pick_{attr}") / pl.col("num_taken")
|
|
39
|
+
)
|
|
40
|
+
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|