spells-mtg 0.4.0__tar.gz → 0.5.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spells-mtg might be problematic. Click here for more details.
- {spells_mtg-0.4.0 → spells_mtg-0.5.1}/PKG-INFO +49 -24
- {spells_mtg-0.4.0 → spells_mtg-0.5.1}/README.md +48 -23
- {spells_mtg-0.4.0 → spells_mtg-0.5.1}/pyproject.toml +1 -1
- spells_mtg-0.5.1/spells/__init__.py +5 -0
- {spells_mtg-0.4.0 → spells_mtg-0.5.1}/spells/columns.py +134 -257
- {spells_mtg-0.4.0 → spells_mtg-0.5.1}/spells/draft_data.py +54 -36
- {spells_mtg-0.4.0 → spells_mtg-0.5.1}/spells/enums.py +1 -0
- spells_mtg-0.5.1/spells/extension.py +40 -0
- {spells_mtg-0.4.0 → spells_mtg-0.5.1}/spells/manifest.py +4 -4
- spells_mtg-0.4.0/spells/__init__.py +0 -5
- {spells_mtg-0.4.0 → spells_mtg-0.5.1}/LICENSE +0 -0
- {spells_mtg-0.4.0 → spells_mtg-0.5.1}/spells/cache.py +0 -0
- {spells_mtg-0.4.0 → spells_mtg-0.5.1}/spells/cards.py +0 -0
- {spells_mtg-0.4.0 → spells_mtg-0.5.1}/spells/external.py +0 -0
- {spells_mtg-0.4.0 → spells_mtg-0.5.1}/spells/filter.py +0 -0
- {spells_mtg-0.4.0 → spells_mtg-0.5.1}/spells/schema.py +0 -0
- {spells_mtg-0.4.0 → spells_mtg-0.5.1}/tests/__init__.py +0 -0
- {spells_mtg-0.4.0 → spells_mtg-0.5.1}/tests/filter_test.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: spells-mtg
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.1
|
|
4
4
|
Summary: analaysis of 17Lands.com public datasets
|
|
5
5
|
Author-Email: Joel Barnes <oelarnes@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -74,8 +74,9 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
|
|
|
74
74
|
- Supports calculating the standard aggregations and measures out of the box with no arguments (ALSA, GIH WR, etc)
|
|
75
75
|
- Caches aggregate DataFrames in the local file system automatically for instantaneous reproduction of previous analysis
|
|
76
76
|
- Manages grouping and filtering by built-in and custom columns at the row level
|
|
77
|
-
- Provides
|
|
77
|
+
- Provides 122 explicitly specified, enumerated, documented column definitions
|
|
78
78
|
- Supports "Deck Color Data" aggregations with built-in column definitions.
|
|
79
|
+
- Lets you feed card metrics back in to column definitions to support scientific workflows like MLE
|
|
79
80
|
- Provides a CLI tool `spells [add|refresh|clean|remove|info] [SET]` to download and manage external files
|
|
80
81
|
- Downloads and manages public datasets from 17Lands
|
|
81
82
|
- Retrieves and models booster configuration and card data from [MTGJSON](https://mtgjson.com/)
|
|
@@ -87,7 +88,7 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
|
|
|
87
88
|
|
|
88
89
|
## summon
|
|
89
90
|
|
|
90
|
-
`summon` takes
|
|
91
|
+
`summon` takes five optional arguments, allowing a fully declarative specification of your desired analysis. Basic functionality not provided by this api can often be managed by simple chained calls using the polars API, e.g. sorting and post-agg filtering.
|
|
91
92
|
- `columns` specifies the desired output columns
|
|
92
93
|
```python
|
|
93
94
|
>>> spells.summon('DSK', columns=["num_gp", "pct_gp", "gp_wr", "gp_wr_z"])
|
|
@@ -133,7 +134,7 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
|
|
|
133
134
|
```
|
|
134
135
|
- `filter_spec` specifies a row-level filter for the dataset, using an intuitive custom query formulation
|
|
135
136
|
```python
|
|
136
|
-
>>> from spells
|
|
137
|
+
>>> from spells import ColName
|
|
137
138
|
>>> spells.summon('BLB', columns=[ColName.GAME_WR], group_by=[ColName.PLAYER_COHORT], filter_spec={'lhs': ColName.NUM_MULLIGANS, 'op': '>', 'rhs': 0})
|
|
138
139
|
shape: (4, 2)
|
|
139
140
|
┌───────────────┬──────────┐
|
|
@@ -150,14 +151,14 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
|
|
|
150
151
|
- `extensions` allows for the specification of arbitrarily complex derived columns and aggregations, including custom columns built on top of custom columns.
|
|
151
152
|
```python
|
|
152
153
|
>>> import polars as pl
|
|
153
|
-
>>> from spells
|
|
154
|
-
>>>
|
|
155
|
-
|
|
156
|
-
...
|
|
157
|
-
...
|
|
158
|
-
...
|
|
159
|
-
...
|
|
160
|
-
>>> spells.summon('DSK', columns=['deq_base', 'color', 'rarity'], filter_spec={'player_cohort': 'Top'}, extensions=
|
|
154
|
+
>>> from spells import ColSpec, ColType
|
|
155
|
+
>>> ext = {
|
|
156
|
+
... 'deq_base': ColSpec(
|
|
157
|
+
... col_type=ColType.AGG,
|
|
158
|
+
... expr=(pl.col('gp_wr_excess') + 0.03 * (1 - pl.col('ata')/14).pow(2)) * pl.col('pct_gp'),
|
|
159
|
+
... }
|
|
160
|
+
... }
|
|
161
|
+
>>> spells.summon('DSK', columns=['deq_base', 'color', 'rarity'], filter_spec={'player_cohort': 'Top'}, extensions=ext)
|
|
161
162
|
... .filter(pl.col('deq_base').is_finite())
|
|
162
163
|
... .filter(pl.col('rarity').is_in(['common', 'uncommon'])
|
|
163
164
|
... .sort('deq_base', descending=True)
|
|
@@ -180,8 +181,32 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
|
|
|
180
181
|
│ Oblivious Bookworm ┆ 0.028605 ┆ uncommon ┆ GU │
|
|
181
182
|
└──────────────────────────┴──────────┴──────────┴───────┘
|
|
182
183
|
```
|
|
183
|
-
|
|
184
|
-
|
|
184
|
+
- `card_context` takes a name-indexed DataFrame or name-keyed dict and allows the construction of column definitions based on the results.
|
|
185
|
+
```python
|
|
186
|
+
>>> deq = spells.summon('DSK', columns=['deq_base'], filter_spec={'player_cohort': 'Top'}, extensions=[ext])
|
|
187
|
+
>>> ext = {
|
|
188
|
+
... 'picked_deq_base': ColSpec(
|
|
189
|
+
... col_type=ColType.PICK_SUM,
|
|
190
|
+
... expr=lambda name, card_context: card_context[name]['deq_base']
|
|
191
|
+
... ),
|
|
192
|
+
... 'picked_deq_base_avg', ColSpec(
|
|
193
|
+
... col_type=ColType.AGG,
|
|
194
|
+
... expr=pl.col('picked_deq_base') / pl.col('num_taken')
|
|
195
|
+
... ),
|
|
196
|
+
... }
|
|
197
|
+
>>> spells.summon('DSK', columns=['picked_deq_base_avg'], group_by=['player_cohort'], extensions=ext, card_context=deq)
|
|
198
|
+
shape: (4, 2)
|
|
199
|
+
┌───────────────┬─────────────────────┐
|
|
200
|
+
│ player_cohort ┆ picked_deq_base_avg │
|
|
201
|
+
│ --- ┆ --- │
|
|
202
|
+
│ str ┆ f64 │
|
|
203
|
+
╞═══════════════╪═════════════════════╡
|
|
204
|
+
│ Bottom ┆ 0.004826 │
|
|
205
|
+
│ Middle ┆ 0.00532 │
|
|
206
|
+
│ Other ┆ 0.004895 │
|
|
207
|
+
│ Top ┆ 0.005659 │
|
|
208
|
+
└───────────────┴─────────────────────┘
|
|
209
|
+
```
|
|
185
210
|
|
|
186
211
|
## Installation
|
|
187
212
|
|
|
@@ -223,7 +248,7 @@ Spells caches the results of expensive aggregations in the local file system as
|
|
|
223
248
|
|
|
224
249
|
### Memory Usage
|
|
225
250
|
|
|
226
|
-
One of my goals in creating Spells was to eliminate issues with memory pressure by exclusively using the map-reduce paradigm and a technology that supports partitioned/streaming aggregation of larget-than-memory datasets. By default, Polars loads the entire dataset in memory, but the API exposes a parameter `streaming` which I have exposed as `use_streaming`. Unfortunately, that feature does not seem to work for my queries and the memory performance can be quite poor
|
|
251
|
+
One of my goals in creating Spells was to eliminate issues with memory pressure by exclusively using the map-reduce paradigm and a technology that supports partitioned/streaming aggregation of larget-than-memory datasets. By default, Polars loads the entire dataset in memory, but the API exposes a parameter `streaming` which I have exposed as `use_streaming`. Unfortunately, that feature does not seem to work for my queries and the memory performance can be quite poor. The one feature that may assist in memory management is the local caching, since you can restart the kernel without losing all of your progress. In particular, be careful about opening multiple Jupyter tabs unless you have at least 32 GB. In general I have not run into issues on my 16 GB MacBook Air except with running multiple kernels at once. Supporting larger-than memory computations is on my roadmap, so check back periodically to see if I've made any progress.
|
|
227
252
|
|
|
228
253
|
When refreshing a given set's data files from 17Lands using the provided cli, the cache for that set is automatically cleared. The `spells` CLI gives additional tools for managing the local and external caches.
|
|
229
254
|
|
|
@@ -289,25 +314,26 @@ aggregations of non-numeric (or numeric) data types are not supported. If `None`
|
|
|
289
314
|
- `{'lhs': 'player_cohort', 'op': 'in', 'rhs': ['Top', 'Middle']}` "player_cohort" value is either "Top" or "Middle". Supported values for `op` are `<`, `<=`, `>`, `>=`, `!=`, `=`, `in` and `nin`.
|
|
290
315
|
- `{'$and': [{'lhs': 'draft_date', 'op': '>', 'rhs': datetime.date(2024, 10, 7)}, {'rank': 'Mythic'}]}` Drafts after October 7 by Mythic-ranked players. Supported values for query construction keys are `$and`, `$or`, and `$not`.
|
|
291
316
|
|
|
292
|
-
- extensions: a
|
|
317
|
+
- extensions: a dict of `spells.columns.ColSpec` objects, keyed by name, which are appended to the definitions built-in columns described below.
|
|
318
|
+
|
|
319
|
+
- card_context: Typically a Polars DataFrame containing a `"name"` column with one row for each card name in the set, such that any usages of `card_context[name][key]` in column specs reference the column `key`. Typically this will be the output of a call to `summon` requesting cards metrics like `GP_WR`. Can also be a dictionary having the necessary form for the same access pattern.
|
|
293
320
|
|
|
294
321
|
- read_cache/write_cache: Use the local file system to cache and retrieve aggregations to minimize expensive reads of the large datasets. You shouldn't need to touch these arguments unless you are debugging.
|
|
295
322
|
|
|
296
323
|
### Enums
|
|
297
324
|
|
|
298
325
|
```python
|
|
299
|
-
from spells
|
|
326
|
+
from spells import ColName, ColType
|
|
300
327
|
```
|
|
301
328
|
|
|
302
329
|
Recommended to import `ColName` for any usage of `summon`, and to import `ColType` when defining custom extensions.
|
|
303
330
|
|
|
304
|
-
###
|
|
331
|
+
### ColSpec
|
|
305
332
|
|
|
306
333
|
```python
|
|
307
|
-
from spells
|
|
334
|
+
from spells import ColSpec
|
|
308
335
|
|
|
309
|
-
|
|
310
|
-
name: str,
|
|
336
|
+
ColSpec(
|
|
311
337
|
col_type: ColType,
|
|
312
338
|
expr: pl.Expr | Callable[..., pl.Expr] | None = None,
|
|
313
339
|
version: str | None = None
|
|
@@ -318,8 +344,6 @@ Used to define extensions in `summon`
|
|
|
318
344
|
|
|
319
345
|
#### parameters
|
|
320
346
|
|
|
321
|
-
- `name`: any string, including existing columns, although this is very likely to break dependent columns, so don't do it. For `NAME_SUM` columns, the name is the prefix without the underscore, e.g. "drawn".
|
|
322
|
-
|
|
323
347
|
- `col_type`: one of the `ColType` enum values, `FILTER_ONLY`, `GROUP_BY`, `PICK_SUM`, `NAME_SUM`, `GAME_SUM`, `CARD_ATTR`, and `AGG`. See documentation for `summon` for usage. All columns except `CARD_ATTR`
|
|
324
348
|
and `AGG` must be derivable at the individual row level on one or both base views. `CARD_ATTR` must be derivable at the individual row level from the card file. `AGG` can depend on any column present after
|
|
325
349
|
summing over groups, and can include polars Expression aggregations. Arbitrarily long chains of aggregate dependencies are supported.
|
|
@@ -328,7 +352,7 @@ summing over groups, and can include polars Expression aggregations. Arbitrarily
|
|
|
328
352
|
- For `NAME_SUM` columns, `expr` must be a function of `name` which will result in a list of expressions mapped over all card names.
|
|
329
353
|
- `PICK_SUM` columns can also be functions on `name`, in which case the value will be a function of the value of the `PICK` field.
|
|
330
354
|
- `AGG` columns that depend on `NAME_SUM` columns reference the prefix (`cdef.name`) only, since the unpivot has occured prior to selection.
|
|
331
|
-
- The possible arguments to `expr`, in addition to `name` when appropriate, include the full `names` array as well as a dictionary called `card_context` which contains card dict objects with all `CARD_ATTR` values, including custom extensions
|
|
355
|
+
- The possible arguments to `expr`, in addition to `name` when appropriate, include the full `names` array as well as a dictionary called `card_context` which contains card dict objects with all `CARD_ATTR` values, including custom extensions and metric columns passed by the `card_context` argument to `summon`. See example notebooks for more details.
|
|
332
356
|
|
|
333
357
|
- `version`: When defining a column using a python function, as opposed to Polars expressions, add a unique version number so that the unique hashed signature of the column specification can be derived
|
|
334
358
|
for caching purposes, since Polars cannot generate a serialization natively. When changing the definition, be sure to increment the version value. Otherwise you do not need to use this parameter.
|
|
@@ -366,6 +390,7 @@ A table of all included columns. Columns can be referenced by enum or by string
|
|
|
366
390
|
| `PICK_NUMBER` | `"pick_number"` | `DRAFT` | `FILTER_ONLY` | Dataset Column | Int |
|
|
367
391
|
| `PICK_NUM` | `"pick_num"` | `DRAFT` | `GROUP_BY` | 1-indexed | Int |
|
|
368
392
|
| `TAKEN_AT` | `"taken_at` | `DRAFT` | `PICK_SUM` | Summable alias of `PICK_NUM` | Int |
|
|
393
|
+
| `NUM_DRAFTS` | `"num_drafts"` | `DRAFT` | `PICK_SUM` | | Int |
|
|
369
394
|
| `NUM_TAKEN` | `"num_taken"` | `DRAFT` | `PICK_SUM` | Sum 1 over rows | Int |
|
|
370
395
|
| `PICK` | `"pick"` | `DRAFT` | `FILTER_ONLY` | Dataset Column, joined as "name" | String |
|
|
371
396
|
| `PICK_MAINDECK_RATE` | `"pick_maindeck_rate"` | `DRAFT` | `PICK_SUM` | Dataset Column | Float |
|
|
@@ -63,8 +63,9 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
|
|
|
63
63
|
- Supports calculating the standard aggregations and measures out of the box with no arguments (ALSA, GIH WR, etc)
|
|
64
64
|
- Caches aggregate DataFrames in the local file system automatically for instantaneous reproduction of previous analysis
|
|
65
65
|
- Manages grouping and filtering by built-in and custom columns at the row level
|
|
66
|
-
- Provides
|
|
66
|
+
- Provides 122 explicitly specified, enumerated, documented column definitions
|
|
67
67
|
- Supports "Deck Color Data" aggregations with built-in column definitions.
|
|
68
|
+
- Lets you feed card metrics back in to column definitions to support scientific workflows like MLE
|
|
68
69
|
- Provides a CLI tool `spells [add|refresh|clean|remove|info] [SET]` to download and manage external files
|
|
69
70
|
- Downloads and manages public datasets from 17Lands
|
|
70
71
|
- Retrieves and models booster configuration and card data from [MTGJSON](https://mtgjson.com/)
|
|
@@ -76,7 +77,7 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
|
|
|
76
77
|
|
|
77
78
|
## summon
|
|
78
79
|
|
|
79
|
-
`summon` takes
|
|
80
|
+
`summon` takes five optional arguments, allowing a fully declarative specification of your desired analysis. Basic functionality not provided by this api can often be managed by simple chained calls using the polars API, e.g. sorting and post-agg filtering.
|
|
80
81
|
- `columns` specifies the desired output columns
|
|
81
82
|
```python
|
|
82
83
|
>>> spells.summon('DSK', columns=["num_gp", "pct_gp", "gp_wr", "gp_wr_z"])
|
|
@@ -122,7 +123,7 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
|
|
|
122
123
|
```
|
|
123
124
|
- `filter_spec` specifies a row-level filter for the dataset, using an intuitive custom query formulation
|
|
124
125
|
```python
|
|
125
|
-
>>> from spells
|
|
126
|
+
>>> from spells import ColName
|
|
126
127
|
>>> spells.summon('BLB', columns=[ColName.GAME_WR], group_by=[ColName.PLAYER_COHORT], filter_spec={'lhs': ColName.NUM_MULLIGANS, 'op': '>', 'rhs': 0})
|
|
127
128
|
shape: (4, 2)
|
|
128
129
|
┌───────────────┬──────────┐
|
|
@@ -139,14 +140,14 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
|
|
|
139
140
|
- `extensions` allows for the specification of arbitrarily complex derived columns and aggregations, including custom columns built on top of custom columns.
|
|
140
141
|
```python
|
|
141
142
|
>>> import polars as pl
|
|
142
|
-
>>> from spells
|
|
143
|
-
>>>
|
|
144
|
-
|
|
145
|
-
...
|
|
146
|
-
...
|
|
147
|
-
...
|
|
148
|
-
...
|
|
149
|
-
>>> spells.summon('DSK', columns=['deq_base', 'color', 'rarity'], filter_spec={'player_cohort': 'Top'}, extensions=
|
|
143
|
+
>>> from spells import ColSpec, ColType
|
|
144
|
+
>>> ext = {
|
|
145
|
+
... 'deq_base': ColSpec(
|
|
146
|
+
... col_type=ColType.AGG,
|
|
147
|
+
... expr=(pl.col('gp_wr_excess') + 0.03 * (1 - pl.col('ata')/14).pow(2)) * pl.col('pct_gp'),
|
|
148
|
+
... }
|
|
149
|
+
... }
|
|
150
|
+
>>> spells.summon('DSK', columns=['deq_base', 'color', 'rarity'], filter_spec={'player_cohort': 'Top'}, extensions=ext)
|
|
150
151
|
... .filter(pl.col('deq_base').is_finite())
|
|
151
152
|
... .filter(pl.col('rarity').is_in(['common', 'uncommon'])
|
|
152
153
|
... .sort('deq_base', descending=True)
|
|
@@ -169,8 +170,32 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
|
|
|
169
170
|
│ Oblivious Bookworm ┆ 0.028605 ┆ uncommon ┆ GU │
|
|
170
171
|
└──────────────────────────┴──────────┴──────────┴───────┘
|
|
171
172
|
```
|
|
172
|
-
|
|
173
|
-
|
|
173
|
+
- `card_context` takes a name-indexed DataFrame or name-keyed dict and allows the construction of column definitions based on the results.
|
|
174
|
+
```python
|
|
175
|
+
>>> deq = spells.summon('DSK', columns=['deq_base'], filter_spec={'player_cohort': 'Top'}, extensions=[ext])
|
|
176
|
+
>>> ext = {
|
|
177
|
+
... 'picked_deq_base': ColSpec(
|
|
178
|
+
... col_type=ColType.PICK_SUM,
|
|
179
|
+
... expr=lambda name, card_context: card_context[name]['deq_base']
|
|
180
|
+
... ),
|
|
181
|
+
... 'picked_deq_base_avg', ColSpec(
|
|
182
|
+
... col_type=ColType.AGG,
|
|
183
|
+
... expr=pl.col('picked_deq_base') / pl.col('num_taken')
|
|
184
|
+
... ),
|
|
185
|
+
... }
|
|
186
|
+
>>> spells.summon('DSK', columns=['picked_deq_base_avg'], group_by=['player_cohort'], extensions=ext, card_context=deq)
|
|
187
|
+
shape: (4, 2)
|
|
188
|
+
┌───────────────┬─────────────────────┐
|
|
189
|
+
│ player_cohort ┆ picked_deq_base_avg │
|
|
190
|
+
│ --- ┆ --- │
|
|
191
|
+
│ str ┆ f64 │
|
|
192
|
+
╞═══════════════╪═════════════════════╡
|
|
193
|
+
│ Bottom ┆ 0.004826 │
|
|
194
|
+
│ Middle ┆ 0.00532 │
|
|
195
|
+
│ Other ┆ 0.004895 │
|
|
196
|
+
│ Top ┆ 0.005659 │
|
|
197
|
+
└───────────────┴─────────────────────┘
|
|
198
|
+
```
|
|
174
199
|
|
|
175
200
|
## Installation
|
|
176
201
|
|
|
@@ -212,7 +237,7 @@ Spells caches the results of expensive aggregations in the local file system as
|
|
|
212
237
|
|
|
213
238
|
### Memory Usage
|
|
214
239
|
|
|
215
|
-
One of my goals in creating Spells was to eliminate issues with memory pressure by exclusively using the map-reduce paradigm and a technology that supports partitioned/streaming aggregation of larget-than-memory datasets. By default, Polars loads the entire dataset in memory, but the API exposes a parameter `streaming` which I have exposed as `use_streaming`. Unfortunately, that feature does not seem to work for my queries and the memory performance can be quite poor
|
|
240
|
+
One of my goals in creating Spells was to eliminate issues with memory pressure by exclusively using the map-reduce paradigm and a technology that supports partitioned/streaming aggregation of larget-than-memory datasets. By default, Polars loads the entire dataset in memory, but the API exposes a parameter `streaming` which I have exposed as `use_streaming`. Unfortunately, that feature does not seem to work for my queries and the memory performance can be quite poor. The one feature that may assist in memory management is the local caching, since you can restart the kernel without losing all of your progress. In particular, be careful about opening multiple Jupyter tabs unless you have at least 32 GB. In general I have not run into issues on my 16 GB MacBook Air except with running multiple kernels at once. Supporting larger-than memory computations is on my roadmap, so check back periodically to see if I've made any progress.
|
|
216
241
|
|
|
217
242
|
When refreshing a given set's data files from 17Lands using the provided cli, the cache for that set is automatically cleared. The `spells` CLI gives additional tools for managing the local and external caches.
|
|
218
243
|
|
|
@@ -278,25 +303,26 @@ aggregations of non-numeric (or numeric) data types are not supported. If `None`
|
|
|
278
303
|
- `{'lhs': 'player_cohort', 'op': 'in', 'rhs': ['Top', 'Middle']}` "player_cohort" value is either "Top" or "Middle". Supported values for `op` are `<`, `<=`, `>`, `>=`, `!=`, `=`, `in` and `nin`.
|
|
279
304
|
- `{'$and': [{'lhs': 'draft_date', 'op': '>', 'rhs': datetime.date(2024, 10, 7)}, {'rank': 'Mythic'}]}` Drafts after October 7 by Mythic-ranked players. Supported values for query construction keys are `$and`, `$or`, and `$not`.
|
|
280
305
|
|
|
281
|
-
- extensions: a
|
|
306
|
+
- extensions: a dict of `spells.columns.ColSpec` objects, keyed by name, which are appended to the definitions built-in columns described below.
|
|
307
|
+
|
|
308
|
+
- card_context: Typically a Polars DataFrame containing a `"name"` column with one row for each card name in the set, such that any usages of `card_context[name][key]` in column specs reference the column `key`. Typically this will be the output of a call to `summon` requesting cards metrics like `GP_WR`. Can also be a dictionary having the necessary form for the same access pattern.
|
|
282
309
|
|
|
283
310
|
- read_cache/write_cache: Use the local file system to cache and retrieve aggregations to minimize expensive reads of the large datasets. You shouldn't need to touch these arguments unless you are debugging.
|
|
284
311
|
|
|
285
312
|
### Enums
|
|
286
313
|
|
|
287
314
|
```python
|
|
288
|
-
from spells
|
|
315
|
+
from spells import ColName, ColType
|
|
289
316
|
```
|
|
290
317
|
|
|
291
318
|
Recommended to import `ColName` for any usage of `summon`, and to import `ColType` when defining custom extensions.
|
|
292
319
|
|
|
293
|
-
###
|
|
320
|
+
### ColSpec
|
|
294
321
|
|
|
295
322
|
```python
|
|
296
|
-
from spells
|
|
323
|
+
from spells import ColSpec
|
|
297
324
|
|
|
298
|
-
|
|
299
|
-
name: str,
|
|
325
|
+
ColSpec(
|
|
300
326
|
col_type: ColType,
|
|
301
327
|
expr: pl.Expr | Callable[..., pl.Expr] | None = None,
|
|
302
328
|
version: str | None = None
|
|
@@ -307,8 +333,6 @@ Used to define extensions in `summon`
|
|
|
307
333
|
|
|
308
334
|
#### parameters
|
|
309
335
|
|
|
310
|
-
- `name`: any string, including existing columns, although this is very likely to break dependent columns, so don't do it. For `NAME_SUM` columns, the name is the prefix without the underscore, e.g. "drawn".
|
|
311
|
-
|
|
312
336
|
- `col_type`: one of the `ColType` enum values, `FILTER_ONLY`, `GROUP_BY`, `PICK_SUM`, `NAME_SUM`, `GAME_SUM`, `CARD_ATTR`, and `AGG`. See documentation for `summon` for usage. All columns except `CARD_ATTR`
|
|
313
337
|
and `AGG` must be derivable at the individual row level on one or both base views. `CARD_ATTR` must be derivable at the individual row level from the card file. `AGG` can depend on any column present after
|
|
314
338
|
summing over groups, and can include polars Expression aggregations. Arbitrarily long chains of aggregate dependencies are supported.
|
|
@@ -317,7 +341,7 @@ summing over groups, and can include polars Expression aggregations. Arbitrarily
|
|
|
317
341
|
- For `NAME_SUM` columns, `expr` must be a function of `name` which will result in a list of expressions mapped over all card names.
|
|
318
342
|
- `PICK_SUM` columns can also be functions on `name`, in which case the value will be a function of the value of the `PICK` field.
|
|
319
343
|
- `AGG` columns that depend on `NAME_SUM` columns reference the prefix (`cdef.name`) only, since the unpivot has occured prior to selection.
|
|
320
|
-
- The possible arguments to `expr`, in addition to `name` when appropriate, include the full `names` array as well as a dictionary called `card_context` which contains card dict objects with all `CARD_ATTR` values, including custom extensions
|
|
344
|
+
- The possible arguments to `expr`, in addition to `name` when appropriate, include the full `names` array as well as a dictionary called `card_context` which contains card dict objects with all `CARD_ATTR` values, including custom extensions and metric columns passed by the `card_context` argument to `summon`. See example notebooks for more details.
|
|
321
345
|
|
|
322
346
|
- `version`: When defining a column using a python function, as opposed to Polars expressions, add a unique version number so that the unique hashed signature of the column specification can be derived
|
|
323
347
|
for caching purposes, since Polars cannot generate a serialization natively. When changing the definition, be sure to increment the version value. Otherwise you do not need to use this parameter.
|
|
@@ -355,6 +379,7 @@ A table of all included columns. Columns can be referenced by enum or by string
|
|
|
355
379
|
| `PICK_NUMBER` | `"pick_number"` | `DRAFT` | `FILTER_ONLY` | Dataset Column | Int |
|
|
356
380
|
| `PICK_NUM` | `"pick_num"` | `DRAFT` | `GROUP_BY` | 1-indexed | Int |
|
|
357
381
|
| `TAKEN_AT` | `"taken_at` | `DRAFT` | `PICK_SUM` | Summable alias of `PICK_NUM` | Int |
|
|
382
|
+
| `NUM_DRAFTS` | `"num_drafts"` | `DRAFT` | `PICK_SUM` | | Int |
|
|
358
383
|
| `NUM_TAKEN` | `"num_taken"` | `DRAFT` | `PICK_SUM` | Sum 1 over rows | Int |
|
|
359
384
|
| `PICK` | `"pick"` | `DRAFT` | `FILTER_ONLY` | Dataset Column, joined as "name" | String |
|
|
360
385
|
| `PICK_MAINDECK_RATE` | `"pick_maindeck_rate"` | `DRAFT` | `PICK_SUM` | Dataset Column | Float |
|