spells-mtg 0.3.1__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spells-mtg might be problematic. Click here for more details.
- {spells_mtg-0.3.1 → spells_mtg-0.5.0}/PKG-INFO +77 -49
- {spells_mtg-0.3.1 → spells_mtg-0.5.0}/README.md +76 -48
- {spells_mtg-0.3.1 → spells_mtg-0.5.0}/pyproject.toml +1 -1
- spells_mtg-0.5.0/spells/__init__.py +5 -0
- {spells_mtg-0.3.1 → spells_mtg-0.5.0}/spells/columns.py +173 -254
- {spells_mtg-0.3.1 → spells_mtg-0.5.0}/spells/draft_data.py +126 -79
- {spells_mtg-0.3.1 → spells_mtg-0.5.0}/spells/enums.py +1 -1
- {spells_mtg-0.3.1 → spells_mtg-0.5.0}/spells/manifest.py +51 -33
- spells_mtg-0.3.1/spells/__init__.py +0 -5
- {spells_mtg-0.3.1 → spells_mtg-0.5.0}/LICENSE +0 -0
- {spells_mtg-0.3.1 → spells_mtg-0.5.0}/spells/cache.py +0 -0
- {spells_mtg-0.3.1 → spells_mtg-0.5.0}/spells/cards.py +0 -0
- {spells_mtg-0.3.1 → spells_mtg-0.5.0}/spells/external.py +0 -0
- {spells_mtg-0.3.1 → spells_mtg-0.5.0}/spells/filter.py +0 -0
- {spells_mtg-0.3.1 → spells_mtg-0.5.0}/spells/schema.py +0 -0
- {spells_mtg-0.3.1 → spells_mtg-0.5.0}/tests/__init__.py +0 -0
- {spells_mtg-0.3.1 → spells_mtg-0.5.0}/tests/filter_test.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: spells-mtg
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: analaysis of 17Lands.com public datasets
|
|
5
5
|
Author-Email: Joel Barnes <oelarnes@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -74,8 +74,9 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
|
|
|
74
74
|
- Supports calculating the standard aggregations and measures out of the box with no arguments (ALSA, GIH WR, etc)
|
|
75
75
|
- Caches aggregate DataFrames in the local file system automatically for instantaneous reproduction of previous analysis
|
|
76
76
|
- Manages grouping and filtering by built-in and custom columns at the row level
|
|
77
|
-
- Provides
|
|
77
|
+
- Provides 122 explicitly specified, enumerated, documented column definitions
|
|
78
78
|
- Supports "Deck Color Data" aggregations with built-in column definitions.
|
|
79
|
+
- Lets you feed card metrics back in to column definitions to support scientific workflows like MLE
|
|
79
80
|
- Provides a CLI tool `spells [add|refresh|clean|remove|info] [SET]` to download and manage external files
|
|
80
81
|
- Downloads and manages public datasets from 17Lands
|
|
81
82
|
- Retrieves and models booster configuration and card data from [MTGJSON](https://mtgjson.com/)
|
|
@@ -87,7 +88,7 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
|
|
|
87
88
|
|
|
88
89
|
## summon
|
|
89
90
|
|
|
90
|
-
`summon` takes
|
|
91
|
+
`summon` takes five optional arguments, allowing a fully declarative specification of your desired analysis. Basic functionality not provided by this api can often be managed by simple chained calls using the polars API, e.g. sorting and post-agg filtering.
|
|
91
92
|
- `columns` specifies the desired output columns
|
|
92
93
|
```python
|
|
93
94
|
>>> spells.summon('DSK', columns=["num_gp", "pct_gp", "gp_wr", "gp_wr_z"])
|
|
@@ -112,24 +113,24 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
|
|
|
112
113
|
```
|
|
113
114
|
- `group_by` specifies the grouping by one or more columns. By default, group by card names, but optionally group by any of a large set of fundamental and derived columns, including card attributes and your own custom extension.
|
|
114
115
|
```python
|
|
115
|
-
>>>
|
|
116
|
-
shape: (10,
|
|
117
|
-
|
|
118
|
-
│ main_colors ┆ num_won ┆ num_games ┆ game_wr │
|
|
119
|
-
│ --- ┆ --- ┆ --- ┆ --- │
|
|
120
|
-
│ str ┆ u32 ┆ u32 ┆ f64 │
|
|
121
|
-
|
|
122
|
-
│ BG ┆ 85022 ┆ 152863 ┆ 0.556197 │
|
|
123
|
-
│ BR ┆ 45900 ┆ 81966 ┆ 0.559988 │
|
|
124
|
-
│ RG ┆ 34641 ┆ 64428 ┆ 0.53767 │
|
|
125
|
-
│ UB ┆ 30922 ┆ 57698 ┆ 0.535928 │
|
|
126
|
-
│ UG ┆ 59879 ┆ 109145 ┆ 0.548619 │
|
|
127
|
-
│ UR ┆ 19638 ┆ 38679 ┆ 0.507717 │
|
|
128
|
-
│ WB ┆ 59480 ┆ 107443 ┆ 0.553596 │
|
|
129
|
-
│ WG ┆ 76134 ┆ 136832 ┆ 0.556405 │
|
|
130
|
-
│ WR ┆ 49712 ┆ 91224 ┆ 0.544944 │
|
|
131
|
-
│ WU ┆ 16483 ┆ 31450 ┆ 0.524102 │
|
|
132
|
-
|
|
116
|
+
>>> summon('BLB', columns=["num_won", "num_games", "game_wr", "deck_mana_value_avg"], group_by=["main_colors"], filter_spec={"num_colors": 2})
|
|
117
|
+
shape: (10, 5)
|
|
118
|
+
┌─────────────┬─────────┬───────────┬──────────┬─────────────────────┐
|
|
119
|
+
│ main_colors ┆ num_won ┆ num_games ┆ game_wr ┆ deck_mana_value_avg │
|
|
120
|
+
│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
|
121
|
+
│ str ┆ u32 ┆ u32 ┆ f64 ┆ f64 │
|
|
122
|
+
╞═════════════╪═════════╪═══════════╪══════════╪═════════════════════╡
|
|
123
|
+
│ BG ┆ 85022 ┆ 152863 ┆ 0.556197 ┆ 2.862305 │
|
|
124
|
+
│ BR ┆ 45900 ┆ 81966 ┆ 0.559988 ┆ 2.76198 │
|
|
125
|
+
│ RG ┆ 34641 ┆ 64428 ┆ 0.53767 ┆ 2.852182 │
|
|
126
|
+
│ UB ┆ 30922 ┆ 57698 ┆ 0.535928 ┆ 3.10409 │
|
|
127
|
+
│ UG ┆ 59879 ┆ 109145 ┆ 0.548619 ┆ 2.861026 │
|
|
128
|
+
│ UR ┆ 19638 ┆ 38679 ┆ 0.507717 ┆ 2.908215 │
|
|
129
|
+
│ WB ┆ 59480 ┆ 107443 ┆ 0.553596 ┆ 2.9217 │
|
|
130
|
+
│ WG ┆ 76134 ┆ 136832 ┆ 0.556405 ┆ 2.721064 │
|
|
131
|
+
│ WR ┆ 49712 ┆ 91224 ┆ 0.544944 ┆ 2.5222 │
|
|
132
|
+
│ WU ┆ 16483 ┆ 31450 ┆ 0.524102 ┆ 2.930967 │
|
|
133
|
+
└─────────────┴─────────┴───────────┴──────────┴─────────────────────┘
|
|
133
134
|
```
|
|
134
135
|
- `filter_spec` specifies a row-level filter for the dataset, using an intuitive custom query formulation
|
|
135
136
|
```python
|
|
@@ -150,14 +151,13 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
|
|
|
150
151
|
- `extensions` allows for the specification of arbitrarily complex derived columns and aggregations, including custom columns built on top of custom columns.
|
|
151
152
|
```python
|
|
152
153
|
>>> import polars as pl
|
|
153
|
-
>>> from spells.columns import
|
|
154
|
+
>>> from spells.columns import ColSpec
|
|
154
155
|
>>> from spells.enums import ColType, View, ColName
|
|
155
|
-
>>> ext =
|
|
156
|
+
>>> ext = ColSpec(
|
|
156
157
|
... name='deq_base',
|
|
157
158
|
... col_type=ColType.AGG,
|
|
158
159
|
... expr=(pl.col('gp_wr_excess') + 0.03 * (1 - pl.col('ata')/14).pow(2)) * pl.col('pct_gp'),
|
|
159
|
-
...
|
|
160
|
-
...)
|
|
160
|
+
... )
|
|
161
161
|
>>> spells.summon('DSK', columns=['deq_base', 'color', 'rarity'], filter_spec={'player_cohort': 'Top'}, extensions=[ext])
|
|
162
162
|
... .filter(pl.col('deq_base').is_finite())
|
|
163
163
|
... .filter(pl.col('rarity').is_in(['common', 'uncommon'])
|
|
@@ -181,8 +181,34 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
|
|
|
181
181
|
│ Oblivious Bookworm ┆ 0.028605 ┆ uncommon ┆ GU │
|
|
182
182
|
└──────────────────────────┴──────────┴──────────┴───────┘
|
|
183
183
|
```
|
|
184
|
-
|
|
185
|
-
|
|
184
|
+
- `card_context` takes a name-indexed DataFrame or name-keyed dict and allows the construction of column definitions based on the results.
|
|
185
|
+
```python
|
|
186
|
+
>>> deq = spells.summon('DSK', columns=['deq_base'], filter_spec={'player_cohort': 'Top'}, extensions=[ext])
|
|
187
|
+
>>> ext = [
|
|
188
|
+
... ColSpec(
|
|
189
|
+
... name='picked_deq_base',
|
|
190
|
+
... col_type=ColType.PICK_SUM,
|
|
191
|
+
... expr=lambda name, card_context: card_context[name]['deq_base']
|
|
192
|
+
... ),
|
|
193
|
+
... ColSpec(
|
|
194
|
+
... name='picked_deq_base_avg',
|
|
195
|
+
... col_type=ColType.AGG,
|
|
196
|
+
... expr=pl.col('picked_deq_base') / pl.col('num_taken')
|
|
197
|
+
... ),
|
|
198
|
+
... ]
|
|
199
|
+
>>> spells.summon('DSK', columns=['picked_deq_base_avg'], group_by=['player_cohort'], extensions=ext, card_context=deq)
|
|
200
|
+
shape: (4, 2)
|
|
201
|
+
┌───────────────┬─────────────────────┐
|
|
202
|
+
│ player_cohort ┆ picked_deq_base_avg │
|
|
203
|
+
│ --- ┆ --- │
|
|
204
|
+
│ str ┆ f64 │
|
|
205
|
+
╞═══════════════╪═════════════════════╡
|
|
206
|
+
│ Bottom ┆ 0.004826 │
|
|
207
|
+
│ Middle ┆ 0.00532 │
|
|
208
|
+
│ Other ┆ 0.004895 │
|
|
209
|
+
│ Top ┆ 0.005659 │
|
|
210
|
+
└───────────────┴─────────────────────┘
|
|
211
|
+
```
|
|
186
212
|
|
|
187
213
|
## Installation
|
|
188
214
|
|
|
@@ -278,43 +304,40 @@ summon(
|
|
|
278
304
|
|
|
279
305
|
#### parameters
|
|
280
306
|
|
|
281
|
-
- columns: a list of string or `ColName` values to select as non-grouped columns. Valid `ColTypes` are `PICK_SUM`, `NAME_SUM`, `GAME_SUM`, `CARD_ATTR`,
|
|
307
|
+
- columns: a list of string or `ColName` values to select as non-grouped columns. Valid `ColTypes` are `PICK_SUM`, `NAME_SUM`, `GAME_SUM`, `CARD_ATTR`, and `AGG`. Min/Max/Unique
|
|
282
308
|
aggregations of non-numeric (or numeric) data types are not supported. If `None`, use a set of columns modeled on the commonly used values on 17Lands.com/card_data.
|
|
283
309
|
|
|
284
310
|
- group_by: a list of string or `ColName` values to display as grouped columns. Valid `ColTypes` are `GROUP_BY` and `CARD_ATTR`. By default, group by "name" (card name).
|
|
285
311
|
|
|
286
312
|
- filter_spec: a dictionary specifying a filter, using a small number of paradigms. Columns used must be in each base view ("draft" and "game") that the `columns` and `group_by` columns depend on, so
|
|
287
|
-
`AGG
|
|
313
|
+
`AGG` and `CARD_ATTR` columns are not valid. Functions of card attributes in the base views can be filtered on, see the documentation for `expr` for details. `NAME_SUM` columns are also not supported. Derived columns are supported. No filter is applied by default. Yes, I should rewrite it to use the mongo query language. The specification is best understood with examples:
|
|
288
314
|
|
|
289
315
|
- `{'player_cohort': 'Top'}` "player_cohort" value equals "Top".
|
|
290
316
|
- `{'lhs': 'player_cohort', 'op': 'in', 'rhs': ['Top', 'Middle']}` "player_cohort" value is either "Top" or "Middle". Supported values for `op` are `<`, `<=`, `>`, `>=`, `!=`, `=`, `in` and `nin`.
|
|
291
317
|
- `{'$and': [{'lhs': 'draft_date', 'op': '>', 'rhs': datetime.date(2024, 10, 7)}, {'rank': 'Mythic'}]}` Drafts after October 7 by Mythic-ranked players. Supported values for query construction keys are `$and`, `$or`, and `$not`.
|
|
292
318
|
|
|
293
|
-
- extensions: a list of `spells.columns.
|
|
319
|
+
- extensions: a list of `spells.columns.ColSpec` objects, which are appended to the definitions built-in columns described below. A name not in the enum `ColName` can be used in this way if it is the name of a provided extension. Existing names can also be redefined using extensions.
|
|
294
320
|
|
|
295
321
|
- read_cache/write_cache: Use the local file system to cache and retrieve aggregations to minimize expensive reads of the large datasets. You shouldn't need to touch these arguments unless you are debugging.
|
|
296
322
|
|
|
297
323
|
### Enums
|
|
298
324
|
|
|
299
325
|
```python
|
|
300
|
-
from spells.enums import ColName, ColType
|
|
326
|
+
from spells.enums import ColName, ColType
|
|
301
327
|
```
|
|
302
328
|
|
|
303
|
-
Recommended to import `ColName` for any usage of `summon`, and to import `ColType` when defining custom extensions.
|
|
329
|
+
Recommended to import `ColName` for any usage of `summon`, and to import `ColType` when defining custom extensions.
|
|
304
330
|
|
|
305
|
-
###
|
|
331
|
+
### ColSpec
|
|
306
332
|
|
|
307
333
|
```python
|
|
308
|
-
from spells.columns import
|
|
334
|
+
from spells.columns import ColSpec
|
|
309
335
|
|
|
310
|
-
|
|
336
|
+
ColSpec(
|
|
311
337
|
name: str,
|
|
312
338
|
col_type: ColType,
|
|
313
|
-
expr: pl.Expr | None = None,
|
|
314
|
-
exprMap: Callable[[str], pl.Expr] | None = None
|
|
315
|
-
dependencies: list[str] | None = None
|
|
339
|
+
expr: pl.Expr | Callable[..., pl.Expr] | None = None,
|
|
316
340
|
version: str | None = None
|
|
317
|
-
views: list[View] | None = None,
|
|
318
341
|
)
|
|
319
342
|
```
|
|
320
343
|
|
|
@@ -324,19 +347,19 @@ Used to define extensions in `summon`
|
|
|
324
347
|
|
|
325
348
|
- `name`: any string, including existing columns, although this is very likely to break dependent columns, so don't do it. For `NAME_SUM` columns, the name is the prefix without the underscore, e.g. "drawn".
|
|
326
349
|
|
|
327
|
-
- `col_type`: one of the `ColType` enum values, `FILTER_ONLY`, `GROUP_BY`, `PICK_SUM`, `NAME_SUM`, `GAME_SUM`, `CARD_ATTR`,
|
|
328
|
-
|
|
329
|
-
|
|
350
|
+
- `col_type`: one of the `ColType` enum values, `FILTER_ONLY`, `GROUP_BY`, `PICK_SUM`, `NAME_SUM`, `GAME_SUM`, `CARD_ATTR`, and `AGG`. See documentation for `summon` for usage. All columns except `CARD_ATTR`
|
|
351
|
+
and `AGG` must be derivable at the individual row level on one or both base views. `CARD_ATTR` must be derivable at the individual row level from the card file. `AGG` can depend on any column present after
|
|
352
|
+
summing over groups, and can include polars Expression aggregations. Arbitrarily long chains of aggregate dependencies are supported.
|
|
330
353
|
|
|
331
|
-
- `
|
|
332
|
-
|
|
333
|
-
- `
|
|
354
|
+
- `expr`: A polars expression or function returning a polars expression giving the derivation of the column value at the first level where it is defined.
|
|
355
|
+
- For `NAME_SUM` columns, `expr` must be a function of `name` which will result in a list of expressions mapped over all card names.
|
|
356
|
+
- `PICK_SUM` columns can also be functions on `name`, in which case the value will be a function of the value of the `PICK` field.
|
|
357
|
+
- `AGG` columns that depend on `NAME_SUM` columns reference the prefix (`cdef.name`) only, since the unpivot has occured prior to selection.
|
|
358
|
+
- The possible arguments to `expr`, in addition to `name` when appropriate, include the full `names` array as well as a dictionary called `card_context` which contains card dict objects with all `CARD_ATTR` values, including custom extensions. See example notebooks for more details.
|
|
334
359
|
|
|
335
360
|
- `version`: When defining a column using a python function, as opposed to Polars expressions, add a unique version number so that the unique hashed signature of the column specification can be derived
|
|
336
361
|
for caching purposes, since Polars cannot generate a serialization natively. When changing the definition, be sure to increment the version value. Otherwise you do not need to use this parameter.
|
|
337
362
|
|
|
338
|
-
- `views`: Not needed for custom columns.
|
|
339
|
-
|
|
340
363
|
### Columns
|
|
341
364
|
|
|
342
365
|
A table of all included columns. Columns can be referenced by enum or by string value in arguments and filter specs. The string value is always the lowercase version of the enum attribute.
|
|
@@ -370,6 +393,7 @@ A table of all included columns. Columns can be referenced by enum or by string
|
|
|
370
393
|
| `PICK_NUMBER` | `"pick_number"` | `DRAFT` | `FILTER_ONLY` | Dataset Column | Int |
|
|
371
394
|
| `PICK_NUM` | `"pick_num"` | `DRAFT` | `GROUP_BY` | 1-indexed | Int |
|
|
372
395
|
| `TAKEN_AT` | `"taken_at` | `DRAFT` | `PICK_SUM` | Summable alias of `PICK_NUM` | Int |
|
|
396
|
+
| `NUM_DRAFTS` | `"num_drafts"` | `DRAFT` | `PICK_SUM` | | Int |
|
|
373
397
|
| `NUM_TAKEN` | `"num_taken"` | `DRAFT` | `PICK_SUM` | Sum 1 over rows | Int |
|
|
374
398
|
| `PICK` | `"pick"` | `DRAFT` | `FILTER_ONLY` | Dataset Column, joined as "name" | String |
|
|
375
399
|
| `PICK_MAINDECK_RATE` | `"pick_maindeck_rate"` | `DRAFT` | `PICK_SUM` | Dataset Column | Float |
|
|
@@ -424,7 +448,9 @@ A table of all included columns. Columns can be referenced by enum or by string
|
|
|
424
448
|
| `CARD_TYPE` | `"card_type"` | `CARD` | `CARD_ATTR` | | String |
|
|
425
449
|
| `SUBTYPE` | `"subtype"` | `CARD` | `CARD_ATTR` | | String |
|
|
426
450
|
| `MANA_VALUE` | `"mana_value"` | `CARD` | `CARD_ATTR` | | Float |
|
|
427
|
-
| `DECK_MANA_VALUE` | `"deck_mana_value"` | | `
|
|
451
|
+
| `DECK_MANA_VALUE` | `"deck_mana_value"` | | `NAME_SUM` | `DECK` * `MANA_VALUE` | Float |
|
|
452
|
+
| `DECK_LANDS` | `"deck_lands"` | | `NAME_SUM` | Number of lands in deck | Float |
|
|
453
|
+
| `DECK_SPELLS` | `"deck_spells"` | | `NAME_SUM` | Number of spells in deck | Float |
|
|
428
454
|
| `MANA_COST` | `"mana_cost"` | `CARD` | `CARD_ATTR` | | String |
|
|
429
455
|
| `POWER` | `"power"` | `CARD` | `CARD_ATTR` | | Float |
|
|
430
456
|
| `TOUGHNESS` | `"toughness"` | `CARD` | `CARD_ATTR` | | Float |
|
|
@@ -463,7 +489,9 @@ A table of all included columns. Columns can be referenced by enum or by string
|
|
|
463
489
|
| `GIH_WR_VAR` | `"gih_wr_var"` | | `AGG` | Game-weighted Variance | Float |
|
|
464
490
|
| `GIH_WR_STDEV` | `"gh_wr_stdev"` | | `AGG` | Sqrt of `GIH_WR_VAR` | Float |
|
|
465
491
|
| `GIH_WR_Z` | `"gih_wr_z"` | | `AGG` |`GIH_WR_EXCESS` / `GIH_WR_STDEV` | Float |
|
|
466
|
-
| `DECK_MANA_VALUE_AVG` | `"deck_mana_value_avg"` | | `AGG` | `DECK_MANA_VALUE ` / `
|
|
492
|
+
| `DECK_MANA_VALUE_AVG` | `"deck_mana_value_avg"` | | `AGG` | `DECK_MANA_VALUE ` / `DECK_SPELLS` | Float |
|
|
493
|
+
| `DECK_LANDS_AVG` | `"deck_lands_avg"` | | `AGG` | `DECK_LANDS ` / `NUM_GAMES` | Float |
|
|
494
|
+
| `DECK_SPELLS_AVG` | `"deck_spells_avg"` | | `AGG` | `DECK_SPELLS ` / `NUM_GAMES` | Float |
|
|
467
495
|
|
|
468
496
|
# Roadmap to 1.0
|
|
469
497
|
|
|
@@ -63,8 +63,9 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
|
|
|
63
63
|
- Supports calculating the standard aggregations and measures out of the box with no arguments (ALSA, GIH WR, etc)
|
|
64
64
|
- Caches aggregate DataFrames in the local file system automatically for instantaneous reproduction of previous analysis
|
|
65
65
|
- Manages grouping and filtering by built-in and custom columns at the row level
|
|
66
|
-
- Provides
|
|
66
|
+
- Provides 122 explicitly specified, enumerated, documented column definitions
|
|
67
67
|
- Supports "Deck Color Data" aggregations with built-in column definitions.
|
|
68
|
+
- Lets you feed card metrics back in to column definitions to support scientific workflows like MLE
|
|
68
69
|
- Provides a CLI tool `spells [add|refresh|clean|remove|info] [SET]` to download and manage external files
|
|
69
70
|
- Downloads and manages public datasets from 17Lands
|
|
70
71
|
- Retrieves and models booster configuration and card data from [MTGJSON](https://mtgjson.com/)
|
|
@@ -76,7 +77,7 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
|
|
|
76
77
|
|
|
77
78
|
## summon
|
|
78
79
|
|
|
79
|
-
`summon` takes
|
|
80
|
+
`summon` takes five optional arguments, allowing a fully declarative specification of your desired analysis. Basic functionality not provided by this api can often be managed by simple chained calls using the polars API, e.g. sorting and post-agg filtering.
|
|
80
81
|
- `columns` specifies the desired output columns
|
|
81
82
|
```python
|
|
82
83
|
>>> spells.summon('DSK', columns=["num_gp", "pct_gp", "gp_wr", "gp_wr_z"])
|
|
@@ -101,24 +102,24 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
|
|
|
101
102
|
```
|
|
102
103
|
- `group_by` specifies the grouping by one or more columns. By default, group by card names, but optionally group by any of a large set of fundamental and derived columns, including card attributes and your own custom extension.
|
|
103
104
|
```python
|
|
104
|
-
>>>
|
|
105
|
-
shape: (10,
|
|
106
|
-
|
|
107
|
-
│ main_colors ┆ num_won ┆ num_games ┆ game_wr │
|
|
108
|
-
│ --- ┆ --- ┆ --- ┆ --- │
|
|
109
|
-
│ str ┆ u32 ┆ u32 ┆ f64 │
|
|
110
|
-
|
|
111
|
-
│ BG ┆ 85022 ┆ 152863 ┆ 0.556197 │
|
|
112
|
-
│ BR ┆ 45900 ┆ 81966 ┆ 0.559988 │
|
|
113
|
-
│ RG ┆ 34641 ┆ 64428 ┆ 0.53767 │
|
|
114
|
-
│ UB ┆ 30922 ┆ 57698 ┆ 0.535928 │
|
|
115
|
-
│ UG ┆ 59879 ┆ 109145 ┆ 0.548619 │
|
|
116
|
-
│ UR ┆ 19638 ┆ 38679 ┆ 0.507717 │
|
|
117
|
-
│ WB ┆ 59480 ┆ 107443 ┆ 0.553596 │
|
|
118
|
-
│ WG ┆ 76134 ┆ 136832 ┆ 0.556405 │
|
|
119
|
-
│ WR ┆ 49712 ┆ 91224 ┆ 0.544944 │
|
|
120
|
-
│ WU ┆ 16483 ┆ 31450 ┆ 0.524102 │
|
|
121
|
-
|
|
105
|
+
>>> summon('BLB', columns=["num_won", "num_games", "game_wr", "deck_mana_value_avg"], group_by=["main_colors"], filter_spec={"num_colors": 2})
|
|
106
|
+
shape: (10, 5)
|
|
107
|
+
┌─────────────┬─────────┬───────────┬──────────┬─────────────────────┐
|
|
108
|
+
│ main_colors ┆ num_won ┆ num_games ┆ game_wr ┆ deck_mana_value_avg │
|
|
109
|
+
│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
|
110
|
+
│ str ┆ u32 ┆ u32 ┆ f64 ┆ f64 │
|
|
111
|
+
╞═════════════╪═════════╪═══════════╪══════════╪═════════════════════╡
|
|
112
|
+
│ BG ┆ 85022 ┆ 152863 ┆ 0.556197 ┆ 2.862305 │
|
|
113
|
+
│ BR ┆ 45900 ┆ 81966 ┆ 0.559988 ┆ 2.76198 │
|
|
114
|
+
│ RG ┆ 34641 ┆ 64428 ┆ 0.53767 ┆ 2.852182 │
|
|
115
|
+
│ UB ┆ 30922 ┆ 57698 ┆ 0.535928 ┆ 3.10409 │
|
|
116
|
+
│ UG ┆ 59879 ┆ 109145 ┆ 0.548619 ┆ 2.861026 │
|
|
117
|
+
│ UR ┆ 19638 ┆ 38679 ┆ 0.507717 ┆ 2.908215 │
|
|
118
|
+
│ WB ┆ 59480 ┆ 107443 ┆ 0.553596 ┆ 2.9217 │
|
|
119
|
+
│ WG ┆ 76134 ┆ 136832 ┆ 0.556405 ┆ 2.721064 │
|
|
120
|
+
│ WR ┆ 49712 ┆ 91224 ┆ 0.544944 ┆ 2.5222 │
|
|
121
|
+
│ WU ┆ 16483 ┆ 31450 ┆ 0.524102 ┆ 2.930967 │
|
|
122
|
+
└─────────────┴─────────┴───────────┴──────────┴─────────────────────┘
|
|
122
123
|
```
|
|
123
124
|
- `filter_spec` specifies a row-level filter for the dataset, using an intuitive custom query formulation
|
|
124
125
|
```python
|
|
@@ -139,14 +140,13 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
|
|
|
139
140
|
- `extensions` allows for the specification of arbitrarily complex derived columns and aggregations, including custom columns built on top of custom columns.
|
|
140
141
|
```python
|
|
141
142
|
>>> import polars as pl
|
|
142
|
-
>>> from spells.columns import
|
|
143
|
+
>>> from spells.columns import ColSpec
|
|
143
144
|
>>> from spells.enums import ColType, View, ColName
|
|
144
|
-
>>> ext =
|
|
145
|
+
>>> ext = ColSpec(
|
|
145
146
|
... name='deq_base',
|
|
146
147
|
... col_type=ColType.AGG,
|
|
147
148
|
... expr=(pl.col('gp_wr_excess') + 0.03 * (1 - pl.col('ata')/14).pow(2)) * pl.col('pct_gp'),
|
|
148
|
-
...
|
|
149
|
-
...)
|
|
149
|
+
... )
|
|
150
150
|
>>> spells.summon('DSK', columns=['deq_base', 'color', 'rarity'], filter_spec={'player_cohort': 'Top'}, extensions=[ext])
|
|
151
151
|
... .filter(pl.col('deq_base').is_finite())
|
|
152
152
|
... .filter(pl.col('rarity').is_in(['common', 'uncommon'])
|
|
@@ -170,8 +170,34 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
|
|
|
170
170
|
│ Oblivious Bookworm ┆ 0.028605 ┆ uncommon ┆ GU │
|
|
171
171
|
└──────────────────────────┴──────────┴──────────┴───────┘
|
|
172
172
|
```
|
|
173
|
-
|
|
174
|
-
|
|
173
|
+
- `card_context` takes a name-indexed DataFrame or name-keyed dict and allows the construction of column definitions based on the results.
|
|
174
|
+
```python
|
|
175
|
+
>>> deq = spells.summon('DSK', columns=['deq_base'], filter_spec={'player_cohort': 'Top'}, extensions=[ext])
|
|
176
|
+
>>> ext = [
|
|
177
|
+
... ColSpec(
|
|
178
|
+
... name='picked_deq_base',
|
|
179
|
+
... col_type=ColType.PICK_SUM,
|
|
180
|
+
... expr=lambda name, card_context: card_context[name]['deq_base']
|
|
181
|
+
... ),
|
|
182
|
+
... ColSpec(
|
|
183
|
+
... name='picked_deq_base_avg',
|
|
184
|
+
... col_type=ColType.AGG,
|
|
185
|
+
... expr=pl.col('picked_deq_base') / pl.col('num_taken')
|
|
186
|
+
... ),
|
|
187
|
+
... ]
|
|
188
|
+
>>> spells.summon('DSK', columns=['picked_deq_base_avg'], group_by=['player_cohort'], extensions=ext, card_context=deq)
|
|
189
|
+
shape: (4, 2)
|
|
190
|
+
┌───────────────┬─────────────────────┐
|
|
191
|
+
│ player_cohort ┆ picked_deq_base_avg │
|
|
192
|
+
│ --- ┆ --- │
|
|
193
|
+
│ str ┆ f64 │
|
|
194
|
+
╞═══════════════╪═════════════════════╡
|
|
195
|
+
│ Bottom ┆ 0.004826 │
|
|
196
|
+
│ Middle ┆ 0.00532 │
|
|
197
|
+
│ Other ┆ 0.004895 │
|
|
198
|
+
│ Top ┆ 0.005659 │
|
|
199
|
+
└───────────────┴─────────────────────┘
|
|
200
|
+
```
|
|
175
201
|
|
|
176
202
|
## Installation
|
|
177
203
|
|
|
@@ -267,43 +293,40 @@ summon(
|
|
|
267
293
|
|
|
268
294
|
#### parameters
|
|
269
295
|
|
|
270
|
-
- columns: a list of string or `ColName` values to select as non-grouped columns. Valid `ColTypes` are `PICK_SUM`, `NAME_SUM`, `GAME_SUM`, `CARD_ATTR`,
|
|
296
|
+
- columns: a list of string or `ColName` values to select as non-grouped columns. Valid `ColTypes` are `PICK_SUM`, `NAME_SUM`, `GAME_SUM`, `CARD_ATTR`, and `AGG`. Min/Max/Unique
|
|
271
297
|
aggregations of non-numeric (or numeric) data types are not supported. If `None`, use a set of columns modeled on the commonly used values on 17Lands.com/card_data.
|
|
272
298
|
|
|
273
299
|
- group_by: a list of string or `ColName` values to display as grouped columns. Valid `ColTypes` are `GROUP_BY` and `CARD_ATTR`. By default, group by "name" (card name).
|
|
274
300
|
|
|
275
301
|
- filter_spec: a dictionary specifying a filter, using a small number of paradigms. Columns used must be in each base view ("draft" and "game") that the `columns` and `group_by` columns depend on, so
|
|
276
|
-
`AGG
|
|
302
|
+
`AGG` and `CARD_ATTR` columns are not valid. Functions of card attributes in the base views can be filtered on, see the documentation for `expr` for details. `NAME_SUM` columns are also not supported. Derived columns are supported. No filter is applied by default. Yes, I should rewrite it to use the mongo query language. The specification is best understood with examples:
|
|
277
303
|
|
|
278
304
|
- `{'player_cohort': 'Top'}` "player_cohort" value equals "Top".
|
|
279
305
|
- `{'lhs': 'player_cohort', 'op': 'in', 'rhs': ['Top', 'Middle']}` "player_cohort" value is either "Top" or "Middle". Supported values for `op` are `<`, `<=`, `>`, `>=`, `!=`, `=`, `in` and `nin`.
|
|
280
306
|
- `{'$and': [{'lhs': 'draft_date', 'op': '>', 'rhs': datetime.date(2024, 10, 7)}, {'rank': 'Mythic'}]}` Drafts after October 7 by Mythic-ranked players. Supported values for query construction keys are `$and`, `$or`, and `$not`.
|
|
281
307
|
|
|
282
|
-
- extensions: a list of `spells.columns.
|
|
308
|
+
- extensions: a list of `spells.columns.ColSpec` objects, which are appended to the definitions built-in columns described below. A name not in the enum `ColName` can be used in this way if it is the name of a provided extension. Existing names can also be redefined using extensions.
|
|
283
309
|
|
|
284
310
|
- read_cache/write_cache: Use the local file system to cache and retrieve aggregations to minimize expensive reads of the large datasets. You shouldn't need to touch these arguments unless you are debugging.
|
|
285
311
|
|
|
286
312
|
### Enums
|
|
287
313
|
|
|
288
314
|
```python
|
|
289
|
-
from spells.enums import ColName, ColType
|
|
315
|
+
from spells.enums import ColName, ColType
|
|
290
316
|
```
|
|
291
317
|
|
|
292
|
-
Recommended to import `ColName` for any usage of `summon`, and to import `ColType` when defining custom extensions.
|
|
318
|
+
Recommended to import `ColName` for any usage of `summon`, and to import `ColType` when defining custom extensions.
|
|
293
319
|
|
|
294
|
-
###
|
|
320
|
+
### ColSpec
|
|
295
321
|
|
|
296
322
|
```python
|
|
297
|
-
from spells.columns import
|
|
323
|
+
from spells.columns import ColSpec
|
|
298
324
|
|
|
299
|
-
|
|
325
|
+
ColSpec(
|
|
300
326
|
name: str,
|
|
301
327
|
col_type: ColType,
|
|
302
|
-
expr: pl.Expr | None = None,
|
|
303
|
-
exprMap: Callable[[str], pl.Expr] | None = None
|
|
304
|
-
dependencies: list[str] | None = None
|
|
328
|
+
expr: pl.Expr | Callable[..., pl.Expr] | None = None,
|
|
305
329
|
version: str | None = None
|
|
306
|
-
views: list[View] | None = None,
|
|
307
330
|
)
|
|
308
331
|
```
|
|
309
332
|
|
|
@@ -313,19 +336,19 @@ Used to define extensions in `summon`
|
|
|
313
336
|
|
|
314
337
|
- `name`: any string, including existing columns, although this is very likely to break dependent columns, so don't do it. For `NAME_SUM` columns, the name is the prefix without the underscore, e.g. "drawn".
|
|
315
338
|
|
|
316
|
-
- `col_type`: one of the `ColType` enum values, `FILTER_ONLY`, `GROUP_BY`, `PICK_SUM`, `NAME_SUM`, `GAME_SUM`, `CARD_ATTR`,
|
|
317
|
-
|
|
318
|
-
|
|
339
|
+
- `col_type`: one of the `ColType` enum values, `FILTER_ONLY`, `GROUP_BY`, `PICK_SUM`, `NAME_SUM`, `GAME_SUM`, `CARD_ATTR`, and `AGG`. See documentation for `summon` for usage. All columns except `CARD_ATTR`
|
|
340
|
+
and `AGG` must be derivable at the individual row level on one or both base views. `CARD_ATTR` must be derivable at the individual row level from the card file. `AGG` can depend on any column present after
|
|
341
|
+
summing over groups, and can include polars Expression aggregations. Arbitrarily long chains of aggregate dependencies are supported.
|
|
319
342
|
|
|
320
|
-
- `
|
|
321
|
-
|
|
322
|
-
- `
|
|
343
|
+
- `expr`: A polars expression or function returning a polars expression giving the derivation of the column value at the first level where it is defined.
|
|
344
|
+
- For `NAME_SUM` columns, `expr` must be a function of `name` which will result in a list of expressions mapped over all card names.
|
|
345
|
+
- `PICK_SUM` columns can also be functions on `name`, in which case the value will be a function of the value of the `PICK` field.
|
|
346
|
+
- `AGG` columns that depend on `NAME_SUM` columns reference the prefix (`cdef.name`) only, since the unpivot has occured prior to selection.
|
|
347
|
+
- The possible arguments to `expr`, in addition to `name` when appropriate, include the full `names` array as well as a dictionary called `card_context` which contains card dict objects with all `CARD_ATTR` values, including custom extensions. See example notebooks for more details.
|
|
323
348
|
|
|
324
349
|
- `version`: When defining a column using a python function, as opposed to Polars expressions, add a unique version number so that the unique hashed signature of the column specification can be derived
|
|
325
350
|
for caching purposes, since Polars cannot generate a serialization natively. When changing the definition, be sure to increment the version value. Otherwise you do not need to use this parameter.
|
|
326
351
|
|
|
327
|
-
- `views`: Not needed for custom columns.
|
|
328
|
-
|
|
329
352
|
### Columns
|
|
330
353
|
|
|
331
354
|
A table of all included columns. Columns can be referenced by enum or by string value in arguments and filter specs. The string value is always the lowercase version of the enum attribute.
|
|
@@ -359,6 +382,7 @@ A table of all included columns. Columns can be referenced by enum or by string
|
|
|
359
382
|
| `PICK_NUMBER` | `"pick_number"` | `DRAFT` | `FILTER_ONLY` | Dataset Column | Int |
|
|
360
383
|
| `PICK_NUM` | `"pick_num"` | `DRAFT` | `GROUP_BY` | 1-indexed | Int |
|
|
361
384
|
| `TAKEN_AT` | `"taken_at` | `DRAFT` | `PICK_SUM` | Summable alias of `PICK_NUM` | Int |
|
|
385
|
+
| `NUM_DRAFTS` | `"num_drafts"` | `DRAFT` | `PICK_SUM` | | Int |
|
|
362
386
|
| `NUM_TAKEN` | `"num_taken"` | `DRAFT` | `PICK_SUM` | Sum 1 over rows | Int |
|
|
363
387
|
| `PICK` | `"pick"` | `DRAFT` | `FILTER_ONLY` | Dataset Column, joined as "name" | String |
|
|
364
388
|
| `PICK_MAINDECK_RATE` | `"pick_maindeck_rate"` | `DRAFT` | `PICK_SUM` | Dataset Column | Float |
|
|
@@ -413,7 +437,9 @@ A table of all included columns. Columns can be referenced by enum or by string
|
|
|
413
437
|
| `CARD_TYPE` | `"card_type"` | `CARD` | `CARD_ATTR` | | String |
|
|
414
438
|
| `SUBTYPE` | `"subtype"` | `CARD` | `CARD_ATTR` | | String |
|
|
415
439
|
| `MANA_VALUE` | `"mana_value"` | `CARD` | `CARD_ATTR` | | Float |
|
|
416
|
-
| `DECK_MANA_VALUE` | `"deck_mana_value"` | | `
|
|
440
|
+
| `DECK_MANA_VALUE` | `"deck_mana_value"` | | `NAME_SUM` | `DECK` * `MANA_VALUE` | Float |
|
|
441
|
+
| `DECK_LANDS` | `"deck_lands"` | | `NAME_SUM` | Number of lands in deck | Float |
|
|
442
|
+
| `DECK_SPELLS` | `"deck_spells"` | | `NAME_SUM` | Number of spells in deck | Float |
|
|
417
443
|
| `MANA_COST` | `"mana_cost"` | `CARD` | `CARD_ATTR` | | String |
|
|
418
444
|
| `POWER` | `"power"` | `CARD` | `CARD_ATTR` | | Float |
|
|
419
445
|
| `TOUGHNESS` | `"toughness"` | `CARD` | `CARD_ATTR` | | Float |
|
|
@@ -452,7 +478,9 @@ A table of all included columns. Columns can be referenced by enum or by string
|
|
|
452
478
|
| `GIH_WR_VAR` | `"gih_wr_var"` | | `AGG` | Game-weighted Variance | Float |
|
|
453
479
|
| `GIH_WR_STDEV` | `"gh_wr_stdev"` | | `AGG` | Sqrt of `GIH_WR_VAR` | Float |
|
|
454
480
|
| `GIH_WR_Z` | `"gih_wr_z"` | | `AGG` |`GIH_WR_EXCESS` / `GIH_WR_STDEV` | Float |
|
|
455
|
-
| `DECK_MANA_VALUE_AVG` | `"deck_mana_value_avg"` | | `AGG` | `DECK_MANA_VALUE ` / `
|
|
481
|
+
| `DECK_MANA_VALUE_AVG` | `"deck_mana_value_avg"` | | `AGG` | `DECK_MANA_VALUE ` / `DECK_SPELLS` | Float |
|
|
482
|
+
| `DECK_LANDS_AVG` | `"deck_lands_avg"` | | `AGG` | `DECK_LANDS ` / `NUM_GAMES` | Float |
|
|
483
|
+
| `DECK_SPELLS_AVG` | `"deck_spells_avg"` | | `AGG` | `DECK_SPELLS ` / `NUM_GAMES` | Float |
|
|
456
484
|
|
|
457
485
|
# Roadmap to 1.0
|
|
458
486
|
|