spells-mtg 0.5.0__tar.gz → 0.5.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spells-mtg might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: spells-mtg
3
- Version: 0.5.0
3
+ Version: 0.5.2
4
4
  Summary: analaysis of 17Lands.com public datasets
5
5
  Author-Email: Joel Barnes <oelarnes@gmail.com>
6
6
  License: MIT
@@ -134,7 +134,7 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
134
134
  ```
135
135
  - `filter_spec` specifies a row-level filter for the dataset, using an intuitive custom query formulation
136
136
  ```python
137
- >>> from spells.enums import ColName
137
+ >>> from spells import ColName
138
138
  >>> spells.summon('BLB', columns=[ColName.GAME_WR], group_by=[ColName.PLAYER_COHORT], filter_spec={'lhs': ColName.NUM_MULLIGANS, 'op': '>', 'rhs': 0})
139
139
  shape: (4, 2)
140
140
  ┌───────────────┬──────────┐
@@ -151,14 +151,14 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
151
151
  - `extensions` allows for the specification of arbitrarily complex derived columns and aggregations, including custom columns built on top of custom columns.
152
152
  ```python
153
153
  >>> import polars as pl
154
- >>> from spells.columns import ColSpec
155
- >>> from spells.enums import ColType, View, ColName
156
- >>> ext = ColSpec(
157
- ... name='deq_base',
158
- ... col_type=ColType.AGG,
159
- ... expr=(pl.col('gp_wr_excess') + 0.03 * (1 - pl.col('ata')/14).pow(2)) * pl.col('pct_gp'),
160
- ... )
161
- >>> spells.summon('DSK', columns=['deq_base', 'color', 'rarity'], filter_spec={'player_cohort': 'Top'}, extensions=[ext])
154
+ >>> from spells import ColSpec, ColType
155
+ >>> ext = {
156
+ ... 'deq_base': ColSpec(
157
+ ... col_type=ColType.AGG,
158
+ ... expr=(pl.col('gp_wr_excess') + 0.03 * (1 - pl.col('ata')/14).pow(2)) * pl.col('pct_gp'),
159
+ ... }
160
+ ... }
161
+ >>> spells.summon('DSK', columns=['deq_base', 'color', 'rarity'], filter_spec={'player_cohort': 'Top'}, extensions=ext)
162
162
  ... .filter(pl.col('deq_base').is_finite())
163
163
  ... .filter(pl.col('rarity').is_in(['common', 'uncommon'])
164
164
  ... .sort('deq_base', descending=True)
@@ -184,18 +184,16 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
184
184
  - `card_context` takes a name-indexed DataFrame or name-keyed dict and allows the construction of column definitions based on the results.
185
185
  ```python
186
186
  >>> deq = spells.summon('DSK', columns=['deq_base'], filter_spec={'player_cohort': 'Top'}, extensions=[ext])
187
- >>> ext = [
188
- ... ColSpec(
189
- ... name='picked_deq_base',
187
+ >>> ext = {
188
+ ... 'picked_deq_base': ColSpec(
190
189
  ... col_type=ColType.PICK_SUM,
191
190
  ... expr=lambda name, card_context: card_context[name]['deq_base']
192
191
  ... ),
193
- ... ColSpec(
194
- ... name='picked_deq_base_avg',
192
+ ... 'picked_deq_base_avg', ColSpec(
195
193
  ... col_type=ColType.AGG,
196
194
  ... expr=pl.col('picked_deq_base') / pl.col('num_taken')
197
195
  ... ),
198
- ... ]
196
+ ... }
199
197
  >>> spells.summon('DSK', columns=['picked_deq_base_avg'], group_by=['player_cohort'], extensions=ext, card_context=deq)
200
198
  shape: (4, 2)
201
199
  ┌───────────────┬─────────────────────┐
@@ -250,7 +248,7 @@ Spells caches the results of expensive aggregations in the local file system as
250
248
 
251
249
  ### Memory Usage
252
250
 
253
- One of my goals in creating Spells was to eliminate issues with memory pressure by exclusively using the map-reduce paradigm and a technology that supports partitioned/streaming aggregation of larget-than-memory datasets. By default, Polars loads the entire dataset in memory, but the API exposes a parameter `streaming` which I have exposed as `use_streaming`. Unfortunately, that feature does not seem to work for my queries and the memory performance can be quite poor, including poor garbage collection. The one feature that may assist in memory management is the local caching, since you can restart the kernel without losing all of your progress. In particular, be careful about opening multiple Jupyter tabs unless you have at least 32 GB. In general I have not run into issues on my 16 GB MacBook Air except with running multiple kernels at once. Supporting larger-than memory computations is on my roadmap, so check back periodically to see if I've made any progress.
251
+ One of my goals in creating Spells was to eliminate issues with memory pressure by exclusively using the map-reduce paradigm and a technology that supports partitioned/streaming aggregation of larget-than-memory datasets. By default, Polars loads the entire dataset in memory, but the API exposes a parameter `streaming` which I have exposed as `use_streaming`. Unfortunately, that feature does not seem to work for my queries and the memory performance can be quite poor. The one feature that may assist in memory management is the local caching, since you can restart the kernel without losing all of your progress. In particular, be careful about opening multiple Jupyter tabs unless you have at least 32 GB. In general I have not run into issues on my 16 GB MacBook Air except with running multiple kernels at once. Supporting larger-than memory computations is on my roadmap, so check back periodically to see if I've made any progress.
254
252
 
255
253
  When refreshing a given set's data files from 17Lands using the provided cli, the cache for that set is automatically cleared. The `spells` CLI gives additional tools for managing the local and external caches.
256
254
 
@@ -296,7 +294,8 @@ summon(
296
294
  columns: list[str] | None = None,
297
295
  group_by: list[str] | None = None,
298
296
  filter_spec: dict | None = None,
299
- extensions: list[str] | None = None,
297
+ extensions: dict[str, ColSpec] | None = None,
298
+ card_context: pl.DataFrame | dict[str, dict[str, Any] | None = None,
300
299
  read_cache: bool = True,
301
300
  write_cache: bool = True,
302
301
  ) -> polars.DataFrame
@@ -316,14 +315,16 @@ aggregations of non-numeric (or numeric) data types are not supported. If `None`
316
315
  - `{'lhs': 'player_cohort', 'op': 'in', 'rhs': ['Top', 'Middle']}` "player_cohort" value is either "Top" or "Middle". Supported values for `op` are `<`, `<=`, `>`, `>=`, `!=`, `=`, `in` and `nin`.
317
316
  - `{'$and': [{'lhs': 'draft_date', 'op': '>', 'rhs': datetime.date(2024, 10, 7)}, {'rank': 'Mythic'}]}` Drafts after October 7 by Mythic-ranked players. Supported values for query construction keys are `$and`, `$or`, and `$not`.
318
317
 
319
- - extensions: a list of `spells.columns.ColSpec` objects, which are appended to the definitions built-in columns described below. A name not in the enum `ColName` can be used in this way if it is the name of a provided extension. Existing names can also be redefined using extensions.
318
+ - extensions: a dict of `spells.columns.ColSpec` objects, keyed by name, which are appended to the definitions built-in columns described below.
319
+
320
+ - card_context: Typically a Polars DataFrame containing a `"name"` column with one row for each card name in the set, such that any usages of `card_context[name][key]` in column specs reference the column `key`. Typically this will be the output of a call to `summon` requesting cards metrics like `GP_WR`. Can also be a dictionary having the necessary form for the same access pattern.
320
321
 
321
322
  - read_cache/write_cache: Use the local file system to cache and retrieve aggregations to minimize expensive reads of the large datasets. You shouldn't need to touch these arguments unless you are debugging.
322
323
 
323
324
  ### Enums
324
325
 
325
326
  ```python
326
- from spells.enums import ColName, ColType
327
+ from spells import ColName, ColType
327
328
  ```
328
329
 
329
330
  Recommended to import `ColName` for any usage of `summon`, and to import `ColType` when defining custom extensions.
@@ -331,10 +332,9 @@ Recommended to import `ColName` for any usage of `summon`, and to import `ColTyp
331
332
  ### ColSpec
332
333
 
333
334
  ```python
334
- from spells.columns import ColSpec
335
+ from spells import ColSpec
335
336
 
336
337
  ColSpec(
337
- name: str,
338
338
  col_type: ColType,
339
339
  expr: pl.Expr | Callable[..., pl.Expr] | None = None,
340
340
  version: str | None = None
@@ -345,8 +345,6 @@ Used to define extensions in `summon`
345
345
 
346
346
  #### parameters
347
347
 
348
- - `name`: any string, including existing columns, although this is very likely to break dependent columns, so don't do it. For `NAME_SUM` columns, the name is the prefix without the underscore, e.g. "drawn".
349
-
350
348
  - `col_type`: one of the `ColType` enum values, `FILTER_ONLY`, `GROUP_BY`, `PICK_SUM`, `NAME_SUM`, `GAME_SUM`, `CARD_ATTR`, and `AGG`. See documentation for `summon` for usage. All columns except `CARD_ATTR`
351
349
  and `AGG` must be derivable at the individual row level on one or both base views. `CARD_ATTR` must be derivable at the individual row level from the card file. `AGG` can depend on any column present after
352
350
  summing over groups, and can include polars Expression aggregations. Arbitrarily long chains of aggregate dependencies are supported.
@@ -355,7 +353,7 @@ summing over groups, and can include polars Expression aggregations. Arbitrarily
355
353
  - For `NAME_SUM` columns, `expr` must be a function of `name` which will result in a list of expressions mapped over all card names.
356
354
  - `PICK_SUM` columns can also be functions on `name`, in which case the value will be a function of the value of the `PICK` field.
357
355
  - `AGG` columns that depend on `NAME_SUM` columns reference the prefix (`cdef.name`) only, since the unpivot has occured prior to selection.
358
- - The possible arguments to `expr`, in addition to `name` when appropriate, include the full `names` array as well as a dictionary called `card_context` which contains card dict objects with all `CARD_ATTR` values, including custom extensions. See example notebooks for more details.
356
+ - The possible arguments to `expr`, in addition to `name` when appropriate, include the full `names` array as well as a dictionary called `card_context` which contains card dict objects with all `CARD_ATTR` values, including custom extensions and metric columns passed by the `card_context` argument to `summon`. See example notebooks for more details.
359
357
 
360
358
  - `version`: When defining a column using a python function, as opposed to Polars expressions, add a unique version number so that the unique hashed signature of the column specification can be derived
361
359
  for caching purposes, since Polars cannot generate a serialization natively. When changing the definition, be sure to increment the version value. Otherwise you do not need to use this parameter.
@@ -123,7 +123,7 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
123
123
  ```
124
124
  - `filter_spec` specifies a row-level filter for the dataset, using an intuitive custom query formulation
125
125
  ```python
126
- >>> from spells.enums import ColName
126
+ >>> from spells import ColName
127
127
  >>> spells.summon('BLB', columns=[ColName.GAME_WR], group_by=[ColName.PLAYER_COHORT], filter_spec={'lhs': ColName.NUM_MULLIGANS, 'op': '>', 'rhs': 0})
128
128
  shape: (4, 2)
129
129
  ┌───────────────┬──────────┐
@@ -140,14 +140,14 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
140
140
  - `extensions` allows for the specification of arbitrarily complex derived columns and aggregations, including custom columns built on top of custom columns.
141
141
  ```python
142
142
  >>> import polars as pl
143
- >>> from spells.columns import ColSpec
144
- >>> from spells.enums import ColType, View, ColName
145
- >>> ext = ColSpec(
146
- ... name='deq_base',
147
- ... col_type=ColType.AGG,
148
- ... expr=(pl.col('gp_wr_excess') + 0.03 * (1 - pl.col('ata')/14).pow(2)) * pl.col('pct_gp'),
149
- ... )
150
- >>> spells.summon('DSK', columns=['deq_base', 'color', 'rarity'], filter_spec={'player_cohort': 'Top'}, extensions=[ext])
143
+ >>> from spells import ColSpec, ColType
144
+ >>> ext = {
145
+ ... 'deq_base': ColSpec(
146
+ ... col_type=ColType.AGG,
147
+ ... expr=(pl.col('gp_wr_excess') + 0.03 * (1 - pl.col('ata')/14).pow(2)) * pl.col('pct_gp'),
148
+ ... }
149
+ ... }
150
+ >>> spells.summon('DSK', columns=['deq_base', 'color', 'rarity'], filter_spec={'player_cohort': 'Top'}, extensions=ext)
151
151
  ... .filter(pl.col('deq_base').is_finite())
152
152
  ... .filter(pl.col('rarity').is_in(['common', 'uncommon'])
153
153
  ... .sort('deq_base', descending=True)
@@ -173,18 +173,16 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
173
173
  - `card_context` takes a name-indexed DataFrame or name-keyed dict and allows the construction of column definitions based on the results.
174
174
  ```python
175
175
  >>> deq = spells.summon('DSK', columns=['deq_base'], filter_spec={'player_cohort': 'Top'}, extensions=[ext])
176
- >>> ext = [
177
- ... ColSpec(
178
- ... name='picked_deq_base',
176
+ >>> ext = {
177
+ ... 'picked_deq_base': ColSpec(
179
178
  ... col_type=ColType.PICK_SUM,
180
179
  ... expr=lambda name, card_context: card_context[name]['deq_base']
181
180
  ... ),
182
- ... ColSpec(
183
- ... name='picked_deq_base_avg',
181
+ ... 'picked_deq_base_avg', ColSpec(
184
182
  ... col_type=ColType.AGG,
185
183
  ... expr=pl.col('picked_deq_base') / pl.col('num_taken')
186
184
  ... ),
187
- ... ]
185
+ ... }
188
186
  >>> spells.summon('DSK', columns=['picked_deq_base_avg'], group_by=['player_cohort'], extensions=ext, card_context=deq)
189
187
  shape: (4, 2)
190
188
  ┌───────────────┬─────────────────────┐
@@ -239,7 +237,7 @@ Spells caches the results of expensive aggregations in the local file system as
239
237
 
240
238
  ### Memory Usage
241
239
 
242
- One of my goals in creating Spells was to eliminate issues with memory pressure by exclusively using the map-reduce paradigm and a technology that supports partitioned/streaming aggregation of larget-than-memory datasets. By default, Polars loads the entire dataset in memory, but the API exposes a parameter `streaming` which I have exposed as `use_streaming`. Unfortunately, that feature does not seem to work for my queries and the memory performance can be quite poor, including poor garbage collection. The one feature that may assist in memory management is the local caching, since you can restart the kernel without losing all of your progress. In particular, be careful about opening multiple Jupyter tabs unless you have at least 32 GB. In general I have not run into issues on my 16 GB MacBook Air except with running multiple kernels at once. Supporting larger-than memory computations is on my roadmap, so check back periodically to see if I've made any progress.
240
+ One of my goals in creating Spells was to eliminate issues with memory pressure by exclusively using the map-reduce paradigm and a technology that supports partitioned/streaming aggregation of larget-than-memory datasets. By default, Polars loads the entire dataset in memory, but the API exposes a parameter `streaming` which I have exposed as `use_streaming`. Unfortunately, that feature does not seem to work for my queries and the memory performance can be quite poor. The one feature that may assist in memory management is the local caching, since you can restart the kernel without losing all of your progress. In particular, be careful about opening multiple Jupyter tabs unless you have at least 32 GB. In general I have not run into issues on my 16 GB MacBook Air except with running multiple kernels at once. Supporting larger-than memory computations is on my roadmap, so check back periodically to see if I've made any progress.
243
241
 
244
242
  When refreshing a given set's data files from 17Lands using the provided cli, the cache for that set is automatically cleared. The `spells` CLI gives additional tools for managing the local and external caches.
245
243
 
@@ -285,7 +283,8 @@ summon(
285
283
  columns: list[str] | None = None,
286
284
  group_by: list[str] | None = None,
287
285
  filter_spec: dict | None = None,
288
- extensions: list[str] | None = None,
286
+ extensions: dict[str, ColSpec] | None = None,
287
+ card_context: pl.DataFrame | dict[str, dict[str, Any] | None = None,
289
288
  read_cache: bool = True,
290
289
  write_cache: bool = True,
291
290
  ) -> polars.DataFrame
@@ -305,14 +304,16 @@ aggregations of non-numeric (or numeric) data types are not supported. If `None`
305
304
  - `{'lhs': 'player_cohort', 'op': 'in', 'rhs': ['Top', 'Middle']}` "player_cohort" value is either "Top" or "Middle". Supported values for `op` are `<`, `<=`, `>`, `>=`, `!=`, `=`, `in` and `nin`.
306
305
  - `{'$and': [{'lhs': 'draft_date', 'op': '>', 'rhs': datetime.date(2024, 10, 7)}, {'rank': 'Mythic'}]}` Drafts after October 7 by Mythic-ranked players. Supported values for query construction keys are `$and`, `$or`, and `$not`.
307
306
 
308
- - extensions: a list of `spells.columns.ColSpec` objects, which are appended to the definitions built-in columns described below. A name not in the enum `ColName` can be used in this way if it is the name of a provided extension. Existing names can also be redefined using extensions.
307
+ - extensions: a dict of `spells.columns.ColSpec` objects, keyed by name, which are appended to the definitions built-in columns described below.
308
+
309
+ - card_context: Typically a Polars DataFrame containing a `"name"` column with one row for each card name in the set, such that any usages of `card_context[name][key]` in column specs reference the column `key`. Typically this will be the output of a call to `summon` requesting cards metrics like `GP_WR`. Can also be a dictionary having the necessary form for the same access pattern.
309
310
 
310
311
  - read_cache/write_cache: Use the local file system to cache and retrieve aggregations to minimize expensive reads of the large datasets. You shouldn't need to touch these arguments unless you are debugging.
311
312
 
312
313
  ### Enums
313
314
 
314
315
  ```python
315
- from spells.enums import ColName, ColType
316
+ from spells import ColName, ColType
316
317
  ```
317
318
 
318
319
  Recommended to import `ColName` for any usage of `summon`, and to import `ColType` when defining custom extensions.
@@ -320,10 +321,9 @@ Recommended to import `ColName` for any usage of `summon`, and to import `ColTyp
320
321
  ### ColSpec
321
322
 
322
323
  ```python
323
- from spells.columns import ColSpec
324
+ from spells import ColSpec
324
325
 
325
326
  ColSpec(
326
- name: str,
327
327
  col_type: ColType,
328
328
  expr: pl.Expr | Callable[..., pl.Expr] | None = None,
329
329
  version: str | None = None
@@ -334,8 +334,6 @@ Used to define extensions in `summon`
334
334
 
335
335
  #### parameters
336
336
 
337
- - `name`: any string, including existing columns, although this is very likely to break dependent columns, so don't do it. For `NAME_SUM` columns, the name is the prefix without the underscore, e.g. "drawn".
338
-
339
337
  - `col_type`: one of the `ColType` enum values, `FILTER_ONLY`, `GROUP_BY`, `PICK_SUM`, `NAME_SUM`, `GAME_SUM`, `CARD_ATTR`, and `AGG`. See documentation for `summon` for usage. All columns except `CARD_ATTR`
340
338
  and `AGG` must be derivable at the individual row level on one or both base views. `CARD_ATTR` must be derivable at the individual row level from the card file. `AGG` can depend on any column present after
341
339
  summing over groups, and can include polars Expression aggregations. Arbitrarily long chains of aggregate dependencies are supported.
@@ -344,7 +342,7 @@ summing over groups, and can include polars Expression aggregations. Arbitrarily
344
342
  - For `NAME_SUM` columns, `expr` must be a function of `name` which will result in a list of expressions mapped over all card names.
345
343
  - `PICK_SUM` columns can also be functions on `name`, in which case the value will be a function of the value of the `PICK` field.
346
344
  - `AGG` columns that depend on `NAME_SUM` columns reference the prefix (`cdef.name`) only, since the unpivot has occured prior to selection.
347
- - The possible arguments to `expr`, in addition to `name` when appropriate, include the full `names` array as well as a dictionary called `card_context` which contains card dict objects with all `CARD_ATTR` values, including custom extensions. See example notebooks for more details.
345
+ - The possible arguments to `expr`, in addition to `name` when appropriate, include the full `names` array as well as a dictionary called `card_context` which contains card dict objects with all `CARD_ATTR` values, including custom extensions and metric columns passed by the `card_context` argument to `summon`. See example notebooks for more details.
348
346
 
349
347
  - `version`: When defining a column using a python function, as opposed to Polars expressions, add a unique version number so that the unique hashed signature of the column specification can be derived
350
348
  for caching purposes, since Polars cannot generate a serialization natively. When changing the definition, be sure to increment the version value. Otherwise you do not need to use this parameter.
@@ -11,7 +11,7 @@ dependencies = [
11
11
  ]
12
12
  requires-python = ">=3.11"
13
13
  readme = "README.md"
14
- version = "0.5.0"
14
+ version = "0.5.2"
15
15
 
16
16
  [project.license]
17
17
  text = "MIT"
@@ -0,0 +1,5 @@
1
+ from spells.columns import ColSpec
2
+ from spells.enums import ColType, ColName
3
+ from spells.draft_data import summon
4
+
5
+ __all__ = ["summon", "ColSpec", "ColType", "ColName"]
@@ -8,7 +8,6 @@ from spells.enums import View, ColName, ColType
8
8
 
9
9
  @dataclass(frozen=True)
10
10
  class ColSpec:
11
- name: str
12
11
  col_type: ColType
13
12
  expr: pl.Expr | Callable[..., pl.Expr] | None = None
14
13
  views: list[View] | None = None
@@ -41,69 +40,56 @@ default_columns = [
41
40
  ColName.GIH_WR,
42
41
  ]
43
42
 
44
- _column_specs = [
45
- ColSpec(
46
- name=ColName.NAME,
43
+ specs: dict[str, ColSpec] = {
44
+ ColName.NAME: ColSpec(
47
45
  col_type=ColType.GROUP_BY,
48
46
  views=[View.CARD],
49
47
  ),
50
- ColSpec(
51
- name=ColName.EXPANSION,
48
+ ColName.EXPANSION: ColSpec(
52
49
  col_type=ColType.GROUP_BY,
53
50
  views=[View.GAME, View.DRAFT],
54
51
  ),
55
- ColSpec(
56
- name=ColName.EVENT_TYPE,
52
+ ColName.EVENT_TYPE: ColSpec(
57
53
  col_type=ColType.GROUP_BY,
58
54
  views=[View.GAME, View.DRAFT],
59
55
  ),
60
- ColSpec(
61
- name=ColName.DRAFT_ID,
56
+ ColName.DRAFT_ID: ColSpec(
62
57
  views=[View.GAME, View.DRAFT],
63
58
  col_type=ColType.FILTER_ONLY,
64
59
  ),
65
- ColSpec(
66
- name=ColName.DRAFT_TIME,
60
+ ColName.DRAFT_TIME: ColSpec(
67
61
  col_type=ColType.FILTER_ONLY,
68
62
  views=[View.GAME, View.DRAFT],
69
63
  ),
70
- ColSpec(
71
- name=ColName.DRAFT_DATE,
64
+ ColName.DRAFT_DATE: ColSpec(
72
65
  col_type=ColType.GROUP_BY,
73
66
  expr=pl.col(ColName.DRAFT_TIME).str.to_datetime("%Y-%m-%d %H:%M:%S").dt.date(),
74
67
  ),
75
- ColSpec(
76
- name=ColName.DRAFT_DAY_OF_WEEK,
68
+ ColName.DRAFT_DAY_OF_WEEK: ColSpec(
77
69
  col_type=ColType.GROUP_BY,
78
70
  expr=pl.col(ColName.DRAFT_TIME).str.to_datetime("%Y-%m-%d %H:%M:%S").dt.weekday(),
79
71
  ),
80
- ColSpec(
81
- name=ColName.DRAFT_HOUR,
72
+ ColName.DRAFT_HOUR: ColSpec(
82
73
  col_type=ColType.GROUP_BY,
83
74
  expr=pl.col(ColName.DRAFT_TIME).str.to_datetime("%Y-%m-%d %H:%M:%S").dt.hour(),
84
75
  ),
85
- ColSpec(
86
- name=ColName.DRAFT_WEEK,
76
+ ColName.DRAFT_WEEK: ColSpec(
87
77
  col_type=ColType.GROUP_BY,
88
78
  expr=pl.col(ColName.DRAFT_TIME).str.to_datetime("%Y-%m-%d %H:%M:%S").dt.week(),
89
79
  ),
90
- ColSpec(
91
- name=ColName.RANK,
80
+ ColName.RANK: ColSpec(
92
81
  col_type=ColType.GROUP_BY,
93
82
  views=[View.GAME, View.DRAFT],
94
83
  ),
95
- ColSpec(
96
- name=ColName.USER_N_GAMES_BUCKET,
84
+ ColName.USER_N_GAMES_BUCKET: ColSpec(
97
85
  col_type=ColType.GROUP_BY,
98
86
  views=[View.DRAFT, View.GAME],
99
87
  ),
100
- ColSpec(
101
- name=ColName.USER_GAME_WIN_RATE_BUCKET,
88
+ ColName.USER_GAME_WIN_RATE_BUCKET: ColSpec(
102
89
  col_type=ColType.GROUP_BY,
103
90
  views=[View.DRAFT, View.GAME],
104
91
  ),
105
- ColSpec(
106
- name=ColName.PLAYER_COHORT,
92
+ ColName.PLAYER_COHORT: ColSpec(
107
93
  col_type=ColType.GROUP_BY,
108
94
  expr=pl.when(pl.col(ColName.USER_N_GAMES_BUCKET) < 100)
109
95
  .then(pl.lit("Other"))
@@ -117,309 +103,249 @@ _column_specs = [
117
103
  )
118
104
  ),
119
105
  ),
120
- ColSpec(
121
- name=ColName.EVENT_MATCH_WINS,
106
+ ColName.EVENT_MATCH_WINS: ColSpec(
122
107
  col_type=ColType.GROUP_BY,
123
108
  views=[View.DRAFT],
124
109
  ),
125
- ColSpec(
126
- name=ColName.EVENT_MATCH_WINS_SUM,
110
+ ColName.EVENT_MATCH_WINS_SUM: ColSpec(
127
111
  col_type=ColType.PICK_SUM,
128
112
  views=[View.DRAFT],
129
113
  expr=pl.col(ColName.EVENT_MATCH_WINS),
130
114
  ),
131
- ColSpec(
132
- name=ColName.EVENT_MATCH_LOSSES,
115
+ ColName.EVENT_MATCH_LOSSES: ColSpec(
133
116
  col_type=ColType.GROUP_BY,
134
117
  views=[View.DRAFT],
135
118
  ),
136
- ColSpec(
137
- name=ColName.EVENT_MATCH_LOSSES_SUM,
119
+ ColName.EVENT_MATCH_LOSSES_SUM: ColSpec(
138
120
  col_type=ColType.PICK_SUM,
139
121
  expr=pl.col(ColName.EVENT_MATCH_LOSSES),
140
122
  ),
141
- ColSpec(
142
- name=ColName.EVENT_MATCHES,
123
+ ColName.EVENT_MATCHES: ColSpec(
143
124
  col_type=ColType.GROUP_BY,
144
125
  expr=pl.col(ColName.EVENT_MATCH_WINS) + pl.col(ColName.EVENT_MATCH_LOSSES),
145
126
  ),
146
- ColSpec(
147
- name=ColName.EVENT_MATCHES_SUM,
127
+ ColName.EVENT_MATCHES_SUM: ColSpec(
148
128
  col_type=ColType.PICK_SUM,
149
129
  expr=pl.col(ColName.EVENT_MATCHES),
150
130
  ),
151
- ColSpec(
152
- name=ColName.IS_TROPHY,
131
+ ColName.IS_TROPHY: ColSpec(
153
132
  col_type=ColType.GROUP_BY,
154
133
  expr=pl.when(pl.col(ColName.EVENT_TYPE) == "Traditional")
155
134
  .then(pl.col(ColName.EVENT_MATCH_WINS) == 3)
156
135
  .otherwise(pl.col(ColName.EVENT_MATCH_WINS) == 7),
157
136
  ),
158
- ColSpec(
159
- name=ColName.IS_TROPHY_SUM,
137
+ ColName.IS_TROPHY_SUM: ColSpec(
160
138
  col_type=ColType.PICK_SUM,
161
139
  expr=pl.col(ColName.IS_TROPHY),
162
140
  ),
163
- ColSpec(
164
- name=ColName.PACK_NUMBER,
141
+ ColName.PACK_NUMBER: ColSpec(
165
142
  col_type=ColType.FILTER_ONLY, # use pack_num
166
143
  views=[View.DRAFT],
167
144
  ),
168
- ColSpec(
169
- name=ColName.PACK_NUM,
145
+ ColName.PACK_NUM: ColSpec(
170
146
  col_type=ColType.GROUP_BY,
171
147
  expr=pl.col(ColName.PACK_NUMBER) + 1,
172
148
  ),
173
- ColSpec(
174
- name=ColName.PICK_NUMBER,
149
+ ColName.PICK_NUMBER: ColSpec(
175
150
  col_type=ColType.FILTER_ONLY, # use pick_num
176
151
  views=[View.DRAFT],
177
152
  ),
178
- ColSpec(
179
- name=ColName.PICK_NUM,
153
+ ColName.PICK_NUM: ColSpec(
180
154
  col_type=ColType.GROUP_BY,
181
155
  expr=pl.col(ColName.PICK_NUMBER) + 1,
182
156
  ),
183
- ColSpec(
184
- name=ColName.TAKEN_AT,
157
+ ColName.TAKEN_AT: ColSpec(
185
158
  col_type=ColType.PICK_SUM,
186
159
  expr=pl.col(ColName.PICK_NUM),
187
160
  ),
188
- ColSpec(
189
- name=ColName.NUM_TAKEN,
161
+ ColName.NUM_TAKEN: ColSpec(
190
162
  col_type=ColType.PICK_SUM,
191
163
  expr=pl.when(pl.col(ColName.PICK).is_not_null())
192
164
  .then(1)
193
165
  .otherwise(0),
194
166
  ),
195
- ColSpec(
196
- name=ColName.NUM_DRAFTS,
167
+ ColName.NUM_DRAFTS: ColSpec(
197
168
  col_type=ColType.PICK_SUM,
198
169
  expr=pl.when((pl.col(ColName.PACK_NUMBER) == 0) & (pl.col(ColName.PICK_NUMBER) == 0)).then(1).otherwise(0),
199
170
  ),
200
- ColSpec(
201
- name=ColName.PICK,
171
+ ColName.PICK: ColSpec(
202
172
  col_type=ColType.FILTER_ONLY,
203
173
  views=[View.DRAFT],
204
174
  ),
205
- ColSpec(
206
- name=ColName.PICK_MAINDECK_RATE,
175
+ ColName.PICK_MAINDECK_RATE: ColSpec(
207
176
  col_type=ColType.PICK_SUM,
208
177
  views=[View.DRAFT],
209
178
  ),
210
- ColSpec(
211
- name=ColName.PICK_SIDEBOARD_IN_RATE,
179
+ ColName.PICK_SIDEBOARD_IN_RATE: ColSpec(
212
180
  col_type=ColType.PICK_SUM,
213
181
  views=[View.DRAFT],
214
182
  ),
215
- ColSpec(
216
- name=ColName.PACK_CARD,
183
+ ColName.PACK_CARD: ColSpec(
217
184
  col_type=ColType.NAME_SUM,
218
185
  views=[View.DRAFT],
219
186
  ),
220
- ColSpec(
221
- name=ColName.LAST_SEEN,
187
+ ColName.LAST_SEEN: ColSpec(
222
188
  col_type=ColType.NAME_SUM,
223
189
  expr=lambda name: pl.col(f"pack_card_{name}")
224
190
  * pl.min_horizontal(ColName.PICK_NUM, 8),
225
191
  ),
226
- ColSpec(
227
- name=ColName.NUM_SEEN,
192
+ ColName.NUM_SEEN: ColSpec(
228
193
  col_type=ColType.NAME_SUM,
229
194
  expr=lambda name: pl.col(f"pack_card_{name}") * (pl.col(ColName.PICK_NUM) <= 8),
230
195
  ),
231
- ColSpec(
232
- name=ColName.POOL,
196
+ ColName.POOL: ColSpec(
233
197
  col_type=ColType.NAME_SUM,
234
198
  views=[View.DRAFT],
235
199
  ),
236
- ColSpec(
237
- name=ColName.GAME_TIME,
200
+ ColName.GAME_TIME: ColSpec(
238
201
  col_type=ColType.FILTER_ONLY,
239
202
  views=[View.GAME],
240
203
  ),
241
- ColSpec(
242
- name=ColName.GAME_DATE,
204
+ ColName.GAME_DATE: ColSpec(
243
205
  col_type=ColType.GROUP_BY,
244
206
  expr=pl.col(ColName.GAME_TIME).str.to_datetime("%Y-%m-%d %H-%M-%S").dt.date(),
245
207
  ),
246
- ColSpec(
247
- name=ColName.GAME_DAY_OF_WEEK,
208
+ ColName.GAME_DAY_OF_WEEK: ColSpec(
248
209
  col_type=ColType.GROUP_BY,
249
210
  expr=pl.col(ColName.GAME_TIME).str.to_datetime("%Y-%m-%d %H-%M-%S").dt.weekday(),
250
211
  ),
251
- ColSpec(
252
- name=ColName.GAME_HOUR,
212
+ ColName.GAME_HOUR: ColSpec(
253
213
  col_type=ColType.GROUP_BY,
254
214
  expr=pl.col(ColName.GAME_TIME).str.to_datetime("%Y-%m-%d %H-%M-%S").dt.hour(),
255
215
  ),
256
- ColSpec(
257
- name=ColName.GAME_WEEK,
216
+ ColName.GAME_WEEK: ColSpec(
258
217
  col_type=ColType.GROUP_BY,
259
218
  expr=pl.col(ColName.GAME_TIME).str.to_datetime("%Y-%m-%d %H-%M-%S").dt.week(),
260
219
  ),
261
- ColSpec(
262
- name=ColName.BUILD_INDEX,
220
+ ColName.BUILD_INDEX: ColSpec(
263
221
  col_type=ColType.GROUP_BY,
264
222
  views=[View.GAME],
265
223
  ),
266
- ColSpec(
267
- name=ColName.MATCH_NUMBER,
224
+ ColName.MATCH_NUMBER: ColSpec(
268
225
  col_type=ColType.GROUP_BY,
269
226
  views=[View.GAME],
270
227
  ),
271
- ColSpec(
272
- name=ColName.GAME_NUMBER,
228
+ ColName.GAME_NUMBER: ColSpec(
273
229
  col_type=ColType.GROUP_BY,
274
230
  views=[View.GAME],
275
231
  ),
276
- ColSpec(
277
- name=ColName.NUM_GAMES,
232
+ ColName.NUM_GAMES: ColSpec(
278
233
  col_type=ColType.GAME_SUM,
279
234
  expr=pl.col(ColName.GAME_NUMBER).is_not_null(),
280
235
  ),
281
- ColSpec(
282
- name=ColName.NUM_MATCHES,
236
+ ColName.NUM_MATCHES: ColSpec(
283
237
  col_type=ColType.GAME_SUM,
284
238
  expr=pl.col(ColName.GAME_NUMBER) == 1,
285
239
  ),
286
- ColSpec(
287
- name=ColName.NUM_EVENTS,
240
+ ColName.NUM_EVENTS: ColSpec(
288
241
  col_type=ColType.GAME_SUM,
289
242
  expr=(pl.col(ColName.GAME_NUMBER) == 1) & (pl.col(ColName.MATCH_NUMBER) == 1),
290
243
  ),
291
- ColSpec(
292
- name=ColName.OPP_RANK,
244
+ ColName.OPP_RANK: ColSpec(
293
245
  col_type=ColType.GROUP_BY,
294
246
  views=[View.GAME],
295
247
  ),
296
- ColSpec(
297
- name=ColName.MAIN_COLORS,
248
+ ColName.MAIN_COLORS: ColSpec(
298
249
  col_type=ColType.GROUP_BY,
299
250
  views=[View.GAME],
300
251
  ),
301
- ColSpec(
302
- name=ColName.NUM_COLORS,
252
+ ColName.NUM_COLORS: ColSpec(
303
253
  col_type=ColType.GROUP_BY,
304
254
  expr=pl.col(ColName.MAIN_COLORS).str.len_chars(),
305
255
  ),
306
- ColSpec(
307
- name=ColName.SPLASH_COLORS,
256
+ ColName.SPLASH_COLORS: ColSpec(
308
257
  col_type=ColType.GROUP_BY,
309
258
  views=[View.GAME],
310
259
  ),
311
- ColSpec(
312
- name=ColName.HAS_SPLASH,
260
+ ColName.HAS_SPLASH: ColSpec(
313
261
  col_type=ColType.GROUP_BY,
314
262
  expr=pl.col(ColName.SPLASH_COLORS).str.len_chars() > 0,
315
263
  ),
316
- ColSpec(
317
- name=ColName.ON_PLAY,
264
+ ColName.ON_PLAY: ColSpec(
318
265
  col_type=ColType.GROUP_BY,
319
266
  views=[View.GAME],
320
267
  ),
321
- ColSpec(
322
- name=ColName.NUM_ON_PLAY,
268
+ ColName.NUM_ON_PLAY: ColSpec(
323
269
  col_type=ColType.GAME_SUM,
324
270
  expr=pl.col(ColName.ON_PLAY),
325
271
  ),
326
- ColSpec(
327
- name=ColName.NUM_MULLIGANS,
272
+ ColName.NUM_MULLIGANS: ColSpec(
328
273
  col_type=ColType.GROUP_BY,
329
274
  views=[View.GAME],
330
275
  ),
331
- ColSpec(
332
- name=ColName.NUM_MULLIGANS_SUM,
276
+ ColName.NUM_MULLIGANS_SUM: ColSpec(
333
277
  col_type=ColType.GAME_SUM,
334
278
  expr=pl.col(ColName.NUM_MULLIGANS),
335
279
  ),
336
- ColSpec(
337
- name=ColName.OPP_NUM_MULLIGANS,
280
+ ColName.OPP_NUM_MULLIGANS: ColSpec(
338
281
  col_type=ColType.GAME_SUM,
339
282
  views=[View.GAME],
340
283
  ),
341
- ColSpec(
342
- name=ColName.OPP_NUM_MULLIGANS_SUM,
284
+ ColName.OPP_NUM_MULLIGANS_SUM: ColSpec(
343
285
  col_type=ColType.GAME_SUM,
344
286
  expr=pl.col(ColName.OPP_NUM_MULLIGANS),
345
287
  ),
346
- ColSpec(
347
- name=ColName.OPP_COLORS,
288
+ ColName.OPP_COLORS: ColSpec(
348
289
  col_type=ColType.GROUP_BY,
349
290
  views=[View.GAME],
350
291
  ),
351
- ColSpec(
352
- name=ColName.NUM_TURNS,
292
+ ColName.NUM_TURNS: ColSpec(
353
293
  col_type=ColType.GROUP_BY,
354
294
  views=[View.GAME],
355
295
  ),
356
- ColSpec(
357
- name=ColName.NUM_TURNS_SUM,
296
+ ColName.NUM_TURNS_SUM: ColSpec(
358
297
  col_type=ColType.GAME_SUM,
359
298
  expr=pl.col(ColName.NUM_TURNS),
360
299
  ),
361
- ColSpec(
362
- name=ColName.WON,
300
+ ColName.WON: ColSpec(
363
301
  col_type=ColType.GROUP_BY,
364
302
  views=[View.GAME],
365
303
  ),
366
- ColSpec(
367
- name=ColName.NUM_WON,
304
+ ColName.NUM_WON: ColSpec(
368
305
  col_type=ColType.GAME_SUM,
369
306
  expr=pl.col(ColName.WON),
370
307
  ),
371
- ColSpec(
372
- name=ColName.OPENING_HAND,
308
+ ColName.OPENING_HAND: ColSpec(
373
309
  col_type=ColType.NAME_SUM,
374
310
  views=[View.GAME],
375
311
  ),
376
- ColSpec(
377
- name=ColName.WON_OPENING_HAND,
312
+ ColName.WON_OPENING_HAND: ColSpec(
378
313
  col_type=ColType.NAME_SUM,
379
314
  expr=lambda name: pl.col(f"opening_hand_{name}") * pl.col(ColName.WON),
380
315
  ),
381
- ColSpec(
382
- name=ColName.DRAWN,
316
+ ColName.DRAWN: ColSpec(
383
317
  col_type=ColType.NAME_SUM,
384
318
  views=[View.GAME],
385
319
  ),
386
- ColSpec(
387
- name=ColName.WON_DRAWN,
320
+ ColName.WON_DRAWN: ColSpec(
388
321
  col_type=ColType.NAME_SUM,
389
322
  expr=lambda name: pl.col(f"drawn_{name}") * pl.col(ColName.WON),
390
323
  ),
391
- ColSpec(
392
- name=ColName.TUTORED,
324
+ ColName.TUTORED: ColSpec(
393
325
  col_type=ColType.NAME_SUM,
394
326
  views=[View.GAME],
395
327
  ),
396
- ColSpec(
397
- name=ColName.WON_TUTORED,
328
+ ColName.WON_TUTORED: ColSpec(
398
329
  col_type=ColType.NAME_SUM,
399
330
  expr=lambda name: pl.col(f"tutored_{name}") * pl.col(ColName.WON),
400
331
  ),
401
- ColSpec(
402
- name=ColName.DECK,
332
+ ColName.DECK: ColSpec(
403
333
  col_type=ColType.NAME_SUM,
404
334
  views=[View.GAME],
405
335
  ),
406
- ColSpec(
407
- name=ColName.WON_DECK,
336
+ ColName.WON_DECK: ColSpec(
408
337
  col_type=ColType.NAME_SUM,
409
338
  expr=lambda name: pl.col(f"deck_{name}") * pl.col(ColName.WON),
410
339
  ),
411
- ColSpec(
412
- name=ColName.SIDEBOARD,
340
+ ColName.SIDEBOARD: ColSpec(
413
341
  col_type=ColType.NAME_SUM,
414
342
  views=[View.GAME],
415
343
  ),
416
- ColSpec(
417
- name=ColName.WON_SIDEBOARD,
344
+ ColName.WON_SIDEBOARD: ColSpec(
418
345
  col_type=ColType.NAME_SUM,
419
346
  expr=lambda name: pl.col(f"sideboard_{name}") * pl.col(ColName.WON),
420
347
  ),
421
- ColSpec(
422
- name=ColName.NUM_GNS,
348
+ ColName.NUM_GNS: ColSpec(
423
349
  col_type=ColType.NAME_SUM,
424
350
  expr=lambda name: pl.max_horizontal(
425
351
  0,
@@ -429,258 +355,204 @@ _column_specs = [
429
355
  - pl.col(f"opening_hand_{name}"),
430
356
  ),
431
357
  ),
432
- ColSpec(
433
- name=ColName.WON_NUM_GNS,
358
+ ColName.WON_NUM_GNS: ColSpec(
434
359
  col_type=ColType.NAME_SUM,
435
360
  expr=lambda name: pl.col(ColName.WON) * pl.col(f"num_gns_{name}"),
436
361
  ),
437
- ColSpec(
438
- name=ColName.SET_CODE,
362
+ ColName.SET_CODE: ColSpec(
439
363
  col_type=ColType.CARD_ATTR,
440
364
  ),
441
- ColSpec(
442
- name=ColName.COLOR,
365
+ ColName.COLOR: ColSpec(
443
366
  col_type=ColType.CARD_ATTR,
444
367
  ),
445
- ColSpec(
446
- name=ColName.RARITY,
368
+ ColName.RARITY: ColSpec(
447
369
  col_type=ColType.CARD_ATTR,
448
370
  ),
449
- ColSpec(
450
- name=ColName.COLOR_IDENTITY,
371
+ ColName.COLOR_IDENTITY: ColSpec(
451
372
  col_type=ColType.CARD_ATTR,
452
373
  ),
453
- ColSpec(
454
- name=ColName.CARD_TYPE,
374
+ ColName.CARD_TYPE: ColSpec(
455
375
  col_type=ColType.CARD_ATTR,
456
376
  ),
457
- ColSpec(
458
- name=ColName.SUBTYPE,
377
+ ColName.SUBTYPE: ColSpec(
459
378
  col_type=ColType.CARD_ATTR,
460
379
  ),
461
- ColSpec(
462
- name=ColName.MANA_VALUE,
380
+ ColName.MANA_VALUE: ColSpec(
463
381
  col_type=ColType.CARD_ATTR,
464
382
  ),
465
- ColSpec(
466
- name=ColName.DECK_MANA_VALUE,
383
+ ColName.DECK_MANA_VALUE: ColSpec(
467
384
  col_type=ColType.NAME_SUM,
468
385
  expr=lambda name, card_context: card_context[name][ColName.MANA_VALUE] * pl.col(f"deck_{name}"),
469
386
  ),
470
- ColSpec(
471
- name=ColName.DECK_LANDS,
387
+ ColName.DECK_LANDS: ColSpec(
472
388
  col_type=ColType.NAME_SUM,
473
389
  expr=lambda name, card_context: pl.col(f"deck_{name}") * ( 1 if 'Land' in card_context[name][ColName.CARD_TYPE] else 0 )
474
390
  ),
475
- ColSpec(
476
- name=ColName.DECK_SPELLS,
391
+ ColName.DECK_SPELLS: ColSpec(
477
392
  col_type=ColType.NAME_SUM,
478
393
  expr=lambda name: pl.col(f"deck_{name}") - pl.col(f"deck_lands_{name}"),
479
394
  ),
480
- ColSpec(
481
- name=ColName.MANA_COST,
395
+ ColName.MANA_COST: ColSpec(
482
396
  col_type=ColType.CARD_ATTR,
483
397
  ),
484
- ColSpec(
485
- name=ColName.POWER,
398
+ ColName.POWER: ColSpec(
486
399
  col_type=ColType.CARD_ATTR,
487
400
  ),
488
- ColSpec(
489
- name=ColName.TOUGHNESS,
401
+ ColName.TOUGHNESS: ColSpec(
490
402
  col_type=ColType.CARD_ATTR,
491
403
  ),
492
- ColSpec(
493
- name=ColName.IS_BONUS_SHEET,
404
+ ColName.IS_BONUS_SHEET: ColSpec(
494
405
  col_type=ColType.CARD_ATTR,
495
406
  ),
496
- ColSpec(
497
- name=ColName.IS_DFC,
407
+ ColName.IS_DFC: ColSpec(
498
408
  col_type=ColType.CARD_ATTR,
499
409
  ),
500
- ColSpec(
501
- name=ColName.ORACLE_TEXT,
410
+ ColName.ORACLE_TEXT: ColSpec(
502
411
  col_type=ColType.CARD_ATTR,
503
412
  ),
504
- ColSpec(
505
- name=ColName.CARD_JSON,
413
+ ColName.CARD_JSON: ColSpec(
506
414
  col_type=ColType.CARD_ATTR,
507
415
  ),
508
- ColSpec(
509
- name=ColName.PICKED_MATCH_WR,
416
+ ColName.PICKED_MATCH_WR: ColSpec(
510
417
  col_type=ColType.AGG,
511
418
  expr=pl.col(ColName.EVENT_MATCH_WINS_SUM) / pl.col(ColName.EVENT_MATCHES_SUM),
512
419
  ),
513
- ColSpec(
514
- name=ColName.TROPHY_RATE,
420
+ ColName.TROPHY_RATE: ColSpec(
515
421
  col_type=ColType.AGG,
516
422
  expr=pl.col(ColName.IS_TROPHY_SUM) / pl.col(ColName.NUM_TAKEN),
517
423
  ),
518
- ColSpec(
519
- name=ColName.GAME_WR,
424
+ ColName.GAME_WR: ColSpec(
520
425
  col_type=ColType.AGG,
521
426
  expr=pl.col(ColName.NUM_WON) / pl.col(ColName.NUM_GAMES),
522
427
  ),
523
- ColSpec(
524
- name=ColName.ALSA,
428
+ ColName.ALSA: ColSpec(
525
429
  col_type=ColType.AGG,
526
430
  expr=pl.col(ColName.LAST_SEEN) / pl.col(ColName.NUM_SEEN),
527
431
  ),
528
- ColSpec(
529
- name=ColName.ATA,
432
+ ColName.ATA: ColSpec(
530
433
  col_type=ColType.AGG,
531
434
  expr=pl.col(ColName.TAKEN_AT) / pl.col(ColName.NUM_TAKEN),
532
435
  ),
533
- ColSpec(
534
- name=ColName.NUM_GP,
436
+ ColName.NUM_GP: ColSpec(
535
437
  col_type=ColType.AGG,
536
438
  expr=pl.col(ColName.DECK),
537
439
  ),
538
- ColSpec(
539
- name=ColName.PCT_GP,
440
+ ColName.PCT_GP: ColSpec(
540
441
  col_type=ColType.AGG,
541
442
  expr=pl.col(ColName.DECK) / (pl.col(ColName.DECK) + pl.col(ColName.SIDEBOARD)),
542
443
  ),
543
- ColSpec(
544
- name=ColName.GP_WR,
444
+ ColName.GP_WR: ColSpec(
545
445
  col_type=ColType.AGG,
546
446
  expr=pl.col(ColName.WON_DECK) / pl.col(ColName.DECK),
547
447
  ),
548
- ColSpec(
549
- name=ColName.NUM_OH,
448
+ ColName.NUM_OH: ColSpec(
550
449
  col_type=ColType.AGG,
551
450
  expr=pl.col(ColName.OPENING_HAND),
552
451
  ),
553
- ColSpec(
554
- name=ColName.OH_WR,
452
+ ColName.OH_WR: ColSpec(
555
453
  col_type=ColType.AGG,
556
454
  expr=pl.col(ColName.WON_OPENING_HAND) / pl.col(ColName.OPENING_HAND),
557
455
  ),
558
- ColSpec(
559
- name=ColName.NUM_GIH,
456
+ ColName.NUM_GIH: ColSpec(
560
457
  col_type=ColType.AGG,
561
458
  expr=pl.col(ColName.OPENING_HAND) + pl.col(ColName.DRAWN),
562
459
  ),
563
- ColSpec(
564
- name=ColName.NUM_GIH_WON,
460
+ ColName.NUM_GIH_WON: ColSpec(
565
461
  col_type=ColType.AGG,
566
462
  expr=pl.col(ColName.WON_OPENING_HAND) + pl.col(ColName.WON_DRAWN),
567
463
  ),
568
- ColSpec(
569
- name=ColName.GIH_WR,
464
+ ColName.GIH_WR: ColSpec(
570
465
  col_type=ColType.AGG,
571
466
  expr=pl.col(ColName.NUM_GIH_WON) / pl.col(ColName.NUM_GIH),
572
467
  ),
573
- ColSpec(
574
- name=ColName.GNS_WR,
468
+ ColName.GNS_WR: ColSpec(
575
469
  col_type=ColType.AGG,
576
470
  expr=pl.col(ColName.WON_NUM_GNS) / pl.col(ColName.NUM_GNS),
577
471
  ),
578
- ColSpec(
579
- name=ColName.IWD,
472
+ ColName.IWD: ColSpec(
580
473
  col_type=ColType.AGG,
581
474
  expr=pl.col(ColName.GIH_WR) - pl.col(ColName.GNS_WR),
582
475
  ),
583
- ColSpec(
584
- name=ColName.NUM_IN_POOL,
476
+ ColName.NUM_IN_POOL: ColSpec(
585
477
  col_type=ColType.AGG,
586
478
  expr=pl.col(ColName.DECK) + pl.col(ColName.SIDEBOARD),
587
479
  ),
588
- ColSpec(
589
- name=ColName.IN_POOL_WR,
480
+ ColName.IN_POOL_WR: ColSpec(
590
481
  col_type=ColType.AGG,
591
482
  expr=(pl.col(ColName.WON_DECK) + pl.col(ColName.WON_SIDEBOARD))
592
483
  / pl.col(ColName.NUM_IN_POOL),
593
484
  ),
594
- ColSpec(
595
- name=ColName.DECK_TOTAL,
485
+ ColName.DECK_TOTAL: ColSpec(
596
486
  col_type=ColType.AGG,
597
487
  expr=pl.col(ColName.DECK).sum(),
598
488
  ),
599
- ColSpec(
600
- name=ColName.WON_DECK_TOTAL,
489
+ ColName.WON_DECK_TOTAL: ColSpec(
601
490
  col_type=ColType.AGG,
602
491
  expr=pl.col(ColName.WON_DECK).sum(),
603
492
  ),
604
- ColSpec(
605
- name=ColName.GP_WR_MEAN,
493
+ ColName.GP_WR_MEAN: ColSpec(
606
494
  col_type=ColType.AGG,
607
495
  expr=pl.col(ColName.WON_DECK_TOTAL) / pl.col(ColName.DECK_TOTAL),
608
496
  ),
609
- ColSpec(
610
- name=ColName.GP_WR_EXCESS,
497
+ ColName.GP_WR_EXCESS: ColSpec(
611
498
  col_type=ColType.AGG,
612
499
  expr=pl.col(ColName.GP_WR) - pl.col(ColName.GP_WR_MEAN),
613
500
  ),
614
- ColSpec(
615
- name=ColName.GP_WR_VAR,
501
+ ColName.GP_WR_VAR: ColSpec(
616
502
  col_type=ColType.AGG,
617
503
  expr=(pl.col(ColName.GP_WR_EXCESS).pow(2) * pl.col(ColName.NUM_GP)).sum()
618
504
  / pl.col(ColName.DECK_TOTAL),
619
505
  ),
620
- ColSpec(
621
- name=ColName.GP_WR_STDEV,
506
+ ColName.GP_WR_STDEV: ColSpec(
622
507
  col_type=ColType.AGG,
623
508
  expr=pl.col(ColName.GP_WR_VAR).sqrt(),
624
509
  ),
625
- ColSpec(
626
- name=ColName.GP_WR_Z,
510
+ ColName.GP_WR_Z: ColSpec(
627
511
  col_type=ColType.AGG,
628
512
  expr=pl.col(ColName.GP_WR_EXCESS) / pl.col(ColName.GP_WR_STDEV),
629
513
  ),
630
- ColSpec(
631
- name=ColName.GIH_TOTAL,
514
+ ColName.GIH_TOTAL: ColSpec(
632
515
  col_type=ColType.AGG,
633
516
  expr=pl.col(ColName.NUM_GIH).sum(),
634
517
  ),
635
- ColSpec(
636
- name=ColName.WON_GIH_TOTAL,
518
+ ColName.WON_GIH_TOTAL: ColSpec(
637
519
  col_type=ColType.AGG,
638
520
  expr=pl.col(ColName.NUM_GIH_WON).sum(),
639
521
  ),
640
- ColSpec(
641
- name=ColName.GIH_WR_MEAN,
522
+ ColName.GIH_WR_MEAN: ColSpec(
642
523
  col_type=ColType.AGG,
643
524
  expr=pl.col(ColName.WON_GIH_TOTAL) / pl.col(ColName.GIH_TOTAL),
644
525
  ),
645
- ColSpec(
646
- name=ColName.GIH_WR_EXCESS,
526
+ ColName.GIH_WR_EXCESS: ColSpec(
647
527
  col_type=ColType.AGG,
648
528
  expr=pl.col(ColName.GIH_WR) - pl.col(ColName.GIH_WR_MEAN),
649
529
  ),
650
- ColSpec(
651
- name=ColName.GIH_WR_VAR,
530
+ ColName.GIH_WR_VAR: ColSpec(
652
531
  col_type=ColType.AGG,
653
532
  expr=(pl.col(ColName.GIH_WR_EXCESS).pow(2) * pl.col(ColName.NUM_GIH)).sum()
654
533
  / pl.col(ColName.GIH_TOTAL),
655
534
  ),
656
- ColSpec(
657
- name=ColName.GIH_WR_STDEV,
535
+ ColName.GIH_WR_STDEV: ColSpec(
658
536
  col_type=ColType.AGG,
659
537
  expr=pl.col(ColName.GIH_WR_VAR).sqrt(),
660
538
  ),
661
- ColSpec(
662
- name=ColName.GIH_WR_Z,
539
+ ColName.GIH_WR_Z: ColSpec(
663
540
  col_type=ColType.AGG,
664
541
  expr=pl.col(ColName.GIH_WR_EXCESS) / pl.col(ColName.GIH_WR_STDEV),
665
542
  ),
666
- ColSpec(
667
- name=ColName.DECK_MANA_VALUE_AVG,
543
+ ColName.DECK_MANA_VALUE_AVG: ColSpec(
668
544
  col_type=ColType.AGG,
669
545
  expr=pl.col(ColName.DECK_MANA_VALUE) / pl.col(ColName.DECK_SPELLS),
670
546
  ),
671
- ColSpec(
672
- name=ColName.DECK_LANDS_AVG,
547
+ ColName.DECK_LANDS_AVG: ColSpec(
673
548
  col_type=ColType.AGG,
674
549
  expr=pl.col(ColName.DECK_LANDS) / pl.col(ColName.NUM_GAMES),
675
550
  ),
676
- ColSpec(
677
- name=ColName.DECK_SPELLS_AVG,
551
+ ColName.DECK_SPELLS_AVG: ColSpec(
678
552
  col_type=ColType.AGG,
679
553
  expr=pl.col(ColName.DECK_SPELLS) / pl.col(ColName.NUM_GAMES),
680
554
  ),
681
- ]
682
-
683
- col_spec_map = {col.name: col for col in _column_specs}
555
+ }
684
556
 
685
557
  for item in ColName:
686
- assert item in col_spec_map, f"column {item} enumerated but not specified"
558
+ assert item in specs, f"column {item} enumerated but not specified"
@@ -54,8 +54,8 @@ def _get_names(set_code: str) -> list[str]:
54
54
  return names
55
55
 
56
56
 
57
- def _get_card_context(set_code: str, col_spec_map: dict[str, ColSpec], card_context: pl.DataFrame | dict[str, dict[str, Any]] | None) -> dict[str, dict[str, Any]]:
58
- card_attr_specs = {col:spec for col, spec in col_spec_map.items() if spec.col_type == ColType.CARD_ATTR or spec.name == ColName.NAME}
57
+ def _get_card_context(set_code: str, specs: dict[str, ColSpec], card_context: pl.DataFrame | dict[str, dict[str, Any]] | None) -> dict[str, dict[str, Any]]:
58
+ card_attr_specs = {col:spec for col, spec in specs.items() if spec.col_type == ColType.CARD_ATTR or col == ColName.NAME}
59
59
  col_def_map = _hydrate_col_defs(set_code, card_attr_specs, card_only=True)
60
60
 
61
61
  columns = list(col_def_map.keys())
@@ -84,7 +84,7 @@ def _get_card_context(set_code: str, col_spec_map: dict[str, ColSpec], card_cont
84
84
  return loaded_context
85
85
 
86
86
 
87
- def _determine_expression(spec: ColSpec, names: list[str], card_context: dict[str, dict]) -> pl.Expr | tuple[pl.Expr, ...]:
87
+ def _determine_expression(col: str, spec: ColSpec, names: list[str], card_context: dict[str, dict]) -> pl.Expr | tuple[pl.Expr, ...]:
88
88
  def seed_params(expr):
89
89
  params = {}
90
90
 
@@ -97,18 +97,18 @@ def _determine_expression(spec: ColSpec, names: list[str], card_context: dict[st
97
97
 
98
98
  if spec.col_type == ColType.NAME_SUM:
99
99
  if spec.expr is not None:
100
- assert isinstance(spec.expr, Callable), f"NAME_SUM column {spec.name} must have a callable `expr` accepting a `name` argument"
100
+ assert isinstance(spec.expr, Callable), f"NAME_SUM column {col} must have a callable `expr` accepting a `name` argument"
101
101
  unnamed_exprs = [spec.expr(**{'name': name, **seed_params(spec.expr)}) for name in names]
102
102
 
103
103
  expr = tuple(
104
104
  map(
105
- lambda ex, name: ex.alias(f"{spec.name}_{name}"),
105
+ lambda ex, name: ex.alias(f"{col}_{name}"),
106
106
  unnamed_exprs,
107
107
  names,
108
108
  )
109
109
  )
110
110
  else:
111
- expr = tuple(map(lambda name: pl.col(f"{spec.name}_{name}"), names))
111
+ expr = tuple(map(lambda name: pl.col(f"{col}_{name}"), names))
112
112
 
113
113
  elif spec.expr is not None:
114
114
  if isinstance(spec.expr, Callable):
@@ -118,25 +118,27 @@ def _determine_expression(spec: ColSpec, names: list[str], card_context: dict[st
118
118
  for name in names:
119
119
  name_params = {'name': name, **params}
120
120
  expr = pl.when(pl.col(ColName.PICK) == name).then(spec.expr(**name_params)).otherwise(expr)
121
+ elif spec.col_type == ColType.CARD_ATTR and 'name' in signature(spec.expr).parameters:
122
+ expr = spec.expr(**{'name': pl.col('name'), **params})
121
123
  else:
122
124
  expr = spec.expr(**params)
123
125
  else:
124
126
  expr = spec.expr
125
- expr = expr.alias(spec.name)
127
+ expr = expr.alias(col)
126
128
  else:
127
- expr = pl.col(spec.name)
129
+ expr = pl.col(col)
128
130
 
129
131
  return expr
130
132
 
131
133
 
132
- def _infer_dependencies(name: str, expr: pl.Expr | tuple[pl.Expr,...], col_spec_map: dict[str, ColSpec], names: list[str]) -> set[str]:
134
+ def _infer_dependencies(name: str, expr: pl.Expr | tuple[pl.Expr,...], specs: dict[str, ColSpec], names: list[str]) -> set[str]:
133
135
  dependencies = set()
134
136
  tricky_ones = set()
135
137
 
136
138
  if isinstance(expr, pl.Expr):
137
139
  dep_cols = [c for c in expr.meta.root_names() if c != name]
138
140
  for dep_col in dep_cols:
139
- if dep_col in col_spec_map.keys():
141
+ if dep_col in specs.keys():
140
142
  dependencies.add(dep_col)
141
143
  else:
142
144
  tricky_ones.add(dep_col)
@@ -145,9 +147,9 @@ def _infer_dependencies(name: str, expr: pl.Expr | tuple[pl.Expr,...], col_spec_
145
147
  pattern = f"_{names[idx]}$"
146
148
  dep_cols = [c for c in exp.meta.root_names() if c != name]
147
149
  for dep_col in dep_cols:
148
- if dep_col in col_spec_map.keys():
150
+ if dep_col in specs.keys():
149
151
  dependencies.add(dep_col)
150
- elif len(split := re.split(pattern, dep_col)) == 2 and split[0] in col_spec_map:
152
+ elif len(split := re.split(pattern, dep_col)) == 2 and split[0] in specs:
151
153
  dependencies.add(split[0])
152
154
  else:
153
155
  tricky_ones.add(dep_col)
@@ -156,7 +158,7 @@ def _infer_dependencies(name: str, expr: pl.Expr | tuple[pl.Expr,...], col_spec_
156
158
  found = False
157
159
  for n in names:
158
160
  pattern = f"_{n}$"
159
- if not found and len(split := re.split(pattern, item)) == 2 and split[0] in col_spec_map:
161
+ if not found and len(split := re.split(pattern, item)) == 2 and split[0] in specs:
160
162
  dependencies.add(split[0])
161
163
  found = True
162
164
  assert found, f"Could not locate column spec for root col {item}"
@@ -164,49 +166,50 @@ def _infer_dependencies(name: str, expr: pl.Expr | tuple[pl.Expr,...], col_spec_
164
166
  return dependencies
165
167
 
166
168
 
167
- def _hydrate_col_defs(set_code: str, col_spec_map: dict[str, ColSpec], card_context: pl.DataFrame | dict[str, dict] | None = None, card_only: bool =False):
169
+ def _hydrate_col_defs(set_code: str, specs: dict[str, ColSpec], card_context: pl.DataFrame | dict[str, dict] | None = None, card_only: bool =False):
168
170
  names = _get_names(set_code)
169
171
 
170
172
  if card_only:
171
173
  card_context = {}
172
174
  else:
173
- card_context = _get_card_context(set_code, col_spec_map, card_context)
175
+ card_context = _get_card_context(set_code, specs, card_context)
174
176
 
175
177
  assert len(names) > 0, "there should be names"
176
178
  hydrated = {}
177
- for key, spec in col_spec_map.items():
178
- expr = _determine_expression(spec, names, card_context)
179
- dependencies = _infer_dependencies(key, expr, col_spec_map, names)
179
+ for col, spec in specs.items():
180
+ expr = _determine_expression(col, spec, names, card_context)
181
+ dependencies = _infer_dependencies(col, expr, specs, names)
180
182
 
183
+ sig_expr = expr if isinstance(expr, pl.Expr) else expr[0]
181
184
  try:
182
- sig_expr = expr if isinstance(expr, pl.Expr) else expr[0]
183
185
  expr_sig = sig_expr.meta.serialize(
184
186
  format="json"
185
- ) # not compatible with renaming
187
+ )
186
188
  except pl.exceptions.ComputeError:
187
189
  if spec.version is not None:
188
- expr_sig = spec.name + spec.version
190
+ expr_sig = col + spec.version
189
191
  else:
190
- expr_sig = str(datetime.datetime.now)
192
+ print(f"Using session-only signature for non-serializable column {col}, please provide a version value")
193
+ expr_sig = str(sig_expr)
191
194
 
192
195
  signature = str(
193
196
  (
194
- spec.name,
197
+ col,
195
198
  spec.col_type.value,
196
199
  expr_sig,
197
- dependencies,
200
+ sorted(dependencies),
198
201
  )
199
202
  )
200
203
 
201
204
  cdef = ColDef(
202
- name=spec.name,
205
+ name=col,
203
206
  col_type=spec.col_type,
204
207
  views=set(spec.views or set()),
205
208
  expr=expr,
206
209
  dependencies=dependencies,
207
210
  signature=signature,
208
211
  )
209
- hydrated[key] = cdef
212
+ hydrated[col] = cdef
210
213
  return hydrated
211
214
 
212
215
 
@@ -353,18 +356,18 @@ def summon(
353
356
  columns: list[str] | None = None,
354
357
  group_by: list[str] | None = None,
355
358
  filter_spec: dict | None = None,
356
- extensions: list[ColSpec] | None = None,
359
+ extensions: dict[str, ColSpec] | None = None,
357
360
  use_streaming: bool = False,
358
361
  read_cache: bool = True,
359
362
  write_cache: bool = True,
360
363
  card_context: pl.DataFrame | dict[str, dict] | None = None
361
364
  ) -> pl.DataFrame:
362
- col_spec_map = dict(spells.columns.col_spec_map)
365
+ specs = dict(spells.columns.specs)
366
+
363
367
  if extensions is not None:
364
- for spec in extensions:
365
- col_spec_map[spec.name] = spec
368
+ specs.update(extensions)
366
369
 
367
- col_def_map = _hydrate_col_defs(set_code, col_spec_map, card_context)
370
+ col_def_map = _hydrate_col_defs(set_code, specs, card_context)
368
371
  m = spells.manifest.create(col_def_map, columns, group_by, filter_spec)
369
372
 
370
373
  calc_fn = functools.partial(_base_agg_df, set_code, m, use_streaming=use_streaming)
@@ -0,0 +1,40 @@
1
+ import polars as pl
2
+
3
+ from spells.enums import ColType
4
+ from spells.columns import ColSpec
5
+
6
+ def attr_metrics(attr):
7
+ return {
8
+ f"seen_{attr}": ColSpec(
9
+ col_type=ColType.NAME_SUM,
10
+ expr=(lambda name, card_context: pl.when(pl.col(f"pack_card_{name}") > 0)
11
+ .then(card_context[name][attr])
12
+ .otherwise(None)),
13
+ ),
14
+ f"pick_{attr}": ColSpec(
15
+ col_type=ColType.PICK_SUM,
16
+ expr=lambda name, card_context: card_context[name][attr]
17
+ ),
18
+ f"least_{attr}_taken": ColSpec(
19
+ col_type=ColType.PICK_SUM,
20
+ expr=(lambda names: pl.col(f'pick_{attr}')
21
+ <= pl.min_horizontal([pl.col(f"seen_{attr}_{name}") for name in names])),
22
+ ),
23
+ f"least_{attr}_taken_rate": ColSpec(
24
+ col_type=ColType.AGG,
25
+ expr=pl.col(f"least_{attr}_taken") / pl.col("num_taken"),
26
+ ),
27
+ f"greatest_{attr}_taken": ColSpec(
28
+ col_type=ColType.PICK_SUM,
29
+ expr=(lambda names: pl.col(f'pick_{attr}')
30
+ >= pl.max_horizontal([pl.col(f"seen_{attr}_{name}") for name in names])),
31
+ ),
32
+ f"greatest_{attr}_taken_rate": ColSpec(
33
+ col_type=ColType.AGG,
34
+ expr=pl.col(f"greatest_{attr}_taken") / pl.col("num_taken"),
35
+ ),
36
+ f"pick_{attr}_mean": ColSpec(
37
+ col_type=ColType.AGG,
38
+ expr=pl.col(f"pick_{attr}") / pl.col("num_taken")
39
+ )
40
+ }
@@ -1,5 +0,0 @@
1
- from spells import columns
2
- from spells import enums
3
- from spells.draft_data import summon
4
-
5
- __all__ = ["summon", "enums", "columns"]
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes