spells-mtg 0.5.3__tar.gz → 0.6.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spells-mtg might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: spells-mtg
3
- Version: 0.5.3
3
+ Version: 0.6.1
4
4
  Summary: analaysis of 17Lands.com public datasets
5
5
  Author-Email: Joel Barnes <oelarnes@gmail.com>
6
6
  License: MIT
@@ -74,7 +74,7 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
74
74
  - Supports calculating the standard aggregations and measures out of the box with no arguments (ALSA, GIH WR, etc)
75
75
  - Caches aggregate DataFrames in the local file system automatically for instantaneous reproduction of previous analysis
76
76
  - Manages grouping and filtering by built-in and custom columns at the row level
77
- - Provides 122 explicitly specified, enumerated, documented column definitions
77
+ - Provides 124 explicitly specified, enumerated, documented column definitions
78
78
  - Supports "Deck Color Data" aggregations with built-in column definitions.
79
79
  - Lets you feed card metrics back in to column definitions to support scientific workflows like MLE
80
80
  - Provides a CLI tool `spells [add|refresh|clean|remove|info] [SET]` to download and manage external files
@@ -296,6 +296,7 @@ summon(
296
296
  filter_spec: dict | None = None,
297
297
  extensions: dict[str, ColSpec] | None = None,
298
298
  card_context: pl.DataFrame | dict[str, dict[str, Any] | None = None,
299
+ set_context: pl.DataFrame | dict[str, Any] | None = None,
299
300
  read_cache: bool = True,
300
301
  write_cache: bool = True,
301
302
  ) -> polars.DataFrame
@@ -319,6 +320,8 @@ aggregations of non-numeric (or numeric) data types are not supported. If `None`
319
320
 
320
321
  - card_context: Typically a Polars DataFrame containing a `"name"` column with one row for each card name in the set, such that any usages of `card_context[name][key]` in column specs reference the column `key`. Typically this will be the output of a call to `summon` requesting cards metrics like `GP_WR`. Can also be a dictionary having the necessary form for the same access pattern.
321
322
 
323
+ - set_context: Typically, a dict of abitrary values to use in column definitions, for example, you could provide the quick draft release date and have a column that depended on that.
324
+
322
325
  - read_cache/write_cache: Use the local file system to cache and retrieve aggregations to minimize expensive reads of the large datasets. You shouldn't need to touch these arguments unless you are debugging.
323
326
 
324
327
  ### Enums
@@ -353,7 +356,10 @@ summing over groups, and can include polars Expression aggregations. Arbitrarily
353
356
  - For `NAME_SUM` columns, `expr` must be a function of `name` which will result in a list of expressions mapped over all card names.
354
357
  - `PICK_SUM` columns can also be functions on `name`, in which case the value will be a function of the value of the `PICK` field.
355
358
  - `AGG` columns that depend on `NAME_SUM` columns reference the prefix (`cdef.name`) only, since the unpivot has occured prior to selection.
356
- - The possible arguments to `expr`, in addition to `name` when appropriate, include the full `names` array as well as a dictionary called `card_context` which contains card dict objects with all `CARD_ATTR` values, including custom extensions and metric columns passed by the `card_context` argument to `summon`. See example notebooks for more details.
359
+ - The possible arguments to `expr`, in addition to `name` when appropriate, are as follows:
360
+ - `names`: An array of all card names in the canonical order.
361
+ - `card_context`: A dictionary keyed by card name which contains card dict objects with all `CARD_ATTR` values, including custom extensions and metric columns passed by the `card_context` argument to `summon`. See example notebooks for more details.
362
+ - `set_context`: A dictionary with arbitrary fields provided via the `set_context` argument. Has two built-in attributes, `picks_per_pack` (e.g. 13 or 14), and `release_time`, which is the minimum value of the `draft_time` field.
357
363
 
358
364
  - `version`: When defining a column using a python function, as opposed to Polars expressions, add a unique version number so that the unique hashed signature of the column specification can be derived
359
365
  for caching purposes, since Polars cannot generate a serialization natively. When changing the definition, be sure to increment the version value. Otherwise you do not need to use this parameter.
@@ -371,9 +377,11 @@ A table of all included columns. Columns can be referenced by enum or by string
371
377
  | `DRAFT_ID` | `"draft_id"` | `DRAFT, GAME` | `FILTER_ONLY` | Dataset column | String |
372
378
  | `DRAFT_TIME` | `"draft_time"` | `DRAFT, GAME` | `FILTER_ONLY` | Dataset column | String |
373
379
  | `DRAFT_DATE` | `"draft_date"` | `DRAFT, GAME` | `GROUP_BY` | | `datetime.date` |
380
+ | `FORMAT_DAY` | `"format_day"` | `DRAFT, GAME` | `GROUP_BY` | 1 for release day, 2, 3, etc. | Int |
374
381
  | `DRAFT_DAY_OF_WEEK` | `"draft_day_of_week` | `DRAFT, GAME` | `GROUP_BY` | 1-7 (Mon-Sun) | Int |
375
382
  | `DRAFT_HOUR` | `"draft_hour"` | `DRAFT, GAME` | `GROUP_BY` | 0-23 | Int |
376
383
  | `DRAFT_WEEK` | `"draft_week"` | `DRAFT, GAME` | `GROUP_BY` | 1-53 | Int |
384
+ | `FORMAT_WEEK` | `"format_week"` | `DRAFT, GAME` | `GROUP_BY` | 1 for `FORMAT_DAY` 1 - 7, etc. | Int |
377
385
  | `RANK` | `"rank"` | `DRAFT, GAME` | `GROUP_BY` | Dataset column | String |
378
386
  | `USER_N_GAMES_BUCKET` | `"user_n_games_bucket"` | `DRAFT, GAME` | `GROUP_BY` | Dataset Column | Int |
379
387
  | `USER_GAME_WIN_RATE_BUCKET` | `"user_game_win_rate_bucket` | `DRAFT, GAME` | `GROUP_BY` | Dataset Column | Float |
@@ -504,5 +512,3 @@ A table of all included columns. Columns can be referenced by enum or by string
504
512
  - [ ] Helper functions for common plotting paradigms
505
513
  - [ ] Example notebooks
506
514
  - [ ] Scientific workflows: regression, MLE, etc
507
-
508
-
@@ -63,7 +63,7 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
63
63
  - Supports calculating the standard aggregations and measures out of the box with no arguments (ALSA, GIH WR, etc)
64
64
  - Caches aggregate DataFrames in the local file system automatically for instantaneous reproduction of previous analysis
65
65
  - Manages grouping and filtering by built-in and custom columns at the row level
66
- - Provides 122 explicitly specified, enumerated, documented column definitions
66
+ - Provides 124 explicitly specified, enumerated, documented column definitions
67
67
  - Supports "Deck Color Data" aggregations with built-in column definitions.
68
68
  - Lets you feed card metrics back in to column definitions to support scientific workflows like MLE
69
69
  - Provides a CLI tool `spells [add|refresh|clean|remove|info] [SET]` to download and manage external files
@@ -285,6 +285,7 @@ summon(
285
285
  filter_spec: dict | None = None,
286
286
  extensions: dict[str, ColSpec] | None = None,
287
287
  card_context: pl.DataFrame | dict[str, dict[str, Any] | None = None,
288
+ set_context: pl.DataFrame | dict[str, Any] | None = None,
288
289
  read_cache: bool = True,
289
290
  write_cache: bool = True,
290
291
  ) -> polars.DataFrame
@@ -308,6 +309,8 @@ aggregations of non-numeric (or numeric) data types are not supported. If `None`
308
309
 
309
310
  - card_context: Typically a Polars DataFrame containing a `"name"` column with one row for each card name in the set, such that any usages of `card_context[name][key]` in column specs reference the column `key`. Typically this will be the output of a call to `summon` requesting cards metrics like `GP_WR`. Can also be a dictionary having the necessary form for the same access pattern.
310
311
 
312
+ - set_context: Typically, a dict of abitrary values to use in column definitions, for example, you could provide the quick draft release date and have a column that depended on that.
313
+
311
314
  - read_cache/write_cache: Use the local file system to cache and retrieve aggregations to minimize expensive reads of the large datasets. You shouldn't need to touch these arguments unless you are debugging.
312
315
 
313
316
  ### Enums
@@ -342,7 +345,10 @@ summing over groups, and can include polars Expression aggregations. Arbitrarily
342
345
  - For `NAME_SUM` columns, `expr` must be a function of `name` which will result in a list of expressions mapped over all card names.
343
346
  - `PICK_SUM` columns can also be functions on `name`, in which case the value will be a function of the value of the `PICK` field.
344
347
  - `AGG` columns that depend on `NAME_SUM` columns reference the prefix (`cdef.name`) only, since the unpivot has occured prior to selection.
345
- - The possible arguments to `expr`, in addition to `name` when appropriate, include the full `names` array as well as a dictionary called `card_context` which contains card dict objects with all `CARD_ATTR` values, including custom extensions and metric columns passed by the `card_context` argument to `summon`. See example notebooks for more details.
348
+ - The possible arguments to `expr`, in addition to `name` when appropriate, are as follows:
349
+ - `names`: An array of all card names in the canonical order.
350
+ - `card_context`: A dictionary keyed by card name which contains card dict objects with all `CARD_ATTR` values, including custom extensions and metric columns passed by the `card_context` argument to `summon`. See example notebooks for more details.
351
+ - `set_context`: A dictionary with arbitrary fields provided via the `set_context` argument. Has two built-in attributes, `picks_per_pack` (e.g. 13 or 14), and `release_time`, which is the minimum value of the `draft_time` field.
346
352
 
347
353
  - `version`: When defining a column using a python function, as opposed to Polars expressions, add a unique version number so that the unique hashed signature of the column specification can be derived
348
354
  for caching purposes, since Polars cannot generate a serialization natively. When changing the definition, be sure to increment the version value. Otherwise you do not need to use this parameter.
@@ -360,9 +366,11 @@ A table of all included columns. Columns can be referenced by enum or by string
360
366
  | `DRAFT_ID` | `"draft_id"` | `DRAFT, GAME` | `FILTER_ONLY` | Dataset column | String |
361
367
  | `DRAFT_TIME` | `"draft_time"` | `DRAFT, GAME` | `FILTER_ONLY` | Dataset column | String |
362
368
  | `DRAFT_DATE` | `"draft_date"` | `DRAFT, GAME` | `GROUP_BY` | | `datetime.date` |
369
+ | `FORMAT_DAY` | `"format_day"` | `DRAFT, GAME` | `GROUP_BY` | 1 for release day, 2, 3, etc. | Int |
363
370
  | `DRAFT_DAY_OF_WEEK` | `"draft_day_of_week` | `DRAFT, GAME` | `GROUP_BY` | 1-7 (Mon-Sun) | Int |
364
371
  | `DRAFT_HOUR` | `"draft_hour"` | `DRAFT, GAME` | `GROUP_BY` | 0-23 | Int |
365
372
  | `DRAFT_WEEK` | `"draft_week"` | `DRAFT, GAME` | `GROUP_BY` | 1-53 | Int |
373
+ | `FORMAT_WEEK` | `"format_week"` | `DRAFT, GAME` | `GROUP_BY` | 1 for `FORMAT_DAY` 1 - 7, etc. | Int |
366
374
  | `RANK` | `"rank"` | `DRAFT, GAME` | `GROUP_BY` | Dataset column | String |
367
375
  | `USER_N_GAMES_BUCKET` | `"user_n_games_bucket"` | `DRAFT, GAME` | `GROUP_BY` | Dataset Column | Int |
368
376
  | `USER_GAME_WIN_RATE_BUCKET` | `"user_game_win_rate_bucket` | `DRAFT, GAME` | `GROUP_BY` | Dataset Column | Float |
@@ -493,5 +501,3 @@ A table of all included columns. Columns can be referenced by enum or by string
493
501
  - [ ] Helper functions for common plotting paradigms
494
502
  - [ ] Example notebooks
495
503
  - [ ] Scientific workflows: regression, MLE, etc
496
-
497
-
@@ -11,7 +11,7 @@ dependencies = [
11
11
  ]
12
12
  requires-python = ">=3.11"
13
13
  readme = "README.md"
14
- version = "0.5.3"
14
+ version = "0.6.1"
15
15
 
16
16
  [project.license]
17
17
  text = "MIT"
@@ -5,6 +5,7 @@ import polars as pl
5
5
 
6
6
  from spells.enums import View, ColName, ColType
7
7
 
8
+
8
9
  @dataclass(frozen=True)
9
10
  class ColSpec:
10
11
  col_type: ColType
@@ -64,9 +65,21 @@ _specs: dict[str, ColSpec] = {
64
65
  col_type=ColType.GROUP_BY,
65
66
  expr=pl.col(ColName.DRAFT_TIME).str.to_datetime("%Y-%m-%d %H:%M:%S").dt.date(),
66
67
  ),
68
+ ColName.FORMAT_DAY: ColSpec(
69
+ col_type=ColType.GROUP_BY,
70
+ expr=lambda set_context: (
71
+ pl.col(ColName.DRAFT_DATE)
72
+ - pl.lit(set_context["release_time"])
73
+ .str.to_datetime("%Y-%m-%d %H:%M:%S")
74
+ .dt.date()
75
+ ).dt.total_days()
76
+ + 1,
77
+ ),
67
78
  ColName.DRAFT_DAY_OF_WEEK: ColSpec(
68
79
  col_type=ColType.GROUP_BY,
69
- expr=pl.col(ColName.DRAFT_TIME).str.to_datetime("%Y-%m-%d %H:%M:%S").dt.weekday(),
80
+ expr=pl.col(ColName.DRAFT_TIME)
81
+ .str.to_datetime("%Y-%m-%d %H:%M:%S")
82
+ .dt.weekday(),
70
83
  ),
71
84
  ColName.DRAFT_HOUR: ColSpec(
72
85
  col_type=ColType.GROUP_BY,
@@ -76,6 +89,9 @@ _specs: dict[str, ColSpec] = {
76
89
  col_type=ColType.GROUP_BY,
77
90
  expr=pl.col(ColName.DRAFT_TIME).str.to_datetime("%Y-%m-%d %H:%M:%S").dt.week(),
78
91
  ),
92
+ ColName.FORMAT_WEEK: ColSpec(
93
+ col_type=ColType.GROUP_BY, expr=(pl.col(ColName.FORMAT_DAY) - 1) // 7 + 1
94
+ ),
79
95
  ColName.RANK: ColSpec(
80
96
  col_type=ColType.GROUP_BY,
81
97
  views=[View.GAME, View.DRAFT],
@@ -159,13 +175,15 @@ _specs: dict[str, ColSpec] = {
159
175
  ),
160
176
  ColName.NUM_TAKEN: ColSpec(
161
177
  col_type=ColType.PICK_SUM,
162
- expr=pl.when(pl.col(ColName.PICK).is_not_null())
163
- .then(1)
164
- .otherwise(0),
178
+ expr=pl.when(pl.col(ColName.PICK).is_not_null()).then(1).otherwise(0),
165
179
  ),
166
180
  ColName.NUM_DRAFTS: ColSpec(
167
181
  col_type=ColType.PICK_SUM,
168
- expr=pl.when((pl.col(ColName.PACK_NUMBER) == 0) & (pl.col(ColName.PICK_NUMBER) == 0)).then(1).otherwise(0),
182
+ expr=pl.when(
183
+ (pl.col(ColName.PACK_NUMBER) == 0) & (pl.col(ColName.PICK_NUMBER) == 0)
184
+ )
185
+ .then(1)
186
+ .otherwise(0),
169
187
  ),
170
188
  ColName.PICK: ColSpec(
171
189
  col_type=ColType.FILTER_ONLY,
@@ -206,7 +224,9 @@ _specs: dict[str, ColSpec] = {
206
224
  ),
207
225
  ColName.GAME_DAY_OF_WEEK: ColSpec(
208
226
  col_type=ColType.GROUP_BY,
209
- expr=pl.col(ColName.GAME_TIME).str.to_datetime("%Y-%m-%d %H-%M-%S").dt.weekday(),
227
+ expr=pl.col(ColName.GAME_TIME)
228
+ .str.to_datetime("%Y-%m-%d %H-%M-%S")
229
+ .dt.weekday(),
210
230
  ),
211
231
  ColName.GAME_HOUR: ColSpec(
212
232
  col_type=ColType.GROUP_BY,
@@ -381,11 +401,13 @@ _specs: dict[str, ColSpec] = {
381
401
  ),
382
402
  ColName.DECK_MANA_VALUE: ColSpec(
383
403
  col_type=ColType.NAME_SUM,
384
- expr=lambda name, card_context: card_context[name][ColName.MANA_VALUE] * pl.col(f"deck_{name}"),
404
+ expr=lambda name, card_context: card_context[name][ColName.MANA_VALUE]
405
+ * pl.col(f"deck_{name}"),
385
406
  ),
386
407
  ColName.DECK_LANDS: ColSpec(
387
408
  col_type=ColType.NAME_SUM,
388
- expr=lambda name, card_context: pl.col(f"deck_{name}") * ( 1 if 'Land' in card_context[name][ColName.CARD_TYPE] else 0 )
409
+ expr=lambda name, card_context: pl.col(f"deck_{name}")
410
+ * (1 if "Land" in card_context[name][ColName.CARD_TYPE] else 0),
389
411
  ),
390
412
  ColName.DECK_SPELLS: ColSpec(
391
413
  col_type=ColType.NAME_SUM,
@@ -560,11 +582,13 @@ _specs: dict[str, ColSpec] = {
560
582
  for item in ColName:
561
583
  assert item in _specs, f"column {item} enumerated but not specified"
562
584
 
585
+
563
586
  class GetSpecs:
564
587
  def __init__(self, spec_dict: dict[str, ColSpec]):
565
588
  self._specs = spec_dict
589
+
566
590
  def __call__(self):
567
591
  return dict(self._specs)
568
592
 
569
- get_specs = GetSpecs(_specs)
570
593
 
594
+ get_specs = GetSpecs(_specs)
@@ -10,6 +10,7 @@ import functools
10
10
  import hashlib
11
11
  import re
12
12
  from inspect import signature
13
+ import os
13
14
  from typing import Callable, TypeVar, Any
14
15
 
15
16
  import polars as pl
@@ -53,9 +54,20 @@ def _get_names(set_code: str) -> list[str]:
53
54
  return names
54
55
 
55
56
 
56
- def _get_card_context(set_code: str, specs: dict[str, ColSpec], card_context: pl.DataFrame | dict[str, dict[str, Any]] | None) -> dict[str, dict[str, Any]]:
57
- card_attr_specs = {col:spec for col, spec in specs.items() if spec.col_type == ColType.CARD_ATTR or col == ColName.NAME}
58
- col_def_map = _hydrate_col_defs(set_code, card_attr_specs, card_only=True)
57
+ def _get_card_context(
58
+ set_code: str,
59
+ specs: dict[str, ColSpec],
60
+ card_context: pl.DataFrame | dict[str, dict[str, Any]] | None,
61
+ set_context: pl.DataFrame | dict[str, Any] | None,
62
+ ) -> dict[str, dict[str, Any]]:
63
+ card_attr_specs = {
64
+ col: spec
65
+ for col, spec in specs.items()
66
+ if spec.col_type == ColType.CARD_ATTR or col == ColName.NAME
67
+ }
68
+ col_def_map = _hydrate_col_defs(
69
+ set_code, card_attr_specs, set_context=set_context, card_only=True
70
+ )
59
71
 
60
72
  columns = list(col_def_map.keys())
61
73
 
@@ -70,34 +82,50 @@ def _get_card_context(set_code: str, specs: dict[str, ColSpec], card_context: pl
70
82
  if card_context is not None:
71
83
  if isinstance(card_context, pl.DataFrame):
72
84
  try:
73
- card_context = {row[ColName.NAME]: row for row in card_context.to_dicts()}
85
+ card_context = {
86
+ row[ColName.NAME]: row for row in card_context.to_dicts()
87
+ }
74
88
  except ColumnNotFoundError:
75
89
  raise ValueError("card_context DataFrame must have column 'name'")
76
90
 
77
91
  names = list(loaded_context.keys())
78
92
  for name in names:
79
- assert name in card_context, f"card_context must include a row for each card name. {name} missing."
93
+ assert (
94
+ name in card_context
95
+ ), f"card_context must include a row for each card name. {name} missing."
80
96
  for col, value in card_context[name].items():
81
97
  loaded_context[name][col] = value
82
98
 
83
99
  return loaded_context
84
-
85
100
 
86
- def _determine_expression(col: str, spec: ColSpec, names: list[str], card_context: dict[str, dict]) -> pl.Expr | tuple[pl.Expr, ...]:
101
+
102
+ def _determine_expression(
103
+ col: str,
104
+ spec: ColSpec,
105
+ names: list[str],
106
+ card_context: dict[str, dict],
107
+ set_context: dict[str, Any],
108
+ ) -> pl.Expr | tuple[pl.Expr, ...]:
87
109
  def seed_params(expr):
88
110
  params = {}
89
111
 
90
112
  sig_params = signature(expr).parameters
91
- if 'names' in sig_params:
92
- params['names'] = names
93
- if 'card_context' in sig_params:
94
- params['card_context'] = card_context
113
+ if "names" in sig_params:
114
+ params["names"] = names
115
+ if "card_context" in sig_params:
116
+ params["card_context"] = card_context
117
+ if "set_context" in sig_params:
118
+ params["set_context"] = set_context
95
119
  return params
96
120
 
97
121
  if spec.col_type == ColType.NAME_SUM:
98
122
  if spec.expr is not None:
99
- assert isinstance(spec.expr, Callable), f"NAME_SUM column {col} must have a callable `expr` accepting a `name` argument"
100
- unnamed_exprs = [spec.expr(**{'name': name, **seed_params(spec.expr)}) for name in names]
123
+ assert isinstance(
124
+ spec.expr, Callable
125
+ ), f"NAME_SUM column {col} must have a callable `expr` accepting a `name` argument"
126
+ unnamed_exprs = [
127
+ spec.expr(**{"name": name, **seed_params(spec.expr)}) for name in names
128
+ ]
101
129
 
102
130
  expr = tuple(
103
131
  map(
@@ -112,13 +140,23 @@ def _determine_expression(col: str, spec: ColSpec, names: list[str], card_contex
112
140
  elif spec.expr is not None:
113
141
  if isinstance(spec.expr, Callable):
114
142
  params = seed_params(spec.expr)
115
- if spec.col_type == ColType.PICK_SUM and 'name' in signature(spec.expr).parameters:
143
+ if (
144
+ spec.col_type == ColType.PICK_SUM
145
+ and "name" in signature(spec.expr).parameters
146
+ ):
116
147
  expr = pl.lit(None)
117
148
  for name in names:
118
- name_params = {'name': name, **params}
119
- expr = pl.when(pl.col(ColName.PICK) == name).then(spec.expr(**name_params)).otherwise(expr)
120
- elif spec.col_type == ColType.CARD_ATTR and 'name' in signature(spec.expr).parameters:
121
- expr = spec.expr(**{'name': pl.col('name'), **params})
149
+ name_params = {"name": name, **params}
150
+ expr = (
151
+ pl.when(pl.col(ColName.PICK) == name)
152
+ .then(spec.expr(**name_params))
153
+ .otherwise(expr)
154
+ )
155
+ elif (
156
+ spec.col_type == ColType.CARD_ATTR
157
+ and "name" in signature(spec.expr).parameters
158
+ ):
159
+ expr = spec.expr(**{"name": pl.col("name"), **params})
122
160
  else:
123
161
  expr = spec.expr(**params)
124
162
  else:
@@ -130,7 +168,12 @@ def _determine_expression(col: str, spec: ColSpec, names: list[str], card_contex
130
168
  return expr
131
169
 
132
170
 
133
- def _infer_dependencies(name: str, expr: pl.Expr | tuple[pl.Expr,...], specs: dict[str, ColSpec], names: list[str]) -> set[str]:
171
+ def _infer_dependencies(
172
+ name: str,
173
+ expr: pl.Expr | tuple[pl.Expr, ...],
174
+ specs: dict[str, ColSpec],
175
+ names: list[str],
176
+ ) -> set[str]:
134
177
  dependencies = set()
135
178
  tricky_ones = set()
136
179
 
@@ -139,7 +182,7 @@ def _infer_dependencies(name: str, expr: pl.Expr | tuple[pl.Expr,...], specs: di
139
182
  for dep_col in dep_cols:
140
183
  if dep_col in specs.keys():
141
184
  dependencies.add(dep_col)
142
- else:
185
+ else:
143
186
  tricky_ones.add(dep_col)
144
187
  else:
145
188
  for idx, exp in enumerate(expr):
@@ -148,7 +191,9 @@ def _infer_dependencies(name: str, expr: pl.Expr | tuple[pl.Expr,...], specs: di
148
191
  for dep_col in dep_cols:
149
192
  if dep_col in specs.keys():
150
193
  dependencies.add(dep_col)
151
- elif len(split := re.split(pattern, dep_col)) == 2 and split[0] in specs:
194
+ elif (
195
+ len(split := re.split(pattern, dep_col)) == 2 and split[0] in specs
196
+ ):
152
197
  dependencies.add(split[0])
153
198
  else:
154
199
  tricky_ones.add(dep_col)
@@ -157,38 +202,80 @@ def _infer_dependencies(name: str, expr: pl.Expr | tuple[pl.Expr,...], specs: di
157
202
  found = False
158
203
  for n in names:
159
204
  pattern = f"_{n}$"
160
- if not found and len(split := re.split(pattern, item)) == 2 and split[0] in specs:
205
+ if (
206
+ not found
207
+ and len(split := re.split(pattern, item)) == 2
208
+ and split[0] in specs
209
+ ):
161
210
  dependencies.add(split[0])
162
211
  found = True
163
- assert found, f"Could not locate column spec for root col {item}"
212
+ assert found, f"Could not locate column spec for root col {item}"
164
213
 
165
214
  return dependencies
166
215
 
167
216
 
168
- def _hydrate_col_defs(set_code: str, specs: dict[str, ColSpec], card_context: pl.DataFrame | dict[str, dict] | None = None, card_only: bool =False):
217
+ def _get_set_context(
218
+ set_code: str, set_context: pl.DataFrame | dict[str, Any] | None
219
+ ) -> dict[str, Any]:
220
+ context_fp = data_file_path(set_code, "context")
221
+
222
+ report = functools.partial(
223
+ spells.cache.spells_print,
224
+ "report",
225
+ f"Set context for {set_code} invalid, please investigate!",
226
+ )
227
+
228
+ context = {}
229
+ if not os.path.isfile(context_fp):
230
+ report()
231
+ else:
232
+ context_df = pl.read_parquet(context_fp)
233
+ if len(context_df) == 1:
234
+ context.update(context_df.to_dicts()[0])
235
+ else:
236
+ report()
237
+
238
+ if isinstance(set_context, pl.DataFrame):
239
+ assert len(set_context != 1), "Invalid set context provided"
240
+ context.update(set_context.to_dicts()[0])
241
+ elif isinstance(set_context, dict):
242
+ context.update(set_context)
243
+
244
+ return context
245
+
246
+
247
+ def _hydrate_col_defs(
248
+ set_code: str,
249
+ specs: dict[str, ColSpec],
250
+ card_context: pl.DataFrame | dict[str, dict] | None = None,
251
+ set_context: pl.DataFrame | dict[str, Any] | None = None,
252
+ card_only: bool = False,
253
+ ):
169
254
  names = _get_names(set_code)
170
255
 
256
+ set_context = _get_set_context(set_code, set_context)
257
+
171
258
  if card_only:
172
259
  card_context = {}
173
260
  else:
174
- card_context = _get_card_context(set_code, specs, card_context)
261
+ card_context = _get_card_context(set_code, specs, card_context, set_context)
175
262
 
176
263
  assert len(names) > 0, "there should be names"
177
264
  hydrated = {}
178
265
  for col, spec in specs.items():
179
- expr = _determine_expression(col, spec, names, card_context)
266
+ expr = _determine_expression(col, spec, names, card_context, set_context)
180
267
  dependencies = _infer_dependencies(col, expr, specs, names)
181
268
 
182
269
  sig_expr = expr if isinstance(expr, pl.Expr) else expr[0]
183
270
  try:
184
- expr_sig = sig_expr.meta.serialize(
185
- format="json"
186
- )
271
+ expr_sig = sig_expr.meta.serialize(format="json")
187
272
  except pl.exceptions.ComputeError:
188
273
  if spec.version is not None:
189
274
  expr_sig = col + spec.version
190
275
  else:
191
- print(f"Using session-only signature for non-serializable column {col}, please provide a version value")
276
+ print(
277
+ f"Using session-only signature for non-serializable column {col}, please provide a version value"
278
+ )
192
279
  expr_sig = str(sig_expr)
193
280
 
194
281
  signature = str(
@@ -359,7 +446,8 @@ def summon(
359
446
  use_streaming: bool = False,
360
447
  read_cache: bool = True,
361
448
  write_cache: bool = True,
362
- card_context: pl.DataFrame | dict[str, dict] | None = None
449
+ card_context: pl.DataFrame | dict[str, dict] | None = None,
450
+ set_context: pl.DataFrame | dict[str, Any] | None = None,
363
451
  ) -> pl.DataFrame:
364
452
  specs = get_specs()
365
453
 
@@ -369,7 +457,7 @@ def summon(
369
457
  for ext in extensions:
370
458
  specs.update(ext)
371
459
 
372
- col_def_map = _hydrate_col_defs(set_code, specs, card_context)
460
+ col_def_map = _hydrate_col_defs(set_code, specs, card_context, set_context)
373
461
  m = spells.manifest.create(col_def_map, columns, group_by, filter_spec)
374
462
 
375
463
  calc_fn = functools.partial(_base_agg_df, set_code, m, use_streaming=use_streaming)
@@ -38,9 +38,11 @@ class ColName(StrEnum):
38
38
  DRAFT_ID = "draft_id"
39
39
  DRAFT_TIME = "draft_time" # modified, cast to time
40
40
  DRAFT_DATE = "draft_date"
41
+ FORMAT_DAY = "format_day"
41
42
  DRAFT_DAY_OF_WEEK = "draft_day_of_week"
42
43
  DRAFT_HOUR = "draft_hour"
43
44
  DRAFT_WEEK = "draft_week"
45
+ FORMAT_WEEK = "format_week"
44
46
  RANK = "rank"
45
47
  USER_N_GAMES_BUCKET = "user_n_games_bucket"
46
48
  USER_GAME_WIN_RATE_BUCKET = "user_game_win_rate_bucket"
@@ -1,4 +1,4 @@
1
- import math
1
+ import math
2
2
 
3
3
  import polars as pl
4
4
 
@@ -6,23 +6,31 @@ from spells.enums import ColType, ColName
6
6
  from spells.columns import ColSpec
7
7
  from spells.cache import spells_print
8
8
 
9
+
9
10
  def print_ext(ext: dict[str, ColSpec]) -> None:
10
11
  spells_print("create", "Created extensions:")
11
12
  for key in ext:
12
13
  print("\t" + key)
13
14
 
14
15
 
15
- def attr_cols(attr, silent=False) -> dict[str, ColSpec]:
16
+ def context_cols(attr, silent: bool = False) -> dict[str, ColSpec]:
16
17
  ext = {
17
18
  f"seen_{attr}": ColSpec(
18
19
  col_type=ColType.NAME_SUM,
19
- expr=(lambda name, card_context: pl.lit(None) if card_context[name][attr] is None or math.isnan(card_context[name][attr]) else pl.when(pl.col(f"pack_card_{name}") > 0)
20
+ expr=(
21
+ lambda name, card_context: pl.lit(None)
22
+ if card_context[name][attr] is None
23
+ or math.isnan(card_context[name][attr])
24
+ else pl.when(pl.col(f"pack_card_{name}") > 0)
20
25
  .then(card_context[name][attr])
21
- .otherwise(None)),
26
+ .otherwise(None)
27
+ ),
22
28
  ),
23
29
  f"pick_{attr}": ColSpec(
24
30
  col_type=ColType.PICK_SUM,
25
- expr=lambda name, card_context: pl.lit(None) if card_context[name][attr] is None or math.isnan(card_context[name][attr]) else card_context[name][attr]
31
+ expr=lambda name, card_context: pl.lit(None)
32
+ if card_context[name][attr] is None or math.isnan(card_context[name][attr])
33
+ else card_context[name][attr],
26
34
  ),
27
35
  f"seen_{attr}_greater": ColSpec(
28
36
  col_type=ColType.NAME_SUM,
@@ -34,27 +42,35 @@ def attr_cols(attr, silent=False) -> dict[str, ColSpec]:
34
42
  ),
35
43
  f"greatest_{attr}_seen": ColSpec(
36
44
  col_type=ColType.PICK_SUM,
37
- expr=lambda names: pl.max_horizontal([pl.col(f"seen_{attr}_{name}") for name in names]),
45
+ expr=lambda names: pl.max_horizontal(
46
+ [pl.col(f"seen_{attr}_{name}") for name in names]
47
+ ),
38
48
  ),
39
49
  f"least_{attr}_seen": ColSpec(
40
50
  col_type=ColType.PICK_SUM,
41
- expr=lambda names: pl.min_horizontal([pl.col(f"seen_{attr}_{name}") for name in names]),
51
+ expr=lambda names: pl.min_horizontal(
52
+ [pl.col(f"seen_{attr}_{name}") for name in names]
53
+ ),
42
54
  ),
43
55
  f"pick_{attr}_rank_greatest": ColSpec(
44
56
  col_type=ColType.GROUP_BY,
45
- expr=lambda names: pl.sum_horizontal([pl.col(f"seen_{attr}_greater_{name}") for name in names]) + 1,
57
+ expr=lambda names: pl.sum_horizontal(
58
+ [pl.col(f"seen_{attr}_greater_{name}") for name in names]
59
+ )
60
+ + 1,
46
61
  ),
47
62
  f"pick_{attr}_rank_least": ColSpec(
48
63
  col_type=ColType.GROUP_BY,
49
- expr=lambda names: pl.sum_horizontal([pl.col(f"seen_{attr}_less_{name}") for name in names]) + 1,
64
+ expr=lambda names: pl.sum_horizontal(
65
+ [pl.col(f"seen_{attr}_less_{name}") for name in names]
66
+ )
67
+ + 1,
50
68
  ),
51
69
  f"pick_{attr}_rank_greatest_sum": ColSpec(
52
- col_type=ColType.PICK_SUM,
53
- expr=pl.col(f"pick_{attr}_rank_greatest")
70
+ col_type=ColType.PICK_SUM, expr=pl.col(f"pick_{attr}_rank_greatest")
54
71
  ),
55
72
  f"pick_{attr}_rank_least_sum": ColSpec(
56
- col_type=ColType.PICK_SUM,
57
- expr=pl.col(f"pick_{attr}_rank_least")
73
+ col_type=ColType.PICK_SUM, expr=pl.col(f"pick_{attr}_rank_least")
58
74
  ),
59
75
  f"pick_{attr}_vs_least": ColSpec(
60
76
  col_type=ColType.PICK_SUM,
@@ -74,31 +90,39 @@ def attr_cols(attr, silent=False) -> dict[str, ColSpec]:
74
90
  ),
75
91
  f"least_{attr}_taken": ColSpec(
76
92
  col_type=ColType.PICK_SUM,
77
- expr=pl.col(f'pick_{attr}') <= pl.col(f'least_{attr}_seen'),
93
+ expr=pl.col(f"pick_{attr}") <= pl.col(f"least_{attr}_seen"),
78
94
  ),
79
95
  f"least_{attr}_taken_rate": ColSpec(
80
96
  col_type=ColType.AGG,
81
- expr=pl.col(f"least_{attr}_taken") / pl.col(ColName.NUM_TAKEN),
97
+ expr=pl.col(f"least_{attr}_taken") / pl.col(ColName.NUM_TAKEN),
82
98
  ),
83
99
  f"greatest_{attr}_taken": ColSpec(
84
100
  col_type=ColType.PICK_SUM,
85
- expr=pl.col(f'pick_{attr}') >= pl.col(f"greatest_{attr}_seen")
101
+ expr=pl.col(f"pick_{attr}") >= pl.col(f"greatest_{attr}_seen"),
86
102
  ),
87
103
  f"greatest_{attr}_taken_rate": ColSpec(
88
104
  col_type=ColType.AGG,
89
- expr=pl.col(f"greatest_{attr}_taken") / pl.col(ColName.NUM_TAKEN),
105
+ expr=pl.col(f"greatest_{attr}_taken") / pl.col(ColName.NUM_TAKEN),
90
106
  ),
91
107
  f"pick_{attr}_mean": ColSpec(
92
108
  col_type=ColType.AGG,
93
- expr=pl.col(f"pick_{attr}") / pl.col(ColName.NUM_TAKEN)
109
+ expr=pl.col(f"pick_{attr}") / pl.col(ColName.NUM_TAKEN),
94
110
  ),
111
+ }
112
+
113
+ if not silent:
114
+ print_ext(ext)
115
+
116
+ return ext
117
+
118
+
119
+ def stat_cols(attr: str, silent: bool = False) -> dict[str, ColSpec]:
120
+ ext = {
95
121
  f"{attr}_deck_weight_group": ColSpec(
96
- col_type=ColType.AGG,
97
- expr=pl.col(f"{attr}") * pl.col(ColName.DECK)
122
+ col_type=ColType.AGG, expr=pl.col(f"{attr}") * pl.col(ColName.DECK)
98
123
  ),
99
124
  f"{attr}_deck_weight_total": ColSpec(
100
- col_type=ColType.AGG,
101
- expr=pl.col(f"{attr}_deck_weight_group").sum()
125
+ col_type=ColType.AGG, expr=pl.col(f"{attr}_deck_weight_group").sum()
102
126
  ),
103
127
  f"{attr}_dw_mean": ColSpec(
104
128
  col_type=ColType.AGG,
@@ -110,7 +134,8 @@ def attr_cols(attr, silent=False) -> dict[str, ColSpec]:
110
134
  ),
111
135
  f"{attr}_dw_var": ColSpec(
112
136
  col_type=ColType.AGG,
113
- expr=(pl.col(f"{attr}_dw_excess").pow(2) * pl.col(ColName.DECK)) / pl.col(ColName.DECK_TOTAL),
137
+ expr=(pl.col(f"{attr}_dw_excess").pow(2) * pl.col(ColName.DECK))
138
+ / pl.col(ColName.DECK_TOTAL),
114
139
  ),
115
140
  f"{attr}_dw_stdev": ColSpec(
116
141
  col_type=ColType.AGG,
@@ -121,16 +146,15 @@ def attr_cols(attr, silent=False) -> dict[str, ColSpec]:
121
146
  expr=pl.col(f"{attr}_dw_excess") / pl.col(f"{attr}_dw_stdev"),
122
147
  ),
123
148
  f"{attr}_pool_weight_group": ColSpec(
124
- col_type=ColType.AGG,
125
- expr=pl.col(f"{attr}") * pl.col(ColName.NUM_IN_POOL)
149
+ col_type=ColType.AGG, expr=pl.col(f"{attr}") * pl.col(ColName.NUM_IN_POOL)
126
150
  ),
127
151
  f"{attr}_pool_weight_total": ColSpec(
128
- col_type=ColType.AGG,
129
- expr=pl.col(f"{attr}_pool_weight_group").sum()
152
+ col_type=ColType.AGG, expr=pl.col(f"{attr}_pool_weight_group").sum()
130
153
  ),
131
154
  f"{attr}_pw_mean": ColSpec(
132
155
  col_type=ColType.AGG,
133
- expr=pl.col(f"{attr}_pool_weight_total") / pl.col(ColName.NUM_IN_POOL_TOTAL),
156
+ expr=pl.col(f"{attr}_pool_weight_total")
157
+ / pl.col(ColName.NUM_IN_POOL_TOTAL),
134
158
  ),
135
159
  f"{attr}_pw_excess": ColSpec(
136
160
  col_type=ColType.AGG,
@@ -138,7 +162,8 @@ def attr_cols(attr, silent=False) -> dict[str, ColSpec]:
138
162
  ),
139
163
  f"{attr}_pw_var": ColSpec(
140
164
  col_type=ColType.AGG,
141
- expr=(pl.col(f"{attr}_pw_excess").pow(2) * pl.col(ColName.NUM_IN_POOL)) / pl.col(ColName.NUM_IN_POOL_TOTAL),
165
+ expr=(pl.col(f"{attr}_pw_excess").pow(2) * pl.col(ColName.NUM_IN_POOL))
166
+ / pl.col(ColName.NUM_IN_POOL_TOTAL),
142
167
  ),
143
168
  f"{attr}_pw_stdev": ColSpec(
144
169
  col_type=ColType.AGG,
@@ -154,29 +179,3 @@ def attr_cols(attr, silent=False) -> dict[str, ColSpec]:
154
179
  print_ext(ext)
155
180
 
156
181
  return ext
157
-
158
- def more(silent=True):
159
- wr_bucket = pl.col(ColName.USER_GAME_WIN_RATE_BUCKET)
160
- gp_bucket = pl.col(ColName.USER_N_GAMES_BUCKET)
161
- ext = {
162
- 'deq_base': ColSpec(
163
- col_type=ColType.AGG,
164
- expr=(pl.col("gp_wr_excess") + 0.03 * (1 - pl.col("ata") / 14).pow(2)) * pl.col("pct_gp")
165
- ),
166
- 'cohorts_plus': ColSpec(
167
- col_type=ColType.GROUP_BY,
168
- expr=pl.when((wr_bucket > 0.65) & (gp_bucket >= 500)).then(pl.lit('1 Best')).otherwise(
169
- pl.when((wr_bucket > 0.61) & (gp_bucket >= 500) | (wr_bucket > 0.65) & (gp_bucket >= 100)).then(pl.lit('2 Elite')).otherwise(
170
- pl.when((wr_bucket > 0.57) & (gp_bucket >= 100) | (wr_bucket > 0.61)).then(pl.lit('3 Competitive')).otherwise(
171
- pl.when((wr_bucket > 0.53) & (gp_bucket >= 100) | (wr_bucket > 0.57)).then(pl.lit('4 Solid')).otherwise(pl.lit('5 None'))
172
- )
173
- )
174
- )
175
- )
176
- }
177
-
178
- if not silent:
179
- print_ext(ext)
180
-
181
- return ext
182
-
@@ -102,6 +102,7 @@ def cli() -> int:
102
102
  def _add(set_code: str, force_download=False):
103
103
  download_data_set(set_code, View.DRAFT, force_download=force_download)
104
104
  write_card_file(set_code, force_download=force_download)
105
+ get_set_context(set_code)
105
106
  download_data_set(set_code, View.GAME, force_download=force_download)
106
107
  return 0
107
108
 
@@ -157,9 +158,9 @@ def _info():
157
158
  )
158
159
  print(f" {item.name} {sizeof_fmt(os.stat(item).st_size)}")
159
160
  file_count += 1
160
- if file_count < 3:
161
+ if file_count < 4:
161
162
  suggest_add.add(entry.name)
162
- if file_count > 3:
163
+ if file_count > 4:
163
164
  suggest_remove.add(entry.name)
164
165
  else:
165
166
  cache.spells_print(
@@ -209,6 +210,9 @@ def _external_set_path(set_code):
209
210
 
210
211
 
211
212
  def data_file_path(set_code, dataset_type: str, event_type=EventType.PREMIER):
213
+ if dataset_type == "set_context":
214
+ return os.path.join(_external_set_path(set_code), f"{set_code}_context.parquet")
215
+
212
216
  if dataset_type == "card":
213
217
  return os.path.join(_external_set_path(set_code), f"{set_code}_card.parquet")
214
218
 
@@ -314,3 +318,31 @@ def write_card_file(draft_set_code: str, force_download=False) -> int:
314
318
 
315
319
  cache.spells_print(mode, f"Wrote file {card_filepath}")
316
320
  return 0
321
+
322
+
323
+ def get_set_context(set_code: str, force_download=False) -> int:
324
+ mode = "refresh" if force_download else "add"
325
+
326
+ context_fp = data_file_path(set_code, "context")
327
+ cache.spells_print(mode, "Calculating set context")
328
+ if os.path.isfile(context_fp) and not force_download:
329
+ cache.spells_print(
330
+ mode,
331
+ f"File {context_fp} already exists, use `spells refresh {set_code}` to overwrite",
332
+ )
333
+ return 1
334
+
335
+ draft_fp = data_file_path(set_code, View.DRAFT)
336
+ draft_view = pl.scan_parquet(draft_fp)
337
+
338
+ context_df = draft_view.select(
339
+ [
340
+ pl.max("pick_number").alias("picks_per_pack") + 1,
341
+ pl.min("draft_time").alias("release_time"),
342
+ ]
343
+ ).collect()
344
+
345
+ context_df.write_parquet(context_fp)
346
+
347
+ cache.spells_print(mode, f"Wrote file {context_fp}")
348
+ return 0
@@ -107,7 +107,9 @@ def _resolve_view_cols(
107
107
  View.DRAFT, frozenset()
108
108
  ).union({ColName.PICK})
109
109
  if cdef.col_type == ColType.CARD_ATTR:
110
- view_resolution[View.CARD] = view_resolution.get(View.CARD, frozenset()).union({col})
110
+ view_resolution[View.CARD] = view_resolution.get(
111
+ View.CARD, frozenset()
112
+ ).union({col})
111
113
  elif cdef.views:
112
114
  for view in cdef.views:
113
115
  view_resolution[view] = view_resolution.get(
@@ -132,7 +134,9 @@ def _resolve_view_cols(
132
134
  else:
133
135
  col_views = col_views.intersection(dep_views)
134
136
  if fully_resolved:
135
- assert len(col_views), f"Column {col} can't be defined in any views!"
137
+ assert len(
138
+ col_views
139
+ ), f"Column {col} can't be defined in any views!"
136
140
  for view in col_views:
137
141
  if view not in view_resolution:
138
142
  print(cdef)
@@ -162,7 +166,9 @@ def create(
162
166
  if columns is None:
163
167
  cols = tuple(spells.columns.default_columns)
164
168
  if ColName.NAME not in gbs:
165
- cols = tuple(c for c in cols if col_def_map[c].col_type != ColType.CARD_ATTR)
169
+ cols = tuple(
170
+ c for c in cols if col_def_map[c].col_type != ColType.CARD_ATTR
171
+ )
166
172
  else:
167
173
  cols = tuple(columns)
168
174
 
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes