PyPI - spells-mtg - Versions diffs - 0.5.2__tar.gz → 0.6.0__tar.gz - Mend

spells-mtg 0.5.2tar.gz → 0.6.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of spells-mtg might be problematic. Click here for more details.

Files changed (18) hide show

{spells_mtg-0.5.2 → spells_mtg-0.6.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: spells-mtg
-Version: 0.5.2
+Version: 0.6.0
 Summary: analaysis of 17Lands.com public datasets
 Author-Email: Joel Barnes <oelarnes@gmail.com>
 License: MIT
@@ -74,7 +74,7 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
 - Supports calculating the standard aggregations and measures out of the box with no arguments (ALSA, GIH WR, etc)
 - Caches aggregate DataFrames in the local file system automatically for instantaneous reproduction of previous analysis
 - Manages grouping and filtering by built-in and custom columns at the row level
-- Provides 122 explicitly specified, enumerated, documented column definitions
+- Provides 124 explicitly specified, enumerated, documented column definitions
 - Supports "Deck Color Data" aggregations with built-in column definitions.
 - Lets you feed card metrics back in to column definitions to support scientific workflows like MLE
 - Provides a CLI tool `spells [add|refresh|clean|remove|info] [SET]` to download and manage external files
@@ -296,6 +296,7 @@ summon(
     filter_spec: dict | None = None,
     extensions: dict[str, ColSpec] | None = None,
     card_context: pl.DataFrame | dict[str, dict[str, Any] | None = None,
+    set_context: pl.DataFrame | dict[str, Any] | None = None,
     read_cache: bool = True,
     write_cache: bool = True,
 ) -> polars.DataFrame
@@ -319,6 +320,8 @@ aggregations of non-numeric (or numeric) data types are not supported. If `None`
 - card_context: Typically a Polars DataFrame containing a `"name"` column with one row for each card name in the set, such that any usages of `card_context[name][key]` in column specs reference the column `key`. Typically this will be the output of a call to `summon` requesting cards metrics like `GP_WR`. Can also be a dictionary having the necessary form for the same access pattern.
+- set_context: Typically, a dict of abitrary values to use in column definitions, for example, you could provide the quick draft release date and have a column that depended on that.
 - read_cache/write_cache: Use the local file system to cache and retrieve aggregations to minimize expensive reads of the large datasets. You shouldn't need to touch these arguments unless you are debugging.
 ### Enums
@@ -353,7 +356,10 @@ summing over groups, and can include polars Expression aggregations. Arbitrarily
     - For `NAME_SUM` columns, `expr` must be a function of `name` which will result in a list of expressions mapped over all card names.
     - `PICK_SUM` columns can also be functions on `name`, in which case the value will be a function of the value of the `PICK` field.
     - `AGG` columns that depend on `NAME_SUM` columns reference the prefix (`cdef.name`) only, since the unpivot has occured prior to selection.
-    - The possible arguments to `expr`, in addition to `name` when appropriate, include the full `names` array as well as a dictionary called `card_context` which contains card dict objects with all `CARD_ATTR` values, including custom extensions and metric columns passed by the `card_context` argument to `summon`. See example notebooks for more details.
+    - The possible arguments to `expr`, in addition to `name` when appropriate, are as follows:
+        - `names`: An array of all card names in the canonical order.
+        - `card_context`: A dictionary keyed by card name which contains card dict objects with all `CARD_ATTR` values, including custom extensions and metric columns passed by the `card_context` argument to `summon`. See example notebooks for more details.
+        - `set_context`: A dictionary with arbitrary fields provided via the `set_context` argument. Has two built-in attributes, `picks_per_pack` (e.g. 13 or 14), and `release_time`, which is the minimum value of the `draft_time` field.
 - `version`: When defining a column using a python function, as opposed to Polars expressions, add a unique version number so that the unique hashed signature of the column specification can be derived
 for caching purposes, since Polars cannot generate a serialization natively. When changing the definition, be sure to increment the version value. Otherwise you do not need to use this parameter.
@@ -371,9 +377,11 @@ A table of all included columns. Columns can be referenced by enum or by string
 | `DRAFT_ID`                  | `"draft_id"`                 | `DRAFT, GAME` | `FILTER_ONLY` | Dataset column  | String          |
 | `DRAFT_TIME`                | `"draft_time"`               | `DRAFT, GAME` | `FILTER_ONLY` | Dataset column  | String          |
 | `DRAFT_DATE`                | `"draft_date"`               | `DRAFT, GAME` | `GROUP_BY`    |                 | `datetime.date` |
+| `FORMAT_DAY`          | `"format_day"` | `DRAFT, GAME` | `GROUP_BY` | 1 for release day, 2, 3, etc. | Int |
 | `DRAFT_DAY_OF_WEEK`         | `"draft_day_of_week`         | `DRAFT, GAME` | `GROUP_BY`    | 1-7 (Mon-Sun)  | Int          |
 | `DRAFT_HOUR`                | `"draft_hour"`               | `DRAFT, GAME` | `GROUP_BY`    | 0-23            | Int             |
 | `DRAFT_WEEK`                | `"draft_week"`               | `DRAFT, GAME` | `GROUP_BY`    | 1-53            | Int             |
+| `FORMAT_WEEK`             | `"format_week"`     | `DRAFT, GAME` | `GROUP_BY` | 1 for `FORMAT_DAY` 1 - 7, etc. | Int |
 | `RANK`                      | `"rank"`                     | `DRAFT, GAME` | `GROUP_BY`    | Dataset column  | String          |
 | `USER_N_GAMES_BUCKET`       | `"user_n_games_bucket"`      | `DRAFT, GAME` | `GROUP_BY`    | Dataset Column  | Int             |
 | `USER_GAME_WIN_RATE_BUCKET` | `"user_game_win_rate_bucket` | `DRAFT, GAME` | `GROUP_BY`    | Dataset Column  | Float           |
@@ -504,5 +512,3 @@ A table of all included columns. Columns can be referenced by enum or by string
 - [ ] Helper functions for common plotting paradigms
 - [ ] Example notebooks
 - [ ] Scientific workflows: regression, MLE, etc

{spells_mtg-0.5.2 → spells_mtg-0.6.0}/README.md RENAMED Viewed

@@ -63,7 +63,7 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
 - Supports calculating the standard aggregations and measures out of the box with no arguments (ALSA, GIH WR, etc)
 - Caches aggregate DataFrames in the local file system automatically for instantaneous reproduction of previous analysis
 - Manages grouping and filtering by built-in and custom columns at the row level
-- Provides 122 explicitly specified, enumerated, documented column definitions
+- Provides 124 explicitly specified, enumerated, documented column definitions
 - Supports "Deck Color Data" aggregations with built-in column definitions.
 - Lets you feed card metrics back in to column definitions to support scientific workflows like MLE
 - Provides a CLI tool `spells [add|refresh|clean|remove|info] [SET]` to download and manage external files
@@ -285,6 +285,7 @@ summon(
     filter_spec: dict | None = None,
     extensions: dict[str, ColSpec] | None = None,
     card_context: pl.DataFrame | dict[str, dict[str, Any] | None = None,
+    set_context: pl.DataFrame | dict[str, Any] | None = None,
     read_cache: bool = True,
     write_cache: bool = True,
 ) -> polars.DataFrame
@@ -308,6 +309,8 @@ aggregations of non-numeric (or numeric) data types are not supported. If `None`
 - card_context: Typically a Polars DataFrame containing a `"name"` column with one row for each card name in the set, such that any usages of `card_context[name][key]` in column specs reference the column `key`. Typically this will be the output of a call to `summon` requesting cards metrics like `GP_WR`. Can also be a dictionary having the necessary form for the same access pattern.
+- set_context: Typically, a dict of abitrary values to use in column definitions, for example, you could provide the quick draft release date and have a column that depended on that.
 - read_cache/write_cache: Use the local file system to cache and retrieve aggregations to minimize expensive reads of the large datasets. You shouldn't need to touch these arguments unless you are debugging.
 ### Enums
@@ -342,7 +345,10 @@ summing over groups, and can include polars Expression aggregations. Arbitrarily
     - For `NAME_SUM` columns, `expr` must be a function of `name` which will result in a list of expressions mapped over all card names.
     - `PICK_SUM` columns can also be functions on `name`, in which case the value will be a function of the value of the `PICK` field.
     - `AGG` columns that depend on `NAME_SUM` columns reference the prefix (`cdef.name`) only, since the unpivot has occured prior to selection.
-    - The possible arguments to `expr`, in addition to `name` when appropriate, include the full `names` array as well as a dictionary called `card_context` which contains card dict objects with all `CARD_ATTR` values, including custom extensions and metric columns passed by the `card_context` argument to `summon`. See example notebooks for more details.
+    - The possible arguments to `expr`, in addition to `name` when appropriate, are as follows:
+        - `names`: An array of all card names in the canonical order.
+        - `card_context`: A dictionary keyed by card name which contains card dict objects with all `CARD_ATTR` values, including custom extensions and metric columns passed by the `card_context` argument to `summon`. See example notebooks for more details.
+        - `set_context`: A dictionary with arbitrary fields provided via the `set_context` argument. Has two built-in attributes, `picks_per_pack` (e.g. 13 or 14), and `release_time`, which is the minimum value of the `draft_time` field.
 - `version`: When defining a column using a python function, as opposed to Polars expressions, add a unique version number so that the unique hashed signature of the column specification can be derived
 for caching purposes, since Polars cannot generate a serialization natively. When changing the definition, be sure to increment the version value. Otherwise you do not need to use this parameter.
@@ -360,9 +366,11 @@ A table of all included columns. Columns can be referenced by enum or by string
 | `DRAFT_ID`                  | `"draft_id"`                 | `DRAFT, GAME` | `FILTER_ONLY` | Dataset column  | String          |
 | `DRAFT_TIME`                | `"draft_time"`               | `DRAFT, GAME` | `FILTER_ONLY` | Dataset column  | String          |
 | `DRAFT_DATE`                | `"draft_date"`               | `DRAFT, GAME` | `GROUP_BY`    |                 | `datetime.date` |
+| `FORMAT_DAY`          | `"format_day"` | `DRAFT, GAME` | `GROUP_BY` | 1 for release day, 2, 3, etc. | Int |
 | `DRAFT_DAY_OF_WEEK`         | `"draft_day_of_week`         | `DRAFT, GAME` | `GROUP_BY`    | 1-7 (Mon-Sun)  | Int          |
 | `DRAFT_HOUR`                | `"draft_hour"`               | `DRAFT, GAME` | `GROUP_BY`    | 0-23            | Int             |
 | `DRAFT_WEEK`                | `"draft_week"`               | `DRAFT, GAME` | `GROUP_BY`    | 1-53            | Int             |
+| `FORMAT_WEEK`             | `"format_week"`     | `DRAFT, GAME` | `GROUP_BY` | 1 for `FORMAT_DAY` 1 - 7, etc. | Int |
 | `RANK`                      | `"rank"`                     | `DRAFT, GAME` | `GROUP_BY`    | Dataset column  | String          |
 | `USER_N_GAMES_BUCKET`       | `"user_n_games_bucket"`      | `DRAFT, GAME` | `GROUP_BY`    | Dataset Column  | Int             |
 | `USER_GAME_WIN_RATE_BUCKET` | `"user_game_win_rate_bucket` | `DRAFT, GAME` | `GROUP_BY`    | Dataset Column  | Float           |
@@ -493,5 +501,3 @@ A table of all included columns. Columns can be referenced by enum or by string
 - [ ] Helper functions for common plotting paradigms
 - [ ] Example notebooks
 - [ ] Scientific workflows: regression, MLE, etc

{spells_mtg-0.5.2 → spells_mtg-0.6.0}/pyproject.toml RENAMED Viewed

@@ -11,7 +11,7 @@ dependencies = [
 ]
 requires-python = ">=3.11"
 readme = "README.md"
-version = "0.5.2"
+version = "0.6.0"
 [project.license]
 text = "MIT"

{spells_mtg-0.5.2 → spells_mtg-0.6.0}/spells/columns.py RENAMED Viewed

@@ -40,7 +40,7 @@ default_columns = [
     ColName.GIH_WR,
 ]
-specs: dict[str, ColSpec] = {
+_specs: dict[str, ColSpec] = {
     ColName.NAME: ColSpec(
         col_type=ColType.GROUP_BY,
         views=[View.CARD],
@@ -65,9 +65,21 @@ specs: dict[str, ColSpec] = {
         col_type=ColType.GROUP_BY,
         expr=pl.col(ColName.DRAFT_TIME).str.to_datetime("%Y-%m-%d %H:%M:%S").dt.date(),
     ),
+    ColName.FORMAT_DAY: ColSpec(
+        col_type=ColType.GROUP_BY,
+        expr=lambda set_context: (
+            pl.col(ColName.DRAFT_DATE)
+            - pl.lit(set_context["release_time"])
+            .str.to_datetime("%Y-%m-%d %H:%M:%S")
+            .dt.date()
+        ).dt.total_days()
+        + 1,
+    ),
     ColName.DRAFT_DAY_OF_WEEK: ColSpec(
         col_type=ColType.GROUP_BY,
-        expr=pl.col(ColName.DRAFT_TIME).str.to_datetime("%Y-%m-%d %H:%M:%S").dt.weekday(),
+        expr=pl.col(ColName.DRAFT_TIME)
+        .str.to_datetime("%Y-%m-%d %H:%M:%S")
+        .dt.weekday(),
     ),
     ColName.DRAFT_HOUR: ColSpec(
         col_type=ColType.GROUP_BY,
@@ -77,6 +89,9 @@ specs: dict[str, ColSpec] = {
         col_type=ColType.GROUP_BY,
         expr=pl.col(ColName.DRAFT_TIME).str.to_datetime("%Y-%m-%d %H:%M:%S").dt.week(),
     ),
+    ColName.FORMAT_WEEK: ColSpec(
+        col_type=ColType.GROUP_BY, expr=(pl.col(ColName.FORMAT_DAY) - 1) // 7 + 1
+    ),
     ColName.RANK: ColSpec(
         col_type=ColType.GROUP_BY,
         views=[View.GAME, View.DRAFT],
@@ -160,13 +175,15 @@ specs: dict[str, ColSpec] = {
     ),
     ColName.NUM_TAKEN: ColSpec(
         col_type=ColType.PICK_SUM,
-        expr=pl.when(pl.col(ColName.PICK).is_not_null())
-        .then(1)
-        .otherwise(0),
+        expr=pl.when(pl.col(ColName.PICK).is_not_null()).then(1).otherwise(0),
     ),
     ColName.NUM_DRAFTS: ColSpec(
         col_type=ColType.PICK_SUM,
-        expr=pl.when((pl.col(ColName.PACK_NUMBER) == 0) & (pl.col(ColName.PICK_NUMBER) == 0)).then(1).otherwise(0),
+        expr=pl.when(
+            (pl.col(ColName.PACK_NUMBER) == 0) & (pl.col(ColName.PICK_NUMBER) == 0)
+        )
+        .then(1)
+        .otherwise(0),
     ),
     ColName.PICK: ColSpec(
         col_type=ColType.FILTER_ONLY,
@@ -207,7 +224,9 @@ specs: dict[str, ColSpec] = {
     ),
     ColName.GAME_DAY_OF_WEEK: ColSpec(
         col_type=ColType.GROUP_BY,
-        expr=pl.col(ColName.GAME_TIME).str.to_datetime("%Y-%m-%d %H-%M-%S").dt.weekday(),
+        expr=pl.col(ColName.GAME_TIME)
+        .str.to_datetime("%Y-%m-%d %H-%M-%S")
+        .dt.weekday(),
     ),
     ColName.GAME_HOUR: ColSpec(
         col_type=ColType.GROUP_BY,
@@ -382,11 +401,13 @@ specs: dict[str, ColSpec] = {
     ),
     ColName.DECK_MANA_VALUE: ColSpec(
         col_type=ColType.NAME_SUM,
-        expr=lambda name, card_context: card_context[name][ColName.MANA_VALUE] * pl.col(f"deck_{name}"),
+        expr=lambda name, card_context: card_context[name][ColName.MANA_VALUE]
+        * pl.col(f"deck_{name}"),
     ),
     ColName.DECK_LANDS: ColSpec(
         col_type=ColType.NAME_SUM,
-        expr=lambda name, card_context: pl.col(f"deck_{name}") * ( 1 if 'Land' in card_context[name][ColName.CARD_TYPE] else 0 )
+        expr=lambda name, card_context: pl.col(f"deck_{name}")
+        * (1 if "Land" in card_context[name][ColName.CARD_TYPE] else 0),
     ),
     ColName.DECK_SPELLS: ColSpec(
         col_type=ColType.NAME_SUM,
@@ -477,6 +498,10 @@ specs: dict[str, ColSpec] = {
         col_type=ColType.AGG,
         expr=pl.col(ColName.DECK) + pl.col(ColName.SIDEBOARD),
     ),
+    ColName.NUM_IN_POOL_TOTAL: ColSpec(
+        col_type=ColType.AGG,
+        expr=pl.col(ColName.NUM_IN_POOL).sum(),
+    ),
     ColName.IN_POOL_WR: ColSpec(
         col_type=ColType.AGG,
         expr=(pl.col(ColName.WON_DECK) + pl.col(ColName.WON_SIDEBOARD))
@@ -555,4 +580,15 @@ specs: dict[str, ColSpec] = {
 }
 for item in ColName:
-    assert item in specs, f"column {item} enumerated but not specified"
+    assert item in _specs, f"column {item} enumerated but not specified"
+class GetSpecs:
+    def __init__(self, spec_dict: dict[str, ColSpec]):
+        self._specs = spec_dict
+    def __call__(self):
+        return dict(self._specs)
+get_specs = GetSpecs(_specs)

{spells_mtg-0.5.2 → spells_mtg-0.6.0}/spells/draft_data.py RENAMED Viewed

@@ -6,11 +6,11 @@ Aggregate dataframes containing raw counts are cached in the local file system
 for performance.
 """
-import datetime
 import functools
 import hashlib
 import re
 from inspect import signature
+import os
 from typing import Callable, TypeVar, Any
 import polars as pl
@@ -20,7 +20,7 @@ from spells.external import data_file_path
 import spells.cache
 import spells.filter
 import spells.manifest
-from spells.columns import ColDef, ColSpec
+from spells.columns import ColDef, ColSpec, get_specs
 from spells.enums import View, ColName, ColType
@@ -54,9 +54,20 @@ def _get_names(set_code: str) -> list[str]:
     return names
-def _get_card_context(set_code: str, specs: dict[str, ColSpec], card_context: pl.DataFrame | dict[str, dict[str, Any]] | None) -> dict[str, dict[str, Any]]:
-    card_attr_specs = {col:spec for col, spec in specs.items() if spec.col_type == ColType.CARD_ATTR or col == ColName.NAME}
-    col_def_map = _hydrate_col_defs(set_code, card_attr_specs, card_only=True)
+def _get_card_context(
+    set_code: str,
+    specs: dict[str, ColSpec],
+    card_context: pl.DataFrame | dict[str, dict[str, Any]] | None,
+    set_context: pl.DataFrame | dict[str, Any] | None,
+) -> dict[str, dict[str, Any]]:
+    card_attr_specs = {
+        col: spec
+        for col, spec in specs.items()
+        if spec.col_type == ColType.CARD_ATTR or col == ColName.NAME
+    }
+    col_def_map = _hydrate_col_defs(
+        set_code, card_attr_specs, set_context=set_context, card_only=True
+    )
     columns = list(col_def_map.keys())
@@ -71,34 +82,50 @@ def _get_card_context(set_code: str, specs: dict[str, ColSpec], card_context: pl
     if card_context is not None:
         if isinstance(card_context, pl.DataFrame):
             try:
-                card_context = {row[ColName.NAME]: row for row in card_context.to_dicts()}
+                card_context = {
+                    row[ColName.NAME]: row for row in card_context.to_dicts()
+                }
             except ColumnNotFoundError:
                 raise ValueError("card_context DataFrame must have column 'name'")
         names = list(loaded_context.keys())
         for name in names:
-            assert name in card_context, f"card_context must include a row for each card name. {name} missing."
+            assert (
+                name in card_context
+            ), f"card_context must include a row for each card name. {name} missing."
             for col, value in card_context[name].items():
                 loaded_context[name][col] = value
     return loaded_context
-def _determine_expression(col: str, spec: ColSpec, names: list[str], card_context: dict[str, dict]) -> pl.Expr | tuple[pl.Expr, ...]:
+def _determine_expression(
+    col: str,
+    spec: ColSpec,
+    names: list[str],
+    card_context: dict[str, dict],
+    set_context: dict[str, Any],
+) -> pl.Expr | tuple[pl.Expr, ...]:
     def seed_params(expr):
         params = {}
         sig_params = signature(expr).parameters
-        if 'names' in sig_params:
-            params['names'] = names
-        if 'card_context' in sig_params:
-            params['card_context'] = card_context
+        if "names" in sig_params:
+            params["names"] = names
+        if "card_context" in sig_params:
+            params["card_context"] = card_context
+        if "set_context" in sig_params:
+            params["set_context"] = set_context
         return params
     if spec.col_type == ColType.NAME_SUM:
         if spec.expr is not None:
-            assert isinstance(spec.expr, Callable), f"NAME_SUM column {col} must have a callable `expr` accepting a `name` argument"
-            unnamed_exprs = [spec.expr(**{'name': name, **seed_params(spec.expr)}) for name in names]
+            assert isinstance(
+                spec.expr, Callable
+            ), f"NAME_SUM column {col} must have a callable `expr` accepting a `name` argument"
+            unnamed_exprs = [
+                spec.expr(**{"name": name, **seed_params(spec.expr)}) for name in names
+            ]
             expr = tuple(
                 map(
@@ -113,13 +140,23 @@ def _determine_expression(col: str, spec: ColSpec, names: list[str], card_contex
     elif spec.expr is not None:
         if isinstance(spec.expr, Callable):
             params = seed_params(spec.expr)
-            if spec.col_type == ColType.PICK_SUM and 'name' in signature(spec.expr).parameters:
+            if (
+                spec.col_type == ColType.PICK_SUM
+                and "name" in signature(spec.expr).parameters
+            ):
                 expr = pl.lit(None)
                 for name in names:
-                    name_params = {'name': name, **params}
-                    expr = pl.when(pl.col(ColName.PICK) == name).then(spec.expr(**name_params)).otherwise(expr)
-            elif spec.col_type == ColType.CARD_ATTR and 'name' in signature(spec.expr).parameters:
-                expr = spec.expr(**{'name': pl.col('name'), **params})
+                    name_params = {"name": name, **params}
+                    expr = (
+                        pl.when(pl.col(ColName.PICK) == name)
+                        .then(spec.expr(**name_params))
+                        .otherwise(expr)
+                    )
+            elif (
+                spec.col_type == ColType.CARD_ATTR
+                and "name" in signature(spec.expr).parameters
+            ):
+                expr = spec.expr(**{"name": pl.col("name"), **params})
             else:
                 expr = spec.expr(**params)
         else:
@@ -131,7 +168,12 @@ def _determine_expression(col: str, spec: ColSpec, names: list[str], card_contex
     return expr
-def _infer_dependencies(name: str, expr: pl.Expr | tuple[pl.Expr,...], specs: dict[str, ColSpec], names: list[str]) -> set[str]:
+def _infer_dependencies(
+    name: str,
+    expr: pl.Expr | tuple[pl.Expr, ...],
+    specs: dict[str, ColSpec],
+    names: list[str],
+) -> set[str]:
     dependencies = set()
     tricky_ones = set()
@@ -140,7 +182,7 @@ def _infer_dependencies(name: str, expr: pl.Expr | tuple[pl.Expr,...], specs: di
         for dep_col in dep_cols:
             if dep_col in specs.keys():
                 dependencies.add(dep_col)
-            else:
+            else:
                 tricky_ones.add(dep_col)
     else:
         for idx, exp in enumerate(expr):
@@ -149,7 +191,9 @@ def _infer_dependencies(name: str, expr: pl.Expr | tuple[pl.Expr,...], specs: di
             for dep_col in dep_cols:
                 if dep_col in specs.keys():
                     dependencies.add(dep_col)
-                elif len(split := re.split(pattern, dep_col)) == 2 and split[0] in specs:
+                elif (
+                    len(split := re.split(pattern, dep_col)) == 2 and split[0] in specs
+                ):
                     dependencies.add(split[0])
                 else:
                     tricky_ones.add(dep_col)
@@ -158,38 +202,80 @@ def _infer_dependencies(name: str, expr: pl.Expr | tuple[pl.Expr,...], specs: di
         found = False
         for n in names:
             pattern = f"_{n}$"
-            if not found and len(split := re.split(pattern, item)) == 2 and split[0] in specs:
+            if (
+                not found
+                and len(split := re.split(pattern, item)) == 2
+                and split[0] in specs
+            ):
                 dependencies.add(split[0])
                 found = True
-        assert found, f"Could not locate column spec for root col {item}"
+        assert found, f"Could not locate column spec for root col {item}"
     return dependencies
-def _hydrate_col_defs(set_code: str, specs: dict[str, ColSpec], card_context: pl.DataFrame | dict[str, dict] | None = None, card_only: bool =False):
+def _get_set_context(
+    set_code: str, set_context: pl.DataFrame | dict[str, Any] | None
+) -> dict[str, Any]:
+    context_fp = data_file_path(set_code, "context")
+    report = functools.partial(
+        spells.cache.spells_print,
+        "report",
+        f"Set context for {set_code} invalid, please investigate!",
+    )
+    context = {}
+    if not os.path.isfile(context_fp):
+        report()
+    else:
+        context_df = pl.read_parquet(context_fp)
+        if len(context_df) == 1:
+            context.update(context_df.to_dicts()[0])
+        else:
+            report()
+    if isinstance(set_context, pl.DataFrame):
+        assert len(set_context != 1), "Invalid set context provided"
+        context.update(set_context.to_dicts()[0])
+    elif isinstance(set_context, dict):
+        context.update(set_context)
+    return context
+def _hydrate_col_defs(
+    set_code: str,
+    specs: dict[str, ColSpec],
+    card_context: pl.DataFrame | dict[str, dict] | None = None,
+    set_context: pl.DataFrame | dict[str, Any] | None = None,
+    card_only: bool = False,
+):
     names = _get_names(set_code)
+    set_context = _get_set_context(set_code, set_context)
     if card_only:
         card_context = {}
     else:
-        card_context = _get_card_context(set_code, specs, card_context)
+        card_context = _get_card_context(set_code, specs, card_context, set_context)
     assert len(names) > 0, "there should be names"
     hydrated = {}
     for col, spec in specs.items():
-        expr = _determine_expression(col, spec, names, card_context)
+        expr = _determine_expression(col, spec, names, card_context, set_context)
         dependencies = _infer_dependencies(col, expr, specs, names)
         sig_expr = expr if isinstance(expr, pl.Expr) else expr[0]
         try:
-            expr_sig = sig_expr.meta.serialize(
-                format="json"
-            )
+            expr_sig = sig_expr.meta.serialize(format="json")
         except pl.exceptions.ComputeError:
             if spec.version is not None:
                 expr_sig = col + spec.version
             else:
-                print(f"Using session-only signature for non-serializable column {col}, please provide a version value")
+                print(
+                    f"Using session-only signature for non-serializable column {col}, please provide a version value"
+                )
                 expr_sig = str(sig_expr)
         signature = str(
@@ -356,18 +442,22 @@ def summon(
     columns: list[str] | None = None,
     group_by: list[str] | None = None,
     filter_spec: dict | None = None,
-    extensions: dict[str, ColSpec] | None = None,
+    extensions: dict[str, ColSpec] | list[dict[str, ColSpec]] | None = None,
     use_streaming: bool = False,
     read_cache: bool = True,
     write_cache: bool = True,
-    card_context: pl.DataFrame | dict[str, dict] | None = None
+    card_context: pl.DataFrame | dict[str, dict] | None = None,
+    set_context: pl.DataFrame | dict[str, Any] | None = None,
 ) -> pl.DataFrame:
-    specs = dict(spells.columns.specs)
+    specs = get_specs()
     if extensions is not None:
-        specs.update(extensions)
+        if not isinstance(extensions, list):
+            extensions = [extensions]
+        for ext in extensions:
+            specs.update(ext)
-    col_def_map = _hydrate_col_defs(set_code, specs, card_context)
+    col_def_map = _hydrate_col_defs(set_code, specs, card_context, set_context)
     m = spells.manifest.create(col_def_map, columns, group_by, filter_spec)
     calc_fn = functools.partial(_base_agg_df, set_code, m, use_streaming=use_streaming)

{spells_mtg-0.5.2 → spells_mtg-0.6.0}/spells/enums.py RENAMED Viewed

@@ -38,9 +38,11 @@ class ColName(StrEnum):
     DRAFT_ID = "draft_id"
     DRAFT_TIME = "draft_time"  # modified, cast to time
     DRAFT_DATE = "draft_date"
+    FORMAT_DAY = "format_day"
     DRAFT_DAY_OF_WEEK = "draft_day_of_week"
     DRAFT_HOUR = "draft_hour"
     DRAFT_WEEK = "draft_week"
+    FORMAT_WEEK = "format_week"
     RANK = "rank"
     USER_N_GAMES_BUCKET = "user_n_games_bucket"
     USER_GAME_WIN_RATE_BUCKET = "user_game_win_rate_bucket"
@@ -143,6 +145,7 @@ class ColName(StrEnum):
     GNS_WR = "gns_wr"
     IWD = "iwd"
     NUM_IN_POOL = "num_in_pool"
+    NUM_IN_POOL_TOTAL = "num_in_pool_total"
     IN_POOL_WR = "in_pool_wr"
     DECK_TOTAL = "deck_total"
     WON_DECK_TOTAL = "won_deck_total"

spells_mtg-0.6.0/spells/extension.py ADDED Viewed

@@ -0,0 +1,213 @@
+import math
+import polars as pl
+from spells.enums import ColType, ColName
+from spells.columns import ColSpec
+from spells.cache import spells_print
+def print_ext(ext: dict[str, ColSpec]) -> None:
+    spells_print("create", "Created extensions:")
+    for key in ext:
+        print("\t" + key)
+def attr_cols(attr, silent=False) -> dict[str, ColSpec]:
+    ext = {
+        f"seen_{attr}": ColSpec(
+            col_type=ColType.NAME_SUM,
+            expr=(
+                lambda name, card_context: pl.lit(None)
+                if card_context[name][attr] is None
+                or math.isnan(card_context[name][attr])
+                else pl.when(pl.col(f"pack_card_{name}") > 0)
+                .then(card_context[name][attr])
+                .otherwise(None)
+            ),
+        ),
+        f"pick_{attr}": ColSpec(
+            col_type=ColType.PICK_SUM,
+            expr=lambda name, card_context: pl.lit(None)
+            if card_context[name][attr] is None or math.isnan(card_context[name][attr])
+            else card_context[name][attr],
+        ),
+        f"seen_{attr}_greater": ColSpec(
+            col_type=ColType.NAME_SUM,
+            expr=lambda name: pl.col(f"seen_{attr}_{name}") > pl.col(f"pick_{attr}"),
+        ),
+        f"seen_{attr}_less": ColSpec(
+            col_type=ColType.NAME_SUM,
+            expr=lambda name: pl.col(f"seen_{attr}_{name}") < pl.col(f"pick_{attr}"),
+        ),
+        f"greatest_{attr}_seen": ColSpec(
+            col_type=ColType.PICK_SUM,
+            expr=lambda names: pl.max_horizontal(
+                [pl.col(f"seen_{attr}_{name}") for name in names]
+            ),
+        ),
+        f"least_{attr}_seen": ColSpec(
+            col_type=ColType.PICK_SUM,
+            expr=lambda names: pl.min_horizontal(
+                [pl.col(f"seen_{attr}_{name}") for name in names]
+            ),
+        ),
+        f"pick_{attr}_rank_greatest": ColSpec(
+            col_type=ColType.GROUP_BY,
+            expr=lambda names: pl.sum_horizontal(
+                [pl.col(f"seen_{attr}_greater_{name}") for name in names]
+            )
+            + 1,
+        ),
+        f"pick_{attr}_rank_least": ColSpec(
+            col_type=ColType.GROUP_BY,
+            expr=lambda names: pl.sum_horizontal(
+                [pl.col(f"seen_{attr}_less_{name}") for name in names]
+            )
+            + 1,
+        ),
+        f"pick_{attr}_rank_greatest_sum": ColSpec(
+            col_type=ColType.PICK_SUM, expr=pl.col(f"pick_{attr}_rank_greatest")
+        ),
+        f"pick_{attr}_rank_least_sum": ColSpec(
+            col_type=ColType.PICK_SUM, expr=pl.col(f"pick_{attr}_rank_least")
+        ),
+        f"pick_{attr}_vs_least": ColSpec(
+            col_type=ColType.PICK_SUM,
+            expr=pl.col(f"pick_{attr}") - pl.col(f"least_{attr}_seen"),
+        ),
+        f"pick_{attr}_vs_greatest": ColSpec(
+            col_type=ColType.PICK_SUM,
+            expr=pl.col(f"pick_{attr}") - pl.col(f"greatest_{attr}_seen"),
+        ),
+        f"pick_{attr}_vs_least_mean": ColSpec(
+            col_type=ColType.AGG,
+            expr=pl.col(f"pick_{attr}_vs_least") / pl.col(ColName.NUM_TAKEN),
+        ),
+        f"pick_{attr}_vs_greatest_mean": ColSpec(
+            col_type=ColType.AGG,
+            expr=pl.col(f"pick_{attr}_vs_greatest") / pl.col(ColName.NUM_TAKEN),
+        ),
+        f"least_{attr}_taken": ColSpec(
+            col_type=ColType.PICK_SUM,
+            expr=pl.col(f"pick_{attr}") <= pl.col(f"least_{attr}_seen"),
+        ),
+        f"least_{attr}_taken_rate": ColSpec(
+            col_type=ColType.AGG,
+            expr=pl.col(f"least_{attr}_taken") / pl.col(ColName.NUM_TAKEN),
+        ),
+        f"greatest_{attr}_taken": ColSpec(
+            col_type=ColType.PICK_SUM,
+            expr=pl.col(f"pick_{attr}") >= pl.col(f"greatest_{attr}_seen"),
+        ),
+        f"greatest_{attr}_taken_rate": ColSpec(
+            col_type=ColType.AGG,
+            expr=pl.col(f"greatest_{attr}_taken") / pl.col(ColName.NUM_TAKEN),
+        ),
+        f"pick_{attr}_mean": ColSpec(
+            col_type=ColType.AGG,
+            expr=pl.col(f"pick_{attr}") / pl.col(ColName.NUM_TAKEN),
+        ),
+        f"{attr}_deck_weight_group": ColSpec(
+            col_type=ColType.AGG, expr=pl.col(f"{attr}") * pl.col(ColName.DECK)
+        ),
+        f"{attr}_deck_weight_total": ColSpec(
+            col_type=ColType.AGG, expr=pl.col(f"{attr}_deck_weight_group").sum()
+        ),
+        f"{attr}_dw_mean": ColSpec(
+            col_type=ColType.AGG,
+            expr=pl.col(f"{attr}_deck_weight_total") / pl.col(ColName.DECK_TOTAL),
+        ),
+        f"{attr}_dw_excess": ColSpec(
+            col_type=ColType.AGG,
+            expr=pl.col(f"{attr}_dw_mean") - pl.col(f"{attr}"),
+        ),
+        f"{attr}_dw_var": ColSpec(
+            col_type=ColType.AGG,
+            expr=(pl.col(f"{attr}_dw_excess").pow(2) * pl.col(ColName.DECK))
+            / pl.col(ColName.DECK_TOTAL),
+        ),
+        f"{attr}_dw_stdev": ColSpec(
+            col_type=ColType.AGG,
+            expr=pl.col(f"{attr}_dw_var").sqrt(),
+        ),
+        f"{attr}_dwz": ColSpec(
+            col_type=ColType.AGG,
+            expr=pl.col(f"{attr}_dw_excess") / pl.col(f"{attr}_dw_stdev"),
+        ),
+        f"{attr}_pool_weight_group": ColSpec(
+            col_type=ColType.AGG, expr=pl.col(f"{attr}") * pl.col(ColName.NUM_IN_POOL)
+        ),
+        f"{attr}_pool_weight_total": ColSpec(
+            col_type=ColType.AGG, expr=pl.col(f"{attr}_pool_weight_group").sum()
+        ),
+        f"{attr}_pw_mean": ColSpec(
+            col_type=ColType.AGG,
+            expr=pl.col(f"{attr}_pool_weight_total")
+            / pl.col(ColName.NUM_IN_POOL_TOTAL),
+        ),
+        f"{attr}_pw_excess": ColSpec(
+            col_type=ColType.AGG,
+            expr=pl.col(f"{attr}_pw_mean") - pl.col(f"{attr}"),
+        ),
+        f"{attr}_pw_var": ColSpec(
+            col_type=ColType.AGG,
+            expr=(pl.col(f"{attr}_pw_excess").pow(2) * pl.col(ColName.NUM_IN_POOL))
+            / pl.col(ColName.NUM_IN_POOL_TOTAL),
+        ),
+        f"{attr}_pw_stdev": ColSpec(
+            col_type=ColType.AGG,
+            expr=pl.col(f"{attr}_pw_var").sqrt(),
+        ),
+        f"{attr}_pwz": ColSpec(
+            col_type=ColType.AGG,
+            expr=pl.col(f"{attr}_pw_excess") / pl.col(f"{attr}_pw_stdev"),
+        ),
+    }
+    if not silent:
+        print_ext(ext)
+    return ext
+def more(silent=True):
+    wr_bucket = pl.col(ColName.USER_GAME_WIN_RATE_BUCKET)
+    gp_bucket = pl.col(ColName.USER_N_GAMES_BUCKET)
+    ext = {
+        "deq_base": ColSpec(
+            col_type=ColType.AGG,
+            expr=(pl.col("gp_wr_excess") + 0.03 * (1 - pl.col("ata") / 14).pow(2))
+            * pl.col("pct_gp"),
+        ),
+        "cohorts_plus": ColSpec(
+            col_type=ColType.GROUP_BY,
+            expr=pl.when((wr_bucket > 0.65) & (gp_bucket >= 500))
+            .then(pl.lit("1 Best"))
+            .otherwise(
+                pl.when(
+                    (wr_bucket > 0.61) & (gp_bucket >= 500)
+                    | (wr_bucket > 0.65) & (gp_bucket >= 100)
+                )
+                .then(pl.lit("2 Elite"))
+                .otherwise(
+                    pl.when(
+                        (wr_bucket > 0.57) & (gp_bucket >= 100) | (wr_bucket > 0.61)
+                    )
+                    .then(pl.lit("3 Competitive"))
+                    .otherwise(
+                        pl.when(
+                            (wr_bucket > 0.53) & (gp_bucket >= 100) | (wr_bucket > 0.57)
+                        )
+                        .then(pl.lit("4 Solid"))
+                        .otherwise(pl.lit("5 None"))
+                    )
+                )
+            ),
+        ),
+    }
+    if not silent:
+        print_ext(ext)
+    return ext

{spells_mtg-0.5.2 → spells_mtg-0.6.0}/spells/external.py RENAMED Viewed

@@ -102,6 +102,7 @@ def cli() -> int:
 def _add(set_code: str, force_download=False):
     download_data_set(set_code, View.DRAFT, force_download=force_download)
     write_card_file(set_code, force_download=force_download)
+    get_set_context(set_code)
     download_data_set(set_code, View.GAME, force_download=force_download)
     return 0
@@ -157,9 +158,9 @@ def _info():
                             )
                         print(f"    {item.name} {sizeof_fmt(os.stat(item).st_size)}")
                         file_count += 1
-                    if file_count < 3:
+                    if file_count < 4:
                         suggest_add.add(entry.name)
-                    if file_count > 3:
+                    if file_count > 4:
                         suggest_remove.add(entry.name)
                 else:
                     cache.spells_print(
@@ -209,6 +210,9 @@ def _external_set_path(set_code):
 def data_file_path(set_code, dataset_type: str, event_type=EventType.PREMIER):
+    if dataset_type == "set_context":
+        return os.path.join(_external_set_path(set_code), f"{set_code}_context.parquet")
     if dataset_type == "card":
         return os.path.join(_external_set_path(set_code), f"{set_code}_card.parquet")
@@ -314,3 +318,31 @@ def write_card_file(draft_set_code: str, force_download=False) -> int:
     cache.spells_print(mode, f"Wrote file {card_filepath}")
     return 0
+def get_set_context(set_code: str, force_download=False) -> int:
+    mode = "refresh" if force_download else "add"
+    context_fp = data_file_path(set_code, "context")
+    cache.spells_print(mode, "Calculating set context")
+    if os.path.isfile(context_fp) and not force_download:
+        cache.spells_print(
+            mode,
+            f"File {context_fp} already exists, use `spells refresh {set_code}` to overwrite",
+        )
+        return 1
+    draft_fp = data_file_path(set_code, View.DRAFT)
+    draft_view = pl.scan_parquet(draft_fp)
+    context_df = draft_view.select(
+        [
+            pl.max("pick_number").alias("picks_per_pack") + 1,
+            pl.min("draft_time").alias("release_time"),
+        ]
+    ).collect()
+    context_df.write_parquet(context_fp)
+    cache.spells_print(mode, f"Wrote file {context_fp}")
+    return 0

{spells_mtg-0.5.2 → spells_mtg-0.6.0}/spells/manifest.py RENAMED Viewed

@@ -107,7 +107,9 @@ def _resolve_view_cols(
                     View.DRAFT, frozenset()
                 ).union({ColName.PICK})
             if cdef.col_type == ColType.CARD_ATTR:
-                view_resolution[View.CARD] = view_resolution.get(View.CARD, frozenset()).union({col})
+                view_resolution[View.CARD] = view_resolution.get(
+                    View.CARD, frozenset()
+                ).union({col})
             elif cdef.views:
                 for view in cdef.views:
                     view_resolution[view] = view_resolution.get(
@@ -132,7 +134,9 @@ def _resolve_view_cols(
                         else:
                             col_views = col_views.intersection(dep_views)
                     if fully_resolved:
-                        assert len(col_views), f"Column {col} can't be defined in any views!"
+                        assert len(
+                            col_views
+                        ), f"Column {col} can't be defined in any views!"
                         for view in col_views:
                             if view not in view_resolution:
                                 print(cdef)
@@ -162,7 +166,9 @@ def create(
     if columns is None:
         cols = tuple(spells.columns.default_columns)
         if ColName.NAME not in gbs:
-            cols = tuple(c for c in cols if col_def_map[c].col_type != ColType.CARD_ATTR)
+            cols = tuple(
+                c for c in cols if col_def_map[c].col_type != ColType.CARD_ATTR
+            )
     else:
         cols = tuple(columns)

spells_mtg-0.5.2/spells/extension.py DELETED Viewed

@@ -1,40 +0,0 @@
-import polars as pl
-from spells.enums import ColType
-from spells.columns import ColSpec
-def attr_metrics(attr):
-    return {
-        f"seen_{attr}": ColSpec(
-            col_type=ColType.NAME_SUM,
-            expr=(lambda name, card_context: pl.when(pl.col(f"pack_card_{name}") > 0)
-                .then(card_context[name][attr])
-                .otherwise(None)),
-        ),
-        f"pick_{attr}": ColSpec(
-            col_type=ColType.PICK_SUM,
-            expr=lambda name, card_context: card_context[name][attr]
-        ),
-        f"least_{attr}_taken": ColSpec(
-            col_type=ColType.PICK_SUM,
-            expr=(lambda names: pl.col(f'pick_{attr}')
-                <= pl.min_horizontal([pl.col(f"seen_{attr}_{name}") for name in names])),
-        ),
-        f"least_{attr}_taken_rate": ColSpec(
-            col_type=ColType.AGG,
-            expr=pl.col(f"least_{attr}_taken") / pl.col("num_taken"),
-        ),
-        f"greatest_{attr}_taken": ColSpec(
-            col_type=ColType.PICK_SUM,
-            expr=(lambda names: pl.col(f'pick_{attr}')
-                >= pl.max_horizontal([pl.col(f"seen_{attr}_{name}") for name in names])),
-        ),
-        f"greatest_{attr}_taken_rate": ColSpec(
-            col_type=ColType.AGG,
-            expr=pl.col(f"greatest_{attr}_taken") / pl.col("num_taken"),
-        ),
-        f"pick_{attr}_mean": ColSpec(
-            col_type=ColType.AGG,
-            expr=pl.col(f"pick_{attr}") / pl.col("num_taken")
-        )
-    }