PyPI - spells-mtg - Versions diffs - 0.0.1__py3-none-any.whl - Mend

spells-mtg 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of spells-mtg might be problematic. Click here for more details.

Files changed (15) hide show

spells/__init__.py +5 -0
spells/cache.py +106 -0
spells/cards.py +96 -0
spells/columns.py +771 -0
spells/draft_data.py +300 -0
spells/enums.py +154 -0
spells/external.py +314 -0
spells/filter.py +137 -0
spells/manifest.py +184 -0
spells/schema.py +157 -0
spells_mtg-0.0.1.dist-info/METADATA +465 -0
spells_mtg-0.0.1.dist-info/RECORD +15 -0
spells_mtg-0.0.1.dist-info/WHEEL +4 -0
spells_mtg-0.0.1.dist-info/entry_points.txt +5 -0
spells_mtg-0.0.1.dist-info/licenses/LICENSE +21 -0

spells/draft_data.py ADDED Viewed

@@ -0,0 +1,300 @@
+"""
+this is where calculations are performed on the 17Lands public data sets and
+aggregate calculations are returned.
+Aggregate dataframes containing raw counts are cached in the local file system
+for performance.
+"""
+import datetime
+import functools
+import hashlib
+import re
+from typing import Callable, TypeVar
+import polars as pl
+from spells.external import data_file_path
+import spells.cache
+import spells.filter
+import spells.manifest
+from spells.columns import ColumnDefinition, ColumnSpec
+from spells.enums import View, ColName, ColType
+DF = TypeVar("DF", pl.LazyFrame, pl.DataFrame)
+def _cache_key(args) -> str:
+    """
+    cache arguments by __str__ (based on the current value of a mutable, so be careful)
+    """
+    return hashlib.md5(str(args).encode("utf-8")).hexdigest()
+@functools.lru_cache(maxsize=None)
+def _get_names(set_code: str) -> tuple[str, ...]:
+    card_fp = data_file_path(set_code, View.CARD)
+    card_view = pl.read_parquet(card_fp)
+    card_names_set = frozenset(card_view.get_column("name").to_list())
+    draft_fp = data_file_path(set_code, View.DRAFT)
+    draft_view = pl.scan_parquet(draft_fp)
+    cols = draft_view.collect_schema().names()
+    prefix = "pack_card_"
+    names = tuple(col[len(prefix) :] for col in cols if col.startswith(prefix))
+    draft_names_set = frozenset(names)
+    assert (
+        draft_names_set == card_names_set
+    ), "names mismatch between card and draft file"
+    return names
+def _hydrate_col_defs(set_code: str, col_spec_map: dict[str, ColumnSpec]):
+    names = _get_names(set_code)
+    assert len(names) > 0, "there should be names"
+    hydrated = {}
+    for key, spec in col_spec_map.items():
+        if spec.col_type == ColType.NAME_SUM and spec.exprMap is not None:
+            unnamed_exprs = map(spec.exprMap, names)
+            expr = tuple(
+                map(
+                    lambda ex, name: ex.alias(f"{spec.name}_{name}"),
+                    unnamed_exprs,
+                    names,
+                )
+            )
+        elif spec.expr is not None:
+            expr = spec.expr.alias(spec.name)
+        else:
+            if spec.col_type == ColType.NAME_SUM:
+                expr = tuple(map(lambda name: pl.col(f"{spec.name}_{name}"), names))
+            else:
+                expr = pl.col(spec.name)
+        try:
+            sig_expr = expr if isinstance(expr, pl.Expr) else expr[0]
+            expr_sig = sig_expr.meta.serialize(
+                format="json"
+            )  # not compatible with renaming
+        except pl.exceptions.ComputeError:
+            if spec.version is not None:
+                expr_sig = spec.name + spec.version
+            else:
+                expr_sig = str(datetime.datetime.now)
+        dependencies = tuple(spec.dependencies or ())
+        signature = str(
+            (
+                spec.name,
+                spec.col_type.value,
+                expr_sig,
+                tuple(view.value for view in spec.views),
+                dependencies,
+            )
+        )
+        cdef = ColumnDefinition(
+            name=spec.name,
+            col_type=spec.col_type,
+            views=spec.views,
+            expr=expr,
+            dependencies=dependencies,
+            signature=signature,
+        )
+        hydrated[key] = cdef
+    return hydrated
+def _view_select(
+    df: DF,
+    view_cols: frozenset[str],
+    col_def_map: dict[str, ColumnDefinition],
+    is_agg_view: bool,
+) -> DF:
+    base_cols = frozenset()
+    cdefs = [col_def_map[c] for c in view_cols]
+    select = []
+    for cdef in cdefs:
+        if is_agg_view:
+            if cdef.col_type == ColType.AGG:
+                base_cols = base_cols.union(cdef.dependencies)
+                select.append(cdef.expr)
+            else:
+                base_cols = base_cols.union(frozenset({cdef.name}))
+                select.append(cdef.name)
+        else:
+            if cdef.dependencies:
+                base_cols = base_cols.union(cdef.dependencies)
+            else:
+                base_cols = base_cols.union(frozenset({cdef.name}))
+            if isinstance(cdef.expr, tuple):
+                select.extend(cdef.expr)
+            else:
+                select.append(cdef.expr)
+    if base_cols != view_cols:
+        df = _view_select(df, base_cols, col_def_map, is_agg_view)
+    return df.select(select)
+def _fetch_or_cache(
+    calc_fn: Callable,
+    set_code: str,
+    cache_args,
+    read_cache: bool = True,
+    write_cache: bool = True,
+):
+    key = _cache_key(cache_args)
+    if read_cache:
+        if spells.cache.cache_exists(set_code, key):
+            return spells.cache.read_cache(set_code, key)
+    df = calc_fn()
+    if write_cache:
+        spells.cache.write_cache(set_code, key, df)
+    return df
+def _base_agg_df(
+    set_code: str,
+    m: spells.manifest.Manifest,
+    use_streaming: bool = False,
+) -> pl.DataFrame:
+    join_dfs = []
+    group_by = m.base_view_group_by
+    is_name_gb = ColName.NAME in group_by
+    nonname_gb = tuple(gb for gb in group_by if gb != ColName.NAME)
+    for view, cols_for_view in m.view_cols.items():
+        if view == View.CARD:
+            continue
+        df_path = data_file_path(set_code, view)
+        base_view_df = pl.scan_parquet(df_path)
+        base_df_prefilter = _view_select(
+            base_view_df, cols_for_view, m.col_def_map, is_agg_view=False
+        )
+        if m.filter is not None:
+            base_df = base_df_prefilter.filter(m.filter.expr)
+        else:
+            base_df = base_df_prefilter
+        sum_cols = tuple(
+            c
+            for c in cols_for_view
+            if m.col_def_map[c].col_type in (ColType.PICK_SUM, ColType.GAME_SUM)
+        )
+        if sum_cols:
+            # manifest will verify that GAME_SUM manifests do not use NAME grouping
+            name_col_tuple = (
+                (pl.col(ColName.PICK).alias(ColName.NAME),) if is_name_gb else ()
+            )
+            sum_col_df = base_df.select(nonname_gb + name_col_tuple + sum_cols)
+            join_dfs.append(
+                sum_col_df.group_by(group_by).sum().collect(streaming=use_streaming)
+            )
+        name_sum_cols = tuple(
+            c for c in cols_for_view if m.col_def_map[c].col_type == ColType.NAME_SUM
+        )
+        for col in name_sum_cols:
+            cdef = m.col_def_map[col]
+            pattern = f"^{cdef.name}_"
+            name_map = functools.partial(
+                lambda patt, name: re.split(patt, name)[1], pattern
+            )
+            expr = pl.col(f"^{cdef.name}_.*$").name.map(name_map)
+            pre_agg_df = base_df.select((expr,) + nonname_gb)
+            if nonname_gb:
+                agg_df = pre_agg_df.group_by(nonname_gb).sum()
+            else:
+                agg_df = pre_agg_df.sum()
+            index = nonname_gb if nonname_gb else None
+            unpivoted = agg_df.unpivot(
+                index=index,
+                value_name=m.col_def_map[col].name,
+                variable_name=ColName.NAME,
+            )
+            if not is_name_gb:
+                df = (
+                    unpivoted.drop("name")
+                    .group_by(nonname_gb)
+                    .sum()
+                    .collect(streaming=use_streaming)
+                )
+            else:
+                df = unpivoted.collect(streaming=use_streaming)
+            join_dfs.append(df)
+    return functools.reduce(
+        lambda prev, curr: prev.join(curr, on=group_by, how="outer", coalesce=True),
+        join_dfs,
+    )
+def summon(
+    set_code: str,
+    columns: list[str] | None = None,
+    group_by: list[str] | None = None,
+    filter_spec: dict | None = None,
+    extensions: list[ColumnSpec] | None = None,
+    use_streaming: bool = False,
+    read_cache: bool = True,
+    write_cache: bool = True,
+) -> pl.DataFrame:
+    col_spec_map = dict(spells.columns.col_spec_map)
+    if extensions is not None:
+        for spec in extensions:
+            col_spec_map[spec.name] = spec
+    col_def_map = _hydrate_col_defs(set_code, col_spec_map)
+    m = spells.manifest.create(col_def_map, columns, group_by, filter_spec)
+    calc_fn = functools.partial(_base_agg_df, set_code, m, use_streaming=use_streaming)
+    agg_df = _fetch_or_cache(
+        calc_fn,
+        set_code,
+        (
+            set_code,
+            sorted(m.view_cols.get(View.DRAFT, set())),
+            sorted(m.view_cols.get(View.GAME, set())),
+            sorted(c.signature or "" for c in m.col_def_map.values()),
+            sorted(m.base_view_group_by),
+            filter_spec,
+        ),
+        read_cache=read_cache,
+        write_cache=write_cache,
+    )
+    if View.CARD in m.view_cols:
+        card_cols = m.view_cols[View.CARD].union({ColName.NAME})
+        fp = data_file_path(set_code, View.CARD)
+        card_df = pl.read_parquet(fp)
+        select_df = _view_select(card_df, card_cols, m.col_def_map, is_agg_view=False)
+        agg_df = agg_df.join(select_df, on="name", how="outer", coalesce=True)
+        if ColName.NAME not in m.group_by:
+            agg_df = agg_df.group_by(m.group_by).sum()
+    ret_cols = m.group_by + m.columns
+    ret_df = (
+        _view_select(agg_df, frozenset(ret_cols), m.col_def_map, is_agg_view=True)
+        .select(ret_cols)
+        .sort(m.group_by)
+    )
+    return ret_df

spells/enums.py ADDED Viewed

@@ -0,0 +1,154 @@
+"""
+enums
+"""
+from enum import StrEnum
+class View(StrEnum):
+    GAME = "game"
+    DRAFT = "draft"
+    CARD = "card"
+class ColType(StrEnum):
+    FILTER_ONLY = "filter_only"
+    GROUP_BY = "group_by"
+    PICK_SUM = "pick_sum"
+    GAME_SUM = "game_sum"
+    NAME_SUM = "name_sum"
+    AGG = "agg"
+    CARD_ATTR = "card_attr"
+class ColName(StrEnum):
+    """
+    A list of all available columns, including built-in extensions.
+    "Name-mapped" columns like "deck_<card name>" are identified by the prefix only.
+    Those columns can be referenced simply as e.g. "deck" in formulas for the post-agg stage.
+    The definitions of the columns and how they may be used is defined in `column_defs`
+    """
+    # shared
+    NAME = "name"  # special column for card name index
+    EXPANSION = "expansion"
+    EVENT_TYPE = "event_type"
+    DRAFT_ID = "draft_id"
+    DRAFT_TIME = "draft_time"  # modified, cast to time
+    DRAFT_DATE = "draft_date"
+    DRAFT_DAY_OF_WEEK = "draft_day_of_week"
+    DRAFT_HOUR = "draft_hour"
+    DRAFT_WEEK = "draft_week"
+    RANK = "rank"
+    USER_N_GAMES_BUCKET = "user_n_games_bucket"
+    USER_GAME_WIN_RATE_BUCKET = "user_game_win_rate_bucket"
+    PLAYER_COHORT = "player_cohort"
+    # draft
+    EVENT_MATCH_WINS = "event_match_wins"
+    EVENT_MATCH_WINS_SUM = "event_match_wins_sum"
+    EVENT_MATCH_LOSSES = "event_match_losses"
+    EVENT_MATCH_LOSSES_SUM = "event_match_losses_sum"
+    EVENT_MATCHES = "event_matches"
+    EVENT_MATCHES_SUM = "event_matches_sum"
+    IS_TROPHY = "is_trophy"
+    IS_TROPHY_SUM = "is_trophy_sum"
+    PACK_NUMBER = "pack_number"
+    PACK_NUM = "pack_num"  # pack_number plus 1
+    PICK_NUMBER = "pick_number"
+    PICK_NUM = "pick_num"  # pick_number plus 1
+    TAKEN_AT = "taken_at"
+    NUM_TAKEN = "num_taken"
+    PICK = "pick"
+    PICK_MAINDECK_RATE = "pick_maindeck_rate"
+    PICK_SIDEBOARD_IN_RATE = "pick_sideboard_in_rate"
+    PACK_CARD = "pack_card"
+    LAST_SEEN = "last_seen"
+    NUM_SEEN = "num_seen"
+    POOL = "pool"
+    # game
+    GAME_TIME = "game_time"
+    GAME_DATE = "game_date"
+    GAME_DAY_OF_WEEK = "game_day_of_week"
+    GAME_HOUR = "game_hour"
+    GAME_WEEK = "game_week"
+    BUILD_INDEX = "build_index"
+    MATCH_NUMBER = "match_number"
+    GAME_NUMBER = "game_number"
+    NUM_EVENTS = "num_events"
+    NUM_MATCHES = "num_matches"
+    NUM_GAMES = "num_games"
+    OPP_RANK = "opp_rank"  # not populated for recent sets
+    MAIN_COLORS = "main_colors"
+    NUM_COLORS = "num_colors"
+    SPLASH_COLORS = "splash_colors"
+    HAS_SPLASH = "has_splash"
+    ON_PLAY = "on_play"
+    NUM_ON_PLAY = "num_on_play"
+    NUM_MULLIGANS = "num_mulligans"
+    NUM_MULLIGANS_SUM = "num_mulligans_sum"
+    OPP_NUM_MULLIGANS = "opp_num_mulligans"
+    OPP_NUM_MULLIGANS_SUM = "opp_num_mulligans_sum"
+    OPP_COLORS = "opp_colors"
+    NUM_TURNS = "num_turns"
+    NUM_TURNS_SUM = "num_turns_sum"
+    WON = "won"
+    NUM_WON = "num_won"
+    OPENING_HAND = "opening_hand"
+    WON_OPENING_HAND = "won_opening_hand"
+    DRAWN = "drawn"
+    WON_DRAWN = "won_drawn"
+    TUTORED = "tutored"
+    WON_TUTORED = "won_tutored"
+    DECK = "deck"
+    WON_DECK = "won_deck"
+    SIDEBOARD = "sideboard"
+    WON_SIDEBOARD = "won_sideboard"
+    NUM_GNS = "num_gns"
+    WON_NUM_GNS = "won_num_gns"
+    # card
+    SET_CODE = "set_code"
+    COLOR = "color"
+    RARITY = "rarity"
+    COLOR_IDENTITY = "color_identity"
+    CARD_TYPE = "card_type"
+    SUBTYPE = "subtype"
+    MANA_VALUE = "mana_value"
+    MANA_COST = "mana_cost"
+    POWER = "power"
+    TOUGHNESS = "toughness"
+    IS_BONUS_SHEET = "is_bonus_sheet"
+    IS_DFC = "is_dfc"
+    # agg extensions
+    PICKED_MATCH_WR = "picked_match_wr"
+    TROPHY_RATE = "trophy_rate"
+    GAME_WR = "game_wr"
+    ALSA = "alsa"
+    ATA = "ata"
+    NUM_GP = "num_gp"
+    PCT_GP = "pct_gp"
+    GP_WR = "gp_wr"
+    NUM_OH = "num_oh"
+    OH_WR = "oh_wr"
+    NUM_GIH = "num_gih"
+    NUM_GIH_WON = "num_gih_won"
+    GIH_WR = "gih_wr"
+    GNS_WR = "gns_wr"
+    IWD = "iwd"
+    NUM_IN_POOL = "num_in_pool"
+    IN_POOL_WR = "in_pool_wr"
+    DECK_TOTAL = "deck_total"
+    WON_DECK_TOTAL = "won_deck_total"
+    GP_WR_MEAN = "gp_wr_mean"
+    GP_WR_EXCESS = "gp_wr_excess"
+    GP_WR_VAR = "gp_wr_var"
+    GP_WR_STDEV = "gp_wr_stdev"
+    GP_WR_Z = "gp_wr_z"
+    GIH_TOTAL = "gih_total"
+    WON_GIH_TOTAL = "won_gih_total"
+    GIH_WR_MEAN = "gih_wr_mean"
+    GIH_WR_EXCESS = "gih_wr_excess"
+    GIH_WR_VAR = "gih_wr_var"
+    GIH_WR_STDEV = "gh_wr_stdev"
+    GIH_WR_Z = "gih_wr_z"