PyPI - spells-mtg - Versions diffs - 0.0.1__py3-none-any.whl - Mend

spells-mtg 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of spells-mtg might be problematic. Click here for more details.

Files changed (15) hide show

spells/__init__.py +5 -0
spells/cache.py +106 -0
spells/cards.py +96 -0
spells/columns.py +771 -0
spells/draft_data.py +300 -0
spells/enums.py +154 -0
spells/external.py +314 -0
spells/filter.py +137 -0
spells/manifest.py +184 -0
spells/schema.py +157 -0
spells_mtg-0.0.1.dist-info/METADATA +465 -0
spells_mtg-0.0.1.dist-info/RECORD +15 -0
spells_mtg-0.0.1.dist-info/WHEEL +4 -0
spells_mtg-0.0.1.dist-info/entry_points.txt +5 -0
spells_mtg-0.0.1.dist-info/licenses/LICENSE +21 -0

spells/external.py ADDED Viewed

@@ -0,0 +1,314 @@
+"""
+download public data sets from 17Lands.com and generate a card
+file containing card attributes using MTGJSON
+cli tool `spells`
+"""
+import functools
+import gzip
+import os
+import re
+import shutil
+import sys
+from enum import StrEnum
+import wget
+import polars as pl
+from spells import cards
+from spells import cache
+from spells.enums import View
+from spells.schema import schema
+DATASET_TEMPLATE = "{dataset_type}_data_public.{set_code}.{event_type}.csv.gz"
+RESOURCE_TEMPLATE = (
+    "https://17lands-public.s3.amazonaws.com/analysis_data/{dataset_type}_data/"
+)
+class FileFormat(StrEnum):
+    CSV = "csv"
+    PARQUET = "parquet"
+class EventType(StrEnum):
+    PREMIER = "PremierDraft"
+    TRADITIONAL = "TradDraft"
+# Fred Cirera via https://stackoverflow.com/questions/1094841/get-a-human-readable-version-of-a-file-size
+def sizeof_fmt(num, suffix="B"):
+    for unit in ("", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"):
+        if abs(num) < 1024.0:
+            return f"{num:3.1f}{unit}{suffix}"
+        num /= 1024.0
+    return f"{num:.1f}Yi{suffix}"
+def cli() -> int:
+    data_dir = cache.data_home()
+    cache.spells_print("spells", f"[data home]={data_dir}")
+    print()
+    usage = """spells [add|refresh|remove|clean] [set_code]
+            spells info
+    add: Download draft and game files from 17Lands.com and card file from MTGJSON.com and save to path
+        [data home]/external/[set code] (use $SPELLS_DATA_HOME or $XDG_DATA_HOME to configure).
+        Does not overwrite existing files. If any files are downloaded, existing local cache is cleared.
+        [set code] should be the capitalized official set code for the draft release.
+        e.g. $ spells add OTJ
+    refresh: Force download and overwrite of existing files (for new data drops, use sparingly!). Clear
+        local cache.
+    remove: Delete the [data home]/external/[set code] and [data home]/local/[set code] directories and their contents
+    clean: Delete [data home]/local/[set code] data directory (your cache of aggregate parquet files).
+    info: No set code argument. Print info on all external and local files.
+    """
+    print_usage = functools.partial(cache.spells_print, "usage", usage)
+    if len(sys.argv) < 2:
+        print_usage()
+        return 1
+    mode = sys.argv[1]
+    if mode == "info":
+        return _info()
+    if len(sys.argv) != 3:
+        print_usage()
+        return 1
+    match mode:
+        case "add":
+            return _add(sys.argv[2])
+        case "refresh":
+            return _refresh(sys.argv[2])
+        case "remove":
+            return _remove(sys.argv[2])
+        case "clean":
+            return cache.clear(sys.argv[2])
+        case _:
+            print_usage()
+            return 1
+def _add(set_code: str, force_download=False):
+    download_data_set(set_code, View.DRAFT, force_download=force_download)
+    write_card_file(set_code, force_download=force_download)
+    download_data_set(set_code, View.GAME, force_download=force_download)
+    return 0
+def _refresh(set_code: str):
+    return _add(set_code, force_download=True)
+def _remove(set_code: str):
+    mode = "remove"
+    dir_path = _external_set_path(set_code)
+    if os.path.isdir(dir_path):
+        with os.scandir(dir_path) as set_dir:
+            count = 0
+            for entry in set_dir:
+                if not entry.name.endswith(".parquet"):
+                    cache.spells_print(
+                        mode,
+                        f"Unexpected file {entry.name} found in external cache, please sort that out!",
+                    )
+                    return 1
+                count += 1
+                os.remove(entry)
+            cache.spells_print(
+                mode, f"Removed {count} files from external cache for set {set_code}"
+            )
+        os.rmdir(dir_path)
+    else:
+        cache.spells_print(mode, f"No external cache found for set {set_code}")
+    return cache.clear(set_code)
+def _info():
+    mode = "info"
+    external_path = cache.data_dir_path(cache.DataDir.EXTERNAL)
+    suggest_add = set()
+    suggest_remove = set()
+    all_external = set()
+    if os.path.isdir(external_path):
+        cache.spells_print(mode, f"External archives found {external_path}")
+        with os.scandir(external_path) as ext_dir:
+            for entry in ext_dir:
+                if entry.is_dir():
+                    all_external.add(entry.name)
+                    file_count = 0
+                    cache.spells_print(mode, f"Archive {entry.name} contents:")
+                    for item in os.scandir(entry):
+                        if not re.match(f"^{entry.name}_.*\\.parquet", item.name):
+                            print(
+                                f"!!! imposter file {item.name}! Please sort that out"
+                            )
+                        print(f"    {item.name} {sizeof_fmt(os.stat(item).st_size)}")
+                        file_count += 1
+                    if file_count < 3:
+                        suggest_add.add(entry.name)
+                    if file_count > 3:
+                        suggest_remove.add(entry.name)
+                else:
+                    cache.spells_print(
+                        mode, f"Imposter file {entry.name}! Please sort that out"
+                    )
+    else:
+        cache.spells_print(mode, "No external archives found")
+    cache_path = cache.data_dir_path(cache.DataDir.CACHE)
+    if os.path.isdir(cache_path):
+        print()
+        cache.spells_print(mode, f"Local cache found {cache_path}")
+        with os.scandir(cache_path) as cache_dir:
+            for entry in cache_dir:
+                if entry.name not in all_external:
+                    suggest_remove.add(entry.name)
+                if entry.is_dir():
+                    cache.spells_print(mode, f"Cache {entry.name} contents:")
+                    parquet_num = 0
+                    parquet_size = 0
+                    for item in os.scandir(entry):
+                        if item.name.endswith(".parquet"):
+                            parquet_num += 1
+                            parquet_size += os.stat(item).st_size
+                        else:
+                            print(
+                                f"!!! imposter file {item.name}! Please sort that out"
+                            )
+                    print(f"    {parquet_num} cache files: {sizeof_fmt(parquet_size)}")
+    else:
+        print()
+        cache.spells_print(mode, "No local cache found")
+    print()
+    for name in suggest_add:
+        cache.spells_print(mode, f"Suggest `spells add {name}'")
+    for name in suggest_remove:
+        cache.spells_print(mode, f"Suggest `spells remove {name}'")
+    return 0
+def _external_set_path(set_code):
+    return os.path.join(cache.data_dir_path(cache.DataDir.EXTERNAL), set_code)
+def data_file_path(set_code, dataset_type: str, event_type=EventType.PREMIER):
+    if dataset_type == "card":
+        return os.path.join(_external_set_path(set_code), f"{set_code}_card.parquet")
+    return os.path.join(
+        _external_set_path(set_code), f"{set_code}_{event_type}_{dataset_type}.parquet"
+    )
+def _process_zipped_file(gzip_path, target_path):
+    csv_path = gzip_path[:-3]
+    # if polars supports streaming from file obj, we can just stream straight
+    # from urllib.Request through GzipFile to sink_parquet without intermediate files
+    with gzip.open(gzip_path, "rb") as f_in:
+        with open(csv_path, "wb") as f_out:
+            shutil.copyfileobj(f_in, f_out)  # type: ignore
+    os.remove(gzip_path)
+    df = pl.scan_csv(csv_path, schema=schema(csv_path))
+    df.sink_parquet(target_path)
+    os.remove(csv_path)
+def download_data_set(
+    set_code,
+    dataset_type: View,
+    event_type=EventType.PREMIER,
+    force_download=False,
+    clear_set_cache=True,
+):
+    mode = "refresh" if force_download else "add"
+    cache.spells_print(mode, f"Downloading {dataset_type} dataset from 17Lands.com")
+    if not os.path.isdir(set_dir := _external_set_path(set_code)):
+        os.makedirs(set_dir)
+    target_path = data_file_path(set_code, dataset_type)
+    if os.path.isfile(target_path) and not force_download:
+        cache.spells_print(
+            mode,
+            f"File {target_path} already exists, use `spells refresh {set_code}` to overwrite",
+        )
+        return 1
+    dataset_file = DATASET_TEMPLATE.format(
+        set_code=set_code, dataset_type=dataset_type, event_type=event_type
+    )
+    dataset_path = os.path.join(_external_set_path(set_code), dataset_file)
+    wget.download(
+        RESOURCE_TEMPLATE.format(dataset_type=dataset_type) + dataset_file,
+        out=dataset_path,
+    )
+    print()
+    cache.spells_print(mode, "Unzipping and transforming to parquet...")
+    _process_zipped_file(dataset_path, target_path)
+    cache.spells_print(mode, f"File {target_path} written")
+    if clear_set_cache:
+        cache.clear(set_code)
+    return 0
+def write_card_file(draft_set_code: str, force_download=False) -> int:
+    """
+    Write a csv containing basic information about draftable cards, such as rarity,
+    set symbol, color, mana cost, and type.
+    """
+    mode = "refresh" if force_download else "add"
+    cache.spells_print(
+        mode, "Fetching card data from mtgjson.com and writing card file"
+    )
+    card_filepath = data_file_path(draft_set_code, View.CARD)
+    if os.path.isfile(card_filepath) and not force_download:
+        cache.spells_print(
+            mode,
+            f"File {card_filepath} already exists, use `spells refresh {draft_set_code}` to overwrite",
+        )
+        return 1
+    draft_filepath = data_file_path(draft_set_code, View.DRAFT)
+    if not os.path.isfile(draft_filepath):
+        cache.spells_print(mode, f"Error: No draft file for set {draft_set_code}")
+        return 1
+    columns = pl.scan_parquet(draft_filepath).collect_schema().names()
+    pattern = "^pack_card_"
+    names = [
+        re.split(pattern, name)[1]
+        for name in columns
+        if re.search(pattern, name) is not None
+    ]
+    card_df = cards.card_df(draft_set_code, names)
+    card_df.write_parquet(card_filepath)
+    cache.spells_print(mode, f"Wrote file {card_filepath}")
+    return 0

spells/filter.py ADDED Viewed

@@ -0,0 +1,137 @@
+"""
+spells.filter (don't import as builtin filter) returns a function from_spec
+that takes a dict-specified filter and returns a Filter object that records
+the dependent column names and contains a filter expression for use in polars.
+"""
+from dataclasses import dataclass
+import functools
+import polars as pl
+@dataclass(frozen=True)
+class Filter:
+    expr: pl.Expr
+    lhs: frozenset[str]
+def __negate(f: Filter) -> Filter:
+    return Filter(expr=~f.expr, lhs=f.lhs)
+def _or(f1: Filter, f2: Filter) -> Filter:
+    return Filter(expr=f1.expr | f2.expr, lhs=f1.lhs.union(f2.lhs))
+def _and(f1: Filter, f2: Filter) -> Filter:
+    return Filter(expr=f1.expr & f2.expr, lhs=f1.lhs.union(f2.lhs))
+def _filter_eq(lhs: str, rhs: str) -> Filter:
+    return Filter(expr=pl.col(lhs) == rhs, lhs=frozenset({lhs}))
+def _filter_leq(lhs: str, rhs: str) -> Filter:
+    return Filter(expr=pl.col(lhs) <= rhs, lhs=frozenset({lhs}))
+def _filter_geq(lhs: str, rhs: str) -> Filter:
+    return Filter(expr=pl.col(lhs) >= rhs, lhs=frozenset({lhs}))
+def _filter_in(lhs: str, rhs: str) -> Filter:
+    return Filter(expr=pl.col(lhs).is_in(rhs), lhs=frozenset({lhs}))
+def _filter_gt(lhs: str, rhs: str) -> Filter:
+    return __negate(_filter_leq(lhs, rhs))
+def _filter_nin(lhs: str, rhs: str) -> Filter:
+    return __negate(_filter_in(lhs, rhs))
+def _filter_neq(lhs: str, rhs: str) -> Filter:
+    return __negate(_filter_eq(lhs, rhs))
+def _filter_lt(lhs: str, rhs: str) -> Filter:
+    return __negate(_filter_geq(lhs, rhs))
+filter_fn_map = {
+    "=": _filter_eq,
+    "<=": _filter_leq,
+    ">=": _filter_geq,
+    "in": _filter_in,
+    ">": _filter_gt,
+    "nin": _filter_nin,
+    "!=": _filter_neq,
+    "<": _filter_lt,
+}
+def _base(lhs, rhs, op="=") -> Filter:
+    return filter_fn_map[op](lhs, rhs)
+def _all_of(filters) -> Filter:
+    return functools.reduce(_and, filters)
+def _any_of(filters) -> Filter:
+    return functools.reduce(_or, filters)
+def _negate(fil) -> Filter:
+    return __negate(fil)
+BUILDER_MAP = {"$and": _all_of, "$or": _any_of, "$not": _negate}
+def from_spec(filter_spec: dict | None) -> Filter | None:
+    """
+    filter_spec is a nested dictionary with the leaf-level consisting of specs of the form
+    {'lhs': 'a', 'rhs': [1,2,3], 'op': 'in'}
+    or
+    {'a': 5}
+    higher level keys can be `_all_of | _any_of | not`
+    e.g.
+    {
+        '$and': [
+            {
+                '$not': {'A' : 5}
+            },
+            {
+                {'lhs': 'B', 'rhs': [1,2], 'op': 'in'}
+            }
+        ]
+    }
+    an empty input returns None, which represents a trivial filter
+    """
+    if not filter_spec:
+        return None
+    for filter_type, filter_fn in BUILDER_MAP.items():
+        if filter_value := filter_spec.get(filter_type):
+            assert (
+                len(filter_spec) == 1
+            ), f"Operator {filter_type} incompatible with additional keys."
+            if isinstance(filter_value, list):
+                arg = tuple(map(from_spec, filter_value))
+            else:
+                arg = from_spec(filter_value)
+            return filter_fn(arg)
+    if len(filter_spec) == 1:
+        for lhs, rhs in filter_spec.items():
+            return _base(lhs, rhs)
+    assert "lhs" in filter_spec and "rhs" in filter_spec
+    return _base(filter_spec["lhs"], filter_spec["rhs"], filter_spec.get("op", "="))

spells/manifest.py ADDED Viewed

@@ -0,0 +1,184 @@
+from dataclasses import dataclass
+import spells.columns
+import spells.filter
+from spells.enums import View, ColName, ColType
+from spells.columns import ColumnDefinition
+@dataclass(frozen=True)
+class Manifest:
+    columns: tuple[str, ...]
+    col_def_map: dict[str, ColumnDefinition]
+    base_view_group_by: frozenset[str]
+    view_cols: dict[View, frozenset[str]]
+    group_by: tuple[str, ...]
+    filter: spells.filter.Filter | None
+    def __post_init__(self):
+        # No name filter check
+        if self.filter is not None:
+            assert (
+                "name" not in self.filter.lhs
+            ), "Don't filter on 'name', include 'name' in groupbys and filter the final result instead"
+        # Col in col_def_map check
+        for col in self.columns:
+            assert col in self.col_def_map, f"Undefined column {col}!"
+            assert (
+                self.col_def_map[col].col_type != ColType.GROUP_BY
+            ), f"group_by column {col} must be passed as group_by"
+            assert (
+                self.col_def_map[col].col_type != ColType.FILTER_ONLY
+            ), f"filter_only column {col} cannot be summoned"
+        # base_view_groupbys have col_type GROUP_BY check
+        for col in self.base_view_group_by:
+            assert (
+                self.col_def_map[col].col_type == ColType.GROUP_BY
+            ), f"Invalid groupby {col}!"
+        for view, cols_for_view in self.view_cols.items():
+            # cols_for_view are actually in view check
+            for col in cols_for_view:
+                assert (
+                    view in self.col_def_map[col].views
+                ), f"View cols generated incorrectly, {col} not in view {view}"
+                # game sum cols on in game, and no NAME groupby
+                assert self.col_def_map[col].col_type != ColType.GAME_SUM or (
+                    view == View.GAME and ColName.NAME not in self.base_view_group_by
+                ), f"Invalid manifest for GAME_SUM column {col}"
+            if view != View.CARD:
+                for col in self.base_view_group_by:
+                    # base_view_groupbys in view check
+                    assert (
+                        col == ColName.NAME or view in self.col_def_map[col].views
+                    ), f"Groupby {col} not in view {view}!"
+                    # base_view_groupbys in view_cols for view
+                    assert (
+                        col == ColName.NAME or col in cols_for_view
+                    ), f"Groupby {col} not in view_cols[view]"
+                # filter cols are in both base_views check
+                if self.filter is not None:
+                    for col in self.filter.lhs:
+                        assert (
+                            col in cols_for_view
+                        ), f"filter col {col} not found in base view"
+            if view == View.CARD:
+                # name in groupbys check
+                assert (
+                    ColName.NAME in self.base_view_group_by
+                ), "base views must groupby by name to join card attrs"
+    def test_str(self):
+        result = "{\n" + 2 * " " + "columns:\n"
+        for c in sorted(self.columns):
+            result += 4 * " " + c + "\n"
+        result += 2 * " " + "base_view_group_by:\n"
+        for c in sorted(self.base_view_group_by):
+            result += 4 * " " + c + "\n"
+        result += 2 * " " + "view_cols:\n"
+        for v, view_cols in sorted(self.view_cols.items()):
+            result += 4 * " " + v + ":\n"
+            for c in sorted(view_cols):
+                result += 6 * " " + c + "\n"
+        result += 2 * " " + "group_by:\n"
+        for c in sorted(self.group_by):
+            result += 4 * " " + c + "\n"
+        result += "}\n"
+        return result
+def _resolve_view_cols(
+    col_set: frozenset[str],
+    col_def_map: dict[str, ColumnDefinition],
+) -> dict[View, frozenset[str]]:
+    """
+    For each view ('game', 'draft', and 'card'), return the columns
+    that must be present at the aggregation step. 'name' need not be
+    included, and 'pick' will be added if needed.
+    Dependencies within base views will be resolved by `col_df`.
+    """
+    unresolved_cols = col_set
+    view_resolution = {}
+    iter_num = 0
+    while unresolved_cols and iter_num < 100:
+        iter_num += 1
+        next_cols = frozenset()
+        for col in unresolved_cols:
+            cdef = col_def_map[col]
+            if cdef.col_type == ColType.PICK_SUM:
+                view_resolution[View.DRAFT] = view_resolution.get(
+                    View.DRAFT, frozenset()
+                ).union({ColName.PICK})
+            if cdef.views:
+                for view in cdef.views:
+                    view_resolution[view] = view_resolution.get(
+                        view, frozenset()
+                    ).union({col})
+            else:
+                if cdef.dependencies is None:
+                    raise ValueError(
+                        f"Invalid column def: {col} has neither views nor dependencies!"
+                    )
+                for dep in cdef.dependencies:
+                    next_cols = next_cols.union({dep})
+        unresolved_cols = next_cols
+    if iter_num >= 100:
+        raise ValueError("broken dependency chain in column spec, loop probable")
+    return view_resolution
+def create(
+    col_def_map: dict[str, ColumnDefinition],
+    columns: list[str] | None = None,
+    group_by: list[str] | None = None,
+    filter_spec: dict | None = None,
+):
+    gbs = (ColName.NAME,) if group_by is None else tuple(group_by)
+    if columns is None:
+        cols = tuple(spells.columns.default_columns)
+        if ColName.NAME not in gbs:
+            cols = tuple(c for c in cols if c not in [ColName.COLOR, ColName.RARITY])
+    else:
+        cols = tuple(columns)
+    base_view_group_by = frozenset()
+    for col in gbs:
+        cdef = col_def_map[col]
+        if cdef.col_type == ColType.GROUP_BY:
+            base_view_group_by = base_view_group_by.union({col})
+        elif cdef.col_type == ColType.CARD_ATTR:
+            base_view_group_by = base_view_group_by.union({ColName.NAME})
+    m_filter = spells.filter.from_spec(filter_spec)
+    col_set = frozenset(cols)
+    col_set = col_set.union(frozenset(gbs) - {ColName.NAME})
+    if m_filter is not None:
+        col_set = col_set.union(m_filter.lhs)
+    view_cols = _resolve_view_cols(col_set, col_def_map)
+    needed_views = frozenset()
+    for view, cols_for_view in view_cols.items():
+        for col in cols_for_view:
+            if col_def_map[col].views == (view,):  # only found in this view
+                needed_views = needed_views.union({view})
+    view_cols = {v: view_cols[v] for v in needed_views}
+    return Manifest(
+        columns=cols,
+        col_def_map=col_def_map,
+        base_view_group_by=base_view_group_by,
+        view_cols=view_cols,
+        group_by=gbs,
+        filter=m_filter,
+    )