PyPI - spells-mtg - Versions diffs - 0.7.0__tar.gz → 0.7.2__tar.gz - Mend

spells-mtg 0.7.0tar.gz → 0.7.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of spells-mtg might be problematic. Click here for more details.

Files changed (17) hide show

{spells_mtg-0.7.0 → spells_mtg-0.7.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: spells-mtg
-Version: 0.7.0
+Version: 0.7.2
 Summary: analaysis of 17Lands.com public datasets
 Author-Email: Joel Barnes <oelarnes@gmail.com>
 License: MIT
@@ -75,6 +75,7 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
 - Caches aggregate DataFrames in the local file system automatically for instantaneous reproduction of previous analysis
 - Manages grouping and filtering by built-in and custom columns at the row level
 - Provides 124 explicitly specified, enumerated, documented column definitions
+- Can aggregate over multiple sets at once, even all of them, if you want.
 - Supports "Deck Color Data" aggregations with built-in column definitions.
 - Lets you feed card metrics back in to column definitions to support scientific workflows like MLE
 - Provides a CLI tool `spells [add|refresh|clean|remove|info] [SET]` to download and manage external files

{spells_mtg-0.7.0 → spells_mtg-0.7.2}/README.md RENAMED Viewed

@@ -64,6 +64,7 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
 - Caches aggregate DataFrames in the local file system automatically for instantaneous reproduction of previous analysis
 - Manages grouping and filtering by built-in and custom columns at the row level
 - Provides 124 explicitly specified, enumerated, documented column definitions
+- Can aggregate over multiple sets at once, even all of them, if you want.
 - Supports "Deck Color Data" aggregations with built-in column definitions.
 - Lets you feed card metrics back in to column definitions to support scientific workflows like MLE
 - Provides a CLI tool `spells [add|refresh|clean|remove|info] [SET]` to download and manage external files

{spells_mtg-0.7.0 → spells_mtg-0.7.2}/pyproject.toml RENAMED Viewed

@@ -11,7 +11,7 @@ dependencies = [
 ]
 requires-python = ">=3.11"
 readme = "README.md"
-version = "0.7.0"
+version = "0.7.2"
 [project.license]
 text = "MIT"

{spells_mtg-0.7.0 → spells_mtg-0.7.2}/spells/cache.py RENAMED Viewed

@@ -78,8 +78,16 @@ def write_cache(set_code: str, cache_key: str, df: pl.DataFrame) -> None:
     df.write_parquet(cache_path_for_key(set_code, cache_key))
-def clear(set_code: str) -> int:
+def clean(set_code: str) -> int:
     mode = "clean"
+    if set_code == "all":
+        cache_dir = data_dir_path(DataDir.CACHE)
+        with os.scandir(cache_dir) as set_dir:
+            for entry in set_dir:
+                clean(entry.name)
+        return 0
     cache_dir = cache_dir_for_set(set_code)
     if os.path.isdir(cache_dir):
         with os.scandir(cache_dir) as set_dir:

{spells_mtg-0.7.0 → spells_mtg-0.7.2}/spells/draft_data.py RENAMED Viewed

@@ -59,25 +59,35 @@ def _get_card_context(
     specs: dict[str, ColSpec],
     card_context: pl.DataFrame | dict[str, dict[str, Any]] | None,
     set_context: pl.DataFrame | dict[str, Any] | None,
+    card_only: bool = False,
 ) -> dict[str, dict[str, Any]]:
     card_attr_specs = {
         col: spec
         for col, spec in specs.items()
         if spec.col_type == ColType.CARD_ATTR or col == ColName.NAME
     }
-    col_def_map = _hydrate_col_defs(
-        set_code, card_attr_specs, set_context=set_context, card_only=True
-    )
-    columns = list(col_def_map.keys())
+    if not card_only:
+        col_def_map = _hydrate_col_defs(
+            set_code,
+            card_attr_specs,
+            set_context=set_context,
+            card_context=card_context,
+            card_only=True,
+        )
+        columns = list(col_def_map.keys())
-    fp = data_file_path(set_code, View.CARD)
-    card_df = pl.read_parquet(fp)
-    select_rows = _view_select(
-        card_df, frozenset(columns), col_def_map, is_agg_view=False
-    ).to_dicts()
+        fp = data_file_path(set_code, View.CARD)
+        card_df = pl.read_parquet(fp)
+        select_rows = _view_select(
+            card_df, frozenset(columns), col_def_map, is_agg_view=False
+        ).to_dicts()
-    loaded_context = {row[ColName.NAME]: row for row in select_rows}
+        loaded_context = {row[ColName.NAME]: row for row in select_rows}
+    else:
+        names = _get_names(set_code)
+        loaded_context = {name: {} for name in names}
     if card_context is not None:
         if isinstance(card_context, pl.DataFrame):
@@ -139,25 +149,25 @@ def _determine_expression(
     elif spec.expr is not None:
         if isinstance(spec.expr, Callable):
-            assert not spec.col_type == ColType.AGG, f"AGG column {col} must be a pure spells expression"
+            assert (
+                not spec.col_type == ColType.AGG
+            ), f"AGG column {col} must be a pure spells expression"
             params = seed_params(spec.expr)
             if (
-                spec.col_type == ColType.PICK_SUM
+                spec.col_type in (ColType.PICK_SUM, ColType.CARD_ATTR)
                 and "name" in signature(spec.expr).parameters
             ):
+                condition_col = (
+                    ColName.PICK if spec.col_type == ColType.PICK_SUM else ColName.NAME
+                )
                 expr = pl.lit(None)
                 for name in names:
                     name_params = {"name": name, **params}
                     expr = (
-                        pl.when(pl.col(ColName.PICK) == name)
+                        pl.when(pl.col(condition_col) == name)
                         .then(spec.expr(**name_params))
                         .otherwise(expr)
                     )
-            elif (
-                spec.col_type == ColType.CARD_ATTR
-                and "name" in signature(spec.expr).parameters
-            ):
-                expr = spec.expr(**{"name": pl.col("name"), **params})
             else:
                 expr = spec.expr(**params)
         else:
@@ -256,10 +266,9 @@ def _hydrate_col_defs(
     set_context = _get_set_context(set_code, set_context)
-    if card_only:
-        card_context = {}
-    else:
-        card_context = _get_card_context(set_code, specs, card_context, set_context)
+    card_context = _get_card_context(
+        set_code, specs, card_context, set_context, card_only=card_only
+    )
     assert len(names) > 0, "there should be names"
     hydrated = {}
@@ -393,9 +402,7 @@ def _base_agg_df(
             sum_col_df = base_df.select(nonname_gb + name_col_tuple + sum_cols)
             grouped = sum_col_df.group_by(group_by) if group_by else sum_col_df
-            join_dfs.append(
-                grouped.sum().collect(streaming=use_streaming)
-            )
+            join_dfs.append(grouped.sum().collect(streaming=use_streaming))
         name_sum_cols = tuple(
             c for c in cols_for_view if m.col_def_map[c].col_type == ColType.NAME_SUM
@@ -423,8 +430,12 @@ def _base_agg_df(
             )
             if not is_name_gb:
-                grouped = unpivoted.drop("name").group_by(nonname_gb) if nonname_gb else unpivoted.drop("name")
-                df = grouped.sum() .collect(streaming=use_streaming)
+                grouped = (
+                    unpivoted.drop("name").group_by(nonname_gb)
+                    if nonname_gb
+                    else unpivoted.drop("name")
+                )
+                df = grouped.sum().collect(streaming=use_streaming)
             else:
                 df = unpivoted.collect(streaming=use_streaming)
@@ -436,7 +447,7 @@ def _base_agg_df(
             join_dfs,
         )
     else:
-        joined_df = pl.concat(join_dfs, how='horizontal')
+        joined_df = pl.concat(join_dfs, how="horizontal")
     return joined_df
@@ -475,14 +486,14 @@ def summon(
     concat_dfs = []
     for code in codes:
         if isinstance(card_context, pl.DataFrame):
-            set_card_context = card_context.filter(pl.col('expansion') == code)
+            set_card_context = card_context.filter(pl.col("expansion") == code)
         elif isinstance(card_context, dict):
             set_card_context = card_context[code]
         else:
             set_card_context = None
         if isinstance(set_context, pl.DataFrame):
-            this_set_context = set_context.filter(pl.col('expansion') == code)
+            this_set_context = set_context.filter(pl.col("expansion") == code)
         elif isinstance(set_context, dict):
             this_set_context = set_context[code]
         else:
@@ -511,13 +522,17 @@ def summon(
             card_cols = m.view_cols[View.CARD].union({ColName.NAME})
             fp = data_file_path(code, View.CARD)
             card_df = pl.read_parquet(fp)
-            select_df = _view_select(card_df, card_cols, m.col_def_map, is_agg_view=False)
+            select_df = _view_select(
+                card_df, card_cols, m.col_def_map, is_agg_view=False
+            )
             agg_df = agg_df.join(select_df, on="name", how="outer", coalesce=True)
         concat_dfs.append(agg_df)
-    full_agg_df = pl.concat(concat_dfs, how='vertical')
+    full_agg_df = pl.concat(concat_dfs, how="vertical")
-    assert m is not None, "What happened? We mean to use one of the sets manifest, it shouldn't matter which."
+    assert (
+        m is not None
+    ), "What happened? We mean to use one of the sets manifest, it shouldn't matter which."
     if m.group_by:
         full_agg_df = full_agg_df.group_by(m.group_by).sum()

{spells_mtg-0.7.0 → spells_mtg-0.7.2}/spells/external.py RENAMED Viewed

@@ -15,6 +15,7 @@ from enum import StrEnum
 import wget
 import polars as pl
+from polars.exceptions import ComputeError
 from spells import cards
 from spells import cache
@@ -52,6 +53,7 @@ def cli() -> int:
     cache.spells_print("spells", f"[data home]={data_dir}")
     print()
     usage = """spells [add|refresh|remove|clean] [set_code]
+            spells clean all
             spells info
     add: Download draft and game files from 17Lands.com and card file from MTGJSON.com and save to path
@@ -66,7 +68,7 @@ def cli() -> int:
     remove: Delete the [data home]/external/[set code] and [data home]/local/[set code] directories and their contents
-    clean: Delete [data home]/local/[set code] data directory (your cache of aggregate parquet files).
+    clean: Delete [data home]/local/[set code] data directory (your cache of aggregate parquet files), or all of them.
     info: No set code argument. Print info on all external and local files.
     """
@@ -93,7 +95,7 @@ def cli() -> int:
         case "remove":
             return _remove(sys.argv[2])
         case "clean":
-            return cache.clear(sys.argv[2])
+            return cache.clean(sys.argv[2])
         case _:
             print_usage()
             return 1
@@ -231,7 +233,18 @@ def _process_zipped_file(gzip_path, target_path):
     os.remove(gzip_path)
     df = pl.scan_csv(csv_path, schema=schema(csv_path))
-    df.sink_parquet(target_path)
+    try:
+        df.sink_parquet(target_path)
+    except ComputeError:
+        df = pl.scan_csv(csv_path)
+        cache.spells_print(
+            "error",
+            "Bad schema found, loading dataset into memory"
+            + " and attempting to cast to correct schema",
+        )
+        select = [pl.col(name).cast(dtype) for name, dtype in schema(csv_path).items()]
+        cast_df = df.select(select).collect()
+        cast_df.write_parquet(target_path)
     os.remove(csv_path)

{spells_mtg-0.7.0 → spells_mtg-0.7.2}/spells/manifest.py RENAMED Viewed

@@ -162,7 +162,11 @@ def create(
     group_by: list[str] | None = None,
     filter_spec: dict | None = None,
 ):
-    gbs = (ColName.NAME, ColName.COLOR, ColName.RARITY) if group_by is None else tuple(group_by)
+    gbs = (
+        (ColName.NAME, ColName.COLOR, ColName.RARITY)
+        if group_by is None
+        else tuple(group_by)
+    )
     if columns is None:
         cols = tuple(spells.columns.default_columns)

{spells_mtg-0.7.0 → spells_mtg-0.7.2}/spells/schema.py RENAMED Viewed

@@ -139,13 +139,12 @@ COLUMN_TYPES = (
 def schema(
     filename: str, print_missing: bool = False
-) -> Dict[str, pl.datatypes.DataType] | None:
+) -> Dict[str, pl.datatypes.DataType]:
     dtypes: Dict[str, pl.datatypes.DataType] = {}
     with open(filename, encoding="utf-8") as f:
         columns = csv.DictReader(f).fieldnames
     if columns is None:
-        print(f"Could not read fieldnames from {filename}")
-        return None
+        raise ValueError(f"Could not read fieldnames from {filename}")
     for column in columns:
         for regex, column_type in COLUMN_TYPES:
             if regex.match(column):