PyPI - spells-mtg - Versions diffs - 0.9.8__tar.gz → 0.10.1__tar.gz - Mend

spells-mtg 0.9.8tar.gz → 0.10.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of spells-mtg might be problematic. Click here for more details.

Files changed (21) hide show

{spells_mtg-0.9.8 → spells_mtg-0.10.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: spells-mtg
-Version: 0.9.8
+Version: 0.10.1
 Summary: analaysis of 17Lands.com public datasets
 Author-Email: Joel Barnes <oelarnes@gmail.com>
 License: MIT
@@ -252,7 +252,7 @@ Spells caches the results of expensive aggregations in the local file system as
 ### Memory Usage
-One of my goals in creating Spells was to eliminate issues with memory pressure by exclusively using the map-reduce paradigm and a technology that supports partitioned/streaming aggregation of larget-than-memory datasets. By default, Polars loads the entire dataset in memory, but the API exposes a parameter `streaming` which I have exposed as `use_streaming`. Further testing is needed to determine the performance impacts, but this is the first thing you should try if you run into memory issues.
+One of my goals in creating Spells was to eliminate issues with memory pressure by exclusively using the map-reduce paradigm and a technology that supports partitioned/streaming aggregation of larget-than-memory datasets. By default, Polars loads the entire dataset in memory, but the API exposes a parameter `streaming` which I have exposed as `use_streaming` and defaulted to `True` in Spells. Further testing is needed to determine the performance impacts, so you could try turning it off if you have expansive virtual memory. My 16 GB MacBook Air is fine using 60 GB of memory, but my 32 GB homelab is not.
 When refreshing a given set's data files from 17Lands using the provided cli, the cache for that set is automatically cleared. The `spells` CLI gives additional tools for managing the local and external caches.

{spells_mtg-0.9.8 → spells_mtg-0.10.1}/README.md RENAMED Viewed

@@ -241,7 +241,7 @@ Spells caches the results of expensive aggregations in the local file system as
 ### Memory Usage
-One of my goals in creating Spells was to eliminate issues with memory pressure by exclusively using the map-reduce paradigm and a technology that supports partitioned/streaming aggregation of larget-than-memory datasets. By default, Polars loads the entire dataset in memory, but the API exposes a parameter `streaming` which I have exposed as `use_streaming`. Further testing is needed to determine the performance impacts, but this is the first thing you should try if you run into memory issues.
+One of my goals in creating Spells was to eliminate issues with memory pressure by exclusively using the map-reduce paradigm and a technology that supports partitioned/streaming aggregation of larget-than-memory datasets. By default, Polars loads the entire dataset in memory, but the API exposes a parameter `streaming` which I have exposed as `use_streaming` and defaulted to `True` in Spells. Further testing is needed to determine the performance impacts, so you could try turning it off if you have expansive virtual memory. My 16 GB MacBook Air is fine using 60 GB of memory, but my 32 GB homelab is not.
 When refreshing a given set's data files from 17Lands using the provided cli, the cache for that set is automatically cleared. The `spells` CLI gives additional tools for managing the local and external caches.

{spells_mtg-0.9.8 → spells_mtg-0.10.1}/pyproject.toml RENAMED Viewed

@@ -11,7 +11,7 @@ dependencies = [
 ]
 requires-python = ">=3.11"
 readme = "README.md"
-version = "0.9.8"
+version = "0.10.1"
 [project.license]
 text = "MIT"
@@ -42,6 +42,4 @@ dev = [
     "jupyter>=1.1.1",
     "matplotlib>=3.10.0",
     "numpy>=2.2.0",
-    "jupyter-book>=1.0.3",
-    "ghp-import>=2.1.0",
 ]

{spells_mtg-0.9.8 → spells_mtg-0.10.1}/spells/cards.py RENAMED Viewed

@@ -23,6 +23,8 @@ class CardAttr(StrEnum):
     IS_DFC = ColName.IS_DFC
     ORACLE_TEXT = ColName.ORACLE_TEXT
     CARD_JSON = ColName.CARD_JSON
+    SCRYFALL_ID = ColName.SCRYFALL_ID
+    IMAGE_URL = ColName.IMAGE_URL
 MTG_JSON_TEMPLATE = "https://mtgjson.com/api/v5/{set_code}.json"
@@ -41,6 +43,13 @@ def _fetch_mtg_json(set_code: str) -> dict:
 def _extract_value(set_code: str, name: str, card_dict: dict, field: CardAttr):
+    scryfall_id = card_dict.get("identifiers", {}).get("scryfallId", "")
+    if scryfall_id:
+        d1 = scryfall_id[0]
+        d2 = scryfall_id[1]
+        img_url = f"https://cards.scryfall.io/large/front/{d1}/{d2}/{scryfall_id}.jpg"
+    else:
+        img_url = ""
     match field:
         case CardAttr.NAME:
             return name
@@ -72,7 +81,10 @@ def _extract_value(set_code: str, name: str, card_dict: dict, field: CardAttr):
             return card_dict.get("text", "")
         case CardAttr.CARD_JSON:
             return card_dict.get("json", "")
+        case CardAttr.SCRYFALL_ID:
+            return scryfall_id
+        case CardAttr.IMAGE_URL:
+            return img_url
 def card_df(draft_set_code: str, names: list[str]) -> pl.DataFrame:
     draft_set_json = _fetch_mtg_json(draft_set_code)

{spells_mtg-0.9.8 → spells_mtg-0.10.1}/spells/columns.py RENAMED Viewed

@@ -431,6 +431,12 @@ _specs: dict[str, ColSpec] = {
     ColName.CARD_JSON: ColSpec(
         col_type=ColType.CARD_ATTR,
     ),
+    ColName.SCRYFALL_ID: ColSpec(
+        col_type=ColType.CARD_ATTR,
+    ),
+    ColName.IMAGE_URL: ColSpec(
+        col_type=ColType.CARD_ATTR,
+    ),
     ColName.PICKED_MATCH_WR: ColSpec(
         col_type=ColType.AGG,
         expr=pl.col(ColName.EVENT_MATCH_WINS_SUM) / pl.col(ColName.EVENT_MATCHES_SUM),

{spells_mtg-0.9.8 → spells_mtg-0.10.1}/spells/draft_data.py RENAMED Viewed

@@ -380,7 +380,7 @@ def _fetch_or_cache(
 def _base_agg_df(
     set_code: str,
     m: spells.manifest.Manifest,
-    use_streaming: bool = False,
+    use_streaming: bool = True,
 ) -> pl.DataFrame:
     join_dfs = []
     group_by = m.base_view_group_by
@@ -470,7 +470,7 @@ def summon(
     group_by: list[str] | None = None,
     filter_spec: dict | None = None,
     extensions: dict[str, ColSpec] | list[dict[str, ColSpec]] | None = None,
-    use_streaming: bool = False,
+    use_streaming: bool = True,
     read_cache: bool = True,
     write_cache: bool = True,
     card_context: pl.DataFrame | dict[str, Any] | None = None,

{spells_mtg-0.9.8 → spells_mtg-0.10.1}/spells/enums.py RENAMED Viewed

@@ -128,6 +128,8 @@ class ColName(StrEnum):
     IS_DFC = "is_dfc"
     ORACLE_TEXT = "oracle_text"
     CARD_JSON = "card_json"
+    SCRYFALL_ID = "scryfall_id"
+    IMAGE_URL = "image_url"
     # agg extensions
     PICKED_MATCH_WR = "picked_match_wr"
     TROPHY_RATE = "trophy_rate"

{spells_mtg-0.9.8 → spells_mtg-0.10.1}/spells/external.py RENAMED Viewed

@@ -69,6 +69,8 @@ def cli() -> int:
     clean: Delete [data home]/local/[set code] data directory (your cache of aggregate parquet files), or all of them.
     info: No set code argument. Print info on all external and local files.
+    context: Refresh context files
     """
     print_usage = functools.partial(cache.spells_print, "usage", usage)
@@ -81,6 +83,9 @@ def cli() -> int:
     if mode == "info":
         return _info()
+    if mode == "context":
+        return _context()
     if len(sys.argv) != 3:
         print_usage()
         return 1
@@ -99,7 +104,7 @@ def cli() -> int:
             return 1
-def _add(set_code: str, force_download=False):
+def _add(set_code: str, force_download: bool = False) -> int:
     if set_code == "all":
         for code in all_sets:
             _add(code, force_download=force_download)
@@ -111,6 +116,13 @@ def _add(set_code: str, force_download=False):
     return 0
+def _context() -> int:
+    cache.spells_print("context", "Refreshing all context files")
+    for code in all_sets:
+        write_card_file(code, force_download=True)
+        get_set_context(code, force_download=True)
+    return 0
 def _refresh(set_code: str):
     return _add(set_code, force_download=True)

spells_mtg-0.10.1/spells/utils.py ADDED Viewed

@@ -0,0 +1,55 @@
+import polars as pl
+def convert_to_expr_list(
+    input: str | pl.Expr | list[str | pl.Expr] | None
+):
+    if input is None:
+        return []
+    input_list = [input] if isinstance(input, str | pl.Expr) else input
+    return [pl.col(i) if isinstance(i, str) else i for i in input_list]
+def wavg(
+    df: pl.DataFrame,
+    cols: str | pl.Expr | list[str | pl.Expr],
+    weights: str | pl.Expr | list[str | pl.Expr],
+    group_by: str | pl.Expr | list[str | pl.Expr] | None = None,
+    new_names: str | list[str] | None = None,
+) -> pl.DataFrame:
+    col_list = convert_to_expr_list(cols)
+    weight_list = convert_to_expr_list(weights)
+    gbs = convert_to_expr_list(group_by)
+    name_list: list[str]
+    if isinstance(new_names, str):
+        name_list = [new_names]
+    elif new_names is None:
+        name_list = [c.meta.output_name() for c in col_list]
+    else:
+        name_list = list(new_names)
+    assert len(name_list) == len(col_list), f"{len(name_list)} names provided for {len(col_list)} columns"
+    assert len(name_list) == len(set(name_list)), "Output names must be unique"
+    assert len(weight_list) == len(col_list) or len(weight_list) == 1, f"{len(weight_list)} weights provided for {len(col_list)} columns"
+    enum_wl = weight_list * int(len(col_list) / len(weight_list))
+    wl_names = [w.meta.output_name() for w in weight_list]
+    assert len(wl_names) == len(set(wl_names)), "Weights must have unique names. Send one weight column or n uniquely named ones"
+    to_group = df.select(gbs + weight_list + [
+        (c * enum_wl[i]).alias(name_list[i]) for i, c in enumerate(col_list)
+    ])
+    grouped = to_group if not gbs else to_group.group_by(gbs)
+    ret_df = grouped.sum().select(
+        gbs +
+        wl_names +
+        [(pl.col(name) / pl.col(enum_wl[i].meta.output_name())) for i, name in enumerate(name_list)]
+    )
+    if gbs:
+        ret_df = ret_df.sort(by=gbs)
+    return ret_df

spells_mtg-0.10.1/tests/utils_test.py ADDED Viewed

@@ -0,0 +1,174 @@
+"""
+Test behavior of wavg utility for Polars DataFrames
+"""
+import pytest
+import polars as pl
+import spells.utils as utils
+def format_test_string(test_string: str) -> str:
+    """
+    strip whitespace from each line to test pasted dataframe outputs
+    """
+    return "\n".join(
+        [line.strip() for line in test_string.splitlines() if line.strip()]
+    )
+test_df = pl.DataFrame({
+    'cat': ['a',    'a',    'b',    'b',    'b',    'c'     ],
+    'va1': [1.0,    -1.0,   0.2,    0.4,    0.0,    10.0    ],
+    'va2': [4.0,    3.0,    1.0,    -2.0,   2.0,    1.0     ],
+    'wt1': [1,      2,      0,      2,      3,      1       ],
+    'wt2': [2,      4,      1,      1,      1,      2,      ],
+})
+# test wavg with default args
+@pytest.mark.parametrize(
+    "cols, weights, expected",
+    [
+        (
+            'va1',
+            'wt1',
+            """
+shape: (1, 2)
+┌─────┬──────────┐
+│ wt1 ┆ va1      │
+│ --- ┆ ---      │
+│ i64 ┆ f64      │
+╞═════╪══════════╡
+│ 9   ┆ 1.088889 │
+└─────┴──────────┘
+"""
+         ),
+        (
+            ['va1', 'va2'],
+            'wt1',
+            """
+shape: (1, 3)
+┌─────┬──────────┬──────────┐
+│ wt1 ┆ va1      ┆ va2      │
+│ --- ┆ ---      ┆ ---      │
+│ i64 ┆ f64      ┆ f64      │
+╞═════╪══════════╪══════════╡
+│ 9   ┆ 1.088889 ┆ 1.444444 │
+└─────┴──────────┴──────────┘
+"""
+        ),
+        (
+            ['va1', 'va2'],
+            ['wt1', 'wt2'],
+            """
+shape: (1, 4)
+┌─────┬─────┬──────────┬──────────┐
+│ wt1 ┆ wt2 ┆ va1      ┆ va2      │
+│ --- ┆ --- ┆ ---      ┆ ---      │
+│ i64 ┆ i64 ┆ f64      ┆ f64      │
+╞═════╪═════╪══════════╪══════════╡
+│ 9   ┆ 11  ┆ 1.088889 ┆ 2.090909 │
+└─────┴─────┴──────────┴──────────┘
+"""
+        ),
+        (
+            [pl.col('va1') + 1, 'va2'],
+            ['wt1', pl.col('wt2') + 1],
+            """
+shape: (1, 4)
+┌─────┬─────┬──────────┬──────────┐
+│ wt1 ┆ wt2 ┆ va1      ┆ va2      │
+│ --- ┆ --- ┆ ---      ┆ ---      │
+│ i64 ┆ i64 ┆ f64      ┆ f64      │
+╞═════╪═════╪══════════╪══════════╡
+│ 9   ┆ 17  ┆ 2.088889 ┆ 1.882353 │
+└─────┴─────┴──────────┴──────────┘
+"""
+        ),
+    ]
+)
+def test_wavg_defaults(cols: str | pl.Expr | list[str | pl.Expr], weights: str | pl.Expr | list[str | pl.Expr], expected: str):
+    result = utils.wavg(test_df, cols, weights)
+    test_str = str(result)
+    print(test_str)
+    assert test_str == format_test_string(expected)
+# test wavg with named args
+@pytest.mark.parametrize(
+    "cols, weights, group_by, new_names, expected",
+    [
+        (
+            "va1",
+            "wt1",
+            [],
+            "v1",
+            """
+shape: (1, 2)
+┌─────┬──────────┐
+│ wt1 ┆ v1       │
+│ --- ┆ ---      │
+│ i64 ┆ f64      │
+╞═════╪══════════╡
+│ 9   ┆ 1.088889 │
+└─────┴──────────┘
+"""
+        ),
+        (
+            "va1",
+            "wt1",
+            "cat",
+            "va1",
+            """
+shape: (3, 3)
+┌─────┬─────┬───────────┐
+│ cat ┆ wt1 ┆ va1       │
+│ --- ┆ --- ┆ ---       │
+│ str ┆ i64 ┆ f64       │
+╞═════╪═════╪═══════════╡
+│ a   ┆ 3   ┆ -0.333333 │
+│ b   ┆ 5   ┆ 0.16      │
+│ c   ┆ 1   ┆ 10.0      │
+└─────┴─────┴───────────┘
+"""
+        ),
+        (
+            ["va1", "va1"],
+            ["wt1", "wt2"],
+            ["cat"],
+            ["v@1", "v@2"],
+            """
+shape: (3, 5)
+┌─────┬─────┬─────┬───────────┬───────────┐
+│ cat ┆ wt1 ┆ wt2 ┆ v@1       ┆ v@2       │
+│ --- ┆ --- ┆ --- ┆ ---       ┆ ---       │
+│ str ┆ i64 ┆ i64 ┆ f64       ┆ f64       │
+╞═════╪═════╪═════╪═══════════╪═══════════╡
+│ a   ┆ 3   ┆ 6   ┆ -0.333333 ┆ -0.333333 │
+│ b   ┆ 5   ┆ 3   ┆ 0.16      ┆ 0.2       │
+│ c   ┆ 1   ┆ 2   ┆ 10.0      ┆ 10.0      │
+└─────┴─────┴─────┴───────────┴───────────┘
+"""
+        )
+    ]
+)
+def test_wavg(
+    cols: str | pl.Expr | list[str | pl.Expr],
+    weights: str | pl.Expr | list[str | pl.Expr],
+    group_by: str | pl.Expr | list[str | pl.Expr],
+    new_names: str | list[str],
+    expected: str,
+):
+    result = utils.wavg(
+        test_df,
+        cols,
+        weights,
+        group_by=group_by,
+        new_names=new_names,
+    )
+    test_str = str(result)
+    print(test_str)
+    assert test_str == format_test_string(expected)