spells-mtg 0.9.8__tar.gz → 0.10.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spells-mtg might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: spells-mtg
3
- Version: 0.9.8
3
+ Version: 0.10.1
4
4
  Summary: analaysis of 17Lands.com public datasets
5
5
  Author-Email: Joel Barnes <oelarnes@gmail.com>
6
6
  License: MIT
@@ -252,7 +252,7 @@ Spells caches the results of expensive aggregations in the local file system as
252
252
 
253
253
  ### Memory Usage
254
254
 
255
- One of my goals in creating Spells was to eliminate issues with memory pressure by exclusively using the map-reduce paradigm and a technology that supports partitioned/streaming aggregation of larget-than-memory datasets. By default, Polars loads the entire dataset in memory, but the API exposes a parameter `streaming` which I have exposed as `use_streaming`. Further testing is needed to determine the performance impacts, but this is the first thing you should try if you run into memory issues.
255
+ One of my goals in creating Spells was to eliminate issues with memory pressure by exclusively using the map-reduce paradigm and a technology that supports partitioned/streaming aggregation of larget-than-memory datasets. By default, Polars loads the entire dataset in memory, but the API exposes a parameter `streaming` which I have exposed as `use_streaming` and defaulted to `True` in Spells. Further testing is needed to determine the performance impacts, so you could try turning it off if you have expansive virtual memory. My 16 GB MacBook Air is fine using 60 GB of memory, but my 32 GB homelab is not.
256
256
 
257
257
  When refreshing a given set's data files from 17Lands using the provided cli, the cache for that set is automatically cleared. The `spells` CLI gives additional tools for managing the local and external caches.
258
258
 
@@ -241,7 +241,7 @@ Spells caches the results of expensive aggregations in the local file system as
241
241
 
242
242
  ### Memory Usage
243
243
 
244
- One of my goals in creating Spells was to eliminate issues with memory pressure by exclusively using the map-reduce paradigm and a technology that supports partitioned/streaming aggregation of larget-than-memory datasets. By default, Polars loads the entire dataset in memory, but the API exposes a parameter `streaming` which I have exposed as `use_streaming`. Further testing is needed to determine the performance impacts, but this is the first thing you should try if you run into memory issues.
244
+ One of my goals in creating Spells was to eliminate issues with memory pressure by exclusively using the map-reduce paradigm and a technology that supports partitioned/streaming aggregation of larget-than-memory datasets. By default, Polars loads the entire dataset in memory, but the API exposes a parameter `streaming` which I have exposed as `use_streaming` and defaulted to `True` in Spells. Further testing is needed to determine the performance impacts, so you could try turning it off if you have expansive virtual memory. My 16 GB MacBook Air is fine using 60 GB of memory, but my 32 GB homelab is not.
245
245
 
246
246
  When refreshing a given set's data files from 17Lands using the provided cli, the cache for that set is automatically cleared. The `spells` CLI gives additional tools for managing the local and external caches.
247
247
 
@@ -11,7 +11,7 @@ dependencies = [
11
11
  ]
12
12
  requires-python = ">=3.11"
13
13
  readme = "README.md"
14
- version = "0.9.8"
14
+ version = "0.10.1"
15
15
 
16
16
  [project.license]
17
17
  text = "MIT"
@@ -42,6 +42,4 @@ dev = [
42
42
  "jupyter>=1.1.1",
43
43
  "matplotlib>=3.10.0",
44
44
  "numpy>=2.2.0",
45
- "jupyter-book>=1.0.3",
46
- "ghp-import>=2.1.0",
47
45
  ]
@@ -23,6 +23,8 @@ class CardAttr(StrEnum):
23
23
  IS_DFC = ColName.IS_DFC
24
24
  ORACLE_TEXT = ColName.ORACLE_TEXT
25
25
  CARD_JSON = ColName.CARD_JSON
26
+ SCRYFALL_ID = ColName.SCRYFALL_ID
27
+ IMAGE_URL = ColName.IMAGE_URL
26
28
 
27
29
 
28
30
  MTG_JSON_TEMPLATE = "https://mtgjson.com/api/v5/{set_code}.json"
@@ -41,6 +43,13 @@ def _fetch_mtg_json(set_code: str) -> dict:
41
43
 
42
44
 
43
45
  def _extract_value(set_code: str, name: str, card_dict: dict, field: CardAttr):
46
+ scryfall_id = card_dict.get("identifiers", {}).get("scryfallId", "")
47
+ if scryfall_id:
48
+ d1 = scryfall_id[0]
49
+ d2 = scryfall_id[1]
50
+ img_url = f"https://cards.scryfall.io/large/front/{d1}/{d2}/{scryfall_id}.jpg"
51
+ else:
52
+ img_url = ""
44
53
  match field:
45
54
  case CardAttr.NAME:
46
55
  return name
@@ -72,7 +81,10 @@ def _extract_value(set_code: str, name: str, card_dict: dict, field: CardAttr):
72
81
  return card_dict.get("text", "")
73
82
  case CardAttr.CARD_JSON:
74
83
  return card_dict.get("json", "")
75
-
84
+ case CardAttr.SCRYFALL_ID:
85
+ return scryfall_id
86
+ case CardAttr.IMAGE_URL:
87
+ return img_url
76
88
 
77
89
  def card_df(draft_set_code: str, names: list[str]) -> pl.DataFrame:
78
90
  draft_set_json = _fetch_mtg_json(draft_set_code)
@@ -431,6 +431,12 @@ _specs: dict[str, ColSpec] = {
431
431
  ColName.CARD_JSON: ColSpec(
432
432
  col_type=ColType.CARD_ATTR,
433
433
  ),
434
+ ColName.SCRYFALL_ID: ColSpec(
435
+ col_type=ColType.CARD_ATTR,
436
+ ),
437
+ ColName.IMAGE_URL: ColSpec(
438
+ col_type=ColType.CARD_ATTR,
439
+ ),
434
440
  ColName.PICKED_MATCH_WR: ColSpec(
435
441
  col_type=ColType.AGG,
436
442
  expr=pl.col(ColName.EVENT_MATCH_WINS_SUM) / pl.col(ColName.EVENT_MATCHES_SUM),
@@ -380,7 +380,7 @@ def _fetch_or_cache(
380
380
  def _base_agg_df(
381
381
  set_code: str,
382
382
  m: spells.manifest.Manifest,
383
- use_streaming: bool = False,
383
+ use_streaming: bool = True,
384
384
  ) -> pl.DataFrame:
385
385
  join_dfs = []
386
386
  group_by = m.base_view_group_by
@@ -470,7 +470,7 @@ def summon(
470
470
  group_by: list[str] | None = None,
471
471
  filter_spec: dict | None = None,
472
472
  extensions: dict[str, ColSpec] | list[dict[str, ColSpec]] | None = None,
473
- use_streaming: bool = False,
473
+ use_streaming: bool = True,
474
474
  read_cache: bool = True,
475
475
  write_cache: bool = True,
476
476
  card_context: pl.DataFrame | dict[str, Any] | None = None,
@@ -128,6 +128,8 @@ class ColName(StrEnum):
128
128
  IS_DFC = "is_dfc"
129
129
  ORACLE_TEXT = "oracle_text"
130
130
  CARD_JSON = "card_json"
131
+ SCRYFALL_ID = "scryfall_id"
132
+ IMAGE_URL = "image_url"
131
133
  # agg extensions
132
134
  PICKED_MATCH_WR = "picked_match_wr"
133
135
  TROPHY_RATE = "trophy_rate"
@@ -69,6 +69,8 @@ def cli() -> int:
69
69
  clean: Delete [data home]/local/[set code] data directory (your cache of aggregate parquet files), or all of them.
70
70
 
71
71
  info: No set code argument. Print info on all external and local files.
72
+
73
+ context: Refresh context files
72
74
  """
73
75
  print_usage = functools.partial(cache.spells_print, "usage", usage)
74
76
 
@@ -81,6 +83,9 @@ def cli() -> int:
81
83
  if mode == "info":
82
84
  return _info()
83
85
 
86
+ if mode == "context":
87
+ return _context()
88
+
84
89
  if len(sys.argv) != 3:
85
90
  print_usage()
86
91
  return 1
@@ -99,7 +104,7 @@ def cli() -> int:
99
104
  return 1
100
105
 
101
106
 
102
- def _add(set_code: str, force_download=False):
107
+ def _add(set_code: str, force_download: bool = False) -> int:
103
108
  if set_code == "all":
104
109
  for code in all_sets:
105
110
  _add(code, force_download=force_download)
@@ -111,6 +116,13 @@ def _add(set_code: str, force_download=False):
111
116
  return 0
112
117
 
113
118
 
119
+ def _context() -> int:
120
+ cache.spells_print("context", "Refreshing all context files")
121
+ for code in all_sets:
122
+ write_card_file(code, force_download=True)
123
+ get_set_context(code, force_download=True)
124
+ return 0
125
+
114
126
  def _refresh(set_code: str):
115
127
  return _add(set_code, force_download=True)
116
128
 
@@ -0,0 +1,55 @@
1
+ import polars as pl
2
+
3
+ def convert_to_expr_list(
4
+ input: str | pl.Expr | list[str | pl.Expr] | None
5
+ ):
6
+ if input is None:
7
+ return []
8
+
9
+ input_list = [input] if isinstance(input, str | pl.Expr) else input
10
+ return [pl.col(i) if isinstance(i, str) else i for i in input_list]
11
+
12
+
13
+ def wavg(
14
+ df: pl.DataFrame,
15
+ cols: str | pl.Expr | list[str | pl.Expr],
16
+ weights: str | pl.Expr | list[str | pl.Expr],
17
+ group_by: str | pl.Expr | list[str | pl.Expr] | None = None,
18
+ new_names: str | list[str] | None = None,
19
+ ) -> pl.DataFrame:
20
+ col_list = convert_to_expr_list(cols)
21
+ weight_list = convert_to_expr_list(weights)
22
+ gbs = convert_to_expr_list(group_by)
23
+
24
+ name_list: list[str]
25
+ if isinstance(new_names, str):
26
+ name_list = [new_names]
27
+ elif new_names is None:
28
+ name_list = [c.meta.output_name() for c in col_list]
29
+ else:
30
+ name_list = list(new_names)
31
+
32
+ assert len(name_list) == len(col_list), f"{len(name_list)} names provided for {len(col_list)} columns"
33
+ assert len(name_list) == len(set(name_list)), "Output names must be unique"
34
+ assert len(weight_list) == len(col_list) or len(weight_list) == 1, f"{len(weight_list)} weights provided for {len(col_list)} columns"
35
+
36
+ enum_wl = weight_list * int(len(col_list) / len(weight_list))
37
+ wl_names = [w.meta.output_name() for w in weight_list]
38
+ assert len(wl_names) == len(set(wl_names)), "Weights must have unique names. Send one weight column or n uniquely named ones"
39
+
40
+ to_group = df.select(gbs + weight_list + [
41
+ (c * enum_wl[i]).alias(name_list[i]) for i, c in enumerate(col_list)
42
+ ])
43
+
44
+ grouped = to_group if not gbs else to_group.group_by(gbs)
45
+
46
+ ret_df = grouped.sum().select(
47
+ gbs +
48
+ wl_names +
49
+ [(pl.col(name) / pl.col(enum_wl[i].meta.output_name())) for i, name in enumerate(name_list)]
50
+ )
51
+
52
+ if gbs:
53
+ ret_df = ret_df.sort(by=gbs)
54
+
55
+ return ret_df
@@ -0,0 +1,174 @@
1
+
2
+ """
3
+ Test behavior of wavg utility for Polars DataFrames
4
+ """
5
+
6
+ import pytest
7
+ import polars as pl
8
+
9
+ import spells.utils as utils
10
+
11
+ def format_test_string(test_string: str) -> str:
12
+ """
13
+ strip whitespace from each line to test pasted dataframe outputs
14
+ """
15
+ return "\n".join(
16
+ [line.strip() for line in test_string.splitlines() if line.strip()]
17
+ )
18
+
19
+ test_df = pl.DataFrame({
20
+ 'cat': ['a', 'a', 'b', 'b', 'b', 'c' ],
21
+ 'va1': [1.0, -1.0, 0.2, 0.4, 0.0, 10.0 ],
22
+ 'va2': [4.0, 3.0, 1.0, -2.0, 2.0, 1.0 ],
23
+ 'wt1': [1, 2, 0, 2, 3, 1 ],
24
+ 'wt2': [2, 4, 1, 1, 1, 2, ],
25
+ })
26
+
27
+
28
+ # test wavg with default args
29
+ @pytest.mark.parametrize(
30
+ "cols, weights, expected",
31
+ [
32
+ (
33
+ 'va1',
34
+ 'wt1',
35
+ """
36
+ shape: (1, 2)
37
+ ┌─────┬──────────┐
38
+ │ wt1 ┆ va1 │
39
+ │ --- ┆ --- │
40
+ │ i64 ┆ f64 │
41
+ ╞═════╪══════════╡
42
+ │ 9 ┆ 1.088889 │
43
+ └─────┴──────────┘
44
+ """
45
+ ),
46
+ (
47
+ ['va1', 'va2'],
48
+ 'wt1',
49
+ """
50
+ shape: (1, 3)
51
+ ┌─────┬──────────┬──────────┐
52
+ │ wt1 ┆ va1 ┆ va2 │
53
+ │ --- ┆ --- ┆ --- │
54
+ │ i64 ┆ f64 ┆ f64 │
55
+ ╞═════╪══════════╪══════════╡
56
+ │ 9 ┆ 1.088889 ┆ 1.444444 │
57
+ └─────┴──────────┴──────────┘
58
+ """
59
+ ),
60
+ (
61
+ ['va1', 'va2'],
62
+ ['wt1', 'wt2'],
63
+ """
64
+ shape: (1, 4)
65
+ ┌─────┬─────┬──────────┬──────────┐
66
+ │ wt1 ┆ wt2 ┆ va1 ┆ va2 │
67
+ │ --- ┆ --- ┆ --- ┆ --- │
68
+ │ i64 ┆ i64 ┆ f64 ┆ f64 │
69
+ ╞═════╪═════╪══════════╪══════════╡
70
+ │ 9 ┆ 11 ┆ 1.088889 ┆ 2.090909 │
71
+ └─────┴─────┴──────────┴──────────┘
72
+ """
73
+ ),
74
+ (
75
+ [pl.col('va1') + 1, 'va2'],
76
+ ['wt1', pl.col('wt2') + 1],
77
+ """
78
+ shape: (1, 4)
79
+ ┌─────┬─────┬──────────┬──────────┐
80
+ │ wt1 ┆ wt2 ┆ va1 ┆ va2 │
81
+ │ --- ┆ --- ┆ --- ┆ --- │
82
+ │ i64 ┆ i64 ┆ f64 ┆ f64 │
83
+ ╞═════╪═════╪══════════╪══════════╡
84
+ │ 9 ┆ 17 ┆ 2.088889 ┆ 1.882353 │
85
+ └─────┴─────┴──────────┴──────────┘
86
+ """
87
+ ),
88
+ ]
89
+ )
90
+ def test_wavg_defaults(cols: str | pl.Expr | list[str | pl.Expr], weights: str | pl.Expr | list[str | pl.Expr], expected: str):
91
+ result = utils.wavg(test_df, cols, weights)
92
+
93
+ test_str = str(result)
94
+ print(test_str)
95
+ assert test_str == format_test_string(expected)
96
+
97
+
98
+ # test wavg with named args
99
+ @pytest.mark.parametrize(
100
+ "cols, weights, group_by, new_names, expected",
101
+ [
102
+ (
103
+ "va1",
104
+ "wt1",
105
+ [],
106
+ "v1",
107
+ """
108
+ shape: (1, 2)
109
+ ┌─────┬──────────┐
110
+ │ wt1 ┆ v1 │
111
+ │ --- ┆ --- │
112
+ │ i64 ┆ f64 │
113
+ ╞═════╪══════════╡
114
+ │ 9 ┆ 1.088889 │
115
+ └─────┴──────────┘
116
+ """
117
+ ),
118
+ (
119
+ "va1",
120
+ "wt1",
121
+ "cat",
122
+ "va1",
123
+ """
124
+ shape: (3, 3)
125
+ ┌─────┬─────┬───────────┐
126
+ │ cat ┆ wt1 ┆ va1 │
127
+ │ --- ┆ --- ┆ --- │
128
+ │ str ┆ i64 ┆ f64 │
129
+ ╞═════╪═════╪═══════════╡
130
+ │ a ┆ 3 ┆ -0.333333 │
131
+ │ b ┆ 5 ┆ 0.16 │
132
+ │ c ┆ 1 ┆ 10.0 │
133
+ └─────┴─────┴───────────┘
134
+ """
135
+ ),
136
+ (
137
+ ["va1", "va1"],
138
+ ["wt1", "wt2"],
139
+ ["cat"],
140
+ ["v@1", "v@2"],
141
+ """
142
+ shape: (3, 5)
143
+ ┌─────┬─────┬─────┬───────────┬───────────┐
144
+ │ cat ┆ wt1 ┆ wt2 ┆ v@1 ┆ v@2 │
145
+ │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
146
+ │ str ┆ i64 ┆ i64 ┆ f64 ┆ f64 │
147
+ ╞═════╪═════╪═════╪═══════════╪═══════════╡
148
+ │ a ┆ 3 ┆ 6 ┆ -0.333333 ┆ -0.333333 │
149
+ │ b ┆ 5 ┆ 3 ┆ 0.16 ┆ 0.2 │
150
+ │ c ┆ 1 ┆ 2 ┆ 10.0 ┆ 10.0 │
151
+ └─────┴─────┴─────┴───────────┴───────────┘
152
+ """
153
+ )
154
+ ]
155
+ )
156
+ def test_wavg(
157
+ cols: str | pl.Expr | list[str | pl.Expr],
158
+ weights: str | pl.Expr | list[str | pl.Expr],
159
+ group_by: str | pl.Expr | list[str | pl.Expr],
160
+ new_names: str | list[str],
161
+ expected: str,
162
+ ):
163
+ result = utils.wavg(
164
+ test_df,
165
+ cols,
166
+ weights,
167
+ group_by=group_by,
168
+ new_names=new_names,
169
+ )
170
+
171
+ test_str = str(result)
172
+ print(test_str)
173
+ assert test_str == format_test_string(expected)
174
+
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes