spells-mtg 0.7.0__tar.gz → 0.7.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spells-mtg might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: spells-mtg
3
- Version: 0.7.0
3
+ Version: 0.7.2
4
4
  Summary: analaysis of 17Lands.com public datasets
5
5
  Author-Email: Joel Barnes <oelarnes@gmail.com>
6
6
  License: MIT
@@ -75,6 +75,7 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
75
75
  - Caches aggregate DataFrames in the local file system automatically for instantaneous reproduction of previous analysis
76
76
  - Manages grouping and filtering by built-in and custom columns at the row level
77
77
  - Provides 124 explicitly specified, enumerated, documented column definitions
78
+ - Can aggregate over multiple sets at once, even all of them, if you want.
78
79
  - Supports "Deck Color Data" aggregations with built-in column definitions.
79
80
  - Lets you feed card metrics back in to column definitions to support scientific workflows like MLE
80
81
  - Provides a CLI tool `spells [add|refresh|clean|remove|info] [SET]` to download and manage external files
@@ -64,6 +64,7 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
64
64
  - Caches aggregate DataFrames in the local file system automatically for instantaneous reproduction of previous analysis
65
65
  - Manages grouping and filtering by built-in and custom columns at the row level
66
66
  - Provides 124 explicitly specified, enumerated, documented column definitions
67
+ - Can aggregate over multiple sets at once, even all of them, if you want.
67
68
  - Supports "Deck Color Data" aggregations with built-in column definitions.
68
69
  - Lets you feed card metrics back in to column definitions to support scientific workflows like MLE
69
70
  - Provides a CLI tool `spells [add|refresh|clean|remove|info] [SET]` to download and manage external files
@@ -11,7 +11,7 @@ dependencies = [
11
11
  ]
12
12
  requires-python = ">=3.11"
13
13
  readme = "README.md"
14
- version = "0.7.0"
14
+ version = "0.7.2"
15
15
 
16
16
  [project.license]
17
17
  text = "MIT"
@@ -78,8 +78,16 @@ def write_cache(set_code: str, cache_key: str, df: pl.DataFrame) -> None:
78
78
  df.write_parquet(cache_path_for_key(set_code, cache_key))
79
79
 
80
80
 
81
- def clear(set_code: str) -> int:
81
+ def clean(set_code: str) -> int:
82
82
  mode = "clean"
83
+
84
+ if set_code == "all":
85
+ cache_dir = data_dir_path(DataDir.CACHE)
86
+ with os.scandir(cache_dir) as set_dir:
87
+ for entry in set_dir:
88
+ clean(entry.name)
89
+ return 0
90
+
83
91
  cache_dir = cache_dir_for_set(set_code)
84
92
  if os.path.isdir(cache_dir):
85
93
  with os.scandir(cache_dir) as set_dir:
@@ -59,25 +59,35 @@ def _get_card_context(
59
59
  specs: dict[str, ColSpec],
60
60
  card_context: pl.DataFrame | dict[str, dict[str, Any]] | None,
61
61
  set_context: pl.DataFrame | dict[str, Any] | None,
62
+ card_only: bool = False,
62
63
  ) -> dict[str, dict[str, Any]]:
63
64
  card_attr_specs = {
64
65
  col: spec
65
66
  for col, spec in specs.items()
66
67
  if spec.col_type == ColType.CARD_ATTR or col == ColName.NAME
67
68
  }
68
- col_def_map = _hydrate_col_defs(
69
- set_code, card_attr_specs, set_context=set_context, card_only=True
70
- )
71
69
 
72
- columns = list(col_def_map.keys())
70
+ if not card_only:
71
+ col_def_map = _hydrate_col_defs(
72
+ set_code,
73
+ card_attr_specs,
74
+ set_context=set_context,
75
+ card_context=card_context,
76
+ card_only=True,
77
+ )
78
+
79
+ columns = list(col_def_map.keys())
73
80
 
74
- fp = data_file_path(set_code, View.CARD)
75
- card_df = pl.read_parquet(fp)
76
- select_rows = _view_select(
77
- card_df, frozenset(columns), col_def_map, is_agg_view=False
78
- ).to_dicts()
81
+ fp = data_file_path(set_code, View.CARD)
82
+ card_df = pl.read_parquet(fp)
83
+ select_rows = _view_select(
84
+ card_df, frozenset(columns), col_def_map, is_agg_view=False
85
+ ).to_dicts()
79
86
 
80
- loaded_context = {row[ColName.NAME]: row for row in select_rows}
87
+ loaded_context = {row[ColName.NAME]: row for row in select_rows}
88
+ else:
89
+ names = _get_names(set_code)
90
+ loaded_context = {name: {} for name in names}
81
91
 
82
92
  if card_context is not None:
83
93
  if isinstance(card_context, pl.DataFrame):
@@ -139,25 +149,25 @@ def _determine_expression(
139
149
 
140
150
  elif spec.expr is not None:
141
151
  if isinstance(spec.expr, Callable):
142
- assert not spec.col_type == ColType.AGG, f"AGG column {col} must be a pure spells expression"
152
+ assert (
153
+ not spec.col_type == ColType.AGG
154
+ ), f"AGG column {col} must be a pure spells expression"
143
155
  params = seed_params(spec.expr)
144
156
  if (
145
- spec.col_type == ColType.PICK_SUM
157
+ spec.col_type in (ColType.PICK_SUM, ColType.CARD_ATTR)
146
158
  and "name" in signature(spec.expr).parameters
147
159
  ):
160
+ condition_col = (
161
+ ColName.PICK if spec.col_type == ColType.PICK_SUM else ColName.NAME
162
+ )
148
163
  expr = pl.lit(None)
149
164
  for name in names:
150
165
  name_params = {"name": name, **params}
151
166
  expr = (
152
- pl.when(pl.col(ColName.PICK) == name)
167
+ pl.when(pl.col(condition_col) == name)
153
168
  .then(spec.expr(**name_params))
154
169
  .otherwise(expr)
155
170
  )
156
- elif (
157
- spec.col_type == ColType.CARD_ATTR
158
- and "name" in signature(spec.expr).parameters
159
- ):
160
- expr = spec.expr(**{"name": pl.col("name"), **params})
161
171
  else:
162
172
  expr = spec.expr(**params)
163
173
  else:
@@ -256,10 +266,9 @@ def _hydrate_col_defs(
256
266
 
257
267
  set_context = _get_set_context(set_code, set_context)
258
268
 
259
- if card_only:
260
- card_context = {}
261
- else:
262
- card_context = _get_card_context(set_code, specs, card_context, set_context)
269
+ card_context = _get_card_context(
270
+ set_code, specs, card_context, set_context, card_only=card_only
271
+ )
263
272
 
264
273
  assert len(names) > 0, "there should be names"
265
274
  hydrated = {}
@@ -393,9 +402,7 @@ def _base_agg_df(
393
402
  sum_col_df = base_df.select(nonname_gb + name_col_tuple + sum_cols)
394
403
 
395
404
  grouped = sum_col_df.group_by(group_by) if group_by else sum_col_df
396
- join_dfs.append(
397
- grouped.sum().collect(streaming=use_streaming)
398
- )
405
+ join_dfs.append(grouped.sum().collect(streaming=use_streaming))
399
406
 
400
407
  name_sum_cols = tuple(
401
408
  c for c in cols_for_view if m.col_def_map[c].col_type == ColType.NAME_SUM
@@ -423,8 +430,12 @@ def _base_agg_df(
423
430
  )
424
431
 
425
432
  if not is_name_gb:
426
- grouped = unpivoted.drop("name").group_by(nonname_gb) if nonname_gb else unpivoted.drop("name")
427
- df = grouped.sum() .collect(streaming=use_streaming)
433
+ grouped = (
434
+ unpivoted.drop("name").group_by(nonname_gb)
435
+ if nonname_gb
436
+ else unpivoted.drop("name")
437
+ )
438
+ df = grouped.sum().collect(streaming=use_streaming)
428
439
  else:
429
440
  df = unpivoted.collect(streaming=use_streaming)
430
441
 
@@ -436,7 +447,7 @@ def _base_agg_df(
436
447
  join_dfs,
437
448
  )
438
449
  else:
439
- joined_df = pl.concat(join_dfs, how='horizontal')
450
+ joined_df = pl.concat(join_dfs, how="horizontal")
440
451
 
441
452
  return joined_df
442
453
 
@@ -475,14 +486,14 @@ def summon(
475
486
  concat_dfs = []
476
487
  for code in codes:
477
488
  if isinstance(card_context, pl.DataFrame):
478
- set_card_context = card_context.filter(pl.col('expansion') == code)
489
+ set_card_context = card_context.filter(pl.col("expansion") == code)
479
490
  elif isinstance(card_context, dict):
480
491
  set_card_context = card_context[code]
481
492
  else:
482
493
  set_card_context = None
483
494
 
484
495
  if isinstance(set_context, pl.DataFrame):
485
- this_set_context = set_context.filter(pl.col('expansion') == code)
496
+ this_set_context = set_context.filter(pl.col("expansion") == code)
486
497
  elif isinstance(set_context, dict):
487
498
  this_set_context = set_context[code]
488
499
  else:
@@ -511,13 +522,17 @@ def summon(
511
522
  card_cols = m.view_cols[View.CARD].union({ColName.NAME})
512
523
  fp = data_file_path(code, View.CARD)
513
524
  card_df = pl.read_parquet(fp)
514
- select_df = _view_select(card_df, card_cols, m.col_def_map, is_agg_view=False)
525
+ select_df = _view_select(
526
+ card_df, card_cols, m.col_def_map, is_agg_view=False
527
+ )
515
528
  agg_df = agg_df.join(select_df, on="name", how="outer", coalesce=True)
516
529
  concat_dfs.append(agg_df)
517
530
 
518
- full_agg_df = pl.concat(concat_dfs, how='vertical')
531
+ full_agg_df = pl.concat(concat_dfs, how="vertical")
519
532
 
520
- assert m is not None, "What happened? We mean to use one of the sets manifest, it shouldn't matter which."
533
+ assert (
534
+ m is not None
535
+ ), "What happened? We mean to use one of the sets manifest, it shouldn't matter which."
521
536
 
522
537
  if m.group_by:
523
538
  full_agg_df = full_agg_df.group_by(m.group_by).sum()
@@ -15,6 +15,7 @@ from enum import StrEnum
15
15
 
16
16
  import wget
17
17
  import polars as pl
18
+ from polars.exceptions import ComputeError
18
19
 
19
20
  from spells import cards
20
21
  from spells import cache
@@ -52,6 +53,7 @@ def cli() -> int:
52
53
  cache.spells_print("spells", f"[data home]={data_dir}")
53
54
  print()
54
55
  usage = """spells [add|refresh|remove|clean] [set_code]
56
+ spells clean all
55
57
  spells info
56
58
 
57
59
  add: Download draft and game files from 17Lands.com and card file from MTGJSON.com and save to path
@@ -66,7 +68,7 @@ def cli() -> int:
66
68
 
67
69
  remove: Delete the [data home]/external/[set code] and [data home]/local/[set code] directories and their contents
68
70
 
69
- clean: Delete [data home]/local/[set code] data directory (your cache of aggregate parquet files).
71
+ clean: Delete [data home]/local/[set code] data directory (your cache of aggregate parquet files), or all of them.
70
72
 
71
73
  info: No set code argument. Print info on all external and local files.
72
74
  """
@@ -93,7 +95,7 @@ def cli() -> int:
93
95
  case "remove":
94
96
  return _remove(sys.argv[2])
95
97
  case "clean":
96
- return cache.clear(sys.argv[2])
98
+ return cache.clean(sys.argv[2])
97
99
  case _:
98
100
  print_usage()
99
101
  return 1
@@ -231,7 +233,18 @@ def _process_zipped_file(gzip_path, target_path):
231
233
 
232
234
  os.remove(gzip_path)
233
235
  df = pl.scan_csv(csv_path, schema=schema(csv_path))
234
- df.sink_parquet(target_path)
236
+ try:
237
+ df.sink_parquet(target_path)
238
+ except ComputeError:
239
+ df = pl.scan_csv(csv_path)
240
+ cache.spells_print(
241
+ "error",
242
+ "Bad schema found, loading dataset into memory"
243
+ + " and attempting to cast to correct schema",
244
+ )
245
+ select = [pl.col(name).cast(dtype) for name, dtype in schema(csv_path).items()]
246
+ cast_df = df.select(select).collect()
247
+ cast_df.write_parquet(target_path)
235
248
 
236
249
  os.remove(csv_path)
237
250
 
@@ -162,7 +162,11 @@ def create(
162
162
  group_by: list[str] | None = None,
163
163
  filter_spec: dict | None = None,
164
164
  ):
165
- gbs = (ColName.NAME, ColName.COLOR, ColName.RARITY) if group_by is None else tuple(group_by)
165
+ gbs = (
166
+ (ColName.NAME, ColName.COLOR, ColName.RARITY)
167
+ if group_by is None
168
+ else tuple(group_by)
169
+ )
166
170
 
167
171
  if columns is None:
168
172
  cols = tuple(spells.columns.default_columns)
@@ -139,13 +139,12 @@ COLUMN_TYPES = (
139
139
 
140
140
  def schema(
141
141
  filename: str, print_missing: bool = False
142
- ) -> Dict[str, pl.datatypes.DataType] | None:
142
+ ) -> Dict[str, pl.datatypes.DataType]:
143
143
  dtypes: Dict[str, pl.datatypes.DataType] = {}
144
144
  with open(filename, encoding="utf-8") as f:
145
145
  columns = csv.DictReader(f).fieldnames
146
146
  if columns is None:
147
- print(f"Could not read fieldnames from {filename}")
148
- return None
147
+ raise ValueError(f"Could not read fieldnames from {filename}")
149
148
  for column in columns:
150
149
  for regex, column_type in COLUMN_TYPES:
151
150
  if regex.match(column):
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes