spells-mtg 0.7.1__tar.gz → 0.7.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spells-mtg might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: spells-mtg
3
- Version: 0.7.1
3
+ Version: 0.7.3
4
4
  Summary: analaysis of 17Lands.com public datasets
5
5
  Author-Email: Joel Barnes <oelarnes@gmail.com>
6
6
  License: MIT
@@ -11,7 +11,7 @@ dependencies = [
11
11
  ]
12
12
  requires-python = ">=3.11"
13
13
  readme = "README.md"
14
- version = "0.7.1"
14
+ version = "0.7.3"
15
15
 
16
16
  [project.license]
17
17
  text = "MIT"
@@ -78,8 +78,16 @@ def write_cache(set_code: str, cache_key: str, df: pl.DataFrame) -> None:
78
78
  df.write_parquet(cache_path_for_key(set_code, cache_key))
79
79
 
80
80
 
81
- def clear(set_code: str) -> int:
81
+ def clean(set_code: str) -> int:
82
82
  mode = "clean"
83
+
84
+ if set_code == "all":
85
+ cache_dir = data_dir_path(DataDir.CACHE)
86
+ with os.scandir(cache_dir) as set_dir:
87
+ for entry in set_dir:
88
+ clean(entry.name)
89
+ return 0
90
+
83
91
  cache_dir = cache_dir_for_set(set_code)
84
92
  if os.path.isdir(cache_dir):
85
93
  with os.scandir(cache_dir) as set_dir:
@@ -59,25 +59,35 @@ def _get_card_context(
59
59
  specs: dict[str, ColSpec],
60
60
  card_context: pl.DataFrame | dict[str, dict[str, Any]] | None,
61
61
  set_context: pl.DataFrame | dict[str, Any] | None,
62
+ card_only: bool = False,
62
63
  ) -> dict[str, dict[str, Any]]:
63
64
  card_attr_specs = {
64
65
  col: spec
65
66
  for col, spec in specs.items()
66
67
  if spec.col_type == ColType.CARD_ATTR or col == ColName.NAME
67
68
  }
68
- col_def_map = _hydrate_col_defs(
69
- set_code, card_attr_specs, set_context=set_context, card_only=True
70
- )
71
69
 
72
- columns = list(col_def_map.keys())
70
+ if not card_only:
71
+ col_def_map = _hydrate_col_defs(
72
+ set_code,
73
+ card_attr_specs,
74
+ set_context=set_context,
75
+ card_context=card_context,
76
+ card_only=True,
77
+ )
78
+
79
+ columns = list(col_def_map.keys())
73
80
 
74
- fp = data_file_path(set_code, View.CARD)
75
- card_df = pl.read_parquet(fp)
76
- select_rows = _view_select(
77
- card_df, frozenset(columns), col_def_map, is_agg_view=False
78
- ).to_dicts()
81
+ fp = data_file_path(set_code, View.CARD)
82
+ card_df = pl.read_parquet(fp)
83
+ select_rows = _view_select(
84
+ card_df, frozenset(columns), col_def_map, is_agg_view=False
85
+ ).to_dicts()
79
86
 
80
- loaded_context = {row[ColName.NAME]: row for row in select_rows}
87
+ loaded_context = {row[ColName.NAME]: row for row in select_rows}
88
+ else:
89
+ names = _get_names(set_code)
90
+ loaded_context = {name: {} for name in names}
81
91
 
82
92
  if card_context is not None:
83
93
  if isinstance(card_context, pl.DataFrame):
@@ -139,30 +149,33 @@ def _determine_expression(
139
149
 
140
150
  elif spec.expr is not None:
141
151
  if isinstance(spec.expr, Callable):
142
- assert not spec.col_type == ColType.AGG, f"AGG column {col} must be a pure spells expression"
152
+ assert (
153
+ not spec.col_type == ColType.AGG
154
+ ), f"AGG column {col} must be a pure spells expression"
143
155
  params = seed_params(spec.expr)
144
156
  if (
145
- spec.col_type == ColType.PICK_SUM
157
+ spec.col_type in (ColType.PICK_SUM, ColType.CARD_ATTR)
146
158
  and "name" in signature(spec.expr).parameters
147
159
  ):
160
+ condition_col = (
161
+ ColName.PICK if spec.col_type == ColType.PICK_SUM else ColName.NAME
162
+ )
148
163
  expr = pl.lit(None)
149
164
  for name in names:
150
165
  name_params = {"name": name, **params}
151
166
  expr = (
152
- pl.when(pl.col(ColName.PICK) == name)
167
+ pl.when(pl.col(condition_col) == name)
153
168
  .then(spec.expr(**name_params))
154
169
  .otherwise(expr)
155
170
  )
156
- elif (
157
- spec.col_type == ColType.CARD_ATTR
158
- and "name" in signature(spec.expr).parameters
159
- ):
160
- expr = spec.expr(**{"name": pl.col("name"), **params})
161
171
  else:
162
172
  expr = spec.expr(**params)
163
173
  else:
164
174
  expr = spec.expr
165
175
  expr = expr.alias(col)
176
+
177
+ if spec.col_type == ColType.AGG:
178
+ expr = expr.fill_nan(None)
166
179
  else:
167
180
  expr = pl.col(col)
168
181
 
@@ -256,10 +269,9 @@ def _hydrate_col_defs(
256
269
 
257
270
  set_context = _get_set_context(set_code, set_context)
258
271
 
259
- if card_only:
260
- card_context = {}
261
- else:
262
- card_context = _get_card_context(set_code, specs, card_context, set_context)
272
+ card_context = _get_card_context(
273
+ set_code, specs, card_context, set_context, card_only=card_only
274
+ )
263
275
 
264
276
  assert len(names) > 0, "there should be names"
265
277
  hydrated = {}
@@ -393,9 +405,7 @@ def _base_agg_df(
393
405
  sum_col_df = base_df.select(nonname_gb + name_col_tuple + sum_cols)
394
406
 
395
407
  grouped = sum_col_df.group_by(group_by) if group_by else sum_col_df
396
- join_dfs.append(
397
- grouped.sum().collect(streaming=use_streaming)
398
- )
408
+ join_dfs.append(grouped.sum().collect(streaming=use_streaming))
399
409
 
400
410
  name_sum_cols = tuple(
401
411
  c for c in cols_for_view if m.col_def_map[c].col_type == ColType.NAME_SUM
@@ -423,8 +433,12 @@ def _base_agg_df(
423
433
  )
424
434
 
425
435
  if not is_name_gb:
426
- grouped = unpivoted.drop("name").group_by(nonname_gb) if nonname_gb else unpivoted.drop("name")
427
- df = grouped.sum() .collect(streaming=use_streaming)
436
+ grouped = (
437
+ unpivoted.drop("name").group_by(nonname_gb)
438
+ if nonname_gb
439
+ else unpivoted.drop("name")
440
+ )
441
+ df = grouped.sum().collect(streaming=use_streaming)
428
442
  else:
429
443
  df = unpivoted.collect(streaming=use_streaming)
430
444
 
@@ -436,7 +450,7 @@ def _base_agg_df(
436
450
  join_dfs,
437
451
  )
438
452
  else:
439
- joined_df = pl.concat(join_dfs, how='horizontal')
453
+ joined_df = pl.concat(join_dfs, how="horizontal")
440
454
 
441
455
  return joined_df
442
456
 
@@ -475,14 +489,14 @@ def summon(
475
489
  concat_dfs = []
476
490
  for code in codes:
477
491
  if isinstance(card_context, pl.DataFrame):
478
- set_card_context = card_context.filter(pl.col('expansion') == code)
492
+ set_card_context = card_context.filter(pl.col("expansion") == code)
479
493
  elif isinstance(card_context, dict):
480
494
  set_card_context = card_context[code]
481
495
  else:
482
496
  set_card_context = None
483
497
 
484
498
  if isinstance(set_context, pl.DataFrame):
485
- this_set_context = set_context.filter(pl.col('expansion') == code)
499
+ this_set_context = set_context.filter(pl.col("expansion") == code)
486
500
  elif isinstance(set_context, dict):
487
501
  this_set_context = set_context[code]
488
502
  else:
@@ -511,13 +525,17 @@ def summon(
511
525
  card_cols = m.view_cols[View.CARD].union({ColName.NAME})
512
526
  fp = data_file_path(code, View.CARD)
513
527
  card_df = pl.read_parquet(fp)
514
- select_df = _view_select(card_df, card_cols, m.col_def_map, is_agg_view=False)
528
+ select_df = _view_select(
529
+ card_df, card_cols, m.col_def_map, is_agg_view=False
530
+ )
515
531
  agg_df = agg_df.join(select_df, on="name", how="outer", coalesce=True)
516
532
  concat_dfs.append(agg_df)
517
533
 
518
- full_agg_df = pl.concat(concat_dfs, how='vertical')
534
+ full_agg_df = pl.concat(concat_dfs, how="vertical")
519
535
 
520
- assert m is not None, "What happened? We mean to use one of the sets manifest, it shouldn't matter which."
536
+ assert (
537
+ m is not None
538
+ ), "What happened? We mean to use one of the sets manifest, it shouldn't matter which."
521
539
 
522
540
  if m.group_by:
523
541
  full_agg_df = full_agg_df.group_by(m.group_by).sum()
@@ -53,6 +53,7 @@ def cli() -> int:
53
53
  cache.spells_print("spells", f"[data home]={data_dir}")
54
54
  print()
55
55
  usage = """spells [add|refresh|remove|clean] [set_code]
56
+ spells clean all
56
57
  spells info
57
58
 
58
59
  add: Download draft and game files from 17Lands.com and card file from MTGJSON.com and save to path
@@ -67,7 +68,7 @@ def cli() -> int:
67
68
 
68
69
  remove: Delete the [data home]/external/[set code] and [data home]/local/[set code] directories and their contents
69
70
 
70
- clean: Delete [data home]/local/[set code] data directory (your cache of aggregate parquet files).
71
+ clean: Delete [data home]/local/[set code] data directory (your cache of aggregate parquet files), or all of them.
71
72
 
72
73
  info: No set code argument. Print info on all external and local files.
73
74
  """
@@ -94,7 +95,7 @@ def cli() -> int:
94
95
  case "remove":
95
96
  return _remove(sys.argv[2])
96
97
  case "clean":
97
- return cache.clear(sys.argv[2])
98
+ return cache.clean(sys.argv[2])
98
99
  case _:
99
100
  print_usage()
100
101
  return 1
@@ -236,8 +237,11 @@ def _process_zipped_file(gzip_path, target_path):
236
237
  df.sink_parquet(target_path)
237
238
  except ComputeError:
238
239
  df = pl.scan_csv(csv_path)
239
- cache.spells_print('error', 'Bad schema found, loading dataset into memory'\
240
- + ' and attempting to cast to correct schema')
240
+ cache.spells_print(
241
+ "error",
242
+ "Bad schema found, loading dataset into memory"
243
+ + " and attempting to cast to correct schema",
244
+ )
241
245
  select = [pl.col(name).cast(dtype) for name, dtype in schema(csv_path).items()]
242
246
  cast_df = df.select(select).collect()
243
247
  cast_df.write_parquet(target_path)
@@ -106,6 +106,7 @@ def _resolve_view_cols(
106
106
  view_resolution[View.DRAFT] = view_resolution.get(
107
107
  View.DRAFT, frozenset()
108
108
  ).union({ColName.PICK})
109
+ # now determine views and deps
109
110
  if cdef.col_type == ColType.CARD_ATTR:
110
111
  view_resolution[View.CARD] = view_resolution.get(
111
112
  View.CARD, frozenset()
@@ -162,7 +163,11 @@ def create(
162
163
  group_by: list[str] | None = None,
163
164
  filter_spec: dict | None = None,
164
165
  ):
165
- gbs = (ColName.NAME, ColName.COLOR, ColName.RARITY) if group_by is None else tuple(group_by)
166
+ gbs = (
167
+ (ColName.NAME, ColName.COLOR, ColName.RARITY)
168
+ if group_by is None
169
+ else tuple(group_by)
170
+ )
166
171
 
167
172
  if columns is None:
168
173
  cols = tuple(spells.columns.default_columns)
@@ -136,6 +136,7 @@ COLUMN_TYPES = (
136
136
  (re.compile(r"^oppo_total_cards_drawn_or_tutored$"), pl.Int8),
137
137
  )
138
138
 
139
+
139
140
  def schema(
140
141
  filename: str, print_missing: bool = False
141
142
  ) -> Dict[str, pl.datatypes.DataType]:
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes