spells-mtg 0.8.4__tar.gz → 0.9.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spells-mtg might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: spells-mtg
3
- Version: 0.8.4
3
+ Version: 0.9.0
4
4
  Summary: analaysis of 17Lands.com public datasets
5
5
  Author-Email: Joel Barnes <oelarnes@gmail.com>
6
6
  License: MIT
@@ -11,7 +11,7 @@ dependencies = [
11
11
  ]
12
12
  requires-python = ">=3.11"
13
13
  readme = "README.md"
14
- version = "0.8.4"
14
+ version = "0.9.0"
15
15
 
16
16
  [project.license]
17
17
  text = "MIT"
@@ -40,4 +40,8 @@ dev = [
40
40
  "pytest>=8.3.3",
41
41
  "ruff>=0.7.4",
42
42
  "jupyter>=1.1.1",
43
+ "matplotlib>=3.10.0",
44
+ "numpy>=2.2.0",
45
+ "jupyter-book>=1.0.3",
46
+ "ghp-import>=2.1.0",
43
47
  ]
@@ -14,6 +14,11 @@ import sys
14
14
  import polars as pl
15
15
 
16
16
 
17
+ class EventType(StrEnum):
18
+ PREMIER = "PremierDraft"
19
+ TRADITIONAL = "TradDraft"
20
+
21
+
17
22
  class DataDir(StrEnum):
18
23
  CACHE = "cache"
19
24
  EXTERNAL = "external"
@@ -52,6 +57,22 @@ def data_dir_path(cache_dir: DataDir) -> str:
52
57
  return data_dir
53
58
 
54
59
 
60
+ def external_set_path(set_code):
61
+ return os.path.join(data_dir_path(DataDir.EXTERNAL), set_code)
62
+
63
+
64
+ def data_file_path(set_code, dataset_type: str, event_type=EventType.PREMIER):
65
+ if dataset_type == "set_context":
66
+ return os.path.join(external_set_path(set_code), f"{set_code}_context.parquet")
67
+
68
+ if dataset_type == "card":
69
+ return os.path.join(external_set_path(set_code), f"{set_code}_card.parquet")
70
+
71
+ return os.path.join(
72
+ external_set_path(set_code), f"{set_code}_{event_type}_{dataset_type}.parquet"
73
+ )
74
+
75
+
55
76
  def cache_dir_for_set(set_code: str) -> str:
56
77
  return os.path.join(data_dir_path(DataDir.CACHE), set_code)
57
78
 
@@ -68,10 +68,7 @@ _specs: dict[str, ColSpec] = {
68
68
  ColName.FORMAT_DAY: ColSpec(
69
69
  col_type=ColType.GROUP_BY,
70
70
  expr=lambda set_context: (
71
- pl.col(ColName.DRAFT_DATE)
72
- - pl.lit(set_context["release_time"])
73
- .str.to_datetime("%Y-%m-%d %H:%M:%S")
74
- .dt.date()
71
+ pl.col(ColName.DRAFT_DATE) - pl.lit(set_context["release_date"])
75
72
  ).dt.total_days()
76
73
  + 1,
77
74
  ),
@@ -16,8 +16,7 @@ from typing import Callable, TypeVar, Any
16
16
  import polars as pl
17
17
  from polars.exceptions import ColumnNotFoundError
18
18
 
19
- from spells.external import data_file_path
20
- import spells.cache
19
+ from spells import cache
21
20
  import spells.filter
22
21
  import spells.manifest
23
22
  from spells.columns import ColDef, ColSpec, get_specs
@@ -36,11 +35,11 @@ def _cache_key(args) -> str:
36
35
 
37
36
  @functools.lru_cache(maxsize=None)
38
37
  def get_names(set_code: str) -> list[str]:
39
- card_fp = data_file_path(set_code, View.CARD)
38
+ card_fp = cache.data_file_path(set_code, View.CARD)
40
39
  card_view = pl.read_parquet(card_fp)
41
40
  card_names_set = frozenset(card_view.get_column("name").to_list())
42
41
 
43
- draft_fp = data_file_path(set_code, View.DRAFT)
42
+ draft_fp = cache.data_file_path(set_code, View.DRAFT)
44
43
  draft_view = pl.scan_parquet(draft_fp)
45
44
  cols = draft_view.collect_schema().names()
46
45
 
@@ -78,7 +77,7 @@ def _get_card_context(
78
77
 
79
78
  columns = list(col_def_map.keys())
80
79
 
81
- fp = data_file_path(set_code, View.CARD)
80
+ fp = cache.data_file_path(set_code, View.CARD)
82
81
  card_df = pl.read_parquet(fp)
83
82
  select_rows = _view_select(
84
83
  card_df, frozenset(columns), col_def_map, is_agg_view=False
@@ -223,7 +222,7 @@ def _infer_dependencies(
223
222
  ):
224
223
  dependencies.add(split[0])
225
224
  found = True
226
- assert found, f"Could not locate column spec for root col {item}"
225
+ # fail silently here, so that columns can be passed in harmlessly
227
226
 
228
227
  return dependencies
229
228
 
@@ -231,23 +230,13 @@ def _infer_dependencies(
231
230
  def _get_set_context(
232
231
  set_code: str, set_context: pl.DataFrame | dict[str, Any] | None
233
232
  ) -> dict[str, Any]:
234
- context_fp = data_file_path(set_code, "context")
235
-
236
- report = functools.partial(
237
- spells.cache.spells_print,
238
- "report",
239
- f"Set context for {set_code} invalid, please investigate!",
240
- )
233
+ context_fp = cache.data_file_path(set_code, "context")
241
234
 
242
235
  context = {}
243
- if not os.path.isfile(context_fp):
244
- report()
245
- else:
236
+ if os.path.isfile(context_fp):
246
237
  context_df = pl.read_parquet(context_fp)
247
238
  if len(context_df) == 1:
248
239
  context.update(context_df.to_dicts()[0])
249
- else:
250
- report()
251
240
 
252
241
  if isinstance(set_context, pl.DataFrame):
253
242
  assert len(set_context != 1), "Invalid set context provided"
@@ -276,7 +265,10 @@ def _hydrate_col_defs(
276
265
  assert len(names) > 0, "there should be names"
277
266
  hydrated = {}
278
267
  for col, spec in specs.items():
279
- expr = _determine_expression(col, spec, names, card_context, set_context)
268
+ try:
269
+ expr = _determine_expression(col, spec, names, card_context, set_context)
270
+ except KeyError:
271
+ continue
280
272
  dependencies = _infer_dependencies(col, expr, specs, names)
281
273
 
282
274
  sig_expr = expr if isinstance(expr, pl.Expr) else expr[0]
@@ -355,13 +347,13 @@ def _fetch_or_cache(
355
347
  key = _cache_key(cache_args)
356
348
 
357
349
  if read_cache:
358
- if spells.cache.cache_exists(set_code, key):
359
- return spells.cache.read_cache(set_code, key)
350
+ if cache.cache_exists(set_code, key):
351
+ return cache.read_cache(set_code, key)
360
352
 
361
353
  df = calc_fn()
362
354
 
363
355
  if write_cache:
364
- spells.cache.write_cache(set_code, key, df)
356
+ cache.write_cache(set_code, key, df)
365
357
 
366
358
  return df
367
359
 
@@ -380,7 +372,7 @@ def _base_agg_df(
380
372
  for view, cols_for_view in m.view_cols.items():
381
373
  if view == View.CARD:
382
374
  continue
383
- df_path = data_file_path(set_code, view)
375
+ df_path = cache.data_file_path(set_code, view)
384
376
  base_view_df = pl.scan_parquet(df_path)
385
377
  base_df_prefilter = _view_select(
386
378
  base_view_df, cols_for_view, m.col_def_map, is_agg_view=False
@@ -411,14 +403,10 @@ def _base_agg_df(
411
403
  c for c in cols_for_view if m.col_def_map[c].col_type == ColType.NAME_SUM
412
404
  )
413
405
  for col in name_sum_cols:
414
- cdef = m.col_def_map[col]
415
- pattern = f"^{cdef.name}_"
416
- name_map = functools.partial(
417
- lambda patt, name: re.split(patt, name)[1], pattern
418
- )
406
+ names = get_names(set_code)
407
+ expr = tuple(pl.col(f"{col}_{name}").alias(name) for name in names)
419
408
 
420
- expr = pl.col(f"^{cdef.name}_.*$").name.map(name_map)
421
- pre_agg_df = base_df.select((expr,) + nonname_gb)
409
+ pre_agg_df = base_df.select(expr + nonname_gb)
422
410
 
423
411
  if nonname_gb:
424
412
  agg_df = pre_agg_df.group_by(nonname_gb).sum()
@@ -428,7 +416,7 @@ def _base_agg_df(
428
416
  index = nonname_gb if nonname_gb else None
429
417
  unpivoted = agg_df.unpivot(
430
418
  index=index,
431
- value_name=m.col_def_map[col].name,
419
+ value_name=col,
432
420
  variable_name=ColName.NAME,
433
421
  )
434
422
 
@@ -523,7 +511,7 @@ def summon(
523
511
 
524
512
  if View.CARD in m.view_cols:
525
513
  card_cols = m.view_cols[View.CARD].union({ColName.NAME})
526
- fp = data_file_path(code, View.CARD)
514
+ fp = cache.data_file_path(code, View.CARD)
527
515
  card_df = pl.read_parquet(fp)
528
516
  select_df = _view_select(
529
517
  card_df, card_cols, m.col_def_map, is_agg_view=False
@@ -578,7 +566,7 @@ def view_select(
578
566
 
579
567
  col_def_map = _hydrate_col_defs(set_code, specs, card_context, set_context)
580
568
 
581
- df_path = data_file_path(set_code, view)
569
+ df_path = cache.data_file_path(set_code, view)
582
570
  base_view_df = pl.scan_parquet(df_path)
583
571
 
584
572
  select_cols = frozenset(columns)
@@ -26,12 +26,15 @@ def context_cols(attr, silent: bool = False) -> dict[str, ColSpec]:
26
26
  .otherwise(None)
27
27
  ),
28
28
  ),
29
- f"pick_{attr}": ColSpec(
29
+ f"pick_{attr}_sum": ColSpec(
30
30
  col_type=ColType.PICK_SUM,
31
31
  expr=lambda name, card_context: pl.lit(None)
32
32
  if card_context[name][attr] is None or math.isnan(card_context[name][attr])
33
33
  else card_context[name][attr],
34
34
  ),
35
+ f"pick_{attr}": ColSpec(
36
+ col_type=ColType.AGG, expr=pl.col(f"pick_{attr}_sum") / pl.col("num_taken")
37
+ ),
35
38
  f"seen_{attr}_is_greatest": ColSpec(
36
39
  col_type=ColType.NAME_SUM,
37
40
  expr=lambda name: pl.col(f"seen_{attr}_{name}")
@@ -39,11 +42,13 @@ def context_cols(attr, silent: bool = False) -> dict[str, ColSpec]:
39
42
  ),
40
43
  f"seen_{attr}_greater": ColSpec(
41
44
  col_type=ColType.NAME_SUM,
42
- expr=lambda name: pl.col(f"seen_{attr}_{name}") > pl.col(f"pick_{attr}"),
45
+ expr=lambda name: pl.col(f"seen_{attr}_{name}")
46
+ > pl.col(f"pick_{attr}_sum"),
43
47
  ),
44
48
  f"seen_{attr}_less": ColSpec(
45
49
  col_type=ColType.NAME_SUM,
46
- expr=lambda name: pl.col(f"seen_{attr}_{name}") < pl.col(f"pick_{attr}"),
50
+ expr=lambda name: pl.col(f"seen_{attr}_{name}")
51
+ < pl.col(f"pick_{attr}_sum"),
47
52
  ),
48
53
  f"greatest_{attr}_seen": ColSpec(
49
54
  col_type=ColType.PICK_SUM,
@@ -79,11 +84,11 @@ def context_cols(attr, silent: bool = False) -> dict[str, ColSpec]:
79
84
  ),
80
85
  f"pick_{attr}_vs_least": ColSpec(
81
86
  col_type=ColType.PICK_SUM,
82
- expr=pl.col(f"pick_{attr}") - pl.col(f"least_{attr}_seen"),
87
+ expr=pl.col(f"pick_{attr}_sum") - pl.col(f"least_{attr}_seen"),
83
88
  ),
84
89
  f"pick_{attr}_vs_greatest": ColSpec(
85
90
  col_type=ColType.PICK_SUM,
86
- expr=pl.col(f"pick_{attr}") - pl.col(f"greatest_{attr}_seen"),
91
+ expr=pl.col(f"pick_{attr}_sum") - pl.col(f"greatest_{attr}_seen"),
87
92
  ),
88
93
  f"pick_{attr}_vs_least_mean": ColSpec(
89
94
  col_type=ColType.AGG,
@@ -95,7 +100,7 @@ def context_cols(attr, silent: bool = False) -> dict[str, ColSpec]:
95
100
  ),
96
101
  f"least_{attr}_taken": ColSpec(
97
102
  col_type=ColType.PICK_SUM,
98
- expr=pl.col(f"pick_{attr}") <= pl.col(f"least_{attr}_seen"),
103
+ expr=pl.col(f"pick_{attr}_sum") <= pl.col(f"least_{attr}_seen"),
99
104
  ),
100
105
  f"least_{attr}_taken_rate": ColSpec(
101
106
  col_type=ColType.AGG,
@@ -103,7 +108,7 @@ def context_cols(attr, silent: bool = False) -> dict[str, ColSpec]:
103
108
  ),
104
109
  f"greatest_{attr}_taken": ColSpec(
105
110
  col_type=ColType.PICK_SUM,
106
- expr=pl.col(f"pick_{attr}") >= pl.col(f"greatest_{attr}_seen"),
111
+ expr=pl.col(f"pick_{attr}_sum") >= pl.col(f"greatest_{attr}_seen"),
107
112
  ),
108
113
  f"greatest_{attr}_taken_rate": ColSpec(
109
114
  col_type=ColType.AGG,
@@ -111,7 +116,7 @@ def context_cols(attr, silent: bool = False) -> dict[str, ColSpec]:
111
116
  ),
112
117
  f"pick_{attr}_mean": ColSpec(
113
118
  col_type=ColType.AGG,
114
- expr=pl.col(f"pick_{attr}") / pl.col(ColName.NUM_TAKEN),
119
+ expr=pl.col(f"pick_{attr}_sum") / pl.col(ColName.NUM_TAKEN),
115
120
  ),
116
121
  }
117
122
 
@@ -19,8 +19,9 @@ from polars.exceptions import ComputeError
19
19
 
20
20
  from spells import cards
21
21
  from spells import cache
22
- from spells.enums import View
22
+ from spells.enums import View, ColName
23
23
  from spells.schema import schema
24
+ from spells.draft_data import summon
24
25
 
25
26
 
26
27
  DATASET_TEMPLATE = "{dataset_type}_data_public.{set_code}.{event_type}.csv.gz"
@@ -28,17 +29,10 @@ RESOURCE_TEMPLATE = (
28
29
  "https://17lands-public.s3.amazonaws.com/analysis_data/{dataset_type}_data/"
29
30
  )
30
31
 
31
-
32
32
  class FileFormat(StrEnum):
33
33
  CSV = "csv"
34
34
  PARQUET = "parquet"
35
35
 
36
-
37
- class EventType(StrEnum):
38
- PREMIER = "PremierDraft"
39
- TRADITIONAL = "TradDraft"
40
-
41
-
42
36
  # Fred Cirera via https://stackoverflow.com/questions/1094841/get-a-human-readable-version-of-a-file-size
43
37
  def sizeof_fmt(num, suffix="B"):
44
38
  for unit in ("", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"):
@@ -64,7 +58,7 @@ def cli() -> int:
64
58
  e.g. $ spells add OTJ
65
59
 
66
60
  refresh: Force download and overwrite of existing files (for new data drops, use sparingly!). Clear
67
- local cache.
61
+ local
68
62
 
69
63
  remove: Delete the [data home]/external/[set code] and [data home]/local/[set code] directories and their contents
70
64
 
@@ -115,7 +109,7 @@ def _refresh(set_code: str):
115
109
 
116
110
  def _remove(set_code: str):
117
111
  mode = "remove"
118
- dir_path = _external_set_path(set_code)
112
+ dir_path = cache.external_set_path(set_code)
119
113
  if os.path.isdir(dir_path):
120
114
  with os.scandir(dir_path) as set_dir:
121
115
  count = 0
@@ -135,7 +129,7 @@ def _remove(set_code: str):
135
129
  else:
136
130
  cache.spells_print(mode, f"No external cache found for set {set_code}")
137
131
 
138
- return cache.clear(set_code)
132
+ return cache.clean(set_code)
139
133
 
140
134
 
141
135
  def _info():
@@ -207,22 +201,6 @@ def _info():
207
201
  return 0
208
202
 
209
203
 
210
- def _external_set_path(set_code):
211
- return os.path.join(cache.data_dir_path(cache.DataDir.EXTERNAL), set_code)
212
-
213
-
214
- def data_file_path(set_code, dataset_type: str, event_type=EventType.PREMIER):
215
- if dataset_type == "set_context":
216
- return os.path.join(_external_set_path(set_code), f"{set_code}_context.parquet")
217
-
218
- if dataset_type == "card":
219
- return os.path.join(_external_set_path(set_code), f"{set_code}_card.parquet")
220
-
221
- return os.path.join(
222
- _external_set_path(set_code), f"{set_code}_{event_type}_{dataset_type}.parquet"
223
- )
224
-
225
-
226
204
  def _process_zipped_file(gzip_path, target_path):
227
205
  csv_path = gzip_path[:-3]
228
206
  # if polars supports streaming from file obj, we can just stream straight
@@ -252,17 +230,17 @@ def _process_zipped_file(gzip_path, target_path):
252
230
  def download_data_set(
253
231
  set_code,
254
232
  dataset_type: View,
255
- event_type=EventType.PREMIER,
233
+ event_type=cache.EventType.PREMIER,
256
234
  force_download=False,
257
235
  clear_set_cache=True,
258
236
  ):
259
237
  mode = "refresh" if force_download else "add"
260
238
  cache.spells_print(mode, f"Downloading {dataset_type} dataset from 17Lands.com")
261
239
 
262
- if not os.path.isdir(set_dir := _external_set_path(set_code)):
240
+ if not os.path.isdir(set_dir := cache.external_set_path(set_code)):
263
241
  os.makedirs(set_dir)
264
242
 
265
- target_path = data_file_path(set_code, dataset_type)
243
+ target_path = cache.data_file_path(set_code, dataset_type)
266
244
 
267
245
  if os.path.isfile(target_path) and not force_download:
268
246
  cache.spells_print(
@@ -274,7 +252,7 @@ def download_data_set(
274
252
  dataset_file = DATASET_TEMPLATE.format(
275
253
  set_code=set_code, dataset_type=dataset_type, event_type=event_type
276
254
  )
277
- dataset_path = os.path.join(_external_set_path(set_code), dataset_file)
255
+ dataset_path = os.path.join(cache.external_set_path(set_code), dataset_file)
278
256
  wget.download(
279
257
  RESOURCE_TEMPLATE.format(dataset_type=dataset_type) + dataset_file,
280
258
  out=dataset_path,
@@ -287,7 +265,7 @@ def download_data_set(
287
265
  _process_zipped_file(dataset_path, target_path)
288
266
  cache.spells_print(mode, f"Wrote file {target_path}")
289
267
  if clear_set_cache:
290
- cache.clear(set_code)
268
+ cache.clean(set_code)
291
269
 
292
270
  return 0
293
271
 
@@ -302,7 +280,7 @@ def write_card_file(draft_set_code: str, force_download=False) -> int:
302
280
  cache.spells_print(
303
281
  mode, "Fetching card data from mtgjson.com and writing card file"
304
282
  )
305
- card_filepath = data_file_path(draft_set_code, View.CARD)
283
+ card_filepath = cache.data_file_path(draft_set_code, View.CARD)
306
284
  if os.path.isfile(card_filepath) and not force_download:
307
285
  cache.spells_print(
308
286
  mode,
@@ -310,7 +288,7 @@ def write_card_file(draft_set_code: str, force_download=False) -> int:
310
288
  )
311
289
  return 1
312
290
 
313
- draft_filepath = data_file_path(draft_set_code, View.DRAFT)
291
+ draft_filepath = cache.data_file_path(draft_set_code, View.DRAFT)
314
292
 
315
293
  if not os.path.isfile(draft_filepath):
316
294
  cache.spells_print(mode, f"Error: No draft file for set {draft_set_code}")
@@ -336,7 +314,7 @@ def write_card_file(draft_set_code: str, force_download=False) -> int:
336
314
  def get_set_context(set_code: str, force_download=False) -> int:
337
315
  mode = "refresh" if force_download else "add"
338
316
 
339
- context_fp = data_file_path(set_code, "context")
317
+ context_fp = cache.data_file_path(set_code, "context")
340
318
  cache.spells_print(mode, "Calculating set context")
341
319
  if os.path.isfile(context_fp) and not force_download:
342
320
  cache.spells_print(
@@ -345,15 +323,14 @@ def get_set_context(set_code: str, force_download=False) -> int:
345
323
  )
346
324
  return 1
347
325
 
348
- draft_fp = data_file_path(set_code, View.DRAFT)
349
- draft_view = pl.scan_parquet(draft_fp)
326
+ df = summon(set_code, columns=[ColName.NUM_DRAFTS], group_by=[ColName.DRAFT_DATE, ColName.PICK_NUM])
350
327
 
351
- context_df = draft_view.select(
328
+ context_df = df.filter(pl.col(ColName.NUM_DRAFTS) > 1000).select(
352
329
  [
353
- pl.max("pick_number").alias("picks_per_pack") + 1,
354
- pl.min("draft_time").alias("release_time"),
330
+ pl.col(ColName.DRAFT_DATE).min().alias("release_date"),
331
+ pl.col(ColName.PICK_NUM).max().alias("picks_per_pack"),
355
332
  ]
356
- ).collect()
333
+ )
357
334
 
358
335
  context_df.write_parquet(context_fp)
359
336
 
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes