spells-mtg 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spells-mtg might be problematic. Click here for more details.

spells/external.py ADDED
@@ -0,0 +1,314 @@
1
+ """
2
+ download public data sets from 17Lands.com and generate a card
3
+ file containing card attributes using MTGJSON
4
+
5
+ cli tool `spells`
6
+ """
7
+
8
+ import functools
9
+ import gzip
10
+ import os
11
+ import re
12
+ import shutil
13
+ import sys
14
+ from enum import StrEnum
15
+
16
+ import wget
17
+ import polars as pl
18
+
19
+ from spells import cards
20
+ from spells import cache
21
+ from spells.enums import View
22
+ from spells.schema import schema
23
+
24
+
25
+ DATASET_TEMPLATE = "{dataset_type}_data_public.{set_code}.{event_type}.csv.gz"
26
+ RESOURCE_TEMPLATE = (
27
+ "https://17lands-public.s3.amazonaws.com/analysis_data/{dataset_type}_data/"
28
+ )
29
+
30
+
31
+ class FileFormat(StrEnum):
32
+ CSV = "csv"
33
+ PARQUET = "parquet"
34
+
35
+
36
+ class EventType(StrEnum):
37
+ PREMIER = "PremierDraft"
38
+ TRADITIONAL = "TradDraft"
39
+
40
+
41
+ # Fred Cirera via https://stackoverflow.com/questions/1094841/get-a-human-readable-version-of-a-file-size
42
+ def sizeof_fmt(num, suffix="B"):
43
+ for unit in ("", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"):
44
+ if abs(num) < 1024.0:
45
+ return f"{num:3.1f}{unit}{suffix}"
46
+ num /= 1024.0
47
+ return f"{num:.1f}Yi{suffix}"
48
+
49
+
50
+ def cli() -> int:
51
+ data_dir = cache.data_home()
52
+ cache.spells_print("spells", f"[data home]={data_dir}")
53
+ print()
54
+ usage = """spells [add|refresh|remove|clean] [set_code]
55
+ spells info
56
+
57
+ add: Download draft and game files from 17Lands.com and card file from MTGJSON.com and save to path
58
+ [data home]/external/[set code] (use $SPELLS_DATA_HOME or $XDG_DATA_HOME to configure).
59
+ Does not overwrite existing files. If any files are downloaded, existing local cache is cleared.
60
+ [set code] should be the capitalized official set code for the draft release.
61
+
62
+ e.g. $ spells add OTJ
63
+
64
+ refresh: Force download and overwrite of existing files (for new data drops, use sparingly!). Clear
65
+ local cache.
66
+
67
+ remove: Delete the [data home]/external/[set code] and [data home]/local/[set code] directories and their contents
68
+
69
+ clean: Delete [data home]/local/[set code] data directory (your cache of aggregate parquet files).
70
+
71
+ info: No set code argument. Print info on all external and local files.
72
+ """
73
+ print_usage = functools.partial(cache.spells_print, "usage", usage)
74
+
75
+ if len(sys.argv) < 2:
76
+ print_usage()
77
+ return 1
78
+
79
+ mode = sys.argv[1]
80
+
81
+ if mode == "info":
82
+ return _info()
83
+
84
+ if len(sys.argv) != 3:
85
+ print_usage()
86
+ return 1
87
+
88
+ match mode:
89
+ case "add":
90
+ return _add(sys.argv[2])
91
+ case "refresh":
92
+ return _refresh(sys.argv[2])
93
+ case "remove":
94
+ return _remove(sys.argv[2])
95
+ case "clean":
96
+ return cache.clear(sys.argv[2])
97
+ case _:
98
+ print_usage()
99
+ return 1
100
+
101
+
102
+ def _add(set_code: str, force_download=False):
103
+ download_data_set(set_code, View.DRAFT, force_download=force_download)
104
+ write_card_file(set_code, force_download=force_download)
105
+ download_data_set(set_code, View.GAME, force_download=force_download)
106
+ return 0
107
+
108
+
109
+ def _refresh(set_code: str):
110
+ return _add(set_code, force_download=True)
111
+
112
+
113
+ def _remove(set_code: str):
114
+ mode = "remove"
115
+ dir_path = _external_set_path(set_code)
116
+ if os.path.isdir(dir_path):
117
+ with os.scandir(dir_path) as set_dir:
118
+ count = 0
119
+ for entry in set_dir:
120
+ if not entry.name.endswith(".parquet"):
121
+ cache.spells_print(
122
+ mode,
123
+ f"Unexpected file {entry.name} found in external cache, please sort that out!",
124
+ )
125
+ return 1
126
+ count += 1
127
+ os.remove(entry)
128
+ cache.spells_print(
129
+ mode, f"Removed {count} files from external cache for set {set_code}"
130
+ )
131
+ os.rmdir(dir_path)
132
+ else:
133
+ cache.spells_print(mode, f"No external cache found for set {set_code}")
134
+
135
+ return cache.clear(set_code)
136
+
137
+
138
+ def _info():
139
+ mode = "info"
140
+ external_path = cache.data_dir_path(cache.DataDir.EXTERNAL)
141
+
142
+ suggest_add = set()
143
+ suggest_remove = set()
144
+ all_external = set()
145
+ if os.path.isdir(external_path):
146
+ cache.spells_print(mode, f"External archives found {external_path}")
147
+ with os.scandir(external_path) as ext_dir:
148
+ for entry in ext_dir:
149
+ if entry.is_dir():
150
+ all_external.add(entry.name)
151
+ file_count = 0
152
+ cache.spells_print(mode, f"Archive {entry.name} contents:")
153
+ for item in os.scandir(entry):
154
+ if not re.match(f"^{entry.name}_.*\\.parquet", item.name):
155
+ print(
156
+ f"!!! imposter file {item.name}! Please sort that out"
157
+ )
158
+ print(f" {item.name} {sizeof_fmt(os.stat(item).st_size)}")
159
+ file_count += 1
160
+ if file_count < 3:
161
+ suggest_add.add(entry.name)
162
+ if file_count > 3:
163
+ suggest_remove.add(entry.name)
164
+ else:
165
+ cache.spells_print(
166
+ mode, f"Imposter file {entry.name}! Please sort that out"
167
+ )
168
+
169
+ else:
170
+ cache.spells_print(mode, "No external archives found")
171
+
172
+ cache_path = cache.data_dir_path(cache.DataDir.CACHE)
173
+
174
+ if os.path.isdir(cache_path):
175
+ print()
176
+ cache.spells_print(mode, f"Local cache found {cache_path}")
177
+ with os.scandir(cache_path) as cache_dir:
178
+ for entry in cache_dir:
179
+ if entry.name not in all_external:
180
+ suggest_remove.add(entry.name)
181
+ if entry.is_dir():
182
+ cache.spells_print(mode, f"Cache {entry.name} contents:")
183
+ parquet_num = 0
184
+ parquet_size = 0
185
+ for item in os.scandir(entry):
186
+ if item.name.endswith(".parquet"):
187
+ parquet_num += 1
188
+ parquet_size += os.stat(item).st_size
189
+ else:
190
+ print(
191
+ f"!!! imposter file {item.name}! Please sort that out"
192
+ )
193
+ print(f" {parquet_num} cache files: {sizeof_fmt(parquet_size)}")
194
+ else:
195
+ print()
196
+ cache.spells_print(mode, "No local cache found")
197
+
198
+ print()
199
+ for name in suggest_add:
200
+ cache.spells_print(mode, f"Suggest `spells add {name}'")
201
+ for name in suggest_remove:
202
+ cache.spells_print(mode, f"Suggest `spells remove {name}'")
203
+
204
+ return 0
205
+
206
+
207
+ def _external_set_path(set_code):
208
+ return os.path.join(cache.data_dir_path(cache.DataDir.EXTERNAL), set_code)
209
+
210
+
211
+ def data_file_path(set_code, dataset_type: str, event_type=EventType.PREMIER):
212
+ if dataset_type == "card":
213
+ return os.path.join(_external_set_path(set_code), f"{set_code}_card.parquet")
214
+
215
+ return os.path.join(
216
+ _external_set_path(set_code), f"{set_code}_{event_type}_{dataset_type}.parquet"
217
+ )
218
+
219
+
220
+ def _process_zipped_file(gzip_path, target_path):
221
+ csv_path = gzip_path[:-3]
222
+ # if polars supports streaming from file obj, we can just stream straight
223
+ # from urllib.Request through GzipFile to sink_parquet without intermediate files
224
+ with gzip.open(gzip_path, "rb") as f_in:
225
+ with open(csv_path, "wb") as f_out:
226
+ shutil.copyfileobj(f_in, f_out) # type: ignore
227
+
228
+ os.remove(gzip_path)
229
+ df = pl.scan_csv(csv_path, schema=schema(csv_path))
230
+ df.sink_parquet(target_path)
231
+
232
+ os.remove(csv_path)
233
+
234
+
235
+ def download_data_set(
236
+ set_code,
237
+ dataset_type: View,
238
+ event_type=EventType.PREMIER,
239
+ force_download=False,
240
+ clear_set_cache=True,
241
+ ):
242
+ mode = "refresh" if force_download else "add"
243
+ cache.spells_print(mode, f"Downloading {dataset_type} dataset from 17Lands.com")
244
+
245
+ if not os.path.isdir(set_dir := _external_set_path(set_code)):
246
+ os.makedirs(set_dir)
247
+
248
+ target_path = data_file_path(set_code, dataset_type)
249
+
250
+ if os.path.isfile(target_path) and not force_download:
251
+ cache.spells_print(
252
+ mode,
253
+ f"File {target_path} already exists, use `spells refresh {set_code}` to overwrite",
254
+ )
255
+ return 1
256
+
257
+ dataset_file = DATASET_TEMPLATE.format(
258
+ set_code=set_code, dataset_type=dataset_type, event_type=event_type
259
+ )
260
+ dataset_path = os.path.join(_external_set_path(set_code), dataset_file)
261
+ wget.download(
262
+ RESOURCE_TEMPLATE.format(dataset_type=dataset_type) + dataset_file,
263
+ out=dataset_path,
264
+ )
265
+ print()
266
+
267
+ cache.spells_print(mode, "Unzipping and transforming to parquet...")
268
+ _process_zipped_file(dataset_path, target_path)
269
+ cache.spells_print(mode, f"File {target_path} written")
270
+ if clear_set_cache:
271
+ cache.clear(set_code)
272
+
273
+ return 0
274
+
275
+
276
+ def write_card_file(draft_set_code: str, force_download=False) -> int:
277
+ """
278
+ Write a csv containing basic information about draftable cards, such as rarity,
279
+ set symbol, color, mana cost, and type.
280
+ """
281
+ mode = "refresh" if force_download else "add"
282
+
283
+ cache.spells_print(
284
+ mode, "Fetching card data from mtgjson.com and writing card file"
285
+ )
286
+ card_filepath = data_file_path(draft_set_code, View.CARD)
287
+ if os.path.isfile(card_filepath) and not force_download:
288
+ cache.spells_print(
289
+ mode,
290
+ f"File {card_filepath} already exists, use `spells refresh {draft_set_code}` to overwrite",
291
+ )
292
+ return 1
293
+
294
+ draft_filepath = data_file_path(draft_set_code, View.DRAFT)
295
+
296
+ if not os.path.isfile(draft_filepath):
297
+ cache.spells_print(mode, f"Error: No draft file for set {draft_set_code}")
298
+ return 1
299
+
300
+ columns = pl.scan_parquet(draft_filepath).collect_schema().names()
301
+
302
+ pattern = "^pack_card_"
303
+ names = [
304
+ re.split(pattern, name)[1]
305
+ for name in columns
306
+ if re.search(pattern, name) is not None
307
+ ]
308
+
309
+ card_df = cards.card_df(draft_set_code, names)
310
+
311
+ card_df.write_parquet(card_filepath)
312
+
313
+ cache.spells_print(mode, f"Wrote file {card_filepath}")
314
+ return 0
spells/filter.py ADDED
@@ -0,0 +1,137 @@
1
+ """
2
+ spells.filter (don't import as builtin filter) returns a function from_spec
3
+ that takes a dict-specified filter and returns a Filter object that records
4
+ the dependent column names and contains a filter expression for use in polars.
5
+ """
6
+
7
+ from dataclasses import dataclass
8
+ import functools
9
+
10
+ import polars as pl
11
+
12
+
13
+ @dataclass(frozen=True)
14
+ class Filter:
15
+ expr: pl.Expr
16
+ lhs: frozenset[str]
17
+
18
+
19
+ def __negate(f: Filter) -> Filter:
20
+ return Filter(expr=~f.expr, lhs=f.lhs)
21
+
22
+
23
+ def _or(f1: Filter, f2: Filter) -> Filter:
24
+ return Filter(expr=f1.expr | f2.expr, lhs=f1.lhs.union(f2.lhs))
25
+
26
+
27
+ def _and(f1: Filter, f2: Filter) -> Filter:
28
+ return Filter(expr=f1.expr & f2.expr, lhs=f1.lhs.union(f2.lhs))
29
+
30
+
31
+ def _filter_eq(lhs: str, rhs: str) -> Filter:
32
+ return Filter(expr=pl.col(lhs) == rhs, lhs=frozenset({lhs}))
33
+
34
+
35
+ def _filter_leq(lhs: str, rhs: str) -> Filter:
36
+ return Filter(expr=pl.col(lhs) <= rhs, lhs=frozenset({lhs}))
37
+
38
+
39
+ def _filter_geq(lhs: str, rhs: str) -> Filter:
40
+ return Filter(expr=pl.col(lhs) >= rhs, lhs=frozenset({lhs}))
41
+
42
+
43
+ def _filter_in(lhs: str, rhs: str) -> Filter:
44
+ return Filter(expr=pl.col(lhs).is_in(rhs), lhs=frozenset({lhs}))
45
+
46
+
47
+ def _filter_gt(lhs: str, rhs: str) -> Filter:
48
+ return __negate(_filter_leq(lhs, rhs))
49
+
50
+
51
+ def _filter_nin(lhs: str, rhs: str) -> Filter:
52
+ return __negate(_filter_in(lhs, rhs))
53
+
54
+
55
+ def _filter_neq(lhs: str, rhs: str) -> Filter:
56
+ return __negate(_filter_eq(lhs, rhs))
57
+
58
+
59
+ def _filter_lt(lhs: str, rhs: str) -> Filter:
60
+ return __negate(_filter_geq(lhs, rhs))
61
+
62
+
63
+ filter_fn_map = {
64
+ "=": _filter_eq,
65
+ "<=": _filter_leq,
66
+ ">=": _filter_geq,
67
+ "in": _filter_in,
68
+ ">": _filter_gt,
69
+ "nin": _filter_nin,
70
+ "!=": _filter_neq,
71
+ "<": _filter_lt,
72
+ }
73
+
74
+
75
+ def _base(lhs, rhs, op="=") -> Filter:
76
+ return filter_fn_map[op](lhs, rhs)
77
+
78
+
79
+ def _all_of(filters) -> Filter:
80
+ return functools.reduce(_and, filters)
81
+
82
+
83
+ def _any_of(filters) -> Filter:
84
+ return functools.reduce(_or, filters)
85
+
86
+
87
+ def _negate(fil) -> Filter:
88
+ return __negate(fil)
89
+
90
+
91
+ BUILDER_MAP = {"$and": _all_of, "$or": _any_of, "$not": _negate}
92
+
93
+
94
+ def from_spec(filter_spec: dict | None) -> Filter | None:
95
+ """
96
+ filter_spec is a nested dictionary with the leaf-level consisting of specs of the form
97
+ {'lhs': 'a', 'rhs': [1,2,3], 'op': 'in'}
98
+ or
99
+ {'a': 5}
100
+
101
+ higher level keys can be `_all_of | _any_of | not`
102
+
103
+ e.g.
104
+
105
+ {
106
+ '$and': [
107
+ {
108
+ '$not': {'A' : 5}
109
+ },
110
+ {
111
+ {'lhs': 'B', 'rhs': [1,2], 'op': 'in'}
112
+ }
113
+ ]
114
+ }
115
+
116
+ an empty input returns None, which represents a trivial filter
117
+ """
118
+ if not filter_spec:
119
+ return None
120
+
121
+ for filter_type, filter_fn in BUILDER_MAP.items():
122
+ if filter_value := filter_spec.get(filter_type):
123
+ assert (
124
+ len(filter_spec) == 1
125
+ ), f"Operator {filter_type} incompatible with additional keys."
126
+ if isinstance(filter_value, list):
127
+ arg = tuple(map(from_spec, filter_value))
128
+ else:
129
+ arg = from_spec(filter_value)
130
+ return filter_fn(arg)
131
+
132
+ if len(filter_spec) == 1:
133
+ for lhs, rhs in filter_spec.items():
134
+ return _base(lhs, rhs)
135
+
136
+ assert "lhs" in filter_spec and "rhs" in filter_spec
137
+ return _base(filter_spec["lhs"], filter_spec["rhs"], filter_spec.get("op", "="))
spells/manifest.py ADDED
@@ -0,0 +1,184 @@
1
+ from dataclasses import dataclass
2
+
3
+ import spells.columns
4
+ import spells.filter
5
+ from spells.enums import View, ColName, ColType
6
+ from spells.columns import ColumnDefinition
7
+
8
+
9
+ @dataclass(frozen=True)
10
+ class Manifest:
11
+ columns: tuple[str, ...]
12
+ col_def_map: dict[str, ColumnDefinition]
13
+ base_view_group_by: frozenset[str]
14
+ view_cols: dict[View, frozenset[str]]
15
+ group_by: tuple[str, ...]
16
+ filter: spells.filter.Filter | None
17
+
18
+ def __post_init__(self):
19
+ # No name filter check
20
+ if self.filter is not None:
21
+ assert (
22
+ "name" not in self.filter.lhs
23
+ ), "Don't filter on 'name', include 'name' in groupbys and filter the final result instead"
24
+
25
+ # Col in col_def_map check
26
+ for col in self.columns:
27
+ assert col in self.col_def_map, f"Undefined column {col}!"
28
+ assert (
29
+ self.col_def_map[col].col_type != ColType.GROUP_BY
30
+ ), f"group_by column {col} must be passed as group_by"
31
+ assert (
32
+ self.col_def_map[col].col_type != ColType.FILTER_ONLY
33
+ ), f"filter_only column {col} cannot be summoned"
34
+
35
+ # base_view_groupbys have col_type GROUP_BY check
36
+ for col in self.base_view_group_by:
37
+ assert (
38
+ self.col_def_map[col].col_type == ColType.GROUP_BY
39
+ ), f"Invalid groupby {col}!"
40
+
41
+ for view, cols_for_view in self.view_cols.items():
42
+ # cols_for_view are actually in view check
43
+ for col in cols_for_view:
44
+ assert (
45
+ view in self.col_def_map[col].views
46
+ ), f"View cols generated incorrectly, {col} not in view {view}"
47
+ # game sum cols on in game, and no NAME groupby
48
+ assert self.col_def_map[col].col_type != ColType.GAME_SUM or (
49
+ view == View.GAME and ColName.NAME not in self.base_view_group_by
50
+ ), f"Invalid manifest for GAME_SUM column {col}"
51
+ if view != View.CARD:
52
+ for col in self.base_view_group_by:
53
+ # base_view_groupbys in view check
54
+ assert (
55
+ col == ColName.NAME or view in self.col_def_map[col].views
56
+ ), f"Groupby {col} not in view {view}!"
57
+ # base_view_groupbys in view_cols for view
58
+ assert (
59
+ col == ColName.NAME or col in cols_for_view
60
+ ), f"Groupby {col} not in view_cols[view]"
61
+ # filter cols are in both base_views check
62
+ if self.filter is not None:
63
+ for col in self.filter.lhs:
64
+ assert (
65
+ col in cols_for_view
66
+ ), f"filter col {col} not found in base view"
67
+
68
+ if view == View.CARD:
69
+ # name in groupbys check
70
+ assert (
71
+ ColName.NAME in self.base_view_group_by
72
+ ), "base views must groupby by name to join card attrs"
73
+
74
+ def test_str(self):
75
+ result = "{\n" + 2 * " " + "columns:\n"
76
+ for c in sorted(self.columns):
77
+ result += 4 * " " + c + "\n"
78
+ result += 2 * " " + "base_view_group_by:\n"
79
+ for c in sorted(self.base_view_group_by):
80
+ result += 4 * " " + c + "\n"
81
+ result += 2 * " " + "view_cols:\n"
82
+ for v, view_cols in sorted(self.view_cols.items()):
83
+ result += 4 * " " + v + ":\n"
84
+ for c in sorted(view_cols):
85
+ result += 6 * " " + c + "\n"
86
+ result += 2 * " " + "group_by:\n"
87
+ for c in sorted(self.group_by):
88
+ result += 4 * " " + c + "\n"
89
+ result += "}\n"
90
+
91
+ return result
92
+
93
+
94
+ def _resolve_view_cols(
95
+ col_set: frozenset[str],
96
+ col_def_map: dict[str, ColumnDefinition],
97
+ ) -> dict[View, frozenset[str]]:
98
+ """
99
+ For each view ('game', 'draft', and 'card'), return the columns
100
+ that must be present at the aggregation step. 'name' need not be
101
+ included, and 'pick' will be added if needed.
102
+
103
+ Dependencies within base views will be resolved by `col_df`.
104
+ """
105
+ unresolved_cols = col_set
106
+ view_resolution = {}
107
+
108
+ iter_num = 0
109
+ while unresolved_cols and iter_num < 100:
110
+ iter_num += 1
111
+ next_cols = frozenset()
112
+ for col in unresolved_cols:
113
+ cdef = col_def_map[col]
114
+ if cdef.col_type == ColType.PICK_SUM:
115
+ view_resolution[View.DRAFT] = view_resolution.get(
116
+ View.DRAFT, frozenset()
117
+ ).union({ColName.PICK})
118
+ if cdef.views:
119
+ for view in cdef.views:
120
+ view_resolution[view] = view_resolution.get(
121
+ view, frozenset()
122
+ ).union({col})
123
+ else:
124
+ if cdef.dependencies is None:
125
+ raise ValueError(
126
+ f"Invalid column def: {col} has neither views nor dependencies!"
127
+ )
128
+ for dep in cdef.dependencies:
129
+ next_cols = next_cols.union({dep})
130
+ unresolved_cols = next_cols
131
+
132
+ if iter_num >= 100:
133
+ raise ValueError("broken dependency chain in column spec, loop probable")
134
+
135
+ return view_resolution
136
+
137
+
138
+ def create(
139
+ col_def_map: dict[str, ColumnDefinition],
140
+ columns: list[str] | None = None,
141
+ group_by: list[str] | None = None,
142
+ filter_spec: dict | None = None,
143
+ ):
144
+ gbs = (ColName.NAME,) if group_by is None else tuple(group_by)
145
+ if columns is None:
146
+ cols = tuple(spells.columns.default_columns)
147
+ if ColName.NAME not in gbs:
148
+ cols = tuple(c for c in cols if c not in [ColName.COLOR, ColName.RARITY])
149
+ else:
150
+ cols = tuple(columns)
151
+
152
+ base_view_group_by = frozenset()
153
+ for col in gbs:
154
+ cdef = col_def_map[col]
155
+ if cdef.col_type == ColType.GROUP_BY:
156
+ base_view_group_by = base_view_group_by.union({col})
157
+ elif cdef.col_type == ColType.CARD_ATTR:
158
+ base_view_group_by = base_view_group_by.union({ColName.NAME})
159
+
160
+ m_filter = spells.filter.from_spec(filter_spec)
161
+
162
+ col_set = frozenset(cols)
163
+ col_set = col_set.union(frozenset(gbs) - {ColName.NAME})
164
+ if m_filter is not None:
165
+ col_set = col_set.union(m_filter.lhs)
166
+
167
+ view_cols = _resolve_view_cols(col_set, col_def_map)
168
+
169
+ needed_views = frozenset()
170
+ for view, cols_for_view in view_cols.items():
171
+ for col in cols_for_view:
172
+ if col_def_map[col].views == (view,): # only found in this view
173
+ needed_views = needed_views.union({view})
174
+
175
+ view_cols = {v: view_cols[v] for v in needed_views}
176
+
177
+ return Manifest(
178
+ columns=cols,
179
+ col_def_map=col_def_map,
180
+ base_view_group_by=base_view_group_by,
181
+ view_cols=view_cols,
182
+ group_by=gbs,
183
+ filter=m_filter,
184
+ )