spells-mtg 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spells-mtg might be problematic. Click here for more details.

spells/draft_data.py ADDED
@@ -0,0 +1,300 @@
1
+ """
2
+ this is where calculations are performed on the 17Lands public data sets and
3
+ aggregate calculations are returned.
4
+
5
+ Aggregate dataframes containing raw counts are cached in the local file system
6
+ for performance.
7
+ """
8
+
9
+ import datetime
10
+ import functools
11
+ import hashlib
12
+ import re
13
+ from typing import Callable, TypeVar
14
+
15
+ import polars as pl
16
+
17
+ from spells.external import data_file_path
18
+ import spells.cache
19
+ import spells.filter
20
+ import spells.manifest
21
+ from spells.columns import ColumnDefinition, ColumnSpec
22
+ from spells.enums import View, ColName, ColType
23
+
24
+
25
+ DF = TypeVar("DF", pl.LazyFrame, pl.DataFrame)
26
+
27
+
28
+ def _cache_key(args) -> str:
29
+ """
30
+ cache arguments by __str__ (based on the current value of a mutable, so be careful)
31
+ """
32
+ return hashlib.md5(str(args).encode("utf-8")).hexdigest()
33
+
34
+
35
+ @functools.lru_cache(maxsize=None)
36
+ def _get_names(set_code: str) -> tuple[str, ...]:
37
+ card_fp = data_file_path(set_code, View.CARD)
38
+ card_view = pl.read_parquet(card_fp)
39
+ card_names_set = frozenset(card_view.get_column("name").to_list())
40
+
41
+ draft_fp = data_file_path(set_code, View.DRAFT)
42
+ draft_view = pl.scan_parquet(draft_fp)
43
+ cols = draft_view.collect_schema().names()
44
+
45
+ prefix = "pack_card_"
46
+ names = tuple(col[len(prefix) :] for col in cols if col.startswith(prefix))
47
+ draft_names_set = frozenset(names)
48
+
49
+ assert (
50
+ draft_names_set == card_names_set
51
+ ), "names mismatch between card and draft file"
52
+ return names
53
+
54
+
55
+ def _hydrate_col_defs(set_code: str, col_spec_map: dict[str, ColumnSpec]):
56
+ names = _get_names(set_code)
57
+ assert len(names) > 0, "there should be names"
58
+ hydrated = {}
59
+ for key, spec in col_spec_map.items():
60
+ if spec.col_type == ColType.NAME_SUM and spec.exprMap is not None:
61
+ unnamed_exprs = map(spec.exprMap, names)
62
+ expr = tuple(
63
+ map(
64
+ lambda ex, name: ex.alias(f"{spec.name}_{name}"),
65
+ unnamed_exprs,
66
+ names,
67
+ )
68
+ )
69
+ elif spec.expr is not None:
70
+ expr = spec.expr.alias(spec.name)
71
+
72
+ else:
73
+ if spec.col_type == ColType.NAME_SUM:
74
+ expr = tuple(map(lambda name: pl.col(f"{spec.name}_{name}"), names))
75
+ else:
76
+ expr = pl.col(spec.name)
77
+
78
+ try:
79
+ sig_expr = expr if isinstance(expr, pl.Expr) else expr[0]
80
+ expr_sig = sig_expr.meta.serialize(
81
+ format="json"
82
+ ) # not compatible with renaming
83
+ except pl.exceptions.ComputeError:
84
+ if spec.version is not None:
85
+ expr_sig = spec.name + spec.version
86
+ else:
87
+ expr_sig = str(datetime.datetime.now)
88
+
89
+ dependencies = tuple(spec.dependencies or ())
90
+ signature = str(
91
+ (
92
+ spec.name,
93
+ spec.col_type.value,
94
+ expr_sig,
95
+ tuple(view.value for view in spec.views),
96
+ dependencies,
97
+ )
98
+ )
99
+ cdef = ColumnDefinition(
100
+ name=spec.name,
101
+ col_type=spec.col_type,
102
+ views=spec.views,
103
+ expr=expr,
104
+ dependencies=dependencies,
105
+ signature=signature,
106
+ )
107
+ hydrated[key] = cdef
108
+ return hydrated
109
+
110
+
111
+ def _view_select(
112
+ df: DF,
113
+ view_cols: frozenset[str],
114
+ col_def_map: dict[str, ColumnDefinition],
115
+ is_agg_view: bool,
116
+ ) -> DF:
117
+ base_cols = frozenset()
118
+ cdefs = [col_def_map[c] for c in view_cols]
119
+ select = []
120
+ for cdef in cdefs:
121
+ if is_agg_view:
122
+ if cdef.col_type == ColType.AGG:
123
+ base_cols = base_cols.union(cdef.dependencies)
124
+ select.append(cdef.expr)
125
+ else:
126
+ base_cols = base_cols.union(frozenset({cdef.name}))
127
+ select.append(cdef.name)
128
+ else:
129
+ if cdef.dependencies:
130
+ base_cols = base_cols.union(cdef.dependencies)
131
+ else:
132
+ base_cols = base_cols.union(frozenset({cdef.name}))
133
+ if isinstance(cdef.expr, tuple):
134
+ select.extend(cdef.expr)
135
+ else:
136
+ select.append(cdef.expr)
137
+
138
+ if base_cols != view_cols:
139
+ df = _view_select(df, base_cols, col_def_map, is_agg_view)
140
+
141
+ return df.select(select)
142
+
143
+
144
+ def _fetch_or_cache(
145
+ calc_fn: Callable,
146
+ set_code: str,
147
+ cache_args,
148
+ read_cache: bool = True,
149
+ write_cache: bool = True,
150
+ ):
151
+ key = _cache_key(cache_args)
152
+
153
+ if read_cache:
154
+ if spells.cache.cache_exists(set_code, key):
155
+ return spells.cache.read_cache(set_code, key)
156
+
157
+ df = calc_fn()
158
+
159
+ if write_cache:
160
+ spells.cache.write_cache(set_code, key, df)
161
+
162
+ return df
163
+
164
+
165
+ def _base_agg_df(
166
+ set_code: str,
167
+ m: spells.manifest.Manifest,
168
+ use_streaming: bool = False,
169
+ ) -> pl.DataFrame:
170
+ join_dfs = []
171
+ group_by = m.base_view_group_by
172
+
173
+ is_name_gb = ColName.NAME in group_by
174
+ nonname_gb = tuple(gb for gb in group_by if gb != ColName.NAME)
175
+
176
+ for view, cols_for_view in m.view_cols.items():
177
+ if view == View.CARD:
178
+ continue
179
+ df_path = data_file_path(set_code, view)
180
+ base_view_df = pl.scan_parquet(df_path)
181
+ base_df_prefilter = _view_select(
182
+ base_view_df, cols_for_view, m.col_def_map, is_agg_view=False
183
+ )
184
+
185
+ if m.filter is not None:
186
+ base_df = base_df_prefilter.filter(m.filter.expr)
187
+ else:
188
+ base_df = base_df_prefilter
189
+
190
+ sum_cols = tuple(
191
+ c
192
+ for c in cols_for_view
193
+ if m.col_def_map[c].col_type in (ColType.PICK_SUM, ColType.GAME_SUM)
194
+ )
195
+ if sum_cols:
196
+ # manifest will verify that GAME_SUM manifests do not use NAME grouping
197
+ name_col_tuple = (
198
+ (pl.col(ColName.PICK).alias(ColName.NAME),) if is_name_gb else ()
199
+ )
200
+
201
+ sum_col_df = base_df.select(nonname_gb + name_col_tuple + sum_cols)
202
+ join_dfs.append(
203
+ sum_col_df.group_by(group_by).sum().collect(streaming=use_streaming)
204
+ )
205
+
206
+ name_sum_cols = tuple(
207
+ c for c in cols_for_view if m.col_def_map[c].col_type == ColType.NAME_SUM
208
+ )
209
+ for col in name_sum_cols:
210
+ cdef = m.col_def_map[col]
211
+ pattern = f"^{cdef.name}_"
212
+ name_map = functools.partial(
213
+ lambda patt, name: re.split(patt, name)[1], pattern
214
+ )
215
+
216
+ expr = pl.col(f"^{cdef.name}_.*$").name.map(name_map)
217
+ pre_agg_df = base_df.select((expr,) + nonname_gb)
218
+
219
+ if nonname_gb:
220
+ agg_df = pre_agg_df.group_by(nonname_gb).sum()
221
+ else:
222
+ agg_df = pre_agg_df.sum()
223
+
224
+ index = nonname_gb if nonname_gb else None
225
+ unpivoted = agg_df.unpivot(
226
+ index=index,
227
+ value_name=m.col_def_map[col].name,
228
+ variable_name=ColName.NAME,
229
+ )
230
+
231
+ if not is_name_gb:
232
+ df = (
233
+ unpivoted.drop("name")
234
+ .group_by(nonname_gb)
235
+ .sum()
236
+ .collect(streaming=use_streaming)
237
+ )
238
+ else:
239
+ df = unpivoted.collect(streaming=use_streaming)
240
+
241
+ join_dfs.append(df)
242
+
243
+ return functools.reduce(
244
+ lambda prev, curr: prev.join(curr, on=group_by, how="outer", coalesce=True),
245
+ join_dfs,
246
+ )
247
+
248
+
249
+ def summon(
250
+ set_code: str,
251
+ columns: list[str] | None = None,
252
+ group_by: list[str] | None = None,
253
+ filter_spec: dict | None = None,
254
+ extensions: list[ColumnSpec] | None = None,
255
+ use_streaming: bool = False,
256
+ read_cache: bool = True,
257
+ write_cache: bool = True,
258
+ ) -> pl.DataFrame:
259
+ col_spec_map = dict(spells.columns.col_spec_map)
260
+ if extensions is not None:
261
+ for spec in extensions:
262
+ col_spec_map[spec.name] = spec
263
+
264
+ col_def_map = _hydrate_col_defs(set_code, col_spec_map)
265
+ m = spells.manifest.create(col_def_map, columns, group_by, filter_spec)
266
+
267
+ calc_fn = functools.partial(_base_agg_df, set_code, m, use_streaming=use_streaming)
268
+ agg_df = _fetch_or_cache(
269
+ calc_fn,
270
+ set_code,
271
+ (
272
+ set_code,
273
+ sorted(m.view_cols.get(View.DRAFT, set())),
274
+ sorted(m.view_cols.get(View.GAME, set())),
275
+ sorted(c.signature or "" for c in m.col_def_map.values()),
276
+ sorted(m.base_view_group_by),
277
+ filter_spec,
278
+ ),
279
+ read_cache=read_cache,
280
+ write_cache=write_cache,
281
+ )
282
+
283
+ if View.CARD in m.view_cols:
284
+ card_cols = m.view_cols[View.CARD].union({ColName.NAME})
285
+ fp = data_file_path(set_code, View.CARD)
286
+ card_df = pl.read_parquet(fp)
287
+ select_df = _view_select(card_df, card_cols, m.col_def_map, is_agg_view=False)
288
+
289
+ agg_df = agg_df.join(select_df, on="name", how="outer", coalesce=True)
290
+ if ColName.NAME not in m.group_by:
291
+ agg_df = agg_df.group_by(m.group_by).sum()
292
+
293
+ ret_cols = m.group_by + m.columns
294
+ ret_df = (
295
+ _view_select(agg_df, frozenset(ret_cols), m.col_def_map, is_agg_view=True)
296
+ .select(ret_cols)
297
+ .sort(m.group_by)
298
+ )
299
+
300
+ return ret_df
spells/enums.py ADDED
@@ -0,0 +1,154 @@
1
+ """
2
+ enums
3
+ """
4
+
5
+ from enum import StrEnum
6
+
7
+
8
+ class View(StrEnum):
9
+ GAME = "game"
10
+ DRAFT = "draft"
11
+ CARD = "card"
12
+
13
+
14
+ class ColType(StrEnum):
15
+ FILTER_ONLY = "filter_only"
16
+ GROUP_BY = "group_by"
17
+ PICK_SUM = "pick_sum"
18
+ GAME_SUM = "game_sum"
19
+ NAME_SUM = "name_sum"
20
+ AGG = "agg"
21
+ CARD_ATTR = "card_attr"
22
+
23
+
24
+ class ColName(StrEnum):
25
+ """
26
+ A list of all available columns, including built-in extensions.
27
+
28
+ "Name-mapped" columns like "deck_<card name>" are identified by the prefix only.
29
+ Those columns can be referenced simply as e.g. "deck" in formulas for the post-agg stage.
30
+
31
+ The definitions of the columns and how they may be used is defined in `column_defs`
32
+ """
33
+
34
+ # shared
35
+ NAME = "name" # special column for card name index
36
+ EXPANSION = "expansion"
37
+ EVENT_TYPE = "event_type"
38
+ DRAFT_ID = "draft_id"
39
+ DRAFT_TIME = "draft_time" # modified, cast to time
40
+ DRAFT_DATE = "draft_date"
41
+ DRAFT_DAY_OF_WEEK = "draft_day_of_week"
42
+ DRAFT_HOUR = "draft_hour"
43
+ DRAFT_WEEK = "draft_week"
44
+ RANK = "rank"
45
+ USER_N_GAMES_BUCKET = "user_n_games_bucket"
46
+ USER_GAME_WIN_RATE_BUCKET = "user_game_win_rate_bucket"
47
+ PLAYER_COHORT = "player_cohort"
48
+ # draft
49
+ EVENT_MATCH_WINS = "event_match_wins"
50
+ EVENT_MATCH_WINS_SUM = "event_match_wins_sum"
51
+ EVENT_MATCH_LOSSES = "event_match_losses"
52
+ EVENT_MATCH_LOSSES_SUM = "event_match_losses_sum"
53
+ EVENT_MATCHES = "event_matches"
54
+ EVENT_MATCHES_SUM = "event_matches_sum"
55
+ IS_TROPHY = "is_trophy"
56
+ IS_TROPHY_SUM = "is_trophy_sum"
57
+ PACK_NUMBER = "pack_number"
58
+ PACK_NUM = "pack_num" # pack_number plus 1
59
+ PICK_NUMBER = "pick_number"
60
+ PICK_NUM = "pick_num" # pick_number plus 1
61
+ TAKEN_AT = "taken_at"
62
+ NUM_TAKEN = "num_taken"
63
+ PICK = "pick"
64
+ PICK_MAINDECK_RATE = "pick_maindeck_rate"
65
+ PICK_SIDEBOARD_IN_RATE = "pick_sideboard_in_rate"
66
+ PACK_CARD = "pack_card"
67
+ LAST_SEEN = "last_seen"
68
+ NUM_SEEN = "num_seen"
69
+ POOL = "pool"
70
+ # game
71
+ GAME_TIME = "game_time"
72
+ GAME_DATE = "game_date"
73
+ GAME_DAY_OF_WEEK = "game_day_of_week"
74
+ GAME_HOUR = "game_hour"
75
+ GAME_WEEK = "game_week"
76
+ BUILD_INDEX = "build_index"
77
+ MATCH_NUMBER = "match_number"
78
+ GAME_NUMBER = "game_number"
79
+ NUM_EVENTS = "num_events"
80
+ NUM_MATCHES = "num_matches"
81
+ NUM_GAMES = "num_games"
82
+ OPP_RANK = "opp_rank" # not populated for recent sets
83
+ MAIN_COLORS = "main_colors"
84
+ NUM_COLORS = "num_colors"
85
+ SPLASH_COLORS = "splash_colors"
86
+ HAS_SPLASH = "has_splash"
87
+ ON_PLAY = "on_play"
88
+ NUM_ON_PLAY = "num_on_play"
89
+ NUM_MULLIGANS = "num_mulligans"
90
+ NUM_MULLIGANS_SUM = "num_mulligans_sum"
91
+ OPP_NUM_MULLIGANS = "opp_num_mulligans"
92
+ OPP_NUM_MULLIGANS_SUM = "opp_num_mulligans_sum"
93
+ OPP_COLORS = "opp_colors"
94
+ NUM_TURNS = "num_turns"
95
+ NUM_TURNS_SUM = "num_turns_sum"
96
+ WON = "won"
97
+ NUM_WON = "num_won"
98
+ OPENING_HAND = "opening_hand"
99
+ WON_OPENING_HAND = "won_opening_hand"
100
+ DRAWN = "drawn"
101
+ WON_DRAWN = "won_drawn"
102
+ TUTORED = "tutored"
103
+ WON_TUTORED = "won_tutored"
104
+ DECK = "deck"
105
+ WON_DECK = "won_deck"
106
+ SIDEBOARD = "sideboard"
107
+ WON_SIDEBOARD = "won_sideboard"
108
+ NUM_GNS = "num_gns"
109
+ WON_NUM_GNS = "won_num_gns"
110
+ # card
111
+ SET_CODE = "set_code"
112
+ COLOR = "color"
113
+ RARITY = "rarity"
114
+ COLOR_IDENTITY = "color_identity"
115
+ CARD_TYPE = "card_type"
116
+ SUBTYPE = "subtype"
117
+ MANA_VALUE = "mana_value"
118
+ MANA_COST = "mana_cost"
119
+ POWER = "power"
120
+ TOUGHNESS = "toughness"
121
+ IS_BONUS_SHEET = "is_bonus_sheet"
122
+ IS_DFC = "is_dfc"
123
+ # agg extensions
124
+ PICKED_MATCH_WR = "picked_match_wr"
125
+ TROPHY_RATE = "trophy_rate"
126
+ GAME_WR = "game_wr"
127
+ ALSA = "alsa"
128
+ ATA = "ata"
129
+ NUM_GP = "num_gp"
130
+ PCT_GP = "pct_gp"
131
+ GP_WR = "gp_wr"
132
+ NUM_OH = "num_oh"
133
+ OH_WR = "oh_wr"
134
+ NUM_GIH = "num_gih"
135
+ NUM_GIH_WON = "num_gih_won"
136
+ GIH_WR = "gih_wr"
137
+ GNS_WR = "gns_wr"
138
+ IWD = "iwd"
139
+ NUM_IN_POOL = "num_in_pool"
140
+ IN_POOL_WR = "in_pool_wr"
141
+ DECK_TOTAL = "deck_total"
142
+ WON_DECK_TOTAL = "won_deck_total"
143
+ GP_WR_MEAN = "gp_wr_mean"
144
+ GP_WR_EXCESS = "gp_wr_excess"
145
+ GP_WR_VAR = "gp_wr_var"
146
+ GP_WR_STDEV = "gp_wr_stdev"
147
+ GP_WR_Z = "gp_wr_z"
148
+ GIH_TOTAL = "gih_total"
149
+ WON_GIH_TOTAL = "won_gih_total"
150
+ GIH_WR_MEAN = "gih_wr_mean"
151
+ GIH_WR_EXCESS = "gih_wr_excess"
152
+ GIH_WR_VAR = "gih_wr_var"
153
+ GIH_WR_STDEV = "gh_wr_stdev"
154
+ GIH_WR_Z = "gih_wr_z"