spells-mtg 0.10.4__py3-none-any.whl → 0.11.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spells/.ruff_cache/.gitignore +2 -0
- spells/.ruff_cache/0.8.6/17785301476771359756 +0 -0
- spells/.ruff_cache/CACHEDIR.TAG +1 -0
- spells/cache.py +127 -0
- spells/card_data_files.py +222 -0
- spells/columns.py +69 -147
- spells/config.py +4 -0
- spells/draft_data.py +78 -39
- spells/enums.py +1 -0
- spells/extension.py +31 -1
- spells/external.py +5 -4
- spells/manifest.py +5 -6
- {spells_mtg-0.10.4.dist-info → spells_mtg-0.11.16.dist-info}/METADATA +3 -2
- spells_mtg-0.11.16.dist-info/RECORD +23 -0
- {spells_mtg-0.10.4.dist-info → spells_mtg-0.11.16.dist-info}/WHEEL +1 -1
- spells_mtg-0.10.4.dist-info/RECORD +0 -19
- {spells_mtg-0.10.4.dist-info → spells_mtg-0.11.16.dist-info}/entry_points.txt +0 -0
- {spells_mtg-0.10.4.dist-info → spells_mtg-0.11.16.dist-info}/licenses/LICENSE +0 -0
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
Signature: 8a477f597d28d172789f06886806bc55
|
spells/cache.py
CHANGED
|
@@ -7,13 +7,23 @@ and groupbys.
|
|
|
7
7
|
Caches are cleared per-set when new files are downloaded.
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
|
+
import datetime as dt
|
|
10
11
|
from enum import StrEnum
|
|
11
12
|
import os
|
|
13
|
+
from pathlib import Path
|
|
12
14
|
import sys
|
|
13
15
|
|
|
14
16
|
import polars as pl
|
|
15
17
|
|
|
16
18
|
|
|
19
|
+
class Env(StrEnum):
|
|
20
|
+
PROD = "prod"
|
|
21
|
+
TEST = "test"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
env = Env.PROD
|
|
25
|
+
|
|
26
|
+
|
|
17
27
|
class EventType(StrEnum):
|
|
18
28
|
PREMIER = "PremierDraft"
|
|
19
29
|
TRADITIONAL = "TradDraft"
|
|
@@ -22,14 +32,38 @@ class EventType(StrEnum):
|
|
|
22
32
|
class DataDir(StrEnum):
|
|
23
33
|
CACHE = "cache"
|
|
24
34
|
EXTERNAL = "external"
|
|
35
|
+
RATINGS = "ratings"
|
|
36
|
+
DECK_COLOR = "deck_color"
|
|
25
37
|
|
|
26
38
|
|
|
27
39
|
def spells_print(mode, content):
|
|
28
40
|
print(f" 🪄 {mode} ✨ {content}")
|
|
29
41
|
|
|
30
42
|
|
|
43
|
+
def set_test_env():
|
|
44
|
+
global env
|
|
45
|
+
env = Env.TEST
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def set_prod_env():
|
|
49
|
+
global env
|
|
50
|
+
env = Env.PROD
|
|
51
|
+
|
|
52
|
+
|
|
31
53
|
def data_home() -> str:
|
|
32
54
|
is_win = sys.platform == "win32"
|
|
55
|
+
global env
|
|
56
|
+
|
|
57
|
+
if env == Env.TEST:
|
|
58
|
+
return os.path.expanduser(
|
|
59
|
+
os.environ.get(
|
|
60
|
+
"SPELLS_TEST_HOME",
|
|
61
|
+
r"~\AppData\Local\SpellsTest"
|
|
62
|
+
if is_win
|
|
63
|
+
else "~/.local/share/spellstest/",
|
|
64
|
+
)
|
|
65
|
+
)
|
|
66
|
+
|
|
33
67
|
return os.path.expanduser(
|
|
34
68
|
os.environ.get(
|
|
35
69
|
"SPELLS_DATA_HOME",
|
|
@@ -41,6 +75,65 @@ def data_home() -> str:
|
|
|
41
75
|
)
|
|
42
76
|
|
|
43
77
|
|
|
78
|
+
def ad_hoc_dir():
|
|
79
|
+
ad_hoc_dir = Path(data_home()) / "ad_hoc"
|
|
80
|
+
if not os.path.isdir(ad_hoc_dir):
|
|
81
|
+
os.makedirs(ad_hoc_dir)
|
|
82
|
+
return ad_hoc_dir
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def save_ad_hoc_dataset(df: pl.DataFrame, key: str):
|
|
86
|
+
df.write_parquet(ad_hoc_dir() / f"{key}.parquet")
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def read_ad_hoc_dataset(key: str):
|
|
90
|
+
path = ad_hoc_dir() / f"{key}.parquet"
|
|
91
|
+
if os.path.exists(path):
|
|
92
|
+
return pl.read_parquet(ad_hoc_dir() / f"{key}.parquet")
|
|
93
|
+
else:
|
|
94
|
+
return None
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def create_test_data(set_code: str, test_num_drafts: int = 100):
|
|
98
|
+
"""
|
|
99
|
+
run from prod environment to write test data for `set_code` into
|
|
100
|
+
the test environment. Then set `SPELLS_DATA_HOME=test_data_home`
|
|
101
|
+
to run from the test environment
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
context_df = pl.scan_parquet(data_file_path(set_code, "context")).collect()
|
|
105
|
+
picks_per_pack = context_df["picks_per_pack"][0]
|
|
106
|
+
|
|
107
|
+
draft_df = (
|
|
108
|
+
pl.scan_parquet(data_file_path(set_code, "draft"))
|
|
109
|
+
.head(50 * (test_num_drafts + 2))
|
|
110
|
+
.collect()
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
sample_draft_ids = (
|
|
114
|
+
draft_df.group_by("draft_id")
|
|
115
|
+
.len()
|
|
116
|
+
.filter(pl.col("len") == picks_per_pack * 3)["draft_id"][0:test_num_drafts]
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
draft_sample_df = draft_df.filter(pl.col("draft_id").is_in(sample_draft_ids))
|
|
120
|
+
game_sample_df = (
|
|
121
|
+
pl.scan_parquet(data_file_path(set_code, "game"))
|
|
122
|
+
.filter(pl.col("draft_id").is_in(sample_draft_ids))
|
|
123
|
+
.collect()
|
|
124
|
+
)
|
|
125
|
+
card_df = pl.scan_parquet(data_file_path(set_code, "card")).collect()
|
|
126
|
+
|
|
127
|
+
set_test_env()
|
|
128
|
+
if not os.path.isdir(set_dir := external_set_path(set_code)):
|
|
129
|
+
os.makedirs(set_dir)
|
|
130
|
+
context_df.write_parquet(data_file_path(set_code, "context"))
|
|
131
|
+
draft_sample_df.write_parquet(data_file_path(set_code, "draft"))
|
|
132
|
+
game_sample_df.write_parquet(data_file_path(set_code, "game"))
|
|
133
|
+
card_df.write_parquet(data_file_path(set_code, "card"))
|
|
134
|
+
set_prod_env()
|
|
135
|
+
|
|
136
|
+
|
|
44
137
|
def data_dir_path(cache_dir: DataDir) -> str:
|
|
45
138
|
"""
|
|
46
139
|
Where 17Lands data is stored. MDU_DATA_DIR environment variable is used, if it exists,
|
|
@@ -51,6 +144,8 @@ def data_dir_path(cache_dir: DataDir) -> str:
|
|
|
51
144
|
ext = {
|
|
52
145
|
DataDir.CACHE: "Cache" if is_win else "cache",
|
|
53
146
|
DataDir.EXTERNAL: "External" if is_win else "external",
|
|
147
|
+
DataDir.RATINGS: "Ratings" if is_win else "ratings",
|
|
148
|
+
DataDir.DECK_COLOR: "DeckColor" if is_win else "deck_color",
|
|
54
149
|
}[cache_dir]
|
|
55
150
|
|
|
56
151
|
data_dir = os.path.join(data_home(), ext)
|
|
@@ -73,6 +168,38 @@ def data_file_path(set_code, dataset_type: str, event_type=EventType.PREMIER):
|
|
|
73
168
|
)
|
|
74
169
|
|
|
75
170
|
|
|
171
|
+
def card_ratings_file_path(
|
|
172
|
+
set_code: str,
|
|
173
|
+
format: str,
|
|
174
|
+
user_group: str,
|
|
175
|
+
deck_color: str,
|
|
176
|
+
start_date: dt.date,
|
|
177
|
+
end_date: dt.date,
|
|
178
|
+
) -> tuple[str, str]:
|
|
179
|
+
return os.path.join(
|
|
180
|
+
data_dir_path(DataDir.RATINGS),
|
|
181
|
+
set_code,
|
|
182
|
+
), (
|
|
183
|
+
f"{format}_{user_group}_{deck_color}_{start_date.strftime('%Y-%m-%d')}"
|
|
184
|
+
f"_{end_date.strftime('%Y-%m-%d')}.json"
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
def deck_color_file_path(
|
|
188
|
+
set_code: str,
|
|
189
|
+
format: str,
|
|
190
|
+
user_group: str,
|
|
191
|
+
start_date: dt.date,
|
|
192
|
+
end_date: dt.date,
|
|
193
|
+
) -> tuple[str, str]:
|
|
194
|
+
return os.path.join(
|
|
195
|
+
data_dir_path(DataDir.DECK_COLOR),
|
|
196
|
+
set_code,
|
|
197
|
+
), (
|
|
198
|
+
f"{format}_{user_group}_{start_date.strftime('%Y-%m-%d')}"
|
|
199
|
+
f"_{end_date.strftime('%Y-%m-%d')}.json"
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
|
|
76
203
|
def cache_dir_for_set(set_code: str) -> str:
|
|
77
204
|
return os.path.join(data_dir_path(DataDir.CACHE), set_code)
|
|
78
205
|
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
import datetime as dt
|
|
2
|
+
import os
|
|
3
|
+
import wget
|
|
4
|
+
from time import sleep
|
|
5
|
+
|
|
6
|
+
import polars as pl
|
|
7
|
+
|
|
8
|
+
from spells import cache
|
|
9
|
+
from spells.enums import ColName
|
|
10
|
+
|
|
11
|
+
RATINGS_TEMPLATE = (
|
|
12
|
+
"https://www.17lands.com/card_ratings/data?expansion={set_code}&format={format}"
|
|
13
|
+
"{user_group_param}{deck_color_param}&start_date={start_date_str}&end_date={end_date_str}"
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
DECK_COLOR_DATA_TEMPLATE = (
|
|
17
|
+
"https://www.17lands.com/color_ratings/data?expansion={set_code}&event_type={format}"
|
|
18
|
+
"{user_group_param}&start_date={start_date_str}&end_date={end_date_str}&combine_splash=true"
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
START_DATE_MAP = {
|
|
22
|
+
"PIO": dt.date(2024, 12, 10),
|
|
23
|
+
"DFT": dt.date(2025, 2, 11),
|
|
24
|
+
"TDM": dt.date(2025, 4, 8),
|
|
25
|
+
"FIN": dt.date(2025, 6, 10),
|
|
26
|
+
"EOE": dt.date(2025, 7, 29),
|
|
27
|
+
"OM1": dt.date(2025, 9, 23),
|
|
28
|
+
"Cube+-+Powered": dt.date(2025, 10, 28),
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
ratings_col_defs = {
|
|
32
|
+
ColName.NAME: pl.col("name").cast(pl.String),
|
|
33
|
+
ColName.COLOR: pl.col("color").cast(pl.String),
|
|
34
|
+
ColName.RARITY: pl.col("rarity").cast(pl.String),
|
|
35
|
+
ColName.IMAGE_URL: pl.col("url").cast(pl.String),
|
|
36
|
+
ColName.NUM_SEEN: pl.col("seen_count").cast(pl.Int64),
|
|
37
|
+
ColName.LAST_SEEN: pl.col("seen_count") * pl.col("avg_seen").cast(pl.Float64),
|
|
38
|
+
ColName.NUM_TAKEN: pl.col("pick_count").cast(pl.Int64),
|
|
39
|
+
ColName.TAKEN_AT: pl.col("pick_count") * pl.col("avg_pick").cast(pl.Float64),
|
|
40
|
+
ColName.DECK: pl.col("game_count").cast(pl.Int64),
|
|
41
|
+
ColName.WON_DECK: pl.col("win_rate") * pl.col("game_count").cast(pl.Float64),
|
|
42
|
+
ColName.SIDEBOARD: (pl.col("pool_count") - pl.col("game_count")).cast(pl.Int64),
|
|
43
|
+
ColName.OPENING_HAND: pl.col("opening_hand_game_count").cast(pl.Int64),
|
|
44
|
+
ColName.WON_OPENING_HAND: pl.col("opening_hand_game_count")
|
|
45
|
+
* pl.col("opening_hand_win_rate").cast(pl.Float64),
|
|
46
|
+
ColName.DRAWN: pl.col("drawn_game_count").cast(pl.Int64),
|
|
47
|
+
ColName.WON_DRAWN: pl.col("drawn_win_rate")
|
|
48
|
+
* pl.col("drawn_game_count").cast(pl.Float64),
|
|
49
|
+
ColName.NUM_GIH: pl.col("ever_drawn_game_count").cast(pl.Int64),
|
|
50
|
+
ColName.NUM_GIH_WON: pl.col("ever_drawn_game_count")
|
|
51
|
+
* pl.col("ever_drawn_win_rate").cast(pl.Float64),
|
|
52
|
+
ColName.NUM_GNS: pl.col("never_drawn_game_count").cast(pl.Int64),
|
|
53
|
+
ColName.WON_NUM_GNS: pl.col("never_drawn_game_count")
|
|
54
|
+
* pl.col("never_drawn_win_rate").cast(pl.Float64),
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
deck_color_col_defs = {
|
|
58
|
+
ColName.MAIN_COLORS: pl.col("short_name").cast(pl.String),
|
|
59
|
+
ColName.NUM_GAMES: pl.col("games").cast(pl.Int64),
|
|
60
|
+
ColName.NUM_WON: pl.col("wins").cast(pl.Int64),
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def deck_color_df(
|
|
65
|
+
set_code: str,
|
|
66
|
+
format: str = "PremierDraft",
|
|
67
|
+
player_cohort: str = "all",
|
|
68
|
+
start_date: dt.date | None = None,
|
|
69
|
+
end_date: dt.date | None = None,
|
|
70
|
+
):
|
|
71
|
+
if start_date is None:
|
|
72
|
+
start_date = START_DATE_MAP[set_code]
|
|
73
|
+
if end_date is None:
|
|
74
|
+
end_date = dt.date.today() - dt.timedelta(days=1)
|
|
75
|
+
|
|
76
|
+
target_dir, filename = cache.deck_color_file_path(
|
|
77
|
+
set_code,
|
|
78
|
+
format,
|
|
79
|
+
player_cohort,
|
|
80
|
+
start_date,
|
|
81
|
+
end_date,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
if not os.path.isdir(target_dir):
|
|
85
|
+
os.makedirs(target_dir)
|
|
86
|
+
|
|
87
|
+
deck_color_file_path = os.path.join(target_dir, filename)
|
|
88
|
+
|
|
89
|
+
if not os.path.isfile(deck_color_file_path):
|
|
90
|
+
user_group_param = (
|
|
91
|
+
"" if player_cohort == "all" else f"&user_group={player_cohort}"
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
url = DECK_COLOR_DATA_TEMPLATE.format(
|
|
95
|
+
set_code=set_code,
|
|
96
|
+
format=format,
|
|
97
|
+
user_group_param=user_group_param,
|
|
98
|
+
start_date_str=start_date.strftime("%Y-%m-%d"),
|
|
99
|
+
end_date_str=end_date.strftime("%Y-%m-%d"),
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
wget.download(
|
|
103
|
+
url,
|
|
104
|
+
out=deck_color_file_path,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
df = (
|
|
108
|
+
pl.read_json(deck_color_file_path)
|
|
109
|
+
.filter(~pl.col("is_summary"))
|
|
110
|
+
.select(
|
|
111
|
+
[
|
|
112
|
+
pl.lit(set_code).alias(ColName.EXPANSION),
|
|
113
|
+
pl.lit(format).alias(ColName.EVENT_TYPE),
|
|
114
|
+
(pl.lit("Top") if player_cohort == "top" else pl.lit(None))
|
|
115
|
+
.alias(ColName.PLAYER_COHORT)
|
|
116
|
+
.cast(pl.String),
|
|
117
|
+
*[val.alias(key) for key, val in deck_color_col_defs.items()],
|
|
118
|
+
]
|
|
119
|
+
)
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
return df
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def base_ratings_df(
|
|
126
|
+
set_code: str,
|
|
127
|
+
format: str = "PremierDraft",
|
|
128
|
+
player_cohort: str = "all",
|
|
129
|
+
deck_colors: str | list[str] = "any",
|
|
130
|
+
start_date: dt.date | None = None,
|
|
131
|
+
end_date: dt.date | None = None,
|
|
132
|
+
) -> pl.DataFrame:
|
|
133
|
+
if start_date is None:
|
|
134
|
+
start_date = START_DATE_MAP[set_code]
|
|
135
|
+
if end_date is None:
|
|
136
|
+
end_date = dt.date.today() - dt.timedelta(days=1)
|
|
137
|
+
|
|
138
|
+
if isinstance(deck_colors, str):
|
|
139
|
+
deck_colors = [deck_colors]
|
|
140
|
+
|
|
141
|
+
concat_list = []
|
|
142
|
+
for i, deck_color in enumerate(deck_colors):
|
|
143
|
+
ratings_dir, filename = cache.card_ratings_file_path(
|
|
144
|
+
set_code,
|
|
145
|
+
format,
|
|
146
|
+
player_cohort,
|
|
147
|
+
deck_color,
|
|
148
|
+
start_date,
|
|
149
|
+
end_date,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
if not os.path.isdir(ratings_dir):
|
|
153
|
+
os.makedirs(ratings_dir)
|
|
154
|
+
|
|
155
|
+
ratings_file_path = os.path.join(ratings_dir, filename)
|
|
156
|
+
|
|
157
|
+
if not os.path.isfile(ratings_file_path):
|
|
158
|
+
if i > 0:
|
|
159
|
+
sleep(5)
|
|
160
|
+
user_group_param = (
|
|
161
|
+
"" if player_cohort == "all" else f"&user_group={player_cohort}"
|
|
162
|
+
)
|
|
163
|
+
deck_color_param = "" if deck_color == "any" else f"&colors={deck_color}"
|
|
164
|
+
|
|
165
|
+
url = RATINGS_TEMPLATE.format(
|
|
166
|
+
set_code=set_code,
|
|
167
|
+
format=format,
|
|
168
|
+
user_group_param=user_group_param,
|
|
169
|
+
deck_color_param=deck_color_param,
|
|
170
|
+
start_date_str=start_date.strftime("%Y-%m-%d"),
|
|
171
|
+
end_date_str=end_date.strftime("%Y-%m-%d"),
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
wget.download(
|
|
175
|
+
url,
|
|
176
|
+
out=ratings_file_path,
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
concat_list.append(
|
|
180
|
+
pl.read_json(ratings_file_path, infer_schema_length=500)
|
|
181
|
+
.with_columns(
|
|
182
|
+
(pl.lit(deck_color) if deck_color != "any" else pl.lit(None))
|
|
183
|
+
.alias(ColName.MAIN_COLORS)
|
|
184
|
+
.cast(pl.String)
|
|
185
|
+
)
|
|
186
|
+
.select(
|
|
187
|
+
[
|
|
188
|
+
pl.lit(set_code).alias(ColName.EXPANSION),
|
|
189
|
+
pl.lit(format).alias(ColName.EVENT_TYPE),
|
|
190
|
+
(pl.lit("Top") if player_cohort == "top" else pl.lit(None))
|
|
191
|
+
.alias(ColName.PLAYER_COHORT)
|
|
192
|
+
.cast(pl.String),
|
|
193
|
+
ColName.MAIN_COLORS,
|
|
194
|
+
*[val.alias(key) for key, val in ratings_col_defs.items()],
|
|
195
|
+
]
|
|
196
|
+
)
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
raw_df = pl.concat(concat_list)
|
|
200
|
+
|
|
201
|
+
group_cols = [
|
|
202
|
+
ColName.NAME,
|
|
203
|
+
ColName.EXPANSION,
|
|
204
|
+
ColName.MAIN_COLORS,
|
|
205
|
+
]
|
|
206
|
+
|
|
207
|
+
attr_cols = [
|
|
208
|
+
ColName.EVENT_TYPE,
|
|
209
|
+
ColName.PLAYER_COHORT,
|
|
210
|
+
ColName.COLOR,
|
|
211
|
+
ColName.RARITY,
|
|
212
|
+
ColName.IMAGE_URL,
|
|
213
|
+
]
|
|
214
|
+
|
|
215
|
+
sum_cols = list(set(ratings_col_defs) - set(group_cols + attr_cols))
|
|
216
|
+
|
|
217
|
+
attr_df = raw_df.select(group_cols + attr_cols).group_by(group_cols).first()
|
|
218
|
+
sum_df = raw_df.select(group_cols + sum_cols).group_by(group_cols).sum()
|
|
219
|
+
|
|
220
|
+
df = attr_df.join(sum_df, on=group_cols, join_nulls=True)
|
|
221
|
+
|
|
222
|
+
return df
|
spells/columns.py
CHANGED
|
@@ -40,6 +40,11 @@ default_columns = [
|
|
|
40
40
|
ColName.GIH_WR,
|
|
41
41
|
]
|
|
42
42
|
|
|
43
|
+
|
|
44
|
+
def agg_col(expr: pl.Expr) -> ColSpec:
|
|
45
|
+
return ColSpec(col_type=ColType.AGG, expr=expr)
|
|
46
|
+
|
|
47
|
+
|
|
43
48
|
_specs: dict[str, ColSpec] = {
|
|
44
49
|
ColName.NAME: ColSpec(
|
|
45
50
|
col_type=ColType.GROUP_BY,
|
|
@@ -166,6 +171,11 @@ _specs: dict[str, ColSpec] = {
|
|
|
166
171
|
col_type=ColType.GROUP_BY,
|
|
167
172
|
expr=pl.col(ColName.PICK_NUMBER) + 1,
|
|
168
173
|
),
|
|
174
|
+
ColName.PICK_INDEX: ColSpec(
|
|
175
|
+
col_type=ColType.GROUP_BY,
|
|
176
|
+
expr=lambda set_context: pl.col(ColName.PICK_NUMBER)
|
|
177
|
+
+ pl.col(ColName.PACK_NUMBER) * set_context["picks_per_pack"],
|
|
178
|
+
),
|
|
169
179
|
ColName.TAKEN_AT: ColSpec(
|
|
170
180
|
col_type=ColType.PICK_SUM,
|
|
171
181
|
expr=pl.col(ColName.PICK_NUM),
|
|
@@ -217,21 +227,21 @@ _specs: dict[str, ColSpec] = {
|
|
|
217
227
|
),
|
|
218
228
|
ColName.GAME_DATE: ColSpec(
|
|
219
229
|
col_type=ColType.GROUP_BY,
|
|
220
|
-
expr=pl.col(ColName.GAME_TIME).str.to_datetime("%Y-%m-%d %H
|
|
230
|
+
expr=pl.col(ColName.GAME_TIME).str.to_datetime("%Y-%m-%d %H:%M:%S").dt.date(),
|
|
221
231
|
),
|
|
222
232
|
ColName.GAME_DAY_OF_WEEK: ColSpec(
|
|
223
233
|
col_type=ColType.GROUP_BY,
|
|
224
234
|
expr=pl.col(ColName.GAME_TIME)
|
|
225
|
-
.str.to_datetime("%Y-%m-%d %H
|
|
235
|
+
.str.to_datetime("%Y-%m-%d %H:%M:%S")
|
|
226
236
|
.dt.weekday(),
|
|
227
237
|
),
|
|
228
238
|
ColName.GAME_HOUR: ColSpec(
|
|
229
239
|
col_type=ColType.GROUP_BY,
|
|
230
|
-
expr=pl.col(ColName.GAME_TIME).str.to_datetime("%Y-%m-%d %H
|
|
240
|
+
expr=pl.col(ColName.GAME_TIME).str.to_datetime("%Y-%m-%d %H:%M:%S").dt.hour(),
|
|
231
241
|
),
|
|
232
242
|
ColName.GAME_WEEK: ColSpec(
|
|
233
243
|
col_type=ColType.GROUP_BY,
|
|
234
|
-
expr=pl.col(ColName.GAME_TIME).str.to_datetime("%Y-%m-%d %H
|
|
244
|
+
expr=pl.col(ColName.GAME_TIME).str.to_datetime("%Y-%m-%d %H:%M:%S").dt.week(),
|
|
235
245
|
),
|
|
236
246
|
ColName.BUILD_INDEX: ColSpec(
|
|
237
247
|
col_type=ColType.GROUP_BY,
|
|
@@ -437,149 +447,61 @@ _specs: dict[str, ColSpec] = {
|
|
|
437
447
|
ColName.IMAGE_URL: ColSpec(
|
|
438
448
|
col_type=ColType.CARD_ATTR,
|
|
439
449
|
),
|
|
440
|
-
ColName.PICKED_MATCH_WR:
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
),
|
|
452
|
-
ColName.
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
),
|
|
456
|
-
ColName.
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
),
|
|
460
|
-
ColName.
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
),
|
|
464
|
-
ColName.
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
),
|
|
468
|
-
ColName.
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
),
|
|
476
|
-
ColName.
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
),
|
|
480
|
-
ColName.
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
),
|
|
484
|
-
ColName.
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
),
|
|
492
|
-
ColName.
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
),
|
|
496
|
-
ColName.IWD: ColSpec(
|
|
497
|
-
col_type=ColType.AGG,
|
|
498
|
-
expr=pl.col(ColName.GIH_WR) - pl.col(ColName.GNS_WR),
|
|
499
|
-
),
|
|
500
|
-
ColName.NUM_IN_POOL: ColSpec(
|
|
501
|
-
col_type=ColType.AGG,
|
|
502
|
-
expr=pl.col(ColName.DECK) + pl.col(ColName.SIDEBOARD),
|
|
503
|
-
),
|
|
504
|
-
ColName.NUM_IN_POOL_TOTAL: ColSpec(
|
|
505
|
-
col_type=ColType.AGG,
|
|
506
|
-
expr=pl.col(ColName.NUM_IN_POOL).sum(),
|
|
507
|
-
),
|
|
508
|
-
ColName.IN_POOL_WR: ColSpec(
|
|
509
|
-
col_type=ColType.AGG,
|
|
510
|
-
expr=(pl.col(ColName.WON_DECK) + pl.col(ColName.WON_SIDEBOARD))
|
|
511
|
-
/ pl.col(ColName.NUM_IN_POOL),
|
|
512
|
-
),
|
|
513
|
-
ColName.DECK_TOTAL: ColSpec(
|
|
514
|
-
col_type=ColType.AGG,
|
|
515
|
-
expr=pl.col(ColName.DECK).sum(),
|
|
516
|
-
),
|
|
517
|
-
ColName.WON_DECK_TOTAL: ColSpec(
|
|
518
|
-
col_type=ColType.AGG,
|
|
519
|
-
expr=pl.col(ColName.WON_DECK).sum(),
|
|
520
|
-
),
|
|
521
|
-
ColName.GP_WR_MEAN: ColSpec(
|
|
522
|
-
col_type=ColType.AGG,
|
|
523
|
-
expr=pl.col(ColName.WON_DECK_TOTAL) / pl.col(ColName.DECK_TOTAL),
|
|
524
|
-
),
|
|
525
|
-
ColName.GP_WR_EXCESS: ColSpec(
|
|
526
|
-
col_type=ColType.AGG,
|
|
527
|
-
expr=pl.col(ColName.GP_WR) - pl.col(ColName.GP_WR_MEAN),
|
|
528
|
-
),
|
|
529
|
-
ColName.GP_WR_VAR: ColSpec(
|
|
530
|
-
col_type=ColType.AGG,
|
|
531
|
-
expr=(pl.col(ColName.GP_WR_EXCESS).pow(2) * pl.col(ColName.NUM_GP)).sum()
|
|
532
|
-
/ pl.col(ColName.DECK_TOTAL),
|
|
533
|
-
),
|
|
534
|
-
ColName.GP_WR_STDEV: ColSpec(
|
|
535
|
-
col_type=ColType.AGG,
|
|
536
|
-
expr=pl.col(ColName.GP_WR_VAR).sqrt(),
|
|
537
|
-
),
|
|
538
|
-
ColName.GP_WR_Z: ColSpec(
|
|
539
|
-
col_type=ColType.AGG,
|
|
540
|
-
expr=pl.col(ColName.GP_WR_EXCESS) / pl.col(ColName.GP_WR_STDEV),
|
|
541
|
-
),
|
|
542
|
-
ColName.GIH_TOTAL: ColSpec(
|
|
543
|
-
col_type=ColType.AGG,
|
|
544
|
-
expr=pl.col(ColName.NUM_GIH).sum(),
|
|
545
|
-
),
|
|
546
|
-
ColName.WON_GIH_TOTAL: ColSpec(
|
|
547
|
-
col_type=ColType.AGG,
|
|
548
|
-
expr=pl.col(ColName.NUM_GIH_WON).sum(),
|
|
549
|
-
),
|
|
550
|
-
ColName.GIH_WR_MEAN: ColSpec(
|
|
551
|
-
col_type=ColType.AGG,
|
|
552
|
-
expr=pl.col(ColName.WON_GIH_TOTAL) / pl.col(ColName.GIH_TOTAL),
|
|
553
|
-
),
|
|
554
|
-
ColName.GIH_WR_EXCESS: ColSpec(
|
|
555
|
-
col_type=ColType.AGG,
|
|
556
|
-
expr=pl.col(ColName.GIH_WR) - pl.col(ColName.GIH_WR_MEAN),
|
|
557
|
-
),
|
|
558
|
-
ColName.GIH_WR_VAR: ColSpec(
|
|
559
|
-
col_type=ColType.AGG,
|
|
560
|
-
expr=(pl.col(ColName.GIH_WR_EXCESS).pow(2) * pl.col(ColName.NUM_GIH)).sum()
|
|
561
|
-
/ pl.col(ColName.GIH_TOTAL),
|
|
562
|
-
),
|
|
563
|
-
ColName.GIH_WR_STDEV: ColSpec(
|
|
564
|
-
col_type=ColType.AGG,
|
|
565
|
-
expr=pl.col(ColName.GIH_WR_VAR).sqrt(),
|
|
566
|
-
),
|
|
567
|
-
ColName.GIH_WR_Z: ColSpec(
|
|
568
|
-
col_type=ColType.AGG,
|
|
569
|
-
expr=pl.col(ColName.GIH_WR_EXCESS) / pl.col(ColName.GIH_WR_STDEV),
|
|
570
|
-
),
|
|
571
|
-
ColName.DECK_MANA_VALUE_AVG: ColSpec(
|
|
572
|
-
col_type=ColType.AGG,
|
|
573
|
-
expr=pl.col(ColName.DECK_MANA_VALUE) / pl.col(ColName.DECK_SPELLS),
|
|
574
|
-
),
|
|
575
|
-
ColName.DECK_LANDS_AVG: ColSpec(
|
|
576
|
-
col_type=ColType.AGG,
|
|
577
|
-
expr=pl.col(ColName.DECK_LANDS) / pl.col(ColName.NUM_GAMES),
|
|
578
|
-
),
|
|
579
|
-
ColName.DECK_SPELLS_AVG: ColSpec(
|
|
580
|
-
col_type=ColType.AGG,
|
|
581
|
-
expr=pl.col(ColName.DECK_SPELLS) / pl.col(ColName.NUM_GAMES),
|
|
582
|
-
),
|
|
450
|
+
ColName.PICKED_MATCH_WR: agg_col(
|
|
451
|
+
pl.col(ColName.EVENT_MATCH_WINS_SUM) / pl.col(ColName.EVENT_MATCHES_SUM)
|
|
452
|
+
),
|
|
453
|
+
ColName.TROPHY_RATE: agg_col(
|
|
454
|
+
pl.col(ColName.IS_TROPHY_SUM) / pl.col(ColName.NUM_TAKEN),
|
|
455
|
+
),
|
|
456
|
+
ColName.GAME_WR: agg_col(
|
|
457
|
+
pl.col(ColName.NUM_WON) / pl.col(ColName.NUM_GAMES),
|
|
458
|
+
),
|
|
459
|
+
ColName.ALSA: agg_col(pl.col(ColName.LAST_SEEN) / pl.col(ColName.NUM_SEEN)),
|
|
460
|
+
ColName.ATA: agg_col(pl.col(ColName.TAKEN_AT) / pl.col(ColName.NUM_TAKEN)),
|
|
461
|
+
ColName.NUM_GP: agg_col(pl.col(ColName.DECK)),
|
|
462
|
+
ColName.PCT_GP: agg_col(
|
|
463
|
+
pl.col(ColName.DECK) / (pl.col(ColName.DECK) + pl.col(ColName.SIDEBOARD))
|
|
464
|
+
),
|
|
465
|
+
ColName.GP_WR: agg_col(pl.col(ColName.WON_DECK) / pl.col(ColName.DECK)),
|
|
466
|
+
ColName.NUM_OH: agg_col(pl.col(ColName.OPENING_HAND)),
|
|
467
|
+
ColName.OH_WR: agg_col(
|
|
468
|
+
pl.col(ColName.WON_OPENING_HAND) / pl.col(ColName.OPENING_HAND)
|
|
469
|
+
),
|
|
470
|
+
ColName.NUM_GIH: agg_col(pl.col(ColName.OPENING_HAND) + pl.col(ColName.DRAWN)),
|
|
471
|
+
ColName.NUM_GIH_WON: agg_col(
|
|
472
|
+
pl.col(ColName.WON_OPENING_HAND) + pl.col(ColName.WON_DRAWN)
|
|
473
|
+
),
|
|
474
|
+
ColName.GIH_WR: agg_col(pl.col(ColName.NUM_GIH_WON) / pl.col(ColName.NUM_GIH)),
|
|
475
|
+
ColName.GNS_WR: agg_col(pl.col(ColName.WON_NUM_GNS) / pl.col(ColName.NUM_GNS)),
|
|
476
|
+
ColName.IWD: agg_col(pl.col(ColName.GIH_WR) - pl.col(ColName.GNS_WR)),
|
|
477
|
+
ColName.NUM_IN_POOL: agg_col(pl.col(ColName.DECK) + pl.col(ColName.SIDEBOARD)),
|
|
478
|
+
ColName.NUM_IN_POOL_TOTAL: agg_col(pl.col(ColName.NUM_IN_POOL).sum()),
|
|
479
|
+
ColName.IN_POOL_WR: agg_col(
|
|
480
|
+
(pl.col(ColName.WON_DECK) + pl.col(ColName.WON_SIDEBOARD))
|
|
481
|
+
/ pl.col(ColName.NUM_IN_POOL)
|
|
482
|
+
),
|
|
483
|
+
ColName.DECK_TOTAL: agg_col(pl.col(ColName.DECK).sum()),
|
|
484
|
+
ColName.WON_DECK_TOTAL: agg_col(pl.col(ColName.WON_DECK).sum()),
|
|
485
|
+
ColName.GP_WR_MEAN: agg_col(pl.col(ColName.WON_DECK_TOTAL) / pl.col(ColName.DECK_TOTAL)),
|
|
486
|
+
ColName.GP_WR_EXCESS: agg_col(pl.col(ColName.GP_WR) - pl.col(ColName.GP_WR_MEAN)),
|
|
487
|
+
ColName.GP_WR_VAR: agg_col((pl.col(ColName.GP_WR_EXCESS).pow(2) * pl.col(ColName.NUM_GP)).sum()
|
|
488
|
+
/ pl.col(ColName.DECK_TOTAL)
|
|
489
|
+
),
|
|
490
|
+
ColName.GP_WR_STDEV: agg_col(pl.col(ColName.GP_WR_VAR).sqrt()),
|
|
491
|
+
ColName.GP_WR_Z: agg_col(pl.col(ColName.GP_WR_EXCESS) / pl.col(ColName.GP_WR_STDEV)),
|
|
492
|
+
ColName.GIH_TOTAL: agg_col(pl.col(ColName.NUM_GIH).sum()),
|
|
493
|
+
ColName.WON_GIH_TOTAL: agg_col(pl.col(ColName.NUM_GIH_WON).sum()),
|
|
494
|
+
ColName.GIH_WR_MEAN: agg_col(pl.col(ColName.WON_GIH_TOTAL) / pl.col(ColName.GIH_TOTAL)),
|
|
495
|
+
ColName.GIH_WR_EXCESS: agg_col(pl.col(ColName.GIH_WR) - pl.col(ColName.GIH_WR_MEAN)),
|
|
496
|
+
ColName.GIH_WR_VAR: agg_col(
|
|
497
|
+
(pl.col(ColName.GIH_WR_EXCESS).pow(2) * pl.col(ColName.NUM_GIH)).sum()
|
|
498
|
+
/ pl.col(ColName.GIH_TOTAL)
|
|
499
|
+
),
|
|
500
|
+
ColName.GIH_WR_STDEV: agg_col(pl.col(ColName.GIH_WR_VAR).sqrt()),
|
|
501
|
+
ColName.GIH_WR_Z: agg_col(pl.col(ColName.GIH_WR_EXCESS) / pl.col(ColName.GIH_WR_STDEV)),
|
|
502
|
+
ColName.DECK_MANA_VALUE_AVG: agg_col(pl.col(ColName.DECK_MANA_VALUE) / pl.col(ColName.DECK_SPELLS)),
|
|
503
|
+
ColName.DECK_LANDS_AVG: agg_col(pl.col(ColName.DECK_LANDS) / pl.col(ColName.NUM_GAMES)),
|
|
504
|
+
ColName.DECK_SPELLS_AVG: agg_col(pl.col(ColName.DECK_SPELLS) / pl.col(ColName.NUM_GAMES)),
|
|
583
505
|
}
|
|
584
506
|
|
|
585
507
|
for item in ColName:
|
spells/config.py
CHANGED
spells/draft_data.py
CHANGED
|
@@ -6,6 +6,8 @@ Aggregate dataframes containing raw counts are cached in the local file system
|
|
|
6
6
|
for performance.
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
import datetime
|
|
9
11
|
import functools
|
|
10
12
|
import hashlib
|
|
11
13
|
import re
|
|
@@ -19,13 +21,23 @@ from polars.exceptions import ColumnNotFoundError
|
|
|
19
21
|
|
|
20
22
|
from spells import cache
|
|
21
23
|
import spells.filter
|
|
22
|
-
import
|
|
24
|
+
from spells import manifest
|
|
23
25
|
from spells.columns import ColDef, ColSpec, get_specs
|
|
24
26
|
from spells.enums import View, ColName, ColType
|
|
25
27
|
from spells.log import make_verbose
|
|
28
|
+
from spells.card_data_files import base_ratings_df
|
|
26
29
|
|
|
27
30
|
DF = TypeVar("DF", pl.LazyFrame, pl.DataFrame)
|
|
28
31
|
|
|
32
|
+
@dataclass
|
|
33
|
+
class CardDataFileSpec():
|
|
34
|
+
set_code: str
|
|
35
|
+
format: str = "PremierDraft"
|
|
36
|
+
player_cohort: str = "all"
|
|
37
|
+
deck_colors: str | list[str] = "any"
|
|
38
|
+
start_date: datetime.date | None = None
|
|
39
|
+
end_date: datetime.date | None = None
|
|
40
|
+
|
|
29
41
|
|
|
30
42
|
def _cache_key(args) -> str:
|
|
31
43
|
"""
|
|
@@ -37,20 +49,25 @@ def _cache_key(args) -> str:
|
|
|
37
49
|
@functools.lru_cache(maxsize=None)
|
|
38
50
|
def get_names(set_code: str) -> list[str]:
|
|
39
51
|
card_fp = cache.data_file_path(set_code, View.CARD)
|
|
40
|
-
|
|
41
|
-
|
|
52
|
+
try:
|
|
53
|
+
card_view = pl.read_parquet(card_fp)
|
|
54
|
+
card_names_set = frozenset(card_view.get_column("name").to_list())
|
|
42
55
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
56
|
+
draft_fp = cache.data_file_path(set_code, View.DRAFT)
|
|
57
|
+
draft_view = pl.scan_parquet(draft_fp)
|
|
58
|
+
cols = draft_view.collect_schema().names()
|
|
46
59
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
60
|
+
prefix = "pack_card_"
|
|
61
|
+
names = [col[len(prefix) :] for col in cols if col.startswith(prefix)]
|
|
62
|
+
draft_names_set = frozenset(names)
|
|
63
|
+
|
|
64
|
+
assert (
|
|
65
|
+
draft_names_set == card_names_set
|
|
66
|
+
), "names mismatch between card and draft file"
|
|
67
|
+
except FileNotFoundError:
|
|
68
|
+
ratings_data = base_ratings_df(set_code)
|
|
69
|
+
names = list(ratings_data['name'])
|
|
50
70
|
|
|
51
|
-
assert (
|
|
52
|
-
draft_names_set == card_names_set
|
|
53
|
-
), "names mismatch between card and draft file"
|
|
54
71
|
return names
|
|
55
72
|
|
|
56
73
|
|
|
@@ -330,7 +347,10 @@ def _view_select(
|
|
|
330
347
|
cdefs = [col_def_map[c] for c in sorted(view_cols)]
|
|
331
348
|
select = []
|
|
332
349
|
for cdef in cdefs:
|
|
333
|
-
if
|
|
350
|
+
if isinstance(df, pl.DataFrame) and cdef.name in df.columns:
|
|
351
|
+
base_cols = base_cols.union(frozenset({cdef.name}))
|
|
352
|
+
select.append(cdef.name)
|
|
353
|
+
elif is_agg_view:
|
|
334
354
|
if cdef.col_type == ColType.AGG:
|
|
335
355
|
base_cols = base_cols.union(cdef.dependencies)
|
|
336
356
|
select.append(cdef.expr)
|
|
@@ -379,7 +399,7 @@ def _fetch_or_cache(
|
|
|
379
399
|
|
|
380
400
|
def _base_agg_df(
|
|
381
401
|
set_code: str,
|
|
382
|
-
m:
|
|
402
|
+
m: manifest.Manifest,
|
|
383
403
|
use_streaming: bool = True,
|
|
384
404
|
) -> pl.DataFrame:
|
|
385
405
|
join_dfs = []
|
|
@@ -454,7 +474,7 @@ def _base_agg_df(
|
|
|
454
474
|
|
|
455
475
|
if group_by:
|
|
456
476
|
joined_df = functools.reduce(
|
|
457
|
-
lambda prev, curr: prev.join(curr, on=group_by, how="
|
|
477
|
+
lambda prev, curr: prev.join(curr, on=group_by, how="full", coalesce=True),
|
|
458
478
|
join_dfs,
|
|
459
479
|
)
|
|
460
480
|
else:
|
|
@@ -476,6 +496,7 @@ def summon(
|
|
|
476
496
|
write_cache: bool = True,
|
|
477
497
|
card_context: pl.DataFrame | dict[str, Any] | None = None,
|
|
478
498
|
set_context: pl.DataFrame | dict[str, Any] | None = None,
|
|
499
|
+
cdfs: CardDataFileSpec | None = None,
|
|
479
500
|
) -> pl.DataFrame:
|
|
480
501
|
specs = get_specs()
|
|
481
502
|
|
|
@@ -515,33 +536,51 @@ def summon(
|
|
|
515
536
|
else:
|
|
516
537
|
this_set_context = None
|
|
517
538
|
|
|
518
|
-
col_def_map = _hydrate_col_defs(
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
code,
|
|
525
|
-
(
|
|
526
|
-
code,
|
|
527
|
-
sorted(m.view_cols.get(View.DRAFT, set())),
|
|
528
|
-
sorted(m.view_cols.get(View.GAME, set())),
|
|
529
|
-
sorted(c.signature or "" for c in m.col_def_map.values()),
|
|
530
|
-
sorted(m.base_view_group_by),
|
|
531
|
-
filter_spec,
|
|
532
|
-
),
|
|
533
|
-
read_cache=read_cache,
|
|
534
|
-
write_cache=write_cache,
|
|
539
|
+
col_def_map = _hydrate_col_defs(
|
|
540
|
+
code,
|
|
541
|
+
specs,
|
|
542
|
+
set_card_context,
|
|
543
|
+
this_set_context,
|
|
544
|
+
card_only=cdfs is not None,
|
|
535
545
|
)
|
|
546
|
+
m = manifest.create(col_def_map, columns, group_by, filter_spec)
|
|
536
547
|
|
|
537
|
-
if
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
548
|
+
if cdfs is None:
|
|
549
|
+
calc_fn = functools.partial(_base_agg_df, code, m, use_streaming=use_streaming)
|
|
550
|
+
agg_df = _fetch_or_cache(
|
|
551
|
+
calc_fn,
|
|
552
|
+
code,
|
|
553
|
+
(
|
|
554
|
+
code,
|
|
555
|
+
sorted(m.view_cols.get(View.DRAFT, set())),
|
|
556
|
+
sorted(m.view_cols.get(View.GAME, set())),
|
|
557
|
+
sorted(c.signature or "" for c in m.col_def_map.values()),
|
|
558
|
+
sorted(m.base_view_group_by),
|
|
559
|
+
filter_spec,
|
|
560
|
+
),
|
|
561
|
+
read_cache=read_cache,
|
|
562
|
+
write_cache=write_cache,
|
|
563
|
+
)
|
|
564
|
+
if View.CARD in m.view_cols:
|
|
565
|
+
card_cols = m.view_cols[View.CARD].union({ColName.NAME})
|
|
566
|
+
fp = cache.data_file_path(code, View.CARD)
|
|
567
|
+
card_df = pl.read_parquet(fp)
|
|
568
|
+
select_df = _view_select(
|
|
569
|
+
card_df, card_cols, m.col_def_map, is_agg_view=False
|
|
570
|
+
)
|
|
571
|
+
agg_df = agg_df.join(select_df, on="name", how="full", coalesce=True)
|
|
572
|
+
else:
|
|
573
|
+
assert len(codes) == 1, "Only one set supported for loading from card data file"
|
|
574
|
+
assert codes[0] == cdfs.set_code, "Wrong set file specified"
|
|
575
|
+
agg_df = base_ratings_df(
|
|
576
|
+
set_code=cdfs.set_code,
|
|
577
|
+
format=cdfs.format,
|
|
578
|
+
player_cohort=cdfs.player_cohort,
|
|
579
|
+
deck_colors=cdfs.deck_colors,
|
|
580
|
+
start_date=cdfs.start_date,
|
|
581
|
+
end_date=cdfs.end_date,
|
|
543
582
|
)
|
|
544
|
-
|
|
583
|
+
|
|
545
584
|
concat_dfs.append(agg_df)
|
|
546
585
|
|
|
547
586
|
full_agg_df = pl.concat(concat_dfs, how="vertical")
|
spells/enums.py
CHANGED
|
@@ -60,6 +60,7 @@ class ColName(StrEnum):
|
|
|
60
60
|
PACK_NUM = "pack_num" # pack_number plus 1
|
|
61
61
|
PICK_NUMBER = "pick_number"
|
|
62
62
|
PICK_NUM = "pick_num" # pick_number plus 1
|
|
63
|
+
PICK_INDEX = "pick_index" # 0 - 3 * picks_per_pack - 1
|
|
63
64
|
TAKEN_AT = "taken_at"
|
|
64
65
|
NUM_TAKEN = "num_taken"
|
|
65
66
|
NUM_DRAFTS = "num_drafts"
|
spells/extension.py
CHANGED
|
@@ -25,6 +25,7 @@ def seen_greatest_name_fn(attr: str) -> Callable:
|
|
|
25
25
|
.otherwise(expr)
|
|
26
26
|
)
|
|
27
27
|
return expr
|
|
28
|
+
|
|
28
29
|
return inner
|
|
29
30
|
|
|
30
31
|
|
|
@@ -49,7 +50,26 @@ def context_cols(attr, silent: bool = True) -> dict[str, ColSpec]:
|
|
|
49
50
|
else card_context[name][attr],
|
|
50
51
|
),
|
|
51
52
|
f"pick_{attr}": ColSpec(
|
|
52
|
-
col_type=ColType.
|
|
53
|
+
col_type=ColType.GROUP_BY, expr=pl.col(f"pick_{attr}_sum")
|
|
54
|
+
),
|
|
55
|
+
f"pool_{attr}": ColSpec(
|
|
56
|
+
col_type=ColType.NAME_SUM,
|
|
57
|
+
expr=(
|
|
58
|
+
lambda name, card_context: pl.lit(None)
|
|
59
|
+
if card_context[name].get(attr) is None
|
|
60
|
+
or math.isnan(card_context[name][attr])
|
|
61
|
+
else card_context[name][attr] * pl.col(f"pool_{name}")
|
|
62
|
+
),
|
|
63
|
+
),
|
|
64
|
+
f"pool_{attr}_sum": ColSpec(
|
|
65
|
+
col_type=ColType.PICK_SUM,
|
|
66
|
+
expr=lambda names: pl.sum_horizontal(
|
|
67
|
+
[pl.col(f"pool_{attr}_{name}") for name in names]
|
|
68
|
+
),
|
|
69
|
+
),
|
|
70
|
+
f"pool_pick_{attr}_sum": ColSpec(
|
|
71
|
+
col_type=ColType.PICK_SUM,
|
|
72
|
+
expr=pl.col(f"pick_{attr}_sum") + pl.col(f"pool_{attr}_sum"),
|
|
53
73
|
),
|
|
54
74
|
f"seen_{attr}_is_greatest": ColSpec(
|
|
55
75
|
col_type=ColType.NAME_SUM,
|
|
@@ -75,6 +95,16 @@ def context_cols(attr, silent: bool = True) -> dict[str, ColSpec]:
|
|
|
75
95
|
[pl.col(f"seen_{attr}_{name}") for name in names]
|
|
76
96
|
),
|
|
77
97
|
),
|
|
98
|
+
f"seen_{attr}_pack_sum": ColSpec(
|
|
99
|
+
col_type=ColType.PICK_SUM,
|
|
100
|
+
expr=lambda names: pl.sum_horizontal(
|
|
101
|
+
[pl.col(f"seen_{attr}_{name}") for name in names]
|
|
102
|
+
),
|
|
103
|
+
),
|
|
104
|
+
f"not_picked_{attr}_sum": ColSpec(
|
|
105
|
+
col_type=ColType.PICK_SUM,
|
|
106
|
+
expr=pl.col(f"seen_{attr}_pack_sum") - pl.col(f"pick_{attr}_sum")
|
|
107
|
+
),
|
|
78
108
|
f"least_{attr}_seen": ColSpec(
|
|
79
109
|
col_type=ColType.PICK_SUM,
|
|
80
110
|
expr=lambda names: pl.min_horizontal(
|
spells/external.py
CHANGED
|
@@ -30,7 +30,6 @@ RESOURCE_TEMPLATE = (
|
|
|
30
30
|
"https://17lands-public.s3.amazonaws.com/analysis_data/{dataset_type}_data/"
|
|
31
31
|
)
|
|
32
32
|
|
|
33
|
-
|
|
34
33
|
class FileFormat(StrEnum):
|
|
35
34
|
CSV = "csv"
|
|
36
35
|
PARQUET = "parquet"
|
|
@@ -121,7 +120,8 @@ def _context() -> int:
|
|
|
121
120
|
for code in all_sets:
|
|
122
121
|
write_card_file(code, force_download=True)
|
|
123
122
|
get_set_context(code, force_download=True)
|
|
124
|
-
return 0
|
|
123
|
+
return 0
|
|
124
|
+
|
|
125
125
|
|
|
126
126
|
def _refresh(set_code: str):
|
|
127
127
|
return _add(set_code, force_download=True)
|
|
@@ -345,11 +345,11 @@ def get_set_context(set_code: str, force_download=False) -> int:
|
|
|
345
345
|
|
|
346
346
|
df = summon(
|
|
347
347
|
set_code,
|
|
348
|
-
columns=[ColName.
|
|
348
|
+
columns=[ColName.NUM_TAKEN],
|
|
349
349
|
group_by=[ColName.DRAFT_DATE, ColName.PICK_NUM],
|
|
350
350
|
)
|
|
351
351
|
|
|
352
|
-
context_df = df.filter(pl.col(ColName.
|
|
352
|
+
context_df = df.filter(pl.col(ColName.NUM_TAKEN) > 1000).select(
|
|
353
353
|
[
|
|
354
354
|
pl.col(ColName.DRAFT_DATE).min().alias("release_date"),
|
|
355
355
|
pl.col(ColName.PICK_NUM).max().alias("picks_per_pack"),
|
|
@@ -359,4 +359,5 @@ def get_set_context(set_code: str, force_download=False) -> int:
|
|
|
359
359
|
context_df.write_parquet(context_fp)
|
|
360
360
|
|
|
361
361
|
cache.spells_print(mode, f"Wrote file {context_fp}")
|
|
362
|
+
|
|
362
363
|
return 0
|
spells/manifest.py
CHANGED
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
from dataclasses import dataclass
|
|
2
2
|
|
|
3
|
-
import spells.
|
|
4
|
-
import spells.filter
|
|
3
|
+
import spells.filter as spells_filter
|
|
5
4
|
from spells.enums import View, ColName, ColType
|
|
6
|
-
from spells.columns import ColDef
|
|
5
|
+
from spells.columns import ColDef, default_columns
|
|
7
6
|
|
|
8
7
|
|
|
9
8
|
@dataclass(frozen=True)
|
|
@@ -13,7 +12,7 @@ class Manifest:
|
|
|
13
12
|
base_view_group_by: frozenset[str]
|
|
14
13
|
view_cols: dict[View, frozenset[str]]
|
|
15
14
|
group_by: tuple[str, ...]
|
|
16
|
-
filter:
|
|
15
|
+
filter: spells_filter.Filter | None
|
|
17
16
|
|
|
18
17
|
def __post_init__(self):
|
|
19
18
|
# No name filter check
|
|
@@ -166,7 +165,7 @@ def create(
|
|
|
166
165
|
gbs = (ColName.NAME,) if group_by is None else tuple(group_by)
|
|
167
166
|
|
|
168
167
|
if columns is None:
|
|
169
|
-
cols = tuple(
|
|
168
|
+
cols = tuple(default_columns)
|
|
170
169
|
if ColName.NAME not in gbs:
|
|
171
170
|
cols = tuple(
|
|
172
171
|
col for col in cols if col not in (ColName.COLOR, ColName.RARITY)
|
|
@@ -174,7 +173,7 @@ def create(
|
|
|
174
173
|
else:
|
|
175
174
|
cols = tuple(columns)
|
|
176
175
|
|
|
177
|
-
m_filter =
|
|
176
|
+
m_filter = spells_filter.from_spec(filter_spec)
|
|
178
177
|
|
|
179
178
|
col_set = frozenset(cols)
|
|
180
179
|
col_set = col_set.union(frozenset(gbs) - {ColName.NAME})
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: spells-mtg
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.11.16
|
|
4
4
|
Summary: analaysis of 17Lands.com public datasets
|
|
5
5
|
Author-Email: Joel Barnes <oelarnes@gmail.com>
|
|
6
6
|
License: MIT
|
|
7
7
|
Requires-Python: >=3.11
|
|
8
|
-
Requires-Dist: polars
|
|
8
|
+
Requires-Dist: polars==1.22.0
|
|
9
9
|
Requires-Dist: wget>=3.2
|
|
10
10
|
Description-Content-Type: text/markdown
|
|
11
11
|
|
|
@@ -412,6 +412,7 @@ A table of all included columns. Columns can be referenced by enum or by string
|
|
|
412
412
|
| `PACK_NUM` | `"pack_num"` | `DRAFT` | `GROUP_BY` | 1-indexed | Int |
|
|
413
413
|
| `PICK_NUMBER` | `"pick_number"` | `DRAFT` | `FILTER_ONLY` | Dataset Column | Int |
|
|
414
414
|
| `PICK_NUM` | `"pick_num"` | `DRAFT` | `GROUP_BY` | 1-indexed | Int |
|
|
415
|
+
| `PICK_INDEX` | `"pick_index"` | `DRAFT` | `GROUP_BY` | 0-indexed, through 39/42/45 depending on set | Int |
|
|
415
416
|
| `TAKEN_AT` | `"taken_at` | `DRAFT` | `PICK_SUM` | Summable alias of `PICK_NUM` | Int |
|
|
416
417
|
| `NUM_DRAFTS` | `"num_drafts"` | `DRAFT` | `PICK_SUM` | | Int |
|
|
417
418
|
| `NUM_TAKEN` | `"num_taken"` | `DRAFT` | `PICK_SUM` | Sum 1 over rows | Int |
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
spells/.ruff_cache/.gitignore,sha256=njpg8ebsSuYCFcEdVLFxOSdF7CXp3e1DPVvZITY68xY,35
|
|
2
|
+
spells/.ruff_cache/0.8.6/17785301476771359756,sha256=gPYLG8psuOSo37IefLzTu5wgRrbKLWEfhWP_yLC4VIY,159
|
|
3
|
+
spells/.ruff_cache/CACHEDIR.TAG,sha256=WVMVbX4MVkpCclExbq8m-IcOZIOuIZf5FrYw5Pk-Ma4,43
|
|
4
|
+
spells/__init__.py,sha256=0pnh2NLn9FrNKncE9-tBsighp3e8YAsN--_ovUpgWGs,333
|
|
5
|
+
spells/cache.py,sha256=6cl0q62erR3LCANPSfxG5-J7JQfLwNdWjzlBpfiL4IE,7174
|
|
6
|
+
spells/card_data_files.py,sha256=x_oiFXw-mH-xZrSLrnE6dV8f1C2mQr5iFp9skR_YGhY,7246
|
|
7
|
+
spells/cards.py,sha256=6stFPhJOzHqvQnkSv9cDeylRa_7L9Y8sOaowiZhzz6I,4174
|
|
8
|
+
spells/columns.py,sha256=HsmOSunZHs0RMyXoSo-gAVWMNZqMYs3IrO5v5-99BqM,16678
|
|
9
|
+
spells/config.py,sha256=Nym660bbYt4ijzC5scVJ6PVOh-4vBi4hfFusBIPxcZk,257
|
|
10
|
+
spells/draft_data.py,sha256=iYsfmVRPy67GS3Epv8xakYVZQZIr0JbrZMFG0_wpLm4,21380
|
|
11
|
+
spells/enums.py,sha256=gbwfon6tQCoKDb-m4hSaHWi9slj82yqaH3qhYMVrsck,4991
|
|
12
|
+
spells/extension.py,sha256=iTR_2a_elM-8q8qxeeZFV8145i97fW2c3PfE3rN-HFg,8625
|
|
13
|
+
spells/external.py,sha256=I-f_vMx-h2kvunUlZthsauaeDn42vcRk0wvTURfImzs,11848
|
|
14
|
+
spells/filter.py,sha256=J-YTOOAzOQpvIX29tviYL04RVoOUlfsbjBXoQBDCEdQ,3380
|
|
15
|
+
spells/log.py,sha256=3avmg65hru8K9npKLvPp1wWWxq-hoEYDUCbxqhPkKUw,2175
|
|
16
|
+
spells/manifest.py,sha256=ExWVk17BRw615UmvrV817xwz457yfTNdNMNE_M00aEg,8338
|
|
17
|
+
spells/schema.py,sha256=DbMvV8PIThJTp0Xzp_XIorlW6JhE1ud1kWRGf5SQ4_c,6406
|
|
18
|
+
spells/utils.py,sha256=IO3brrXVvZla0LRTEB5v6NgGqZb_rYA46XtKBURGMNk,1944
|
|
19
|
+
spells_mtg-0.11.16.dist-info/METADATA,sha256=G-rx2_1SCdYXRT15YWkgTNA7kKnUdqVDFMf6qvFQSy0,47370
|
|
20
|
+
spells_mtg-0.11.16.dist-info/WHEEL,sha256=9P2ygRxDrTJz3gsagc0Z96ukrxjr-LFBGOgv3AuKlCA,90
|
|
21
|
+
spells_mtg-0.11.16.dist-info/entry_points.txt,sha256=a9Y1omdl9MdnKuIj3aOodgrp-zZII6OCdvqwgP6BFvI,63
|
|
22
|
+
spells_mtg-0.11.16.dist-info/licenses/LICENSE,sha256=tS54XYbJSgmq5zuHhbsQGbNQLJPVgXqhF5nu2CSRMig,1068
|
|
23
|
+
spells_mtg-0.11.16.dist-info/RECORD,,
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
spells/__init__.py,sha256=0pnh2NLn9FrNKncE9-tBsighp3e8YAsN--_ovUpgWGs,333
|
|
2
|
-
spells/cache.py,sha256=7XDcltmlpagK2nkhuCCcAQVyrFMYkw9Acr7iVsLs3JU,3799
|
|
3
|
-
spells/cards.py,sha256=6stFPhJOzHqvQnkSv9cDeylRa_7L9Y8sOaowiZhzz6I,4174
|
|
4
|
-
spells/columns.py,sha256=tQPhFpG4vOMIjV111p8DK4V-d_8634xoE-csBzou_sw,18008
|
|
5
|
-
spells/config.py,sha256=_t98dc_uY3Wp3d5tsU3CJgVcWOD3wVlwMo98-tN2KRY,213
|
|
6
|
-
spells/draft_data.py,sha256=xoL82NTgTJWxZK1p-6LjxnYzkeYOr-Dc9-_T0m-WZ1Y,19946
|
|
7
|
-
spells/enums.py,sha256=MwJ9694AVoaKE22WlFjzHPcRfrU0DKI2_mrWC6_YjhE,4931
|
|
8
|
-
spells/extension.py,sha256=B-vZ9mTnb3xJVUjyqvvv8UtNsRDjcdJRCTYu9J0sulk,7507
|
|
9
|
-
spells/external.py,sha256=XZpZ0mn8_BmiF76xEiu2KblZ-dwOxmbs6vrDftl2XbI,11846
|
|
10
|
-
spells/filter.py,sha256=J-YTOOAzOQpvIX29tviYL04RVoOUlfsbjBXoQBDCEdQ,3380
|
|
11
|
-
spells/log.py,sha256=3avmg65hru8K9npKLvPp1wWWxq-hoEYDUCbxqhPkKUw,2175
|
|
12
|
-
spells/manifest.py,sha256=iOmhxIVMHu_Av3nd23zGjmUwLvZ2hAM0oc1ErPzZ_oE,8341
|
|
13
|
-
spells/schema.py,sha256=DbMvV8PIThJTp0Xzp_XIorlW6JhE1ud1kWRGf5SQ4_c,6406
|
|
14
|
-
spells/utils.py,sha256=IO3brrXVvZla0LRTEB5v6NgGqZb_rYA46XtKBURGMNk,1944
|
|
15
|
-
spells_mtg-0.10.4.dist-info/METADATA,sha256=0F73Ji1D0LR9u2_Zo24CBaK3U4R1QJeB0hlGIKo4Poo,47227
|
|
16
|
-
spells_mtg-0.10.4.dist-info/WHEEL,sha256=thaaA2w1JzcGC48WYufAs8nrYZjJm8LqNfnXFOFyCC4,90
|
|
17
|
-
spells_mtg-0.10.4.dist-info/entry_points.txt,sha256=a9Y1omdl9MdnKuIj3aOodgrp-zZII6OCdvqwgP6BFvI,63
|
|
18
|
-
spells_mtg-0.10.4.dist-info/licenses/LICENSE,sha256=tS54XYbJSgmq5zuHhbsQGbNQLJPVgXqhF5nu2CSRMig,1068
|
|
19
|
-
spells_mtg-0.10.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|