spells-mtg 0.6.1__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spells-mtg might be problematic. Click here for more details.

spells/columns.py CHANGED
@@ -25,8 +25,6 @@ class ColDef:
25
25
 
26
26
 
27
27
  default_columns = [
28
- ColName.COLOR,
29
- ColName.RARITY,
30
28
  ColName.NUM_SEEN,
31
29
  ColName.ALSA,
32
30
  ColName.NUM_TAKEN,
spells/draft_data.py CHANGED
@@ -139,6 +139,7 @@ def _determine_expression(
139
139
 
140
140
  elif spec.expr is not None:
141
141
  if isinstance(spec.expr, Callable):
142
+ assert not spec.col_type == ColType.AGG, f"AGG column {col} must be a pure spells expression"
142
143
  params = seed_params(spec.expr)
143
144
  if (
144
145
  spec.col_type == ColType.PICK_SUM
@@ -306,7 +307,7 @@ def _view_select(
306
307
  is_agg_view: bool,
307
308
  ) -> DF:
308
309
  base_cols = frozenset()
309
- cdefs = [col_def_map[c] for c in view_cols]
310
+ cdefs = [col_def_map[c] for c in sorted(view_cols)]
310
311
  select = []
311
312
  for cdef in cdefs:
312
313
  if is_agg_view:
@@ -390,8 +391,10 @@ def _base_agg_df(
390
391
  )
391
392
 
392
393
  sum_col_df = base_df.select(nonname_gb + name_col_tuple + sum_cols)
394
+
395
+ grouped = sum_col_df.group_by(group_by) if group_by else sum_col_df
393
396
  join_dfs.append(
394
- sum_col_df.group_by(group_by).sum().collect(streaming=use_streaming)
397
+ grouped.sum().collect(streaming=use_streaming)
395
398
  )
396
399
 
397
400
  name_sum_cols = tuple(
@@ -420,25 +423,26 @@ def _base_agg_df(
420
423
  )
421
424
 
422
425
  if not is_name_gb:
423
- df = (
424
- unpivoted.drop("name")
425
- .group_by(nonname_gb)
426
- .sum()
427
- .collect(streaming=use_streaming)
428
- )
426
+ grouped = unpivoted.drop("name").group_by(nonname_gb) if nonname_gb else unpivoted.drop("name")
427
+ df = grouped.sum() .collect(streaming=use_streaming)
429
428
  else:
430
429
  df = unpivoted.collect(streaming=use_streaming)
431
430
 
432
431
  join_dfs.append(df)
433
432
 
434
- return functools.reduce(
435
- lambda prev, curr: prev.join(curr, on=group_by, how="outer", coalesce=True),
436
- join_dfs,
437
- )
433
+ if group_by:
434
+ joined_df = functools.reduce(
435
+ lambda prev, curr: prev.join(curr, on=group_by, how="outer", coalesce=True),
436
+ join_dfs,
437
+ )
438
+ else:
439
+ joined_df = pl.concat(join_dfs, how='horizontal')
440
+
441
+ return joined_df
438
442
 
439
443
 
440
444
  def summon(
441
- set_code: str,
445
+ set_code: str | list[str],
442
446
  columns: list[str] | None = None,
443
447
  group_by: list[str] | None = None,
444
448
  filter_spec: dict | None = None,
@@ -446,7 +450,7 @@ def summon(
446
450
  use_streaming: bool = False,
447
451
  read_cache: bool = True,
448
452
  write_cache: bool = True,
449
- card_context: pl.DataFrame | dict[str, dict] | None = None,
453
+ card_context: pl.DataFrame | dict[str, Any] | None = None,
450
454
  set_context: pl.DataFrame | dict[str, Any] | None = None,
451
455
  ) -> pl.DataFrame:
452
456
  specs = get_specs()
@@ -457,38 +461,72 @@ def summon(
457
461
  for ext in extensions:
458
462
  specs.update(ext)
459
463
 
460
- col_def_map = _hydrate_col_defs(set_code, specs, card_context, set_context)
461
- m = spells.manifest.create(col_def_map, columns, group_by, filter_spec)
462
-
463
- calc_fn = functools.partial(_base_agg_df, set_code, m, use_streaming=use_streaming)
464
- agg_df = _fetch_or_cache(
465
- calc_fn,
466
- set_code,
467
- (
468
- set_code,
469
- sorted(m.view_cols.get(View.DRAFT, set())),
470
- sorted(m.view_cols.get(View.GAME, set())),
471
- sorted(c.signature or "" for c in m.col_def_map.values()),
472
- sorted(m.base_view_group_by),
473
- filter_spec,
474
- ),
475
- read_cache=read_cache,
476
- write_cache=write_cache,
477
- )
464
+ if isinstance(set_code, str):
465
+ card_context = {set_code: card_context}
466
+ set_context = {set_code: set_context}
467
+ codes = [set_code]
468
+ else:
469
+ codes = set_code
470
+
471
+ assert codes, "Please ask for at least one set"
472
+
473
+ m = None
478
474
 
479
- if View.CARD in m.view_cols:
480
- card_cols = m.view_cols[View.CARD].union({ColName.NAME})
481
- fp = data_file_path(set_code, View.CARD)
482
- card_df = pl.read_parquet(fp)
483
- select_df = _view_select(card_df, card_cols, m.col_def_map, is_agg_view=False)
484
- agg_df = agg_df.join(select_df, on="name", how="outer", coalesce=True)
475
+ concat_dfs = []
476
+ for code in codes:
477
+ if isinstance(card_context, pl.DataFrame):
478
+ set_card_context = card_context.filter(pl.col('expansion') == code)
479
+ elif isinstance(card_context, dict):
480
+ set_card_context = card_context[code]
481
+ else:
482
+ set_card_context = None
483
+
484
+ if isinstance(set_context, pl.DataFrame):
485
+ this_set_context = set_context.filter(pl.col('expansion') == code)
486
+ elif isinstance(set_context, dict):
487
+ this_set_context = set_context[code]
488
+ else:
489
+ this_set_context = None
490
+
491
+ col_def_map = _hydrate_col_defs(code, specs, set_card_context, this_set_context)
492
+ m = spells.manifest.create(col_def_map, columns, group_by, filter_spec)
493
+
494
+ calc_fn = functools.partial(_base_agg_df, code, m, use_streaming=use_streaming)
495
+ agg_df = _fetch_or_cache(
496
+ calc_fn,
497
+ code,
498
+ (
499
+ code,
500
+ sorted(m.view_cols.get(View.DRAFT, set())),
501
+ sorted(m.view_cols.get(View.GAME, set())),
502
+ sorted(c.signature or "" for c in m.col_def_map.values()),
503
+ sorted(m.base_view_group_by),
504
+ filter_spec,
505
+ ),
506
+ read_cache=read_cache,
507
+ write_cache=write_cache,
508
+ )
509
+
510
+ if View.CARD in m.view_cols:
511
+ card_cols = m.view_cols[View.CARD].union({ColName.NAME})
512
+ fp = data_file_path(code, View.CARD)
513
+ card_df = pl.read_parquet(fp)
514
+ select_df = _view_select(card_df, card_cols, m.col_def_map, is_agg_view=False)
515
+ agg_df = agg_df.join(select_df, on="name", how="outer", coalesce=True)
516
+ concat_dfs.append(agg_df)
485
517
 
486
- if ColName.NAME not in m.group_by:
487
- agg_df = agg_df.group_by(m.group_by).sum()
518
+ full_agg_df = pl.concat(concat_dfs, how='vertical')
519
+
520
+ assert m is not None, "What happened? We mean to use one of the sets manifest, it shouldn't matter which."
521
+
522
+ if m.group_by:
523
+ full_agg_df = full_agg_df.group_by(m.group_by).sum()
524
+ else:
525
+ full_agg_df = full_agg_df.sum()
488
526
 
489
527
  ret_cols = m.group_by + m.columns
490
528
  ret_df = (
491
- _view_select(agg_df, frozenset(ret_cols), m.col_def_map, is_agg_view=True)
529
+ _view_select(full_agg_df, frozenset(ret_cols), m.col_def_map, is_agg_view=True)
492
530
  .select(ret_cols)
493
531
  .sort(m.group_by)
494
532
  )
spells/manifest.py CHANGED
@@ -162,13 +162,10 @@ def create(
162
162
  group_by: list[str] | None = None,
163
163
  filter_spec: dict | None = None,
164
164
  ):
165
- gbs = (ColName.NAME,) if group_by is None else tuple(group_by)
165
+ gbs = (ColName.NAME, ColName.COLOR, ColName.RARITY) if group_by is None else tuple(group_by)
166
+
166
167
  if columns is None:
167
168
  cols = tuple(spells.columns.default_columns)
168
- if ColName.NAME not in gbs:
169
- cols = tuple(
170
- c for c in cols if col_def_map[c].col_type != ColType.CARD_ATTR
171
- )
172
169
  else:
173
170
  cols = tuple(columns)
174
171
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: spells-mtg
3
- Version: 0.6.1
3
+ Version: 0.7.0
4
4
  Summary: analaysis of 17Lands.com public datasets
5
5
  Author-Email: Joel Barnes <oelarnes@gmail.com>
6
6
  License: MIT
@@ -156,9 +156,9 @@ Spells is not affiliated with 17Lands. Please review the [Usage Guidelines](http
156
156
  ... 'deq_base': ColSpec(
157
157
  ... col_type=ColType.AGG,
158
158
  ... expr=(pl.col('gp_wr_excess') + 0.03 * (1 - pl.col('ata')/14).pow(2)) * pl.col('pct_gp'),
159
- ... }
159
+ ... )
160
160
  ... }
161
- >>> spells.summon('DSK', columns=['deq_base', 'color', 'rarity'], filter_spec={'player_cohort': 'Top'}, extensions=ext)
161
+ >>> spells.summon('DSK', columns=['deq_base'], group_by=["name", "color", "rarity"], filter_spec={'player_cohort': 'Top'}, extensions=ext)
162
162
  ... .filter(pl.col('deq_base').is_finite())
163
163
  ... .filter(pl.col('rarity').is_in(['common', 'uncommon'])
164
164
  ... .sort('deq_base', descending=True)
@@ -304,25 +304,26 @@ summon(
304
304
 
305
305
  #### parameters
306
306
 
307
- - columns: a list of string or `ColName` values to select as non-grouped columns. Valid `ColTypes` are `PICK_SUM`, `NAME_SUM`, `GAME_SUM`, `CARD_ATTR`, and `AGG`. Min/Max/Unique
307
+ - `columns`: a list of string or `ColName` values to select as non-grouped columns. Valid `ColTypes` are `PICK_SUM`, `NAME_SUM`, `GAME_SUM`, and `AGG`. Min/Max/Unique
308
308
  aggregations of non-numeric (or numeric) data types are not supported. If `None`, use a set of columns modeled on the commonly used values on 17Lands.com/card_data.
309
309
 
310
- - group_by: a list of string or `ColName` values to display as grouped columns. Valid `ColTypes` are `GROUP_BY` and `CARD_ATTR`. By default, group by "name" (card name).
310
+ - `group_by`: a list of string or `ColName` values to display as grouped columns. Valid `ColTypes` are `GROUP_BY` and `CARD_ATTR`. By default, group by "name" (card name). For contextual card attrs, include
311
+ in `group_by`, even when grouping by name.
311
312
 
312
- - filter_spec: a dictionary specifying a filter, using a small number of paradigms. Columns used must be in each base view ("draft" and "game") that the `columns` and `group_by` columns depend on, so
313
- `AGG` and `CARD_ATTR` columns are not valid. Functions of card attributes in the base views can be filtered on, see the documentation for `expr` for details. `NAME_SUM` columns are also not supported. Derived columns are supported. No filter is applied by default. Yes, I should rewrite it to use the mongo query language. The specification is best understood with examples:
313
+ - `filter_spec`: a dictionary specifying a filter, using a small number of paradigms. Columns used must be in each base view ("draft" and "game") that the `columns` and `group_by` columns depend on, so
314
+ `AGG` and `CARD_ATTR` columns are not valid. Functions of card attributes in the base views can be filtered on using `card_context`, see the documentation for `expr` for details. `NAME_SUM` columns are also not supported. Derived columns are supported. No filter is applied by default. Yes, I should rewrite it to use the mongo query language. The specification is best understood with examples:
314
315
 
315
316
  - `{'player_cohort': 'Top'}` "player_cohort" value equals "Top".
316
317
  - `{'lhs': 'player_cohort', 'op': 'in', 'rhs': ['Top', 'Middle']}` "player_cohort" value is either "Top" or "Middle". Supported values for `op` are `<`, `<=`, `>`, `>=`, `!=`, `=`, `in` and `nin`.
317
318
  - `{'$and': [{'lhs': 'draft_date', 'op': '>', 'rhs': datetime.date(2024, 10, 7)}, {'rank': 'Mythic'}]}` Drafts after October 7 by Mythic-ranked players. Supported values for query construction keys are `$and`, `$or`, and `$not`.
318
319
 
319
- - extensions: a dict of `spells.columns.ColSpec` objects, keyed by name, which are appended to the definitions built-in columns described below.
320
+ - `extensions`: a dict of `spells.columns.ColSpec` objects, keyed by name, which are appended to the definitions built-in columns described below.
320
321
 
321
- - card_context: Typically a Polars DataFrame containing a `"name"` column with one row for each card name in the set, such that any usages of `card_context[name][key]` in column specs reference the column `key`. Typically this will be the output of a call to `summon` requesting cards metrics like `GP_WR`. Can also be a dictionary having the necessary form for the same access pattern.
322
+ - `card_context`: Typically a Polars DataFrame containing a `"name"` column with one row for each card name in the set, such that any usages of `card_context[name][key]` in column specs reference the column `key`. Typically this will be the output of a call to `summon` requesting cards metrics like `GP_WR`. Can also be a dictionary having the necessary form for the same access pattern.
322
323
 
323
- - set_context: Typically, a dict of abitrary values to use in column definitions, for example, you could provide the quick draft release date and have a column that depended on that.
324
+ - `set_context`: Typically, a dict of abitrary values to use in column definitions, for example, you could provide the quick draft release date and have a column that depended on that. You can also provide a one-row dataframe and access the column values.
324
325
 
325
- - read_cache/write_cache: Use the local file system to cache and retrieve aggregations to minimize expensive reads of the large datasets. You shouldn't need to touch these arguments unless you are debugging.
326
+ - `read_cache`/`write_cache`: Use the local file system to cache and retrieve aggregations to minimize expensive reads of the large datasets. You shouldn't need to touch these arguments unless you are debugging.
326
327
 
327
328
  ### Enums
328
329
 
@@ -356,6 +357,7 @@ summing over groups, and can include polars Expression aggregations. Arbitrarily
356
357
  - For `NAME_SUM` columns, `expr` must be a function of `name` which will result in a list of expressions mapped over all card names.
357
358
  - `PICK_SUM` columns can also be functions on `name`, in which case the value will be a function of the value of the `PICK` field.
358
359
  - `AGG` columns that depend on `NAME_SUM` columns reference the prefix (`cdef.name`) only, since the unpivot has occured prior to selection.
360
+ - `AGG` columns must not be functions, since they may be applied to the aggregation of several sets' data. (And they shouldn't need this anyway)
359
361
  - The possible arguments to `expr`, in addition to `name` when appropriate, are as follows:
360
362
  - `names`: An array of all card names in the canonical order.
361
363
  - `card_context`: A dictionary keyed by card name which contains card dict objects with all `CARD_ATTR` values, including custom extensions and metric columns passed by the `card_context` argument to `summon`. See example notebooks for more details.
@@ -1,16 +1,16 @@
1
1
  spells/__init__.py,sha256=iyX74sLFJIQ_5ShZ7KDOeBlbYLm21ibWxwyzCGnKdg8,169
2
2
  spells/cache.py,sha256=4v7h8D3TtaT0R_EdiRNhdcQrXzdH_CukezO6oAXvNEY,2956
3
3
  spells/cards.py,sha256=EOXAB_F2yedjf6KquCERCIHl0TSIJIoOe1jv8g4JzOc,3601
4
- spells/columns.py,sha256=SdNw51lA1ox5rD2M7F8M2yqo-m0Dal5sKzS4Rp4q9MQ,18092
5
- spells/draft_data.py,sha256=7b9SGfFtm7IBHeawgFAlhGSSPcMNJ1waeCAtSlZdhM8,15415
4
+ spells/columns.py,sha256=kWeDofGL0bYcxq4OOfUwkU4Ty3fm0xux6R9774v9HVQ,18053
5
+ spells/draft_data.py,sha256=evVzhvo_luKrYzw27QglpSJL6KvuU4ehtnRxyBa86FQ,16873
6
6
  spells/enums.py,sha256=DL7e1xDEvrsTMbA7vJB_Et1DaYkyO4rIEzvIQDz3MZk,4871
7
7
  spells/extension.py,sha256=CcKRdDfFrt2L-53-m2tQrurX-LbIkaRokOD8CuupzW0,6538
8
8
  spells/external.py,sha256=pRDrCrRq7W_vqUQdnp0Q63oggRgnLnmqvi7yM6HmSbo,11270
9
9
  spells/filter.py,sha256=J-YTOOAzOQpvIX29tviYL04RVoOUlfsbjBXoQBDCEdQ,3380
10
- spells/manifest.py,sha256=GZmWBg5Uw7EyCoLq7GZJV6l0GfMCp9Eorgaj9vWfBj0,8264
10
+ spells/manifest.py,sha256=4_jiPgRyjNzFQJzaJgVcClFxYkXK1uRrB42EOg0rfXQ,8139
11
11
  spells/schema.py,sha256=z8Qn2SiHG4T6YfPsz8xHLGMjU_Ofm76-Vrquh3b9B64,6422
12
- spells_mtg-0.6.1.dist-info/METADATA,sha256=FZQIxSln7kbg-yAjw2arTTvFT7wRD82dCtMKyF-2-W0,46314
13
- spells_mtg-0.6.1.dist-info/WHEEL,sha256=thaaA2w1JzcGC48WYufAs8nrYZjJm8LqNfnXFOFyCC4,90
14
- spells_mtg-0.6.1.dist-info/entry_points.txt,sha256=a9Y1omdl9MdnKuIj3aOodgrp-zZII6OCdvqwgP6BFvI,63
15
- spells_mtg-0.6.1.dist-info/licenses/LICENSE,sha256=tS54XYbJSgmq5zuHhbsQGbNQLJPVgXqhF5nu2CSRMig,1068
16
- spells_mtg-0.6.1.dist-info/RECORD,,
12
+ spells_mtg-0.7.0.dist-info/METADATA,sha256=25hIjXGOZOSEJGkl2XA4Z0PzKbeelcRQuwDthvkTZRo,46656
13
+ spells_mtg-0.7.0.dist-info/WHEEL,sha256=thaaA2w1JzcGC48WYufAs8nrYZjJm8LqNfnXFOFyCC4,90
14
+ spells_mtg-0.7.0.dist-info/entry_points.txt,sha256=a9Y1omdl9MdnKuIj3aOodgrp-zZII6OCdvqwgP6BFvI,63
15
+ spells_mtg-0.7.0.dist-info/licenses/LICENSE,sha256=tS54XYbJSgmq5zuHhbsQGbNQLJPVgXqhF5nu2CSRMig,1068
16
+ spells_mtg-0.7.0.dist-info/RECORD,,