zipline_polygon_bundle 0.1.8__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (19) hide show
  1. {zipline_polygon_bundle-0.1.8 → zipline_polygon_bundle-0.2.0}/PKG-INFO +7 -5
  2. {zipline_polygon_bundle-0.1.8 → zipline_polygon_bundle-0.2.0}/README.md +3 -0
  3. {zipline_polygon_bundle-0.1.8 → zipline_polygon_bundle-0.2.0}/pyproject.toml +10 -7
  4. {zipline_polygon_bundle-0.1.8 → zipline_polygon_bundle-0.2.0}/zipline_polygon_bundle/__init__.py +12 -11
  5. {zipline_polygon_bundle-0.1.8 → zipline_polygon_bundle-0.2.0}/zipline_polygon_bundle/adjustments.py +27 -32
  6. {zipline_polygon_bundle-0.1.8 → zipline_polygon_bundle-0.2.0}/zipline_polygon_bundle/bundle.py +172 -200
  7. zipline_polygon_bundle-0.2.0/zipline_polygon_bundle/compute_signals.py +261 -0
  8. {zipline_polygon_bundle-0.1.8 → zipline_polygon_bundle-0.2.0}/zipline_polygon_bundle/concat_all_aggs.py +129 -44
  9. {zipline_polygon_bundle-0.1.8 → zipline_polygon_bundle-0.2.0}/zipline_polygon_bundle/config.py +90 -32
  10. zipline_polygon_bundle-0.2.0/zipline_polygon_bundle/nyse_all_hours_calendar.py +25 -0
  11. {zipline_polygon_bundle-0.1.8 → zipline_polygon_bundle-0.2.0}/zipline_polygon_bundle/tickers_and_names.py +4 -1
  12. zipline_polygon_bundle-0.2.0/zipline_polygon_bundle/trades.py +533 -0
  13. zipline_polygon_bundle-0.1.8/zipline_polygon_bundle/trades.py +0 -707
  14. {zipline_polygon_bundle-0.1.8 → zipline_polygon_bundle-0.2.0}/LICENSE +0 -0
  15. {zipline_polygon_bundle-0.1.8 → zipline_polygon_bundle-0.2.0}/zipline_polygon_bundle/concat_all_aggs_partitioned.py +0 -0
  16. {zipline_polygon_bundle-0.1.8 → zipline_polygon_bundle-0.2.0}/zipline_polygon_bundle/polygon_file_reader.py +0 -0
  17. {zipline_polygon_bundle-0.1.8 → zipline_polygon_bundle-0.2.0}/zipline_polygon_bundle/process_all_aggs.py +0 -0
  18. {zipline_polygon_bundle-0.1.8 → zipline_polygon_bundle-0.2.0}/zipline_polygon_bundle/quotes.py +0 -0
  19. {zipline_polygon_bundle-0.1.8 → zipline_polygon_bundle-0.2.0}/zipline_polygon_bundle/split_aggs_by_ticker.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: zipline_polygon_bundle
3
- Version: 0.1.8
3
+ Version: 0.2.0
4
4
  Summary: A zipline-reloaded data provider bundle for Polygon.io
5
5
  License: GNU AFFERO GENERAL PUBLIC LICENSE
6
6
  Version 3, 19 November 2007
@@ -666,22 +666,21 @@ License: GNU AFFERO GENERAL PUBLIC LICENSE
666
666
  Keywords: zipline,data-bundle,finance
667
667
  Author: Jim White
668
668
  Author-email: jim@fovi.com
669
- Requires-Python: >=3.9,<4.0
669
+ Requires-Python: >=3.10,<4.0
670
670
  Classifier: Programming Language :: Python :: 3
671
671
  Classifier: License :: OSI Approved :: GNU Affero General Public License v3
672
672
  Classifier: Operating System :: OS Independent
673
673
  Requires-Dist: bcolz-zipline (>=1.2.11)
674
+ Requires-Dist: filelock (>=3.16.0)
674
675
  Requires-Dist: fsspec (>=2024.10)
675
676
  Requires-Dist: numpy (<2)
676
677
  Requires-Dist: pandas (>=2.2,<3)
677
- Requires-Dist: pandas-market-calendars (>=4.4.2)
678
- Requires-Dist: pandas_ta (>=0.3)
679
678
  Requires-Dist: polygon-api-client (>=1.14.2)
680
679
  Requires-Dist: pyarrow (>=18.1.0,<19)
681
680
  Requires-Dist: pytz (>=2018.5)
682
681
  Requires-Dist: requests (>=2.9.1)
683
682
  Requires-Dist: toolz (>=0.8.2)
684
- Requires-Dist: zipline-reloaded (>=3.1)
683
+ Requires-Dist: zipline-arrow (>=3.2)
685
684
  Project-URL: Repository, https://github.com/fovi-llc/zipline-polygon-bundle
686
685
  Description-Content-Type: text/markdown
687
686
 
@@ -745,6 +744,9 @@ register_polygon_equities_bundle(
745
744
  ## Install the Zipline Polygon.io Bundle PyPi package and check that it works.
746
745
  Listing bundles will show if everything is working correctly.
747
746
  ```bash
747
+ pip install -U git+https://github.com/fovi-llc/zipline-reloaded.git@calendar
748
+ pip install -U git+https://github.com/fovi-llc/zipline-polygon-bundle.git
749
+
748
750
  pip install zipline_polygon_bundle
749
751
  zipline -e extension.py bundles
750
752
  ```
@@ -58,6 +58,9 @@ register_polygon_equities_bundle(
58
58
  ## Install the Zipline Polygon.io Bundle PyPi package and check that it works.
59
59
  Listing bundles will show if everything is working correctly.
60
60
  ```bash
61
+ pip install -U git+https://github.com/fovi-llc/zipline-reloaded.git@calendar
62
+ pip install -U git+https://github.com/fovi-llc/zipline-polygon-bundle.git
63
+
61
64
  pip install zipline_polygon_bundle
62
65
  zipline -e extension.py bundles
63
66
  ```
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = 'zipline_polygon_bundle'
3
- version = '0.1.8'
3
+ version = '0.2.0'
4
4
  description = 'A zipline-reloaded data provider bundle for Polygon.io'
5
5
  authors = [
6
6
  { name = 'Jim White', email = 'jim@fovi.com' },
@@ -19,7 +19,7 @@ Repository = 'https://github.com/fovi-llc/zipline-polygon-bundle'
19
19
 
20
20
  [tool.poetry]
21
21
  name = 'zipline-polygon-bundle'
22
- version = '0.1.8'
22
+ version = '0.2.0'
23
23
  description = 'A zipline-reloaded data provider bundle for Polygon.io'
24
24
  authors = ['Jim White <jim@fovi.com>']
25
25
  license = 'AGPL-3.0'
@@ -33,25 +33,28 @@ classifiers = [
33
33
 
34
34
  [tool.poetry.dependencies]
35
35
  fsspec = ">=2024.10"
36
- python = ">=3.9,<4.0"
36
+ filelock = ">=3.16.0"
37
+ python = ">=3.10,<4.0"
37
38
  polygon-api-client = ">=1.14.2"
38
39
  pandas = ">=2.2,<3"
39
- pandas-market-calendars = ">=4.4.2"
40
- pandas_ta = ">=0.3"
40
+ # pandas-market-calendars = ">=4.4.2"
41
+ # pandas-ta install doesn't work with poetry for some reason.
42
+ # It is used in compute_signals.py which we're not using yet.
43
+ # pandas-ta = ">=0.3"
41
44
  pytz = ">=2018.5"
42
45
  requests = ">=2.9.1"
43
46
  bcolz-zipline = ">=1.2.11"
44
47
  pyarrow = ">=18.1.0,<19"
45
48
  numpy = "<2"
46
49
  toolz = ">=0.8.2"
47
- zipline-reloaded = ">=3.1"
50
+ zipline-arrow = { version = ">=3.2" }
48
51
 
49
52
  [tool.poetry.dev-dependencies]
50
53
  pytest = "*"
51
54
 
52
55
  [build-system]
53
56
  build-backend = "poetry.core.masonry.api"
54
- requires = ["poetry_core>=1.0.0"]
57
+ requires = ["poetry_core>=2.1.0"]
55
58
 
56
59
  [tool.pytest.ini_options]
57
60
  # https://docs.pytest.org/en/stable/how-to/capture-warnings.html#controlling-warnings
@@ -1,16 +1,17 @@
1
1
  from .bundle import (
2
2
  register_polygon_equities_bundle,
3
3
  symbol_to_upper,
4
- polygon_equities_bundle_day,
5
- polygon_equities_bundle_minute,
4
+ ingest_polygon_equities_bundle
6
5
  )
7
6
 
8
7
  from .config import PolygonConfig
8
+ from .nyse_all_hours_calendar import NYSE_ALL_HOURS, register_nyse_all_hours_calendar
9
9
  from .concat_all_aggs import concat_all_aggs_from_csv, generate_csv_agg_tables
10
10
  from .adjustments import load_splits, load_dividends, load_conditions
11
- from .trades import trades_schema, trades_dataset, cast_trades, date_to_path
12
- from .trades import custom_aggs_partitioning, custom_aggs_schema, trades_to_custom_aggs, convert_all_to_custom_aggs
13
- from .trades import get_custom_aggs_dates, generate_csv_trades_tables, compute_signals_for_all_custom_aggs
11
+ from .trades import trades_schema, trades_dataset, cast_trades
12
+ from .trades import custom_aggs_partitioning, custom_aggs_schema, trades_to_custom_aggs, convert_trades_to_custom_aggs
13
+ from .trades import get_aggs_dates, generate_csv_trades_tables
14
+ # from .compute_signals import compute_signals_for_all_custom_aggs
14
15
  from .quotes import quotes_schema, quotes_dataset, cast_quotes
15
16
  # from .tickers_and_names import load_all_tickers, merge_tickers, ticker_names_from_merged_tickers, get_ticker_universe
16
17
  from .tickers_and_names import PolygonAssets, get_ticker_universe
@@ -18,9 +19,10 @@ from .tickers_and_names import PolygonAssets, get_ticker_universe
18
19
 
19
20
  __all__ = [
20
21
  "register_polygon_equities_bundle",
22
+ "register_nyse_all_hours_calendar",
23
+ "NYSE_ALL_HOURS",
21
24
  "symbol_to_upper",
22
- "polygon_equities_bundle_day",
23
- "polygon_equities_bundle_minute",
25
+ "ingest_polygon_equities_bundle",
24
26
  "PolygonConfig",
25
27
  "concat_all_aggs_from_csv",
26
28
  "generate_csv_agg_tables",
@@ -30,14 +32,13 @@ __all__ = [
30
32
  "trades_schema",
31
33
  "trades_dataset",
32
34
  "cast_trades",
33
- "date_to_path",
34
- "get_custom_aggs_dates",
35
+ "get_aggs_dates",
35
36
  "generate_csv_trades_tables",
36
37
  "custom_aggs_partitioning",
37
38
  "custom_aggs_schema",
38
39
  "trades_to_custom_aggs",
39
- "convert_all_to_custom_aggs",
40
- "compute_signals_for_all_custom_aggs",
40
+ "convert_trades_to_custom_aggs",
41
+ # "compute_signals_for_all_custom_aggs",
41
42
  "quotes_schema",
42
43
  "quotes_dataset",
43
44
  "cast_quotes",
@@ -10,19 +10,19 @@ from urllib3 import HTTPResponse
10
10
 
11
11
 
12
12
  def load_polygon_splits(
13
- config: PolygonConfig, first_start_end: datetime.date, last_end_date: datetime.date
13
+ config: PolygonConfig, first_day: pd.Timestamp, last_day: pd.Timestamp
14
14
  ) -> pd.DataFrame:
15
15
  # N.B. If the schema changes then the filename should change. We're on v3 now.
16
16
  splits_path = config.api_cache_path(
17
- start_date=first_start_end, end_date=last_end_date, filename="list_splits"
17
+ first_day=first_day, last_day=last_day, filename="list_splits"
18
18
  )
19
- expected_split_count = (last_end_date - first_start_end).days * 3
19
+ expected_split_count = (last_day - first_day).days * 3
20
20
  if not os.path.exists(splits_path):
21
21
  client = polygon.RESTClient(api_key=config.api_key)
22
22
  splits = client.list_splits(
23
23
  limit=1000,
24
- execution_date_gte=first_start_end,
25
- execution_date_lt=last_end_date + datetime.timedelta(days=1),
24
+ execution_date_gte=first_day.date(),
25
+ execution_date_lt=last_day.date() + datetime.timedelta(days=1),
26
26
  )
27
27
  if splits is HTTPResponse:
28
28
  raise ValueError(f"Polygon.list_splits bad HTTPResponse: {splits}")
@@ -32,7 +32,7 @@ def load_polygon_splits(
32
32
  splits.to_parquet(splits_path)
33
33
  if len(splits) < expected_split_count:
34
34
  logging.warning(
35
- f"Only got {len(splits)=} from Polygon list_splits (expected {expected_split_count=}). "
35
+ f"Only got {len(splits)=} from Polygon list_splits ({expected_split_count=}). "
36
36
  "This is probably fine if your historical range is short."
37
37
  )
38
38
  # We will always load from the file to avoid any chance of weird errors.
@@ -41,7 +41,7 @@ def load_polygon_splits(
41
41
  print(f"Loaded {len(splits)=} from {splits_path}")
42
42
  if len(splits) < expected_split_count:
43
43
  logging.warning(
44
- f"Only got {len(splits)=} from Polygon list_splits (expected {expected_split_count=}). "
44
+ f"Only got {len(splits)=} from Polygon list_splits ({expected_split_count=}). "
45
45
  "This is probably fine if your historical range is short."
46
46
  )
47
47
  return splits
@@ -50,11 +50,11 @@ def load_polygon_splits(
50
50
 
51
51
  def load_splits(
52
52
  config: PolygonConfig,
53
- first_start_end: datetime.date,
54
- last_end_date: datetime.date,
53
+ first_day: pd.Timestamp,
54
+ last_day: pd.Timestamp,
55
55
  ticker_to_sid: dict[str, int],
56
56
  ) -> pd.DataFrame:
57
- splits = load_polygon_splits(config, first_start_end, last_end_date)
57
+ splits = load_polygon_splits(config, first_day=first_day, last_day=last_day)
58
58
  splits["sid"] = splits["ticker"].apply(lambda t: ticker_to_sid.get(t, pd.NA))
59
59
  splits.dropna(inplace=True)
60
60
  splits["sid"] = splits["sid"].astype("int64")
@@ -70,18 +70,18 @@ def load_splits(
70
70
 
71
71
 
72
72
  def load_polygon_dividends(
73
- config: PolygonConfig, first_start_date: datetime.date, last_end_date: datetime.date
73
+ config: PolygonConfig, first_day: pd.Timestamp, last_day: pd.Timestamp
74
74
  ) -> pd.DataFrame:
75
75
  # N.B. If the schema changes then the filename should change. We're on v3 now.
76
76
  dividends_path = config.api_cache_path(
77
- start_date=first_start_date, end_date=last_end_date, filename="list_dividends"
77
+ first_day=first_day, last_day=last_day, filename="list_dividends"
78
78
  )
79
79
  if not os.path.exists(dividends_path):
80
80
  client = polygon.RESTClient(api_key=config.api_key)
81
81
  dividends = client.list_dividends(
82
82
  limit=1000,
83
- record_date_gte=first_start_date,
84
- pay_date_lt=last_end_date + datetime.timedelta(days=1),
83
+ record_date_gte=first_day.date(),
84
+ pay_date_lt=last_day.date() + datetime.timedelta(days=1),
85
85
  )
86
86
  if dividends is HTTPResponse:
87
87
  raise ValueError(f"Polygon.list_dividends bad HTTPResponse: {dividends}")
@@ -104,35 +104,30 @@ def load_polygon_dividends(
104
104
 
105
105
 
106
106
  def load_chunked_polygon_dividends(
107
- config: PolygonConfig, first_start_end: datetime.date, last_end_date: datetime.date
107
+ config: PolygonConfig, first_day: pd.Timestamp,
108
+ last_day: pd.Timestamp
108
109
  ) -> pd.DataFrame:
109
110
  dividends_list = []
110
- next_start_end = first_start_end
111
- while next_start_end < last_end_date:
111
+ next_start_end = first_day
112
+ while next_start_end < last_day:
112
113
  # We want at most a month of dividends at a time. They should end on the last day of the month.
113
- # So the next_end_date is the day before the first day of the next month.
114
- first_of_next_month = datetime.date(
115
- next_start_end.year + (next_start_end.month // 12),
116
- (next_start_end.month % 12) + 1,
117
- 1,
118
- )
119
- next_end_date = first_of_next_month - datetime.timedelta(days=1)
120
- if next_end_date > last_end_date:
121
- next_end_date = last_end_date
114
+ next_end_date = next_start_end + pd.offsets.MonthEnd()
115
+ if next_end_date > last_day:
116
+ next_end_date = last_day
122
117
  dividends_list.append(
123
- load_polygon_dividends(config, next_start_end, next_end_date)
118
+ load_polygon_dividends(config, first_day=next_start_end, last_day=next_end_date)
124
119
  )
125
- next_start_end = next_end_date + datetime.timedelta(days=1)
120
+ next_start_end = next_end_date + pd.Timedelta(days=1)
126
121
  return pd.concat(dividends_list)
127
122
 
128
123
 
129
124
  def load_dividends(
130
125
  config: PolygonConfig,
131
- first_start_end: datetime.date,
132
- last_end_date: datetime.date,
126
+ first_day: pd.Timestamp,
127
+ last_day: pd.Timestamp,
133
128
  ticker_to_sid: dict[str, int],
134
129
  ) -> pd.DataFrame:
135
- dividends = load_chunked_polygon_dividends(config, first_start_end, last_end_date)
130
+ dividends = load_chunked_polygon_dividends(config, first_day=first_day, last_day=last_day)
136
131
  dividends["sid"] = dividends["ticker"].apply(lambda t: ticker_to_sid.get(t, pd.NA))
137
132
  dividends.dropna(how="any", inplace=True)
138
133
  dividends["sid"] = dividends["sid"].astype("int64")
@@ -159,7 +154,7 @@ def load_conditions(config: PolygonConfig) -> pd.DataFrame:
159
154
  # The API doesn't use dates for the condition codes but this is a way to provide control over caching.
160
155
  # Main thing is to get the current conditions list but we don't want to call more than once a day.
161
156
  conditions_path = config.api_cache_path(
162
- start_date=config.start_timestamp.date(), end_date=config.end_timestamp.date(), filename="conditions"
157
+ first_day=config.start_timestamp, last_day=config.end_timestamp, filename="conditions"
163
158
  )
164
159
  expected_conditions_count = 100
165
160
  if not os.path.exists(conditions_path):