zipline_polygon_bundle 0.1.7__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (20) hide show
  1. {zipline_polygon_bundle-0.1.7 → zipline_polygon_bundle-0.2.0}/PKG-INFO +10 -5
  2. {zipline_polygon_bundle-0.1.7 → zipline_polygon_bundle-0.2.0}/README.md +3 -0
  3. {zipline_polygon_bundle-0.1.7 → zipline_polygon_bundle-0.2.0}/pyproject.toml +13 -7
  4. zipline_polygon_bundle-0.2.0/zipline_polygon_bundle/__init__.py +50 -0
  5. {zipline_polygon_bundle-0.1.7 → zipline_polygon_bundle-0.2.0}/zipline_polygon_bundle/adjustments.py +60 -31
  6. {zipline_polygon_bundle-0.1.7 → zipline_polygon_bundle-0.2.0}/zipline_polygon_bundle/bundle.py +202 -208
  7. zipline_polygon_bundle-0.2.0/zipline_polygon_bundle/compute_signals.py +261 -0
  8. {zipline_polygon_bundle-0.1.7 → zipline_polygon_bundle-0.2.0}/zipline_polygon_bundle/concat_all_aggs.py +140 -70
  9. {zipline_polygon_bundle-0.1.7 → zipline_polygon_bundle-0.2.0}/zipline_polygon_bundle/concat_all_aggs_partitioned.py +6 -6
  10. zipline_polygon_bundle-0.2.0/zipline_polygon_bundle/config.py +244 -0
  11. zipline_polygon_bundle-0.2.0/zipline_polygon_bundle/nyse_all_hours_calendar.py +25 -0
  12. {zipline_polygon_bundle-0.1.7 → zipline_polygon_bundle-0.2.0}/zipline_polygon_bundle/polygon_file_reader.py +1 -1
  13. {zipline_polygon_bundle-0.1.7 → zipline_polygon_bundle-0.2.0}/zipline_polygon_bundle/process_all_aggs.py +2 -2
  14. zipline_polygon_bundle-0.2.0/zipline_polygon_bundle/quotes.py +101 -0
  15. {zipline_polygon_bundle-0.1.7 → zipline_polygon_bundle-0.2.0}/zipline_polygon_bundle/tickers_and_names.py +5 -38
  16. zipline_polygon_bundle-0.2.0/zipline_polygon_bundle/trades.py +533 -0
  17. zipline_polygon_bundle-0.1.7/zipline_polygon_bundle/__init__.py +0 -22
  18. zipline_polygon_bundle-0.1.7/zipline_polygon_bundle/config.py +0 -113
  19. {zipline_polygon_bundle-0.1.7 → zipline_polygon_bundle-0.2.0}/LICENSE +0 -0
  20. {zipline_polygon_bundle-0.1.7 → zipline_polygon_bundle-0.2.0}/zipline_polygon_bundle/split_aggs_by_ticker.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: zipline_polygon_bundle
3
- Version: 0.1.7
3
+ Version: 0.2.0
4
4
  Summary: A zipline-reloaded data provider bundle for Polygon.io
5
5
  License: GNU AFFERO GENERAL PUBLIC LICENSE
6
6
  Version 3, 19 November 2007
@@ -666,19 +666,21 @@ License: GNU AFFERO GENERAL PUBLIC LICENSE
666
666
  Keywords: zipline,data-bundle,finance
667
667
  Author: Jim White
668
668
  Author-email: jim@fovi.com
669
- Requires-Python: >=3.9,<4.0
669
+ Requires-Python: >=3.10,<4.0
670
670
  Classifier: Programming Language :: Python :: 3
671
671
  Classifier: License :: OSI Approved :: GNU Affero General Public License v3
672
672
  Classifier: Operating System :: OS Independent
673
673
  Requires-Dist: bcolz-zipline (>=1.2.11)
674
+ Requires-Dist: filelock (>=3.16.0)
675
+ Requires-Dist: fsspec (>=2024.10)
674
676
  Requires-Dist: numpy (<2)
675
677
  Requires-Dist: pandas (>=2.2,<3)
676
- Requires-Dist: polygon-api-client
677
- Requires-Dist: pyarrow
678
+ Requires-Dist: polygon-api-client (>=1.14.2)
679
+ Requires-Dist: pyarrow (>=18.1.0,<19)
678
680
  Requires-Dist: pytz (>=2018.5)
679
681
  Requires-Dist: requests (>=2.9.1)
680
682
  Requires-Dist: toolz (>=0.8.2)
681
- Requires-Dist: zipline-reloaded (>=3.1)
683
+ Requires-Dist: zipline-arrow (>=3.2)
682
684
  Project-URL: Repository, https://github.com/fovi-llc/zipline-polygon-bundle
683
685
  Description-Content-Type: text/markdown
684
686
 
@@ -742,6 +744,9 @@ register_polygon_equities_bundle(
742
744
  ## Install the Zipline Polygon.io Bundle PyPi package and check that it works.
743
745
  Listing bundles will show if everything is working correctly.
744
746
  ```bash
747
+ pip install -U git+https://github.com/fovi-llc/zipline-reloaded.git@calendar
748
+ pip install -U git+https://github.com/fovi-llc/zipline-polygon-bundle.git
749
+
745
750
  pip install zipline_polygon_bundle
746
751
  zipline -e extension.py bundles
747
752
  ```
@@ -58,6 +58,9 @@ register_polygon_equities_bundle(
58
58
  ## Install the Zipline Polygon.io Bundle PyPi package and check that it works.
59
59
  Listing bundles will show if everything is working correctly.
60
60
  ```bash
61
+ pip install -U git+https://github.com/fovi-llc/zipline-reloaded.git@calendar
62
+ pip install -U git+https://github.com/fovi-llc/zipline-polygon-bundle.git
63
+
61
64
  pip install zipline_polygon_bundle
62
65
  zipline -e extension.py bundles
63
66
  ```
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = 'zipline_polygon_bundle'
3
- version = '0.1.7'
3
+ version = '0.2.0'
4
4
  description = 'A zipline-reloaded data provider bundle for Polygon.io'
5
5
  authors = [
6
6
  { name = 'Jim White', email = 'jim@fovi.com' },
@@ -19,7 +19,7 @@ Repository = 'https://github.com/fovi-llc/zipline-polygon-bundle'
19
19
 
20
20
  [tool.poetry]
21
21
  name = 'zipline-polygon-bundle'
22
- version = '0.1.7'
22
+ version = '0.2.0'
23
23
  description = 'A zipline-reloaded data provider bundle for Polygon.io'
24
24
  authors = ['Jim White <jim@fovi.com>']
25
25
  license = 'AGPL-3.0'
@@ -32,23 +32,29 @@ classifiers = [
32
32
  ]
33
33
 
34
34
  [tool.poetry.dependencies]
35
- python = ">=3.9,<4.0"
36
- polygon-api-client = "*"
35
+ fsspec = ">=2024.10"
36
+ filelock = ">=3.16.0"
37
+ python = ">=3.10,<4.0"
38
+ polygon-api-client = ">=1.14.2"
37
39
  pandas = ">=2.2,<3"
40
+ # pandas-market-calendars = ">=4.4.2"
41
+ # pandas-ta install doesn't work with poetry for some reason.
42
+ # It is used in compute_signals.py which we're not using yet.
43
+ # pandas-ta = ">=0.3"
38
44
  pytz = ">=2018.5"
39
45
  requests = ">=2.9.1"
40
46
  bcolz-zipline = ">=1.2.11"
41
- pyarrow = "*"
47
+ pyarrow = ">=18.1.0,<19"
42
48
  numpy = "<2"
43
49
  toolz = ">=0.8.2"
44
- zipline-reloaded = ">=3.1"
50
+ zipline-arrow = { version = ">=3.2" }
45
51
 
46
52
  [tool.poetry.dev-dependencies]
47
53
  pytest = "*"
48
54
 
49
55
  [build-system]
50
56
  build-backend = "poetry.core.masonry.api"
51
- requires = ["poetry_core>=1.0.0"]
57
+ requires = ["poetry_core>=2.1.0"]
52
58
 
53
59
  [tool.pytest.ini_options]
54
60
  # https://docs.pytest.org/en/stable/how-to/capture-warnings.html#controlling-warnings
@@ -0,0 +1,50 @@
1
+ from .bundle import (
2
+ register_polygon_equities_bundle,
3
+ symbol_to_upper,
4
+ ingest_polygon_equities_bundle
5
+ )
6
+
7
+ from .config import PolygonConfig
8
+ from .nyse_all_hours_calendar import NYSE_ALL_HOURS, register_nyse_all_hours_calendar
9
+ from .concat_all_aggs import concat_all_aggs_from_csv, generate_csv_agg_tables
10
+ from .adjustments import load_splits, load_dividends, load_conditions
11
+ from .trades import trades_schema, trades_dataset, cast_trades
12
+ from .trades import custom_aggs_partitioning, custom_aggs_schema, trades_to_custom_aggs, convert_trades_to_custom_aggs
13
+ from .trades import get_aggs_dates, generate_csv_trades_tables
14
+ # from .compute_signals import compute_signals_for_all_custom_aggs
15
+ from .quotes import quotes_schema, quotes_dataset, cast_quotes
16
+ # from .tickers_and_names import load_all_tickers, merge_tickers, ticker_names_from_merged_tickers, get_ticker_universe
17
+ from .tickers_and_names import PolygonAssets, get_ticker_universe
18
+
19
+
20
+ __all__ = [
21
+ "register_polygon_equities_bundle",
22
+ "register_nyse_all_hours_calendar",
23
+ "NYSE_ALL_HOURS",
24
+ "symbol_to_upper",
25
+ "ingest_polygon_equities_bundle",
26
+ "PolygonConfig",
27
+ "concat_all_aggs_from_csv",
28
+ "generate_csv_agg_tables",
29
+ "load_splits",
30
+ "load_dividends",
31
+ "load_conditions",
32
+ "trades_schema",
33
+ "trades_dataset",
34
+ "cast_trades",
35
+ "get_aggs_dates",
36
+ "generate_csv_trades_tables",
37
+ "custom_aggs_partitioning",
38
+ "custom_aggs_schema",
39
+ "trades_to_custom_aggs",
40
+ "convert_trades_to_custom_aggs",
41
+ # "compute_signals_for_all_custom_aggs",
42
+ "quotes_schema",
43
+ "quotes_dataset",
44
+ "cast_quotes",
45
+ # "load_all_tickers",
46
+ # "merge_tickers",
47
+ # "ticker_names_from_merged_tickers",
48
+ "PolygonAssets",
49
+ "get_ticker_universe",
50
+ ]
@@ -10,19 +10,19 @@ from urllib3 import HTTPResponse
10
10
 
11
11
 
12
12
  def load_polygon_splits(
13
- config: PolygonConfig, first_start_end: datetime.date, last_end_date: datetime.date
13
+ config: PolygonConfig, first_day: pd.Timestamp, last_day: pd.Timestamp
14
14
  ) -> pd.DataFrame:
15
15
  # N.B. If the schema changes then the filename should change. We're on v3 now.
16
16
  splits_path = config.api_cache_path(
17
- start_date=first_start_end, end_date=last_end_date, filename="list_splits"
17
+ first_day=first_day, last_day=last_day, filename="list_splits"
18
18
  )
19
- expected_split_count = (last_end_date - first_start_end).days * 3
19
+ expected_split_count = (last_day - first_day).days * 3
20
20
  if not os.path.exists(splits_path):
21
21
  client = polygon.RESTClient(api_key=config.api_key)
22
22
  splits = client.list_splits(
23
23
  limit=1000,
24
- execution_date_gte=first_start_end,
25
- execution_date_lt=last_end_date + datetime.timedelta(days=1),
24
+ execution_date_gte=first_day.date(),
25
+ execution_date_lt=last_day.date() + datetime.timedelta(days=1),
26
26
  )
27
27
  if splits is HTTPResponse:
28
28
  raise ValueError(f"Polygon.list_splits bad HTTPResponse: {splits}")
@@ -32,7 +32,7 @@ def load_polygon_splits(
32
32
  splits.to_parquet(splits_path)
33
33
  if len(splits) < expected_split_count:
34
34
  logging.warning(
35
- f"Only got {len(splits)=} from Polygon list_splits (expected {expected_split_count=}). "
35
+ f"Only got {len(splits)=} from Polygon list_splits ({expected_split_count=}). "
36
36
  "This is probably fine if your historical range is short."
37
37
  )
38
38
  # We will always load from the file to avoid any chance of weird errors.
@@ -41,7 +41,7 @@ def load_polygon_splits(
41
41
  print(f"Loaded {len(splits)=} from {splits_path}")
42
42
  if len(splits) < expected_split_count:
43
43
  logging.warning(
44
- f"Only got {len(splits)=} from Polygon list_splits (expected {expected_split_count=}). "
44
+ f"Only got {len(splits)=} from Polygon list_splits ({expected_split_count=}). "
45
45
  "This is probably fine if your historical range is short."
46
46
  )
47
47
  return splits
@@ -50,11 +50,11 @@ def load_polygon_splits(
50
50
 
51
51
  def load_splits(
52
52
  config: PolygonConfig,
53
- first_start_end: datetime.date,
54
- last_end_date: datetime.date,
53
+ first_day: pd.Timestamp,
54
+ last_day: pd.Timestamp,
55
55
  ticker_to_sid: dict[str, int],
56
56
  ) -> pd.DataFrame:
57
- splits = load_polygon_splits(config, first_start_end, last_end_date)
57
+ splits = load_polygon_splits(config, first_day=first_day, last_day=last_day)
58
58
  splits["sid"] = splits["ticker"].apply(lambda t: ticker_to_sid.get(t, pd.NA))
59
59
  splits.dropna(inplace=True)
60
60
  splits["sid"] = splits["sid"].astype("int64")
@@ -70,18 +70,18 @@ def load_splits(
70
70
 
71
71
 
72
72
  def load_polygon_dividends(
73
- config: PolygonConfig, first_start_date: datetime.date, last_end_date: datetime.date
73
+ config: PolygonConfig, first_day: pd.Timestamp, last_day: pd.Timestamp
74
74
  ) -> pd.DataFrame:
75
75
  # N.B. If the schema changes then the filename should change. We're on v3 now.
76
76
  dividends_path = config.api_cache_path(
77
- start_date=first_start_date, end_date=last_end_date, filename="list_dividends"
77
+ first_day=first_day, last_day=last_day, filename="list_dividends"
78
78
  )
79
79
  if not os.path.exists(dividends_path):
80
80
  client = polygon.RESTClient(api_key=config.api_key)
81
81
  dividends = client.list_dividends(
82
82
  limit=1000,
83
- record_date_gte=first_start_date,
84
- pay_date_lt=last_end_date + datetime.timedelta(days=1),
83
+ record_date_gte=first_day.date(),
84
+ pay_date_lt=last_day.date() + datetime.timedelta(days=1),
85
85
  )
86
86
  if dividends is HTTPResponse:
87
87
  raise ValueError(f"Polygon.list_dividends bad HTTPResponse: {dividends}")
@@ -104,35 +104,30 @@ def load_polygon_dividends(
104
104
 
105
105
 
106
106
  def load_chunked_polygon_dividends(
107
- config: PolygonConfig, first_start_end: datetime.date, last_end_date: datetime.date
107
+ config: PolygonConfig, first_day: pd.Timestamp,
108
+ last_day: pd.Timestamp
108
109
  ) -> pd.DataFrame:
109
110
  dividends_list = []
110
- next_start_end = first_start_end
111
- while next_start_end < last_end_date:
111
+ next_start_end = first_day
112
+ while next_start_end < last_day:
112
113
  # We want at most a month of dividends at a time. They should end on the last day of the month.
113
- # So the next_end_date is the day before the first day of the next month.
114
- first_of_next_month = datetime.date(
115
- next_start_end.year + (next_start_end.month // 12),
116
- (next_start_end.month % 12) + 1,
117
- 1,
118
- )
119
- next_end_date = first_of_next_month - datetime.timedelta(days=1)
120
- if next_end_date > last_end_date:
121
- next_end_date = last_end_date
114
+ next_end_date = next_start_end + pd.offsets.MonthEnd()
115
+ if next_end_date > last_day:
116
+ next_end_date = last_day
122
117
  dividends_list.append(
123
- load_polygon_dividends(config, next_start_end, next_end_date)
118
+ load_polygon_dividends(config, first_day=next_start_end, last_day=next_end_date)
124
119
  )
125
- next_start_end = next_end_date + datetime.timedelta(days=1)
120
+ next_start_end = next_end_date + pd.Timedelta(days=1)
126
121
  return pd.concat(dividends_list)
127
122
 
128
123
 
129
124
  def load_dividends(
130
125
  config: PolygonConfig,
131
- first_start_end: datetime.date,
132
- last_end_date: datetime.date,
126
+ first_day: pd.Timestamp,
127
+ last_day: pd.Timestamp,
133
128
  ticker_to_sid: dict[str, int],
134
129
  ) -> pd.DataFrame:
135
- dividends = load_chunked_polygon_dividends(config, first_start_end, last_end_date)
130
+ dividends = load_chunked_polygon_dividends(config, first_day=first_day, last_day=last_day)
136
131
  dividends["sid"] = dividends["ticker"].apply(lambda t: ticker_to_sid.get(t, pd.NA))
137
132
  dividends.dropna(how="any", inplace=True)
138
133
  dividends["sid"] = dividends["sid"].astype("int64")
@@ -153,3 +148,37 @@ def load_dividends(
153
148
  return dividends[
154
149
  ["sid", "ex_date", "declared_date", "record_date", "pay_date", "amount"]
155
150
  ]
151
+
152
+
153
+ def load_conditions(config: PolygonConfig) -> pd.DataFrame:
154
+ # The API doesn't use dates for the condition codes but this is a way to provide control over caching.
155
+ # Main thing is to get the current conditions list but we don't want to call more than once a day.
156
+ conditions_path = config.api_cache_path(
157
+ first_day=config.start_timestamp, last_day=config.end_timestamp, filename="conditions"
158
+ )
159
+ expected_conditions_count = 100
160
+ if not os.path.exists(conditions_path):
161
+ client = polygon.RESTClient(api_key=config.api_key)
162
+ conditions_response = client.list_conditions(
163
+ limit=1000,
164
+ )
165
+ if conditions_response is HTTPResponse:
166
+ raise ValueError(f"Polygon.list_splits bad HTTPResponse: {conditions_response}")
167
+ conditions = pd.DataFrame(conditions_response)
168
+ print(f"Got {len(conditions)=} from Polygon list_conditions.")
169
+ os.makedirs(os.path.dirname(conditions_path), exist_ok=True)
170
+ conditions.to_parquet(conditions_path)
171
+ if len(conditions) < expected_conditions_count:
172
+ logging.warning(
173
+ f"Only got {len(conditions)=} from Polygon list_splits (expected {expected_conditions_count=}). "
174
+ )
175
+ # We will always load from the file to avoid any chance of weird errors.
176
+ if os.path.exists(conditions_path):
177
+ conditions = pd.read_parquet(conditions_path)
178
+ print(f"Loaded {len(conditions)=} from {conditions_path}")
179
+ if len(conditions) < expected_conditions_count:
180
+ logging.warning(
181
+ f"Only got {len(conditions)=} from cached conditions (expected {expected_conditions_count=}). "
182
+ )
183
+ return conditions
184
+ raise ValueError(f"Failed to load splits from {conditions_path}")