zipline_polygon_bundle 0.2.0.dev1__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (20) hide show
  1. {zipline_polygon_bundle-0.2.0.dev1 → zipline_polygon_bundle-0.2.3}/PKG-INFO +90 -8
  2. zipline_polygon_bundle-0.2.3/README.md +195 -0
  3. {zipline_polygon_bundle-0.2.0.dev1 → zipline_polygon_bundle-0.2.3}/pyproject.toml +24 -31
  4. {zipline_polygon_bundle-0.2.0.dev1 → zipline_polygon_bundle-0.2.3}/zipline_polygon_bundle/__init__.py +7 -9
  5. {zipline_polygon_bundle-0.2.0.dev1 → zipline_polygon_bundle-0.2.3}/zipline_polygon_bundle/adjustments.py +27 -32
  6. {zipline_polygon_bundle-0.2.0.dev1 → zipline_polygon_bundle-0.2.3}/zipline_polygon_bundle/bundle.py +157 -312
  7. zipline_polygon_bundle-0.2.3/zipline_polygon_bundle/compute_signals.py +261 -0
  8. {zipline_polygon_bundle-0.2.0.dev1 → zipline_polygon_bundle-0.2.3}/zipline_polygon_bundle/concat_all_aggs.py +130 -25
  9. {zipline_polygon_bundle-0.2.0.dev1 → zipline_polygon_bundle-0.2.3}/zipline_polygon_bundle/config.py +70 -45
  10. zipline_polygon_bundle-0.2.3/zipline_polygon_bundle/trades.py +535 -0
  11. zipline_polygon_bundle-0.2.0.dev1/README.md +0 -112
  12. zipline_polygon_bundle-0.2.0.dev1/zipline_polygon_bundle/trades.py +0 -944
  13. {zipline_polygon_bundle-0.2.0.dev1 → zipline_polygon_bundle-0.2.3}/LICENSE +0 -0
  14. {zipline_polygon_bundle-0.2.0.dev1 → zipline_polygon_bundle-0.2.3}/zipline_polygon_bundle/concat_all_aggs_partitioned.py +0 -0
  15. {zipline_polygon_bundle-0.2.0.dev1 → zipline_polygon_bundle-0.2.3}/zipline_polygon_bundle/nyse_all_hours_calendar.py +0 -0
  16. {zipline_polygon_bundle-0.2.0.dev1 → zipline_polygon_bundle-0.2.3}/zipline_polygon_bundle/polygon_file_reader.py +0 -0
  17. {zipline_polygon_bundle-0.2.0.dev1 → zipline_polygon_bundle-0.2.3}/zipline_polygon_bundle/process_all_aggs.py +0 -0
  18. {zipline_polygon_bundle-0.2.0.dev1 → zipline_polygon_bundle-0.2.3}/zipline_polygon_bundle/quotes.py +0 -0
  19. {zipline_polygon_bundle-0.2.0.dev1 → zipline_polygon_bundle-0.2.3}/zipline_polygon_bundle/split_aggs_by_ticker.py +0 -0
  20. {zipline_polygon_bundle-0.2.0.dev1 → zipline_polygon_bundle-0.2.3}/zipline_polygon_bundle/tickers_and_names.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: zipline_polygon_bundle
3
- Version: 0.2.0.dev1
3
+ Version: 0.2.3
4
4
  Summary: A zipline-reloaded data provider bundle for Polygon.io
5
5
  License: GNU AFFERO GENERAL PUBLIC LICENSE
6
6
  Version 3, 19 November 2007
@@ -666,31 +666,36 @@ License: GNU AFFERO GENERAL PUBLIC LICENSE
666
666
  Keywords: zipline,data-bundle,finance
667
667
  Author: Jim White
668
668
  Author-email: jim@fovi.com
669
- Requires-Python: >=3.9,<4.0
669
+ Requires-Python: >= 3.10,<4.0
670
670
  Classifier: Programming Language :: Python :: 3
671
671
  Classifier: License :: OSI Approved :: GNU Affero General Public License v3
672
672
  Classifier: Operating System :: OS Independent
673
673
  Requires-Dist: bcolz-zipline (>=1.2.11)
674
+ Requires-Dist: filelock (>=3.16.0)
674
675
  Requires-Dist: fsspec (>=2024.10)
675
676
  Requires-Dist: numpy (<2)
676
677
  Requires-Dist: pandas (>=2.2,<3)
677
- Requires-Dist: pandas-market-calendars (>=4.4.2)
678
- Requires-Dist: pandas_ta (>=0.3)
679
678
  Requires-Dist: polygon-api-client (>=1.14.2)
680
679
  Requires-Dist: pyarrow (>=18.1.0,<19)
681
680
  Requires-Dist: pytz (>=2018.5)
682
681
  Requires-Dist: requests (>=2.9.1)
683
682
  Requires-Dist: toolz (>=0.8.2)
684
- Requires-Dist: zipline-reloaded (>=3.1)
683
+ Requires-Dist: zipline-arrow (>=3.2.2)
685
684
  Project-URL: Repository, https://github.com/fovi-llc/zipline-polygon-bundle
686
685
  Description-Content-Type: text/markdown
687
686
 
688
687
  # zipline-polygon-bundle
689
- `zipline-polygon-bundle` is a `zipline-reloaded` (https://github.com/stefan-jansen/zipline-reloaded) data ingestion bundle for [Polygon.io](https://polygon.io/).
688
+ `zipline-polygon-bundle` is a `zipline-arrow` (https://github.com/fovi-llc/zipline-arrow) data ingestion bundle for [Polygon.io](https://polygon.io/).
689
+
690
+ Zipline Arrow is a fork of Zipline Reloaded `zipline-reloaded` (https://github.com/stefan-jansen/zipline-reloaded) which is only required if you want to use Polygon.io trades flatfiles. So if you only need to use Polygon daily or minute agg flatfiles then you may want to use `zipline-polygon-bundle<0.2` which depends on `zipline-reloaded>=3.1`.
690
691
 
691
692
  ## GitHub
692
693
  https://github.com/fovi-llc/zipline-polygon-bundle
693
694
 
695
+ ## PyPi
696
+
697
+ https://pypi.org/project/zipline_polygon_bundle
698
+
694
699
  ## Resources
695
700
 
696
701
  Get a subscription to https://polygon.io/ for an API key and access to flat files.
@@ -707,7 +712,25 @@ Code from *Trading Evolved* with some small updates for convenience: https://git
707
712
 
708
713
  One of the modifications I've made to that code is so that some of the notebooks can be run on Colab with a minimum of fuss: https://github.com/fovi-llc/trading_evolved/blob/main/Chapter%207%20-%20Backtesting%20Trading%20Strategies/First%20Zipline%20Backtest.ipynb
709
714
 
710
- # Ingest data from Polygon.io into Zipline
715
+ # Zipline Reloaded (`zipline-reloaded`) or Zipline Arrow (`zipline-arrow`)?
716
+
717
+ This bundle supports Polygon daily and minute aggregates and now trades too (quotes coming). The trades are converted to minute and daily aggregates for all trading hours (extended both pre and post, as well as regular market). But in order to support those extended hours I needed to change how Zipline handles `get_calendar` for Exchange Calendar (`exchange-calendar`) initialization. To make that work I've forked `zipline-reloaded` as `zipline-arrow`. The versions of this package before 0.2 depend on `zipline-reloaded>=3.1` and only support daily and minute flatfiles. Versions >= 0.2 of `zipline-polygon-bundle` depend on `zipline-arrow` and will work with daily and minute flatfiles as well as trades flatfiles.
718
+
719
+ # Ingest data from Polygon.io into Zipline using `aws s3` CLI
720
+ Get AWS S3 CLI in the usual way: https://docs.aws.amazon.com/cli/latest/reference/s3/
721
+
722
+ This will get everything which is currently around 12TB.
723
+ ```bash
724
+ aws s3 sync s3://flatfiles/us_stocks_sip $POLYGON_DATA_DIR/flatfiles/us_stocks_sip --checksum-mode ENABLED --endpoint-url https://files.polygon.io
725
+ ```
726
+
727
+ If you don't need quotes yet (and this bundle doesn't use them yet) then this will be faster (quotes about twice as big as trades):
728
+ ```bash
729
+ aws s3 sync s3://flatfiles/us_stocks_sip/{subdir} $POLYGON_DATA_DIR/flatfiles/us_stocks_sip/{subdir} --checksum-mode ENABLED --endpoint-url https://files.polygon.io
730
+ ```
731
+
732
+ # Alternative: Ingest data using `rclone`.
733
+ I've had problems with `rclone` on the larger files for trades and quotes so I recommend using `aws s3` CLI instead.
711
734
 
712
735
  ## Set up your `rclone` (https://rclone.org/) configuration
713
736
  ```bash
@@ -742,9 +765,23 @@ register_polygon_equities_bundle(
742
765
  )
743
766
  ```
744
767
 
768
+ ## Cython build setup
769
+
770
+ ```bash
771
+ sudo apt-get update
772
+ sudo apt-get install python3-dev python3-poetry
773
+
774
+ CFLAGS=$(python3-config --includes) pip install git+https://github.com/fovi-llc/zipline-arrow.git
775
+ ```
776
+
777
+
745
778
  ## Install the Zipline Polygon.io Bundle PyPi package and check that it works.
746
779
  Listing bundles will show if everything is working correctly.
747
780
  ```bash
781
+
782
+ pip install -U git+https://github.com/fovi-llc/zipline-reloaded.git@calendar
783
+ pip install -U git+https://github.com/fovi-llc/zipline-polygon-bundle.git
784
+
748
785
  pip install zipline_polygon_bundle
749
786
  zipline -e extension.py bundles
750
787
  ```
@@ -759,7 +796,7 @@ quantopian-quandl <no ingestions>
759
796
 
760
797
  ## Ingest the Polygon.io data. The API key is needed for the split and dividend data.
761
798
 
762
- Note that ingest currently stores cached API data and shuffled agg data in the `POLYGON_DATA_DIR` directory (`flatfiles/us_stocks_sip/api_cache` and `flatfiles/us_stocks_sip/day_by_ticker_v1` respectively) so write access is needed at this stage. After ingestion the data in `POLYGON_DATA_DIR` is not accessed.
799
+ Note that ingest currently stores cached API data and shuffled agg ("by ticker") data in the `$CUSTOM_ASSET_FILES_DIR` directory which is `$ZIPLINE_ROOT/data/polygon_custom_assets` by default.
763
800
 
764
801
  ```bash
765
802
  export POLYGON_API_KEY=<your API key here>
@@ -793,6 +830,51 @@ This ingestion for 10 years of minute bars took around 10 hours on my Mac using
793
830
  zipline ingest -b polygon-minute
794
831
  ```
795
832
 
833
+ ## Using trades flat files.
834
+ This takes a lot of space for the trades flatfiles (currently the 22 years of trades take around 4TB) and a fair bit of time to convert to minute aggregates. The benefit though is the whole trading day is covered from premarket open to after hours close. Also the current conversion logic ignores trade corrections, official close updates, and the TRF "dark pool" trades (because they are not reported when they occurred nor were they offered on the exchanges). That is to make the aggregates be as good of a simulation of real-time as we can do for algo training and backtesting. Details in the `trades_to_custom_aggs` function in `zipline_polygon_bundle/trades.py`.
835
+
836
+ The conversion process creates `.csv.gz` files in the same format as Polygon flatfiles in the custom assets dir, which is `$ZIPLINE_ROOT/data/polygon_custom_assets` by default. So while `$ZIPLINE_ROOT` needs to be writable, the Polygon flatfiles (`$POLYGON_DATA_DIR`) can be read-only.
837
+
838
+ Get AWS S3 CLI in the usual way: https://docs.aws.amazon.com/cli/latest/reference/s3/
839
+
840
+ ```bash
841
+ aws s3 sync s3://flatfiles/us_stocks_sip/trades_v1 $POLYGON_DATA_DIR/flatfiles/us_stocks_sip/trades_v1 --checksum-mode ENABLED --endpoint-url https://files.polygon.io
842
+ ```
843
+
844
+ ## `extension.py`
845
+
846
+ If you set the `ZIPLINE_ROOT` environment variable (recommended and likely necessary because the default of `~/.zipline` is probably not what you'll want) and copy your `extension.py` config there then you don't need to put `-e extension.py` on the `zipline` command line.
847
+
848
+ If you leave out the `start_date` and/or `end_date` args then `register_polygon_equities_bundle` will scan for the dates of the first and last trade file in `$POLYGON_DATA_DIR` and use them respectively.
849
+
850
+ The `NYSE_ALL_HOURS` calendar (defined in `zipline_polygon_bundle/nyse_all_hours_calendar.py`) uses open and close times for the entire trading day from premarket open to after hours close.
851
+
852
+ Right now `agg_time="1min"` is the only supported aggregate duration because Zipline can only deal with day or minute duration aggregates.
853
+
854
+ ```python
855
+ from zipline_polygon_bundle import register_polygon_equities_bundle, register_nyse_all_hours_calendar, NYSE_ALL_HOURS
856
+ from exchange_calendars.calendar_helpers import parse_date
857
+ # from zipline.utils.calendar_utils import get_calendar
858
+
859
+ # Register the NYSE_ALL_HOURS ExchangeCalendar.
860
+ register_nyse_all_hours_calendar()
861
+
862
+ register_polygon_equities_bundle(
863
+ "polygon-trades",
864
+ calendar_name=NYSE_ALL_HOURS,
865
+ # start_date=parse_date("2020-01-03", raise_oob=False),
866
+ # end_date=parse_date("2021-01-29", raise_oob=False),
867
+ agg_time="1min",
868
+ minutes_per_day=16 * 60,
869
+ )
870
+ ```
871
+
872
+ As with the daily and minute aggs, the POLYGON_API_KEY is needed for the split and dividend data. Also coming is SID assignment across ticker changes using the Polygon tickers API data.
873
+
874
+ ```bash
875
+ zipline ingest -b polygon-trades
876
+ ```
877
+
796
878
  # License is Affero General Public License v3 (AGPL v3)
797
879
  The content of this project is Copyright (C) 2024 Fovi LLC and authored by James P. White (https://www.linkedin.com/in/jamespaulwhite/). It is distributed under the terms of the GNU AFFERO GENERAL PUBLIC LICENSE (AGPL) Version 3 (See LICENSE file).
798
880
 
@@ -0,0 +1,195 @@
1
+ # zipline-polygon-bundle
2
+ `zipline-polygon-bundle` is a `zipline-arrow` (https://github.com/fovi-llc/zipline-arrow) data ingestion bundle for [Polygon.io](https://polygon.io/).
3
+
4
+ Zipline Arrow is a fork of Zipline Reloaded `zipline-reloaded` (https://github.com/stefan-jansen/zipline-reloaded) which is only required if you want to use Polygon.io trades flatfiles. So if you only need to use Polygon daily or minute agg flatfiles then you may want to use `zipline-polygon-bundle<0.2` which depends on `zipline-reloaded>=3.1`.
5
+
6
+ ## GitHub
7
+ https://github.com/fovi-llc/zipline-polygon-bundle
8
+
9
+ ## PyPi
10
+
11
+ https://pypi.org/project/zipline_polygon_bundle
12
+
13
+ ## Resources
14
+
15
+ Get a subscription to https://polygon.io/ for an API key and access to flat files.
16
+
17
+ https://polygon.io/knowledge-base/article/how-to-get-started-with-s3
18
+
19
+ Quantopian's Zipline backtester revived by Stefan Jansen: https://github.com/stefan-jansen/zipline-reloaded
20
+
21
+ Stefan's excellent book *Machine Learning for Algorithmic Trading*: https://ml4trading.io/
22
+
23
+ *Trading Evolved* by Andreas Clenow is a gentler introduction to Zipline Reloaded: https://www.followingthetrend.com/trading-evolved/
24
+
25
+ Code from *Trading Evolved* with some small updates for convenience: https://github.com/fovi-llc/trading_evolved
26
+
27
+ One of the modifications I've made to that code is so that some of the notebooks can be run on Colab with a minimum of fuss: https://github.com/fovi-llc/trading_evolved/blob/main/Chapter%207%20-%20Backtesting%20Trading%20Strategies/First%20Zipline%20Backtest.ipynb
28
+
29
+ # Zipline Reloaded (`zipline-reloaded`) or Zipline Arrow (`zipline-arrow`)?
30
+
31
+ This bundle supports Polygon daily and minute aggregates and now trades too (quotes coming). The trades are converted to minute and daily aggregates for all trading hours (extended both pre and post, as well as regular market). But in order to support those extended hours I needed to change how Zipline handles `get_calendar` for Exchange Calendar (`exchange-calendar`) initialization. To make that work I've forked `zipline-reloaded` as `zipline-arrow`. The versions of this package before 0.2 depend on `zipline-reloaded>=3.1` and only support daily and minute flatfiles. Versions >= 0.2 of `zipline-polygon-bundle` depend on `zipline-arrow` and will work with daily and minute flatfiles as well as trades flatfiles.
32
+
33
+ # Ingest data from Polygon.io into Zipline using `aws s3` CLI
34
+ Get AWS S3 CLI in the usual way: https://docs.aws.amazon.com/cli/latest/reference/s3/
35
+
36
+ This will get everything which is currently around 12TB.
37
+ ```bash
38
+ aws s3 sync s3://flatfiles/us_stocks_sip $POLYGON_DATA_DIR/flatfiles/us_stocks_sip --checksum-mode ENABLED --endpoint-url https://files.polygon.io
39
+ ```
40
+
41
+ If you don't need quotes yet (and this bundle doesn't use them yet) then this will be faster (quotes about twice as big as trades):
42
+ ```bash
43
+ aws s3 sync s3://flatfiles/us_stocks_sip/{subdir} $POLYGON_DATA_DIR/flatfiles/us_stocks_sip/{subdir} --checksum-mode ENABLED --endpoint-url https://files.polygon.io
44
+ ```
45
+
46
+ # Alternative: Ingest data using `rclone`.
47
+ I've had problems with `rclone` on the larger files for trades and quotes so I recommend using `aws s3` CLI instead.
48
+
49
+ ## Set up your `rclone` (https://rclone.org/) configuration
50
+ ```bash
51
+ export POLYGON_FILE_ENDPOINT=https://files.polygon.io/
52
+ rclone config create s3polygon s3 env_auth=false endpoint=$POLYGON_FILE_ENDPOINT \
53
+ access_key_id=$POLYGON_S3_Access_ID secret_access_key=$POLYGON_Secret_Access_Key
54
+ ```
55
+
56
+ ## Get flat files (`*.csv.gz`) for US Stock daily aggregates.
57
+ The default asset dir is `us_stock_sip` but that can be overriden with the `POLYGON_ASSET_SUBDIR`
58
+ environment variable if/when Polygon.io adds other markets to flat files.
59
+
60
+ ```bash
61
+ export POLYGON_DATA_DIR=`pwd`/data/files.polygon.io
62
+ for year in 2024 2023 2022 2021; do \
63
+ rclone copy -P s3polygon:flatfiles/us_stocks_sip/day_aggs_v1/$year \
64
+ $POLYGON_DATA_DIR/flatfiles/us_stocks_sip/day_aggs_v1/$year; \
65
+ done
66
+ ```
67
+
68
+ ## `extension.py`
69
+
70
+ ```python
71
+ from zipline_polygon_bundle import register_polygon_equities_bundle
72
+
73
+ # All tickers (>20K) are ingested. Filtering is TBD.
74
+ # `start_session` and `end_session` can be set to ingest a range of dates (which must be market days).
75
+ register_polygon_equities_bundle(
76
+ "polygon",
77
+ calendar_name="XNYS",
78
+ agg_time="day"
79
+ )
80
+ ```
81
+
82
+ ## Cython build setup
83
+
84
+ ```bash
85
+ sudo apt-get update
86
+ sudo apt-get install python3-dev python3-poetry
87
+
88
+ CFLAGS=$(python3-config --includes) pip install git+https://github.com/fovi-llc/zipline-arrow.git
89
+ ```
90
+
91
+
92
+ ## Install the Zipline Polygon.io Bundle PyPi package and check that it works.
93
+ Listing bundles will show if everything is working correctly.
94
+ ```bash
95
+
96
+ pip install -U git+https://github.com/fovi-llc/zipline-reloaded.git@calendar
97
+ pip install -U git+https://github.com/fovi-llc/zipline-polygon-bundle.git
98
+
99
+ pip install zipline_polygon_bundle
100
+ zipline -e extension.py bundles
101
+ ```
102
+ stdout:
103
+ ```
104
+ csvdir <no ingestions>
105
+ polygon <no ingestions>
106
+ polygon-minute <no ingestions>
107
+ quandl <no ingestions>
108
+ quantopian-quandl <no ingestions>
109
+ ```
110
+
111
+ ## Ingest the Polygon.io data. The API key is needed for the split and dividend data.
112
+
113
+ Note that ingest currently stores cached API data and shuffled agg ("by ticker") data in the `$CUSTOM_ASSET_FILES_DIR` directory which is `$ZIPLINE_ROOT/data/polygon_custom_assets` by default.
114
+
115
+ ```bash
116
+ export POLYGON_API_KEY=<your API key here>
117
+ zipline -e extension.py ingest -b polygon
118
+ ```
119
+
120
+ ### Cleaning up bad ingests
121
+ After a while you may wind up with old (or empty because of an error during ingestion) bundles cluttering
122
+ up the list and could waste space (although old bundles may be useful for rerunning old backtests).
123
+ To remove all but the last ingest (say after your first successful ingest after a number of false starts) you could use:
124
+ ```bash
125
+ zipline -e extension.py clean -b polygon --keep-last 1
126
+ ```
127
+
128
+ ## Using minute aggregate flat files.
129
+ Minute aggs work too but everything takes more space and a lot longer to do.
130
+
131
+ ```bash
132
+ export POLYGON_DATA_DIR=`pwd`/data/files.polygon.io
133
+ for year in 2024 2023 2022 2021; do \
134
+ rclone copy -P s3polygon:flatfiles/us_stocks_sip/minute_aggs_v1/$year \
135
+ $POLYGON_DATA_DIR/flatfiles/us_stocks_sip/minute_aggs_v1/$year; \
136
+ done
137
+ ```
138
+
139
+ If you set the `ZIPLINE_ROOT` environment variable (recommended and likely necessary because the default of `~/.zipline` is probably not what you'll want) and copy your `extension.py` config there then you don't need to put `-e extension.py` on the `zipline` command line.
140
+
141
+ This ingestion for 10 years of minute bars took around 10 hours on my Mac using an external hard drive (not SSD). A big chunk of that was copying from the default tmp dir to the Zipline root (6.3million files for 47GB actual, 63GB used). I plan to change that `shutil.copy2` to use `shutil.move` and to use a `tmp` dir in Zipline root for temporary files instead of the default which should save an hour or two. Also the ingestion process is single threaded and could be sped up with some concurrency.
142
+
143
+ ```bash
144
+ zipline ingest -b polygon-minute
145
+ ```
146
+
147
+ ## Using trades flat files.
148
+ This takes a lot of space for the trades flatfiles (currently the 22 years of trades take around 4TB) and a fair bit of time to convert to minute aggregates. The benefit though is the whole trading day is covered from premarket open to after hours close. Also the current conversion logic ignores trade corrections, official close updates, and the TRF "dark pool" trades (because they are not reported when they occurred nor were they offered on the exchanges). That is to make the aggregates be as good of a simulation of real-time as we can do for algo training and backtesting. Details in the `trades_to_custom_aggs` function in `zipline_polygon_bundle/trades.py`.
149
+
150
+ The conversion process creates `.csv.gz` files in the same format as Polygon flatfiles in the custom assets dir, which is `$ZIPLINE_ROOT/data/polygon_custom_assets` by default. So while `$ZIPLINE_ROOT` needs to be writable, the Polygon flatfiles (`$POLYGON_DATA_DIR`) can be read-only.
151
+
152
+ Get AWS S3 CLI in the usual way: https://docs.aws.amazon.com/cli/latest/reference/s3/
153
+
154
+ ```bash
155
+ aws s3 sync s3://flatfiles/us_stocks_sip/trades_v1 $POLYGON_DATA_DIR/flatfiles/us_stocks_sip/trades_v1 --checksum-mode ENABLED --endpoint-url https://files.polygon.io
156
+ ```
157
+
158
+ ## `extension.py`
159
+
160
+ If you set the `ZIPLINE_ROOT` environment variable (recommended and likely necessary because the default of `~/.zipline` is probably not what you'll want) and copy your `extension.py` config there then you don't need to put `-e extension.py` on the `zipline` command line.
161
+
162
+ If you leave out the `start_date` and/or `end_date` args then `register_polygon_equities_bundle` will scan for the dates of the first and last trade file in `$POLYGON_DATA_DIR` and use them respectively.
163
+
164
+ The `NYSE_ALL_HOURS` calendar (defined in `zipline_polygon_bundle/nyse_all_hours_calendar.py`) uses open and close times for the entire trading day from premarket open to after hours close.
165
+
166
+ Right now `agg_time="1min"` is the only supported aggregate duration because Zipline can only deal with day or minute duration aggregates.
167
+
168
+ ```python
169
+ from zipline_polygon_bundle import register_polygon_equities_bundle, register_nyse_all_hours_calendar, NYSE_ALL_HOURS
170
+ from exchange_calendars.calendar_helpers import parse_date
171
+ # from zipline.utils.calendar_utils import get_calendar
172
+
173
+ # Register the NYSE_ALL_HOURS ExchangeCalendar.
174
+ register_nyse_all_hours_calendar()
175
+
176
+ register_polygon_equities_bundle(
177
+ "polygon-trades",
178
+ calendar_name=NYSE_ALL_HOURS,
179
+ # start_date=parse_date("2020-01-03", raise_oob=False),
180
+ # end_date=parse_date("2021-01-29", raise_oob=False),
181
+ agg_time="1min",
182
+ minutes_per_day=16 * 60,
183
+ )
184
+ ```
185
+
186
+ As with the daily and minute aggs, the POLYGON_API_KEY is needed for the split and dividend data. Also coming is SID assignment across ticker changes using the Polygon tickers API data.
187
+
188
+ ```bash
189
+ zipline ingest -b polygon-trades
190
+ ```
191
+
192
+ # License is Affero General Public License v3 (AGPL v3)
193
+ The content of this project is Copyright (C) 2024 Fovi LLC and authored by James P. White (https://www.linkedin.com/in/jamespaulwhite/). It is distributed under the terms of the GNU AFFERO GENERAL PUBLIC LICENSE (AGPL) Version 3 (See LICENSE file).
194
+
195
+ The AGPL doesn't put any restrictions on personal use but people using this in a service for others have obligations. If you have commerical purposes and those distribution requirements don't work for you, feel free to contact me (mailto:jim@fovi.com) about other licensing terms.
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = 'zipline_polygon_bundle'
3
- version = '0.2.0dev1'
3
+ version = '0.2.3'
4
4
  description = 'A zipline-reloaded data provider bundle for Polygon.io'
5
5
  authors = [
6
6
  { name = 'Jim White', email = 'jim@fovi.com' },
@@ -14,44 +14,37 @@ classifiers = [
14
14
  'Operating System :: OS Independent',
15
15
  ]
16
16
 
17
- [project.urls]
18
- Repository = 'https://github.com/fovi-llc/zipline-polygon-bundle'
17
+ requires-python = ">= 3.10,<4.0"
19
18
 
20
- [tool.poetry]
21
- name = 'zipline-polygon-bundle'
22
- version = '0.2.0dev1'
23
- description = 'A zipline-reloaded data provider bundle for Polygon.io'
24
- authors = ['Jim White <jim@fovi.com>']
25
- license = 'AGPL-3.0'
26
- readme = 'README.md'
27
- keywords = ['zipline', 'data-bundle', 'finance']
28
- classifiers = [
29
- 'Programming Language :: Python :: 3',
30
- 'License :: OSI Approved :: GNU Affero General Public License v3',
31
- 'Operating System :: OS Independent',
19
+ dependencies = [
20
+ "fsspec>=2024.10",
21
+ "filelock>=3.16.0",
22
+ "polygon-api-client>=1.14.2",
23
+ "pandas>=2.2,<3",
24
+ # "pandas-market-calendars>=4.4.2",
25
+ # "pandas-ta>=0.3", # pandas-ta install doesn't work with poetry for some reason.
26
+ # It is used in compute_signals.py which we're not using yet.
27
+ "pytz>=2018.5",
28
+ "requests>=2.9.1",
29
+ "bcolz-zipline>=1.2.11",
30
+ # There was an issue in PyArrow 19 which is probably fixed but don't remember how to test it.
31
+ "pyarrow>=18.1.0,<19",
32
+ "numpy<2",
33
+ "toolz>=0.8.2",
34
+ "zipline-arrow>=3.2.2",
35
+ # "zipline-arrow = { git = 'https://github.com/fovi-llc/zipline-arrow.git' }"
32
36
  ]
33
37
 
34
- [tool.poetry.dependencies]
35
- fsspec = ">=2024.10"
36
- python = ">=3.9,<4.0"
37
- polygon-api-client = ">=1.14.2"
38
- pandas = ">=2.2,<3"
39
- pandas-market-calendars = ">=4.4.2"
40
- pandas_ta = ">=0.3"
41
- pytz = ">=2018.5"
42
- requests = ">=2.9.1"
43
- bcolz-zipline = ">=1.2.11"
44
- pyarrow = ">=18.1.0,<19"
45
- numpy = "<2"
46
- toolz = ">=0.8.2"
47
- zipline-reloaded = ">=3.1"
38
+ [project.urls]
39
+ Repository = 'https://github.com/fovi-llc/zipline-polygon-bundle'
40
+
48
41
 
49
- [tool.poetry.dev-dependencies]
42
+ [poetry.group.dev.dependencies]
50
43
  pytest = "*"
51
44
 
52
45
  [build-system]
53
46
  build-backend = "poetry.core.masonry.api"
54
- requires = ["poetry_core>=1.0.0"]
47
+ requires = ["poetry_core>=2.1.0"]
55
48
 
56
49
  [tool.pytest.ini_options]
57
50
  # https://docs.pytest.org/en/stable/how-to/capture-warnings.html#controlling-warnings
@@ -1,17 +1,17 @@
1
1
  from .bundle import (
2
2
  register_polygon_equities_bundle,
3
3
  symbol_to_upper,
4
- polygon_equities_bundle_day,
5
- polygon_equities_bundle_minute,
4
+ ingest_polygon_equities_bundle
6
5
  )
7
6
 
8
7
  from .config import PolygonConfig
9
8
  from .nyse_all_hours_calendar import NYSE_ALL_HOURS, register_nyse_all_hours_calendar
10
9
  from .concat_all_aggs import concat_all_aggs_from_csv, generate_csv_agg_tables
11
10
  from .adjustments import load_splits, load_dividends, load_conditions
12
- from .trades import trades_schema, trades_dataset, cast_trades, date_to_path
11
+ from .trades import trades_schema, trades_dataset, cast_trades
13
12
  from .trades import custom_aggs_partitioning, custom_aggs_schema, trades_to_custom_aggs, convert_trades_to_custom_aggs
14
- from .trades import get_custom_aggs_dates, generate_csv_trades_tables, compute_signals_for_all_custom_aggs
13
+ from .trades import get_aggs_dates, generate_csv_trades_tables
14
+ # from .compute_signals import compute_signals_for_all_custom_aggs
15
15
  from .quotes import quotes_schema, quotes_dataset, cast_quotes
16
16
  # from .tickers_and_names import load_all_tickers, merge_tickers, ticker_names_from_merged_tickers, get_ticker_universe
17
17
  from .tickers_and_names import PolygonAssets, get_ticker_universe
@@ -22,8 +22,7 @@ __all__ = [
22
22
  "register_nyse_all_hours_calendar",
23
23
  "NYSE_ALL_HOURS",
24
24
  "symbol_to_upper",
25
- "polygon_equities_bundle_day",
26
- "polygon_equities_bundle_minute",
25
+ "ingest_polygon_equities_bundle",
27
26
  "PolygonConfig",
28
27
  "concat_all_aggs_from_csv",
29
28
  "generate_csv_agg_tables",
@@ -33,14 +32,13 @@ __all__ = [
33
32
  "trades_schema",
34
33
  "trades_dataset",
35
34
  "cast_trades",
36
- "date_to_path",
37
- "get_custom_aggs_dates",
35
+ "get_aggs_dates",
38
36
  "generate_csv_trades_tables",
39
37
  "custom_aggs_partitioning",
40
38
  "custom_aggs_schema",
41
39
  "trades_to_custom_aggs",
42
40
  "convert_trades_to_custom_aggs",
43
- "compute_signals_for_all_custom_aggs",
41
+ # "compute_signals_for_all_custom_aggs",
44
42
  "quotes_schema",
45
43
  "quotes_dataset",
46
44
  "cast_quotes",
@@ -10,19 +10,19 @@ from urllib3 import HTTPResponse
10
10
 
11
11
 
12
12
  def load_polygon_splits(
13
- config: PolygonConfig, first_start_end: datetime.date, last_end_date: datetime.date
13
+ config: PolygonConfig, first_day: pd.Timestamp, last_day: pd.Timestamp
14
14
  ) -> pd.DataFrame:
15
15
  # N.B. If the schema changes then the filename should change. We're on v3 now.
16
16
  splits_path = config.api_cache_path(
17
- start_date=first_start_end, end_date=last_end_date, filename="list_splits"
17
+ first_day=first_day, last_day=last_day, filename="list_splits"
18
18
  )
19
- expected_split_count = (last_end_date - first_start_end).days * 3
19
+ expected_split_count = (last_day - first_day).days * 3
20
20
  if not os.path.exists(splits_path):
21
21
  client = polygon.RESTClient(api_key=config.api_key)
22
22
  splits = client.list_splits(
23
23
  limit=1000,
24
- execution_date_gte=first_start_end,
25
- execution_date_lt=last_end_date + datetime.timedelta(days=1),
24
+ execution_date_gte=first_day.date(),
25
+ execution_date_lt=last_day.date() + datetime.timedelta(days=1),
26
26
  )
27
27
  if splits is HTTPResponse:
28
28
  raise ValueError(f"Polygon.list_splits bad HTTPResponse: {splits}")
@@ -32,7 +32,7 @@ def load_polygon_splits(
32
32
  splits.to_parquet(splits_path)
33
33
  if len(splits) < expected_split_count:
34
34
  logging.warning(
35
- f"Only got {len(splits)=} from Polygon list_splits (expected {expected_split_count=}). "
35
+ f"Only got {len(splits)=} from Polygon list_splits ({expected_split_count=}). "
36
36
  "This is probably fine if your historical range is short."
37
37
  )
38
38
  # We will always load from the file to avoid any chance of weird errors.
@@ -41,7 +41,7 @@ def load_polygon_splits(
41
41
  print(f"Loaded {len(splits)=} from {splits_path}")
42
42
  if len(splits) < expected_split_count:
43
43
  logging.warning(
44
- f"Only got {len(splits)=} from Polygon list_splits (expected {expected_split_count=}). "
44
+ f"Only got {len(splits)=} from Polygon list_splits ({expected_split_count=}). "
45
45
  "This is probably fine if your historical range is short."
46
46
  )
47
47
  return splits
@@ -50,11 +50,11 @@ def load_polygon_splits(
50
50
 
51
51
  def load_splits(
52
52
  config: PolygonConfig,
53
- first_start_end: datetime.date,
54
- last_end_date: datetime.date,
53
+ first_day: pd.Timestamp,
54
+ last_day: pd.Timestamp,
55
55
  ticker_to_sid: dict[str, int],
56
56
  ) -> pd.DataFrame:
57
- splits = load_polygon_splits(config, first_start_end, last_end_date)
57
+ splits = load_polygon_splits(config, first_day=first_day, last_day=last_day)
58
58
  splits["sid"] = splits["ticker"].apply(lambda t: ticker_to_sid.get(t, pd.NA))
59
59
  splits.dropna(inplace=True)
60
60
  splits["sid"] = splits["sid"].astype("int64")
@@ -70,18 +70,18 @@ def load_splits(
70
70
 
71
71
 
72
72
  def load_polygon_dividends(
73
- config: PolygonConfig, first_start_date: datetime.date, last_end_date: datetime.date
73
+ config: PolygonConfig, first_day: pd.Timestamp, last_day: pd.Timestamp
74
74
  ) -> pd.DataFrame:
75
75
  # N.B. If the schema changes then the filename should change. We're on v3 now.
76
76
  dividends_path = config.api_cache_path(
77
- start_date=first_start_date, end_date=last_end_date, filename="list_dividends"
77
+ first_day=first_day, last_day=last_day, filename="list_dividends"
78
78
  )
79
79
  if not os.path.exists(dividends_path):
80
80
  client = polygon.RESTClient(api_key=config.api_key)
81
81
  dividends = client.list_dividends(
82
82
  limit=1000,
83
- record_date_gte=first_start_date,
84
- pay_date_lt=last_end_date + datetime.timedelta(days=1),
83
+ record_date_gte=first_day.date(),
84
+ pay_date_lt=last_day.date() + datetime.timedelta(days=1),
85
85
  )
86
86
  if dividends is HTTPResponse:
87
87
  raise ValueError(f"Polygon.list_dividends bad HTTPResponse: {dividends}")
@@ -104,35 +104,30 @@ def load_polygon_dividends(
104
104
 
105
105
 
106
106
  def load_chunked_polygon_dividends(
107
- config: PolygonConfig, first_start_end: datetime.date, last_end_date: datetime.date
107
+ config: PolygonConfig, first_day: pd.Timestamp,
108
+ last_day: pd.Timestamp
108
109
  ) -> pd.DataFrame:
109
110
  dividends_list = []
110
- next_start_end = first_start_end
111
- while next_start_end < last_end_date:
111
+ next_start_end = first_day
112
+ while next_start_end < last_day:
112
113
  # We want at most a month of dividends at a time. They should end on the last day of the month.
113
- # So the next_end_date is the day before the first day of the next month.
114
- first_of_next_month = datetime.date(
115
- next_start_end.year + (next_start_end.month // 12),
116
- (next_start_end.month % 12) + 1,
117
- 1,
118
- )
119
- next_end_date = first_of_next_month - datetime.timedelta(days=1)
120
- if next_end_date > last_end_date:
121
- next_end_date = last_end_date
114
+ next_end_date = next_start_end + pd.offsets.MonthEnd()
115
+ if next_end_date > last_day:
116
+ next_end_date = last_day
122
117
  dividends_list.append(
123
- load_polygon_dividends(config, next_start_end, next_end_date)
118
+ load_polygon_dividends(config, first_day=next_start_end, last_day=next_end_date)
124
119
  )
125
- next_start_end = next_end_date + datetime.timedelta(days=1)
120
+ next_start_end = next_end_date + pd.Timedelta(days=1)
126
121
  return pd.concat(dividends_list)
127
122
 
128
123
 
129
124
  def load_dividends(
130
125
  config: PolygonConfig,
131
- first_start_end: datetime.date,
132
- last_end_date: datetime.date,
126
+ first_day: pd.Timestamp,
127
+ last_day: pd.Timestamp,
133
128
  ticker_to_sid: dict[str, int],
134
129
  ) -> pd.DataFrame:
135
- dividends = load_chunked_polygon_dividends(config, first_start_end, last_end_date)
130
+ dividends = load_chunked_polygon_dividends(config, first_day=first_day, last_day=last_day)
136
131
  dividends["sid"] = dividends["ticker"].apply(lambda t: ticker_to_sid.get(t, pd.NA))
137
132
  dividends.dropna(how="any", inplace=True)
138
133
  dividends["sid"] = dividends["sid"].astype("int64")
@@ -159,7 +154,7 @@ def load_conditions(config: PolygonConfig) -> pd.DataFrame:
159
154
  # The API doesn't use dates for the condition codes but this is a way to provide control over caching.
160
155
  # Main thing is to get the current conditions list but we don't want to call more than once a day.
161
156
  conditions_path = config.api_cache_path(
162
- start_date=config.start_timestamp.date(), end_date=config.end_timestamp.date(), filename="conditions"
157
+ first_day=config.start_timestamp, last_day=config.end_timestamp, filename="conditions"
163
158
  )
164
159
  expected_conditions_count = 100
165
160
  if not os.path.exists(conditions_path):