zipline_polygon_bundle 0.2.1__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zipline_polygon_bundle/__init__.py +2 -1
- zipline_polygon_bundle/bundle.py +10 -0
- zipline_polygon_bundle/config.py +13 -13
- zipline_polygon_bundle/trades.py +62 -29
- {zipline_polygon_bundle-0.2.1.dist-info → zipline_polygon_bundle-0.2.4.dist-info}/METADATA +86 -6
- {zipline_polygon_bundle-0.2.1.dist-info → zipline_polygon_bundle-0.2.4.dist-info}/RECORD +8 -8
- {zipline_polygon_bundle-0.2.1.dist-info → zipline_polygon_bundle-0.2.4.dist-info}/WHEEL +1 -1
- {zipline_polygon_bundle-0.2.1.dist-info → zipline_polygon_bundle-0.2.4.dist-info}/LICENSE +0 -0
@@ -8,7 +8,7 @@ from .config import PolygonConfig
|
|
8
8
|
from .nyse_all_hours_calendar import NYSE_ALL_HOURS, register_nyse_all_hours_calendar
|
9
9
|
from .concat_all_aggs import concat_all_aggs_from_csv, generate_csv_agg_tables
|
10
10
|
from .adjustments import load_splits, load_dividends, load_conditions
|
11
|
-
from .trades import trades_schema, trades_dataset, cast_trades
|
11
|
+
from .trades import trades_schema, trades_dataset, cast_trades, ordinary_trades_mask
|
12
12
|
from .trades import custom_aggs_partitioning, custom_aggs_schema, trades_to_custom_aggs, convert_trades_to_custom_aggs
|
13
13
|
from .trades import get_aggs_dates, generate_csv_trades_tables
|
14
14
|
# from .compute_signals import compute_signals_for_all_custom_aggs
|
@@ -32,6 +32,7 @@ __all__ = [
|
|
32
32
|
"trades_schema",
|
33
33
|
"trades_dataset",
|
34
34
|
"cast_trades",
|
35
|
+
"ordinary_trades_mask",
|
35
36
|
"get_aggs_dates",
|
36
37
|
"generate_csv_trades_tables",
|
37
38
|
"custom_aggs_partitioning",
|
zipline_polygon_bundle/bundle.py
CHANGED
@@ -203,6 +203,16 @@ def process_minute_table(
|
|
203
203
|
metadata.loc[sid, "auto_close_date"] = auto_close_date
|
204
204
|
else:
|
205
205
|
# Add a row to the metadata DataFrame. Don't forget to add an exchange field.
|
206
|
+
# metadata = pd.DataFrame(
|
207
|
+
# columns=(
|
208
|
+
# "start_date",
|
209
|
+
# "end_date",
|
210
|
+
# "auto_close_date",
|
211
|
+
# "symbol",
|
212
|
+
# "exchange",
|
213
|
+
# "asset_name",
|
214
|
+
# )
|
215
|
+
# )
|
206
216
|
metadata.loc[sid] = (
|
207
217
|
start_date,
|
208
218
|
end_date,
|
zipline_polygon_bundle/config.py
CHANGED
@@ -3,7 +3,7 @@ from zipline.utils.calendar_utils import get_calendar
|
|
3
3
|
|
4
4
|
from .nyse_all_hours_calendar import NYSE_ALL_HOURS
|
5
5
|
|
6
|
-
from typing import Iterator, Tuple
|
6
|
+
from typing import Iterator, Mapping, Tuple
|
7
7
|
|
8
8
|
import pandas as pd
|
9
9
|
from pyarrow.fs import LocalFileSystem
|
@@ -38,7 +38,7 @@ def to_partition_key(s: str) -> str:
|
|
38
38
|
class PolygonConfig:
|
39
39
|
def __init__(
|
40
40
|
self,
|
41
|
-
environ:
|
41
|
+
environ: Mapping[str, str],
|
42
42
|
calendar_name: str,
|
43
43
|
start_date: Date,
|
44
44
|
end_date: Date,
|
@@ -71,17 +71,6 @@ class PolygonConfig:
|
|
71
71
|
)
|
72
72
|
self.market = environ.get("POLYGON_MARKET", "stocks")
|
73
73
|
self.asset_subdir = environ.get("POLYGON_ASSET_SUBDIR", "us_stocks_sip")
|
74
|
-
self.tickers_dir = environ.get(
|
75
|
-
"POLYGON_TICKERS_DIR",
|
76
|
-
os.path.join(os.path.join(self.data_dir, "tickers"), self.asset_subdir),
|
77
|
-
)
|
78
|
-
self.tickers_csv_path = environ.get(
|
79
|
-
"POLYGON_TICKERS_CSV",
|
80
|
-
os.path.join(
|
81
|
-
self.tickers_dir,
|
82
|
-
f"tickers_{self.start_timestamp.date().isoformat()}_{self.end_timestamp.date().isoformat()}.csv",
|
83
|
-
),
|
84
|
-
)
|
85
74
|
self.flat_files_dir = environ.get(
|
86
75
|
"POLYGON_FLAT_FILES_DIR", os.path.join(self.data_dir, "flatfiles")
|
87
76
|
)
|
@@ -101,6 +90,17 @@ class PolygonConfig:
|
|
101
90
|
self.custom_asset_files_dir = environ.get(
|
102
91
|
"CUSTOM_ASSET_FILES_DIR", self.asset_files_dir
|
103
92
|
)
|
93
|
+
self.tickers_dir = environ.get(
|
94
|
+
"POLYGON_TICKERS_DIR",
|
95
|
+
os.path.join(self.custom_asset_files_dir, "tickers"),
|
96
|
+
)
|
97
|
+
self.tickers_csv_path = environ.get(
|
98
|
+
"POLYGON_TICKERS_CSV",
|
99
|
+
os.path.join(
|
100
|
+
self.tickers_dir,
|
101
|
+
f"tickers_{self.start_timestamp.date().isoformat()}_{self.end_timestamp.date().isoformat()}.csv",
|
102
|
+
),
|
103
|
+
)
|
104
104
|
|
105
105
|
self.cache_dir = os.path.join(self.custom_asset_files_dir, "api_cache")
|
106
106
|
|
zipline_polygon_bundle/trades.py
CHANGED
@@ -17,6 +17,29 @@ import numpy as np
|
|
17
17
|
import pandas as pd
|
18
18
|
|
19
19
|
|
20
|
+
# Polygon Trade Conditions codes that don't reflect a current market-priced trade.
|
21
|
+
# https://polygon.io/docs/rest/stocks/market-operations/condition-codes
|
22
|
+
# Odd lots are excluded because although their volume counts the prices don't.
|
23
|
+
EXCLUDED_CONDITION_CODES = {
|
24
|
+
# 2, # Average Price
|
25
|
+
# 7, # Cash Sale
|
26
|
+
10, # Derivatively Priced
|
27
|
+
# 12, # Form T / Extended Hours
|
28
|
+
13, # Extended Hours (Sold Out Of Sequence)
|
29
|
+
# 15, # Official Close
|
30
|
+
# 16, # Official Open
|
31
|
+
20, # Next Day
|
32
|
+
21, # Price Variation
|
33
|
+
# 22, # Prior Reference
|
34
|
+
29, # Seller
|
35
|
+
32, # Sold (Out of Sequence)
|
36
|
+
# 33, # Sold + Stopped
|
37
|
+
41, # Trade Thru Exempt
|
38
|
+
52, # Contingent Trade
|
39
|
+
53 # Qualified Contingent Trade
|
40
|
+
}
|
41
|
+
|
42
|
+
|
20
43
|
def trades_schema(raw: bool = False) -> pa.Schema:
|
21
44
|
# There is some problem reading the timestamps as timestamps so we have to read as integer then change the schema.
|
22
45
|
# Polygon Aggregate flatfile timestamps are in nanoseconds (like trades), not milliseconds as the docs say.
|
@@ -89,9 +112,15 @@ def cast_strings_to_list(
|
|
89
112
|
split_array = pa_compute.split_pattern(filled_column, pattern=separator)
|
90
113
|
|
91
114
|
# Cast each element in the resulting lists to integers
|
92
|
-
|
115
|
+
return pa_compute.cast(split_array, pa.list_(value_type))
|
116
|
+
|
93
117
|
|
94
|
-
|
118
|
+
def ordinary_trades_mask(table: pa.Table) -> pa.BooleanArray:
|
119
|
+
conditions_dict = table["conditions"].combine_chunks().dictionary_encode()
|
120
|
+
list_of_codes = cast_strings_to_list(conditions_dict.dictionary).to_pylist()
|
121
|
+
code_dictionary = pa.array(set(codes).isdisjoint(EXCLUDED_CONDITION_CODES) for codes in list_of_codes)
|
122
|
+
include_mask = pa.DictionaryArray.from_arrays(conditions_dict.indices, code_dictionary).dictionary_decode()
|
123
|
+
return pa_compute.and_(include_mask, pa_compute.equal(table["correction"], "0"))
|
95
124
|
|
96
125
|
|
97
126
|
def cast_trades(trades) -> pa.Table:
|
@@ -116,6 +145,9 @@ def custom_aggs_schema(raw: bool = False) -> pa.Schema:
|
|
116
145
|
pa.field("low", price_type, nullable=False),
|
117
146
|
pa.field("window_start", timestamp_type, nullable=False),
|
118
147
|
pa.field("transactions", pa.int64(), nullable=False),
|
148
|
+
pa.field("vwap", price_type, nullable=False),
|
149
|
+
pa.field("traded_value", price_type, nullable=False),
|
150
|
+
pa.field("cumulative_traded_value", price_type, nullable=False),
|
119
151
|
pa.field("date", pa.date32(), nullable=False),
|
120
152
|
pa.field("year", pa.uint16(), nullable=False),
|
121
153
|
pa.field("month", pa.uint8(), nullable=False),
|
@@ -189,17 +221,10 @@ def trades_to_custom_aggs(
|
|
189
221
|
config: PolygonConfig,
|
190
222
|
date: datetime.date,
|
191
223
|
table: pa.Table,
|
192
|
-
include_trf: bool = False,
|
193
224
|
) -> pa.Table:
|
194
225
|
print(f"{date=} {pa.default_memory_pool()=}")
|
195
|
-
# print(f"{datetime.datetime.now()=} {date=} {pa.default_memory_pool()=}")
|
196
|
-
# print(f"{resource.getrusage(resource.RUSAGE_SELF).ru_maxrss=}")
|
197
|
-
table = table.filter(pa_compute.greater(table["size"], 0))
|
198
|
-
table = table.filter(pa_compute.equal(table["correction"], "0"))
|
199
|
-
if not include_trf:
|
200
|
-
table = table.filter(pa_compute.not_equal(table["exchange"], 4))
|
201
226
|
table = table.append_column(
|
202
|
-
"
|
227
|
+
"traded_value", pa_compute.multiply(table["price"], table["size"])
|
203
228
|
)
|
204
229
|
table = table.append_column(
|
205
230
|
"window_start",
|
@@ -207,13 +232,14 @@ def trades_to_custom_aggs(
|
|
207
232
|
table["sip_timestamp"], multiple=config.agg_timedelta.seconds, unit="second"
|
208
233
|
),
|
209
234
|
)
|
235
|
+
table = table.sort_by([("ticker", "ascending"), ("sip_timestamp", "ascending")])
|
210
236
|
table = table.group_by(["ticker", "window_start"], use_threads=False).aggregate(
|
211
237
|
[
|
212
238
|
("price", "first"),
|
213
239
|
("price", "max"),
|
214
240
|
("price", "min"),
|
215
241
|
("price", "last"),
|
216
|
-
("
|
242
|
+
("traded_value", "sum"),
|
217
243
|
("size", "sum"),
|
218
244
|
([], "count_all"),
|
219
245
|
]
|
@@ -225,13 +251,21 @@ def trades_to_custom_aggs(
|
|
225
251
|
"price_min": "low",
|
226
252
|
"price_last": "close",
|
227
253
|
"size_sum": "volume",
|
228
|
-
"
|
254
|
+
"traded_value_sum": "traded_value",
|
229
255
|
"count_all": "transactions",
|
230
256
|
}
|
231
257
|
)
|
258
|
+
table = table.sort_by([("ticker", "ascending"), ("window_start", "ascending")])
|
232
259
|
table = table.append_column(
|
233
|
-
"vwap", pa_compute.divide(table["
|
260
|
+
"vwap", pa_compute.divide(table["traded_value"], table["volume"])
|
234
261
|
)
|
262
|
+
# Calculate cumulative traded value by ticker
|
263
|
+
traded_values_by_ticker = table.group_by("ticker").aggregate([("traded_value", "list")])
|
264
|
+
cumulative_sum_arrays = [
|
265
|
+
pa_compute.cumulative_sum(pa.array(values_list)) for values_list in traded_values_by_ticker["traded_value_list"].combine_chunks()
|
266
|
+
]
|
267
|
+
table = table.append_column("cumulative_traded_value", pa.concat_arrays(cumulative_sum_arrays))
|
268
|
+
|
235
269
|
# table.append_column('date', pa.array([date] * len(table), type=pa.date32()))
|
236
270
|
# table.append_column('year', pa.array([date.year] * len(table), type=pa.uint16()))
|
237
271
|
# table.append_column('month', pa.array([date.month] * len(table), type=pa.uint8()))
|
@@ -248,7 +282,6 @@ def trades_to_custom_aggs(
|
|
248
282
|
[to_partition_key(ticker) for ticker in table.column("ticker").to_pylist()]
|
249
283
|
),
|
250
284
|
)
|
251
|
-
table = table.sort_by([("window_start", "ascending"), ("ticker", "ascending")])
|
252
285
|
# print(f"aggs {date=} {table.to_pandas().head()=}")
|
253
286
|
return table
|
254
287
|
|
@@ -278,22 +311,11 @@ def convert_trades_to_custom_aggs(
|
|
278
311
|
|
279
312
|
# pa.set_memory_pool()
|
280
313
|
|
281
|
-
# pa_ds.write_dataset(
|
282
|
-
# generate_custom_agg_batches_from_tables(config),
|
283
|
-
# schema=custom_aggs_schema(),
|
284
|
-
# filesystem=config.filesystem,
|
285
|
-
# base_dir=config.aggs_dir,
|
286
|
-
# partitioning=custom_aggs_partitioning(),
|
287
|
-
# format="parquet",
|
288
|
-
# existing_data_behavior="overwrite_or_ignore",
|
289
|
-
# # max_open_files = MAX_FILES_OPEN,
|
290
|
-
# # min_rows_per_group = MIN_ROWS_PER_GROUP,
|
291
|
-
# )
|
292
|
-
|
293
314
|
for date, trades_table in generate_csv_trades_tables(config):
|
294
|
-
aggs_table = trades_to_custom_aggs(config, date, trades_table)
|
295
315
|
pa_ds.write_dataset(
|
296
|
-
|
316
|
+
trades_to_custom_aggs(config,
|
317
|
+
date,
|
318
|
+
trades_table.filter(ordinary_trades_mask(trades_table))),
|
297
319
|
filesystem=config.filesystem,
|
298
320
|
base_dir=config.aggs_dir,
|
299
321
|
partitioning=custom_aggs_partitioning(),
|
@@ -303,7 +325,17 @@ def convert_trades_to_custom_aggs(
|
|
303
325
|
# max_open_files=10,
|
304
326
|
# min_rows_per_group=MIN_ROWS_PER_GROUP,
|
305
327
|
)
|
306
|
-
|
328
|
+
# pa_ds.write_dataset(
|
329
|
+
# trades_to_custom_events(config,
|
330
|
+
# date,
|
331
|
+
# trades_table.filter(pa_compute.invert(ordinary_trades_mask(trades_table)))),
|
332
|
+
# filesystem=config.filesystem,
|
333
|
+
# base_dir=config.events_dir,
|
334
|
+
# partitioning=custom_events_partitioning(),
|
335
|
+
# format="parquet",
|
336
|
+
# existing_data_behavior="overwrite_or_ignore",
|
337
|
+
# file_visitor=file_visitor,
|
338
|
+
# )
|
307
339
|
del trades_table
|
308
340
|
|
309
341
|
# with ProcessPoolExecutor(max_workers=1) as executor:
|
@@ -366,6 +398,7 @@ def batches_for_date(aggs_ds: pa_ds.Dataset, date: pd.Timestamp):
|
|
366
398
|
table = table.sort_by([("part", "ascending"), ("ticker", "ascending"), ("window_start", "ascending"), ])
|
367
399
|
return table.to_batches()
|
368
400
|
|
401
|
+
|
369
402
|
def generate_batches_for_schedule(config, aggs_ds):
|
370
403
|
schedule = config.calendar.trading_index(
|
371
404
|
start=config.start_timestamp, end=config.end_timestamp, period="1D"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: zipline_polygon_bundle
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.4
|
4
4
|
Summary: A zipline-reloaded data provider bundle for Polygon.io
|
5
5
|
License: GNU AFFERO GENERAL PUBLIC LICENSE
|
6
6
|
Version 3, 19 November 2007
|
@@ -666,7 +666,7 @@ License: GNU AFFERO GENERAL PUBLIC LICENSE
|
|
666
666
|
Keywords: zipline,data-bundle,finance
|
667
667
|
Author: Jim White
|
668
668
|
Author-email: jim@fovi.com
|
669
|
-
Requires-Python: >=3.10,<4.0
|
669
|
+
Requires-Python: >= 3.10,<4.0
|
670
670
|
Classifier: Programming Language :: Python :: 3
|
671
671
|
Classifier: License :: OSI Approved :: GNU Affero General Public License v3
|
672
672
|
Classifier: Operating System :: OS Independent
|
@@ -679,17 +679,23 @@ Requires-Dist: polygon-api-client (>=1.14.2)
|
|
679
679
|
Requires-Dist: pyarrow (>=18.1.0,<19)
|
680
680
|
Requires-Dist: pytz (>=2018.5)
|
681
681
|
Requires-Dist: requests (>=2.9.1)
|
682
|
-
Requires-Dist: toolz (>=
|
682
|
+
Requires-Dist: toolz (>=0.8.2)
|
683
683
|
Requires-Dist: zipline-arrow (>=3.2.2)
|
684
684
|
Project-URL: Repository, https://github.com/fovi-llc/zipline-polygon-bundle
|
685
685
|
Description-Content-Type: text/markdown
|
686
686
|
|
687
687
|
# zipline-polygon-bundle
|
688
|
-
`zipline-polygon-bundle` is a `zipline-
|
688
|
+
`zipline-polygon-bundle` is a `zipline-arrow` (https://github.com/fovi-llc/zipline-arrow) data ingestion bundle for [Polygon.io](https://polygon.io/).
|
689
|
+
|
690
|
+
Zipline Arrow is a fork of Zipline Reloaded `zipline-reloaded` (https://github.com/stefan-jansen/zipline-reloaded) which is only required if you want to use Polygon.io trades flatfiles. So if you only need to use Polygon daily or minute agg flatfiles then you may want to use `zipline-polygon-bundle<0.2` which depends on `zipline-reloaded>=3.1`.
|
689
691
|
|
690
692
|
## GitHub
|
691
693
|
https://github.com/fovi-llc/zipline-polygon-bundle
|
692
694
|
|
695
|
+
## PyPi
|
696
|
+
|
697
|
+
https://pypi.org/project/zipline_polygon_bundle
|
698
|
+
|
693
699
|
## Resources
|
694
700
|
|
695
701
|
Get a subscription to https://polygon.io/ for an API key and access to flat files.
|
@@ -706,7 +712,25 @@ Code from *Trading Evolved* with some small updates for convenience: https://git
|
|
706
712
|
|
707
713
|
One of the modifications I've made to that code is so that some of the notebooks can be run on Colab with a minimum of fuss: https://github.com/fovi-llc/trading_evolved/blob/main/Chapter%207%20-%20Backtesting%20Trading%20Strategies/First%20Zipline%20Backtest.ipynb
|
708
714
|
|
709
|
-
#
|
715
|
+
# Zipline Reloaded (`zipline-reloaded`) or Zipline Arrow (`zipline-arrow`)?
|
716
|
+
|
717
|
+
This bundle supports Polygon daily and minute aggregates and now trades too (quotes coming). The trades are converted to minute and daily aggregates for all trading hours (extended both pre and post, as well as regular market). But in order to support those extended hours I needed to change how Zipline handles `get_calendar` for Exchange Calendar (`exchange-calendar`) initialization. To make that work I've forked `zipline-reloaded` as `zipline-arrow`. The versions of this package before 0.2 depend on `zipline-reloaded>=3.1` and only support daily and minute flatfiles. Versions >= 0.2 of `zipline-polygon-bundle` depend on `zipline-arrow` and will work with daily and minute flatfiles as well as trades flatfiles.
|
718
|
+
|
719
|
+
# Ingest data from Polygon.io into Zipline using `aws s3` CLI
|
720
|
+
Get AWS S3 CLI in the usual way: https://docs.aws.amazon.com/cli/latest/reference/s3/
|
721
|
+
|
722
|
+
This will get everything which is currently around 12TB.
|
723
|
+
```bash
|
724
|
+
aws s3 sync s3://flatfiles/us_stocks_sip $POLYGON_DATA_DIR/flatfiles/us_stocks_sip --checksum-mode ENABLED --endpoint-url https://files.polygon.io
|
725
|
+
```
|
726
|
+
|
727
|
+
If you don't need quotes yet (and this bundle doesn't use them yet) then this will be faster (quotes about twice as big as trades):
|
728
|
+
```bash
|
729
|
+
aws s3 sync s3://flatfiles/us_stocks_sip/{subdir} $POLYGON_DATA_DIR/flatfiles/us_stocks_sip/{subdir} --checksum-mode ENABLED --endpoint-url https://files.polygon.io
|
730
|
+
```
|
731
|
+
|
732
|
+
# Alternative: Ingest data using `rclone`.
|
733
|
+
I've had problems with `rclone` on the larger files for trades and quotes so I recommend using `aws s3` CLI instead.
|
710
734
|
|
711
735
|
## Set up your `rclone` (https://rclone.org/) configuration
|
712
736
|
```bash
|
@@ -741,9 +765,20 @@ register_polygon_equities_bundle(
|
|
741
765
|
)
|
742
766
|
```
|
743
767
|
|
768
|
+
## Cython build setup
|
769
|
+
|
770
|
+
```bash
|
771
|
+
sudo apt-get update
|
772
|
+
sudo apt-get install python3-dev python3-poetry gcc-multilib
|
773
|
+
|
774
|
+
CFLAGS=$(python3-config --includes) pip install git+https://github.com/fovi-llc/zipline-arrow.git
|
775
|
+
```
|
776
|
+
|
777
|
+
|
744
778
|
## Install the Zipline Polygon.io Bundle PyPi package and check that it works.
|
745
779
|
Listing bundles will show if everything is working correctly.
|
746
780
|
```bash
|
781
|
+
|
747
782
|
pip install -U git+https://github.com/fovi-llc/zipline-reloaded.git@calendar
|
748
783
|
pip install -U git+https://github.com/fovi-llc/zipline-polygon-bundle.git
|
749
784
|
|
@@ -761,7 +796,7 @@ quantopian-quandl <no ingestions>
|
|
761
796
|
|
762
797
|
## Ingest the Polygon.io data. The API key is needed for the split and dividend data.
|
763
798
|
|
764
|
-
Note that ingest currently stores cached API data and shuffled agg data in the `
|
799
|
+
Note that ingest currently stores cached API data and shuffled agg ("by ticker") data in the `$CUSTOM_ASSET_FILES_DIR` directory which is `$ZIPLINE_ROOT/data/polygon_custom_assets` by default.
|
765
800
|
|
766
801
|
```bash
|
767
802
|
export POLYGON_API_KEY=<your API key here>
|
@@ -795,6 +830,51 @@ This ingestion for 10 years of minute bars took around 10 hours on my Mac using
|
|
795
830
|
zipline ingest -b polygon-minute
|
796
831
|
```
|
797
832
|
|
833
|
+
## Using trades flat files.
|
834
|
+
This takes a lot of space for the trades flatfiles (currently the 22 years of trades take around 4TB) and a fair bit of time to convert to minute aggregates. The benefit though is the whole trading day is covered from premarket open to after hours close. Also the current conversion logic ignores trade corrections, official close updates, and the TRF "dark pool" trades (because they are not reported when they occurred nor were they offered on the exchanges). That is to make the aggregates be as good of a simulation of real-time as we can do for algo training and backtesting. Details in the `trades_to_custom_aggs` function in `zipline_polygon_bundle/trades.py`.
|
835
|
+
|
836
|
+
The conversion process creates `.csv.gz` files in the same format as Polygon flatfiles in the custom assets dir, which is `$ZIPLINE_ROOT/data/polygon_custom_assets` by default. So while `$ZIPLINE_ROOT` needs to be writable, the Polygon flatfiles (`$POLYGON_DATA_DIR`) can be read-only.
|
837
|
+
|
838
|
+
Get AWS S3 CLI in the usual way: https://docs.aws.amazon.com/cli/latest/reference/s3/
|
839
|
+
|
840
|
+
```bash
|
841
|
+
aws s3 sync s3://flatfiles/us_stocks_sip/trades_v1 $POLYGON_DATA_DIR/flatfiles/us_stocks_sip/trades_v1 --checksum-mode ENABLED --endpoint-url https://files.polygon.io
|
842
|
+
```
|
843
|
+
|
844
|
+
## `extension.py`
|
845
|
+
|
846
|
+
If you set the `ZIPLINE_ROOT` environment variable (recommended and likely necessary because the default of `~/.zipline` is probably not what you'll want) and copy your `extension.py` config there then you don't need to put `-e extension.py` on the `zipline` command line.
|
847
|
+
|
848
|
+
If you leave out the `start_date` and/or `end_date` args then `register_polygon_equities_bundle` will scan for the dates of the first and last trade file in `$POLYGON_DATA_DIR` and use them respectively.
|
849
|
+
|
850
|
+
The `NYSE_ALL_HOURS` calendar (defined in `zipline_polygon_bundle/nyse_all_hours_calendar.py`) uses open and close times for the entire trading day from premarket open to after hours close.
|
851
|
+
|
852
|
+
Right now `agg_time="1min"` is the only supported aggregate duration because Zipline can only deal with day or minute duration aggregates.
|
853
|
+
|
854
|
+
```python
|
855
|
+
from zipline_polygon_bundle import register_polygon_equities_bundle, register_nyse_all_hours_calendar, NYSE_ALL_HOURS
|
856
|
+
from exchange_calendars.calendar_helpers import parse_date
|
857
|
+
# from zipline.utils.calendar_utils import get_calendar
|
858
|
+
|
859
|
+
# Register the NYSE_ALL_HOURS ExchangeCalendar.
|
860
|
+
register_nyse_all_hours_calendar()
|
861
|
+
|
862
|
+
register_polygon_equities_bundle(
|
863
|
+
"polygon-trades",
|
864
|
+
calendar_name=NYSE_ALL_HOURS,
|
865
|
+
# start_date=parse_date("2020-01-03", raise_oob=False),
|
866
|
+
# end_date=parse_date("2021-01-29", raise_oob=False),
|
867
|
+
agg_time="1min",
|
868
|
+
minutes_per_day=16 * 60,
|
869
|
+
)
|
870
|
+
```
|
871
|
+
|
872
|
+
As with the daily and minute aggs, the POLYGON_API_KEY is needed for the split and dividend data. Also coming is SID assignment across ticker changes using the Polygon tickers API data.
|
873
|
+
|
874
|
+
```bash
|
875
|
+
zipline ingest -b polygon-trades
|
876
|
+
```
|
877
|
+
|
798
878
|
# License is Affero General Public License v3 (AGPL v3)
|
799
879
|
The content of this project is Copyright (C) 2024 Fovi LLC and authored by James P. White (https://www.linkedin.com/in/jamespaulwhite/). It is distributed under the terms of the GNU AFFERO GENERAL PUBLIC LICENSE (AGPL) Version 3 (See LICENSE file).
|
800
880
|
|
@@ -1,18 +1,18 @@
|
|
1
|
-
zipline_polygon_bundle/__init__.py,sha256=
|
1
|
+
zipline_polygon_bundle/__init__.py,sha256=QlH8njI5zj1WM5EFDqR3vXy80rXF4qeG3hnPPkaMqE0,1815
|
2
2
|
zipline_polygon_bundle/adjustments.py,sha256=4garYK7RUrYyCIhCm0ZqHsk3y2bCt9vHUkWoHvVniTA,8233
|
3
|
-
zipline_polygon_bundle/bundle.py,sha256=
|
3
|
+
zipline_polygon_bundle/bundle.py,sha256=EPv3uTgLZFkv2Fi3o5d3Yun0NQ-iHXGVmajVTxUjln4,20161
|
4
4
|
zipline_polygon_bundle/compute_signals.py,sha256=FxcMuwMmxuvyy45y1avdL_uFEn0B4_2ekcv_B4AyPo0,10115
|
5
5
|
zipline_polygon_bundle/concat_all_aggs.py,sha256=Nuj0pytQAVoK8OK7qx5m3jWCV8uJIPsa0XHnmicgSmg,12066
|
6
6
|
zipline_polygon_bundle/concat_all_aggs_partitioned.py,sha256=AQq4ai5u5GyclWzQq2C8zIvHl_zjvLiDtxarNejwCQ4,6325
|
7
|
-
zipline_polygon_bundle/config.py,sha256=
|
7
|
+
zipline_polygon_bundle/config.py,sha256=VdgwvnLKeb_WppQI6Rr97GqulEfufjDVww4ulkmlbdU,10474
|
8
8
|
zipline_polygon_bundle/nyse_all_hours_calendar.py,sha256=QrwWHm3_sfwrtt1tN5u6rqjTQcwN3qxyhjNGeHdyqcI,698
|
9
9
|
zipline_polygon_bundle/polygon_file_reader.py,sha256=TCq6hKlxixwtL57xLxs9GnvH3MMa6aWBI9mi1-PBNHw,3749
|
10
10
|
zipline_polygon_bundle/process_all_aggs.py,sha256=MVhb8xn9-DngSNSrRIpMG4XAgHjMXktoqYrxuM9ph-c,3069
|
11
11
|
zipline_polygon_bundle/quotes.py,sha256=yFjlPiQXPp0t6w2Bo96VLtYSqITP7WCLwMp5CH3zx1E,4260
|
12
12
|
zipline_polygon_bundle/split_aggs_by_ticker.py,sha256=HI_3nuN6E_VCq7LfOj4Dib_qm8wYME-jdXXX4rt-9YI,2150
|
13
13
|
zipline_polygon_bundle/tickers_and_names.py,sha256=BjYquIlSBQGd1yDW3m3cGuXKVvUfh_waYwdMR7eAhuM,15402
|
14
|
-
zipline_polygon_bundle/trades.py,sha256=
|
15
|
-
zipline_polygon_bundle-0.2.
|
16
|
-
zipline_polygon_bundle-0.2.
|
17
|
-
zipline_polygon_bundle-0.2.
|
18
|
-
zipline_polygon_bundle-0.2.
|
14
|
+
zipline_polygon_bundle/trades.py,sha256=OXRILPa7Hyx-cyEe0u1BVhoncpotOzG_dhh_TPHLCBI,21818
|
15
|
+
zipline_polygon_bundle-0.2.4.dist-info/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
|
16
|
+
zipline_polygon_bundle-0.2.4.dist-info/METADATA,sha256=bh8uyn9tcvitKwjf7lKo1uGeXclD3V62PiRfrr4NBh8,51921
|
17
|
+
zipline_polygon_bundle-0.2.4.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
18
|
+
zipline_polygon_bundle-0.2.4.dist-info/RECORD,,
|
File without changes
|