zipline_polygon_bundle 0.2.3__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zipline_polygon_bundle/__init__.py +2 -1
- zipline_polygon_bundle/bundle.py +10 -0
- zipline_polygon_bundle/trades.py +60 -29
- {zipline_polygon_bundle-0.2.3.dist-info → zipline_polygon_bundle-0.2.4.dist-info}/METADATA +2 -2
- {zipline_polygon_bundle-0.2.3.dist-info → zipline_polygon_bundle-0.2.4.dist-info}/RECORD +7 -7
- {zipline_polygon_bundle-0.2.3.dist-info → zipline_polygon_bundle-0.2.4.dist-info}/WHEEL +1 -1
- {zipline_polygon_bundle-0.2.3.dist-info → zipline_polygon_bundle-0.2.4.dist-info}/LICENSE +0 -0
@@ -8,7 +8,7 @@ from .config import PolygonConfig
|
|
8
8
|
from .nyse_all_hours_calendar import NYSE_ALL_HOURS, register_nyse_all_hours_calendar
|
9
9
|
from .concat_all_aggs import concat_all_aggs_from_csv, generate_csv_agg_tables
|
10
10
|
from .adjustments import load_splits, load_dividends, load_conditions
|
11
|
-
from .trades import trades_schema, trades_dataset, cast_trades
|
11
|
+
from .trades import trades_schema, trades_dataset, cast_trades, ordinary_trades_mask
|
12
12
|
from .trades import custom_aggs_partitioning, custom_aggs_schema, trades_to_custom_aggs, convert_trades_to_custom_aggs
|
13
13
|
from .trades import get_aggs_dates, generate_csv_trades_tables
|
14
14
|
# from .compute_signals import compute_signals_for_all_custom_aggs
|
@@ -32,6 +32,7 @@ __all__ = [
|
|
32
32
|
"trades_schema",
|
33
33
|
"trades_dataset",
|
34
34
|
"cast_trades",
|
35
|
+
"ordinary_trades_mask",
|
35
36
|
"get_aggs_dates",
|
36
37
|
"generate_csv_trades_tables",
|
37
38
|
"custom_aggs_partitioning",
|
zipline_polygon_bundle/bundle.py
CHANGED
@@ -203,6 +203,16 @@ def process_minute_table(
|
|
203
203
|
metadata.loc[sid, "auto_close_date"] = auto_close_date
|
204
204
|
else:
|
205
205
|
# Add a row to the metadata DataFrame. Don't forget to add an exchange field.
|
206
|
+
# metadata = pd.DataFrame(
|
207
|
+
# columns=(
|
208
|
+
# "start_date",
|
209
|
+
# "end_date",
|
210
|
+
# "auto_close_date",
|
211
|
+
# "symbol",
|
212
|
+
# "exchange",
|
213
|
+
# "asset_name",
|
214
|
+
# )
|
215
|
+
# )
|
206
216
|
metadata.loc[sid] = (
|
207
217
|
start_date,
|
208
218
|
end_date,
|
zipline_polygon_bundle/trades.py
CHANGED
@@ -17,6 +17,29 @@ import numpy as np
|
|
17
17
|
import pandas as pd
|
18
18
|
|
19
19
|
|
20
|
+
# Polygon Trade Conditions codes that don't reflect a current market-priced trade.
|
21
|
+
# https://polygon.io/docs/rest/stocks/market-operations/condition-codes
|
22
|
+
# Odd lots are excluded because although their volume counts the prices don't.
|
23
|
+
EXCLUDED_CONDITION_CODES = {
|
24
|
+
# 2, # Average Price
|
25
|
+
# 7, # Cash Sale
|
26
|
+
10, # Derivatively Priced
|
27
|
+
# 12, # Form T / Extended Hours
|
28
|
+
13, # Extended Hours (Sold Out Of Sequence)
|
29
|
+
# 15, # Official Close
|
30
|
+
# 16, # Official Open
|
31
|
+
20, # Next Day
|
32
|
+
21, # Price Variation
|
33
|
+
# 22, # Prior Reference
|
34
|
+
29, # Seller
|
35
|
+
32, # Sold (Out of Sequence)
|
36
|
+
# 33, # Sold + Stopped
|
37
|
+
41, # Trade Thru Exempt
|
38
|
+
52, # Contingent Trade
|
39
|
+
53 # Qualified Contingent Trade
|
40
|
+
}
|
41
|
+
|
42
|
+
|
20
43
|
def trades_schema(raw: bool = False) -> pa.Schema:
|
21
44
|
# There is some problem reading the timestamps as timestamps so we have to read as integer then change the schema.
|
22
45
|
# Polygon Aggregate flatfile timestamps are in nanoseconds (like trades), not milliseconds as the docs say.
|
@@ -89,9 +112,15 @@ def cast_strings_to_list(
|
|
89
112
|
split_array = pa_compute.split_pattern(filled_column, pattern=separator)
|
90
113
|
|
91
114
|
# Cast each element in the resulting lists to integers
|
92
|
-
|
115
|
+
return pa_compute.cast(split_array, pa.list_(value_type))
|
93
116
|
|
94
|
-
|
117
|
+
|
118
|
+
def ordinary_trades_mask(table: pa.Table) -> pa.BooleanArray:
|
119
|
+
conditions_dict = table["conditions"].combine_chunks().dictionary_encode()
|
120
|
+
list_of_codes = cast_strings_to_list(conditions_dict.dictionary).to_pylist()
|
121
|
+
code_dictionary = pa.array(set(codes).isdisjoint(EXCLUDED_CONDITION_CODES) for codes in list_of_codes)
|
122
|
+
include_mask = pa.DictionaryArray.from_arrays(conditions_dict.indices, code_dictionary).dictionary_decode()
|
123
|
+
return pa_compute.and_(include_mask, pa_compute.equal(table["correction"], "0"))
|
95
124
|
|
96
125
|
|
97
126
|
def cast_trades(trades) -> pa.Table:
|
@@ -117,6 +146,8 @@ def custom_aggs_schema(raw: bool = False) -> pa.Schema:
|
|
117
146
|
pa.field("window_start", timestamp_type, nullable=False),
|
118
147
|
pa.field("transactions", pa.int64(), nullable=False),
|
119
148
|
pa.field("vwap", price_type, nullable=False),
|
149
|
+
pa.field("traded_value", price_type, nullable=False),
|
150
|
+
pa.field("cumulative_traded_value", price_type, nullable=False),
|
120
151
|
pa.field("date", pa.date32(), nullable=False),
|
121
152
|
pa.field("year", pa.uint16(), nullable=False),
|
122
153
|
pa.field("month", pa.uint8(), nullable=False),
|
@@ -190,17 +221,10 @@ def trades_to_custom_aggs(
|
|
190
221
|
config: PolygonConfig,
|
191
222
|
date: datetime.date,
|
192
223
|
table: pa.Table,
|
193
|
-
include_trf: bool = False,
|
194
224
|
) -> pa.Table:
|
195
225
|
print(f"{date=} {pa.default_memory_pool()=}")
|
196
|
-
# print(f"{datetime.datetime.now()=} {date=} {pa.default_memory_pool()=}")
|
197
|
-
# print(f"{resource.getrusage(resource.RUSAGE_SELF).ru_maxrss=}")
|
198
|
-
table = table.filter(pa_compute.greater(table["size"], 0))
|
199
|
-
table = table.filter(pa_compute.equal(table["correction"], "0"))
|
200
|
-
if not include_trf:
|
201
|
-
table = table.filter(pa_compute.not_equal(table["exchange"], 4))
|
202
226
|
table = table.append_column(
|
203
|
-
"
|
227
|
+
"traded_value", pa_compute.multiply(table["price"], table["size"])
|
204
228
|
)
|
205
229
|
table = table.append_column(
|
206
230
|
"window_start",
|
@@ -208,13 +232,14 @@ def trades_to_custom_aggs(
|
|
208
232
|
table["sip_timestamp"], multiple=config.agg_timedelta.seconds, unit="second"
|
209
233
|
),
|
210
234
|
)
|
235
|
+
table = table.sort_by([("ticker", "ascending"), ("sip_timestamp", "ascending")])
|
211
236
|
table = table.group_by(["ticker", "window_start"], use_threads=False).aggregate(
|
212
237
|
[
|
213
238
|
("price", "first"),
|
214
239
|
("price", "max"),
|
215
240
|
("price", "min"),
|
216
241
|
("price", "last"),
|
217
|
-
("
|
242
|
+
("traded_value", "sum"),
|
218
243
|
("size", "sum"),
|
219
244
|
([], "count_all"),
|
220
245
|
]
|
@@ -226,13 +251,21 @@ def trades_to_custom_aggs(
|
|
226
251
|
"price_min": "low",
|
227
252
|
"price_last": "close",
|
228
253
|
"size_sum": "volume",
|
229
|
-
"
|
254
|
+
"traded_value_sum": "traded_value",
|
230
255
|
"count_all": "transactions",
|
231
256
|
}
|
232
257
|
)
|
258
|
+
table = table.sort_by([("ticker", "ascending"), ("window_start", "ascending")])
|
233
259
|
table = table.append_column(
|
234
|
-
"vwap", pa_compute.divide(table["
|
260
|
+
"vwap", pa_compute.divide(table["traded_value"], table["volume"])
|
235
261
|
)
|
262
|
+
# Calculate cumulative traded value by ticker
|
263
|
+
traded_values_by_ticker = table.group_by("ticker").aggregate([("traded_value", "list")])
|
264
|
+
cumulative_sum_arrays = [
|
265
|
+
pa_compute.cumulative_sum(pa.array(values_list)) for values_list in traded_values_by_ticker["traded_value_list"].combine_chunks()
|
266
|
+
]
|
267
|
+
table = table.append_column("cumulative_traded_value", pa.concat_arrays(cumulative_sum_arrays))
|
268
|
+
|
236
269
|
# table.append_column('date', pa.array([date] * len(table), type=pa.date32()))
|
237
270
|
# table.append_column('year', pa.array([date.year] * len(table), type=pa.uint16()))
|
238
271
|
# table.append_column('month', pa.array([date.month] * len(table), type=pa.uint8()))
|
@@ -249,7 +282,6 @@ def trades_to_custom_aggs(
|
|
249
282
|
[to_partition_key(ticker) for ticker in table.column("ticker").to_pylist()]
|
250
283
|
),
|
251
284
|
)
|
252
|
-
table = table.sort_by([("window_start", "ascending"), ("ticker", "ascending")])
|
253
285
|
# print(f"aggs {date=} {table.to_pandas().head()=}")
|
254
286
|
return table
|
255
287
|
|
@@ -279,22 +311,11 @@ def convert_trades_to_custom_aggs(
|
|
279
311
|
|
280
312
|
# pa.set_memory_pool()
|
281
313
|
|
282
|
-
# pa_ds.write_dataset(
|
283
|
-
# generate_custom_agg_batches_from_tables(config),
|
284
|
-
# schema=custom_aggs_schema(),
|
285
|
-
# filesystem=config.filesystem,
|
286
|
-
# base_dir=config.aggs_dir,
|
287
|
-
# partitioning=custom_aggs_partitioning(),
|
288
|
-
# format="parquet",
|
289
|
-
# existing_data_behavior="overwrite_or_ignore",
|
290
|
-
# # max_open_files = MAX_FILES_OPEN,
|
291
|
-
# # min_rows_per_group = MIN_ROWS_PER_GROUP,
|
292
|
-
# )
|
293
|
-
|
294
314
|
for date, trades_table in generate_csv_trades_tables(config):
|
295
|
-
aggs_table = trades_to_custom_aggs(config, date, trades_table)
|
296
315
|
pa_ds.write_dataset(
|
297
|
-
|
316
|
+
trades_to_custom_aggs(config,
|
317
|
+
date,
|
318
|
+
trades_table.filter(ordinary_trades_mask(trades_table))),
|
298
319
|
filesystem=config.filesystem,
|
299
320
|
base_dir=config.aggs_dir,
|
300
321
|
partitioning=custom_aggs_partitioning(),
|
@@ -304,7 +325,17 @@ def convert_trades_to_custom_aggs(
|
|
304
325
|
# max_open_files=10,
|
305
326
|
# min_rows_per_group=MIN_ROWS_PER_GROUP,
|
306
327
|
)
|
307
|
-
|
328
|
+
# pa_ds.write_dataset(
|
329
|
+
# trades_to_custom_events(config,
|
330
|
+
# date,
|
331
|
+
# trades_table.filter(pa_compute.invert(ordinary_trades_mask(trades_table)))),
|
332
|
+
# filesystem=config.filesystem,
|
333
|
+
# base_dir=config.events_dir,
|
334
|
+
# partitioning=custom_events_partitioning(),
|
335
|
+
# format="parquet",
|
336
|
+
# existing_data_behavior="overwrite_or_ignore",
|
337
|
+
# file_visitor=file_visitor,
|
338
|
+
# )
|
308
339
|
del trades_table
|
309
340
|
|
310
341
|
# with ProcessPoolExecutor(max_workers=1) as executor:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: zipline_polygon_bundle
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.4
|
4
4
|
Summary: A zipline-reloaded data provider bundle for Polygon.io
|
5
5
|
License: GNU AFFERO GENERAL PUBLIC LICENSE
|
6
6
|
Version 3, 19 November 2007
|
@@ -769,7 +769,7 @@ register_polygon_equities_bundle(
|
|
769
769
|
|
770
770
|
```bash
|
771
771
|
sudo apt-get update
|
772
|
-
sudo apt-get install python3-dev python3-poetry
|
772
|
+
sudo apt-get install python3-dev python3-poetry gcc-multilib
|
773
773
|
|
774
774
|
CFLAGS=$(python3-config --includes) pip install git+https://github.com/fovi-llc/zipline-arrow.git
|
775
775
|
```
|
@@ -1,6 +1,6 @@
|
|
1
|
-
zipline_polygon_bundle/__init__.py,sha256=
|
1
|
+
zipline_polygon_bundle/__init__.py,sha256=QlH8njI5zj1WM5EFDqR3vXy80rXF4qeG3hnPPkaMqE0,1815
|
2
2
|
zipline_polygon_bundle/adjustments.py,sha256=4garYK7RUrYyCIhCm0ZqHsk3y2bCt9vHUkWoHvVniTA,8233
|
3
|
-
zipline_polygon_bundle/bundle.py,sha256=
|
3
|
+
zipline_polygon_bundle/bundle.py,sha256=EPv3uTgLZFkv2Fi3o5d3Yun0NQ-iHXGVmajVTxUjln4,20161
|
4
4
|
zipline_polygon_bundle/compute_signals.py,sha256=FxcMuwMmxuvyy45y1avdL_uFEn0B4_2ekcv_B4AyPo0,10115
|
5
5
|
zipline_polygon_bundle/concat_all_aggs.py,sha256=Nuj0pytQAVoK8OK7qx5m3jWCV8uJIPsa0XHnmicgSmg,12066
|
6
6
|
zipline_polygon_bundle/concat_all_aggs_partitioned.py,sha256=AQq4ai5u5GyclWzQq2C8zIvHl_zjvLiDtxarNejwCQ4,6325
|
@@ -11,8 +11,8 @@ zipline_polygon_bundle/process_all_aggs.py,sha256=MVhb8xn9-DngSNSrRIpMG4XAgHjMXk
|
|
11
11
|
zipline_polygon_bundle/quotes.py,sha256=yFjlPiQXPp0t6w2Bo96VLtYSqITP7WCLwMp5CH3zx1E,4260
|
12
12
|
zipline_polygon_bundle/split_aggs_by_ticker.py,sha256=HI_3nuN6E_VCq7LfOj4Dib_qm8wYME-jdXXX4rt-9YI,2150
|
13
13
|
zipline_polygon_bundle/tickers_and_names.py,sha256=BjYquIlSBQGd1yDW3m3cGuXKVvUfh_waYwdMR7eAhuM,15402
|
14
|
-
zipline_polygon_bundle/trades.py,sha256=
|
15
|
-
zipline_polygon_bundle-0.2.
|
16
|
-
zipline_polygon_bundle-0.2.
|
17
|
-
zipline_polygon_bundle-0.2.
|
18
|
-
zipline_polygon_bundle-0.2.
|
14
|
+
zipline_polygon_bundle/trades.py,sha256=OXRILPa7Hyx-cyEe0u1BVhoncpotOzG_dhh_TPHLCBI,21818
|
15
|
+
zipline_polygon_bundle-0.2.4.dist-info/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
|
16
|
+
zipline_polygon_bundle-0.2.4.dist-info/METADATA,sha256=bh8uyn9tcvitKwjf7lKo1uGeXclD3V62PiRfrr4NBh8,51921
|
17
|
+
zipline_polygon_bundle-0.2.4.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
18
|
+
zipline_polygon_bundle-0.2.4.dist-info/RECORD,,
|
File without changes
|