zipline_polygon_bundle 0.2.3__py3-none-any.whl → 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,7 +8,7 @@ from .config import PolygonConfig
8
8
  from .nyse_all_hours_calendar import NYSE_ALL_HOURS, register_nyse_all_hours_calendar
9
9
  from .concat_all_aggs import concat_all_aggs_from_csv, generate_csv_agg_tables
10
10
  from .adjustments import load_splits, load_dividends, load_conditions
11
- from .trades import trades_schema, trades_dataset, cast_trades
11
+ from .trades import trades_schema, trades_dataset, cast_trades, ordinary_trades_mask
12
12
  from .trades import custom_aggs_partitioning, custom_aggs_schema, trades_to_custom_aggs, convert_trades_to_custom_aggs
13
13
  from .trades import get_aggs_dates, generate_csv_trades_tables
14
14
  # from .compute_signals import compute_signals_for_all_custom_aggs
@@ -32,6 +32,7 @@ __all__ = [
32
32
  "trades_schema",
33
33
  "trades_dataset",
34
34
  "cast_trades",
35
+ "ordinary_trades_mask",
35
36
  "get_aggs_dates",
36
37
  "generate_csv_trades_tables",
37
38
  "custom_aggs_partitioning",
@@ -203,6 +203,16 @@ def process_minute_table(
203
203
  metadata.loc[sid, "auto_close_date"] = auto_close_date
204
204
  else:
205
205
  # Add a row to the metadata DataFrame. Don't forget to add an exchange field.
206
+ # metadata = pd.DataFrame(
207
+ # columns=(
208
+ # "start_date",
209
+ # "end_date",
210
+ # "auto_close_date",
211
+ # "symbol",
212
+ # "exchange",
213
+ # "asset_name",
214
+ # )
215
+ # )
206
216
  metadata.loc[sid] = (
207
217
  start_date,
208
218
  end_date,
@@ -17,6 +17,29 @@ import numpy as np
17
17
  import pandas as pd
18
18
 
19
19
 
20
+ # Polygon Trade Conditions codes that don't reflect a current market-priced trade.
21
+ # https://polygon.io/docs/rest/stocks/market-operations/condition-codes
22
+ # Odd lots are excluded because although their volume counts the prices don't.
23
+ EXCLUDED_CONDITION_CODES = {
24
+ # 2, # Average Price
25
+ # 7, # Cash Sale
26
+ 10, # Derivatively Priced
27
+ # 12, # Form T / Extended Hours
28
+ 13, # Extended Hours (Sold Out Of Sequence)
29
+ # 15, # Official Close
30
+ # 16, # Official Open
31
+ 20, # Next Day
32
+ 21, # Price Variation
33
+ # 22, # Prior Reference
34
+ 29, # Seller
35
+ 32, # Sold (Out of Sequence)
36
+ # 33, # Sold + Stopped
37
+ 41, # Trade Thru Exempt
38
+ 52, # Contingent Trade
39
+ 53 # Qualified Contingent Trade
40
+ }
41
+
42
+
20
43
  def trades_schema(raw: bool = False) -> pa.Schema:
21
44
  # There is some problem reading the timestamps as timestamps so we have to read as integer then change the schema.
22
45
  # Polygon Aggregate flatfile timestamps are in nanoseconds (like trades), not milliseconds as the docs say.
@@ -89,9 +112,15 @@ def cast_strings_to_list(
89
112
  split_array = pa_compute.split_pattern(filled_column, pattern=separator)
90
113
 
91
114
  # Cast each element in the resulting lists to integers
92
- int_list_array = pa_compute.cast(split_array, pa.list_(value_type))
115
+ return pa_compute.cast(split_array, pa.list_(value_type))
93
116
 
94
- return int_list_array
117
+
118
+ def ordinary_trades_mask(table: pa.Table) -> pa.BooleanArray:
119
+ conditions_dict = table["conditions"].combine_chunks().dictionary_encode()
120
+ list_of_codes = cast_strings_to_list(conditions_dict.dictionary).to_pylist()
121
+ code_dictionary = pa.array(set(codes).isdisjoint(EXCLUDED_CONDITION_CODES) for codes in list_of_codes)
122
+ include_mask = pa.DictionaryArray.from_arrays(conditions_dict.indices, code_dictionary).dictionary_decode()
123
+ return pa_compute.and_(include_mask, pa_compute.equal(table["correction"], "0"))
95
124
 
96
125
 
97
126
  def cast_trades(trades) -> pa.Table:
@@ -117,6 +146,8 @@ def custom_aggs_schema(raw: bool = False) -> pa.Schema:
117
146
  pa.field("window_start", timestamp_type, nullable=False),
118
147
  pa.field("transactions", pa.int64(), nullable=False),
119
148
  pa.field("vwap", price_type, nullable=False),
149
+ pa.field("traded_value", price_type, nullable=False),
150
+ pa.field("cumulative_traded_value", price_type, nullable=False),
120
151
  pa.field("date", pa.date32(), nullable=False),
121
152
  pa.field("year", pa.uint16(), nullable=False),
122
153
  pa.field("month", pa.uint8(), nullable=False),
@@ -190,17 +221,10 @@ def trades_to_custom_aggs(
190
221
  config: PolygonConfig,
191
222
  date: datetime.date,
192
223
  table: pa.Table,
193
- include_trf: bool = False,
194
224
  ) -> pa.Table:
195
225
  print(f"{date=} {pa.default_memory_pool()=}")
196
- # print(f"{datetime.datetime.now()=} {date=} {pa.default_memory_pool()=}")
197
- # print(f"{resource.getrusage(resource.RUSAGE_SELF).ru_maxrss=}")
198
- table = table.filter(pa_compute.greater(table["size"], 0))
199
- table = table.filter(pa_compute.equal(table["correction"], "0"))
200
- if not include_trf:
201
- table = table.filter(pa_compute.not_equal(table["exchange"], 4))
202
226
  table = table.append_column(
203
- "price_total", pa_compute.multiply(table["price"], table["size"])
227
+ "traded_value", pa_compute.multiply(table["price"], table["size"])
204
228
  )
205
229
  table = table.append_column(
206
230
  "window_start",
@@ -208,13 +232,14 @@ def trades_to_custom_aggs(
208
232
  table["sip_timestamp"], multiple=config.agg_timedelta.seconds, unit="second"
209
233
  ),
210
234
  )
235
+ table = table.sort_by([("ticker", "ascending"), ("sip_timestamp", "ascending")])
211
236
  table = table.group_by(["ticker", "window_start"], use_threads=False).aggregate(
212
237
  [
213
238
  ("price", "first"),
214
239
  ("price", "max"),
215
240
  ("price", "min"),
216
241
  ("price", "last"),
217
- ("price_total", "sum"),
242
+ ("traded_value", "sum"),
218
243
  ("size", "sum"),
219
244
  ([], "count_all"),
220
245
  ]
@@ -226,13 +251,21 @@ def trades_to_custom_aggs(
226
251
  "price_min": "low",
227
252
  "price_last": "close",
228
253
  "size_sum": "volume",
229
- "price_total_sum": "total",
254
+ "traded_value_sum": "traded_value",
230
255
  "count_all": "transactions",
231
256
  }
232
257
  )
258
+ table = table.sort_by([("ticker", "ascending"), ("window_start", "ascending")])
233
259
  table = table.append_column(
234
- "vwap", pa_compute.divide(table["total"], table["volume"])
260
+ "vwap", pa_compute.divide(table["traded_value"], table["volume"])
235
261
  )
262
+ # Calculate cumulative traded value by ticker
263
+ traded_values_by_ticker = table.group_by("ticker").aggregate([("traded_value", "list")])
264
+ cumulative_sum_arrays = [
265
+ pa_compute.cumulative_sum(pa.array(values_list)) for values_list in traded_values_by_ticker["traded_value_list"].combine_chunks()
266
+ ]
267
+ table = table.append_column("cumulative_traded_value", pa.concat_arrays(cumulative_sum_arrays))
268
+
236
269
  # table.append_column('date', pa.array([date] * len(table), type=pa.date32()))
237
270
  # table.append_column('year', pa.array([date.year] * len(table), type=pa.uint16()))
238
271
  # table.append_column('month', pa.array([date.month] * len(table), type=pa.uint8()))
@@ -249,7 +282,6 @@ def trades_to_custom_aggs(
249
282
  [to_partition_key(ticker) for ticker in table.column("ticker").to_pylist()]
250
283
  ),
251
284
  )
252
- table = table.sort_by([("window_start", "ascending"), ("ticker", "ascending")])
253
285
  # print(f"aggs {date=} {table.to_pandas().head()=}")
254
286
  return table
255
287
 
@@ -279,22 +311,11 @@ def convert_trades_to_custom_aggs(
279
311
 
280
312
  # pa.set_memory_pool()
281
313
 
282
- # pa_ds.write_dataset(
283
- # generate_custom_agg_batches_from_tables(config),
284
- # schema=custom_aggs_schema(),
285
- # filesystem=config.filesystem,
286
- # base_dir=config.aggs_dir,
287
- # partitioning=custom_aggs_partitioning(),
288
- # format="parquet",
289
- # existing_data_behavior="overwrite_or_ignore",
290
- # # max_open_files = MAX_FILES_OPEN,
291
- # # min_rows_per_group = MIN_ROWS_PER_GROUP,
292
- # )
293
-
294
314
  for date, trades_table in generate_csv_trades_tables(config):
295
- aggs_table = trades_to_custom_aggs(config, date, trades_table)
296
315
  pa_ds.write_dataset(
297
- aggs_table,
316
+ trades_to_custom_aggs(config,
317
+ date,
318
+ trades_table.filter(ordinary_trades_mask(trades_table))),
298
319
  filesystem=config.filesystem,
299
320
  base_dir=config.aggs_dir,
300
321
  partitioning=custom_aggs_partitioning(),
@@ -304,7 +325,17 @@ def convert_trades_to_custom_aggs(
304
325
  # max_open_files=10,
305
326
  # min_rows_per_group=MIN_ROWS_PER_GROUP,
306
327
  )
307
- del aggs_table
328
+ # pa_ds.write_dataset(
329
+ # trades_to_custom_events(config,
330
+ # date,
331
+ # trades_table.filter(pa_compute.invert(ordinary_trades_mask(trades_table)))),
332
+ # filesystem=config.filesystem,
333
+ # base_dir=config.events_dir,
334
+ # partitioning=custom_events_partitioning(),
335
+ # format="parquet",
336
+ # existing_data_behavior="overwrite_or_ignore",
337
+ # file_visitor=file_visitor,
338
+ # )
308
339
  del trades_table
309
340
 
310
341
  # with ProcessPoolExecutor(max_workers=1) as executor:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: zipline_polygon_bundle
3
- Version: 0.2.3
3
+ Version: 0.2.4
4
4
  Summary: A zipline-reloaded data provider bundle for Polygon.io
5
5
  License: GNU AFFERO GENERAL PUBLIC LICENSE
6
6
  Version 3, 19 November 2007
@@ -769,7 +769,7 @@ register_polygon_equities_bundle(
769
769
 
770
770
  ```bash
771
771
  sudo apt-get update
772
- sudo apt-get install python3-dev python3-poetry
772
+ sudo apt-get install python3-dev python3-poetry gcc-multilib
773
773
 
774
774
  CFLAGS=$(python3-config --includes) pip install git+https://github.com/fovi-llc/zipline-arrow.git
775
775
  ```
@@ -1,6 +1,6 @@
1
- zipline_polygon_bundle/__init__.py,sha256=KGN5kBi021Eiz_GDtxVRTUdXgYWe6loG_C8XcrVNHrY,1765
1
+ zipline_polygon_bundle/__init__.py,sha256=QlH8njI5zj1WM5EFDqR3vXy80rXF4qeG3hnPPkaMqE0,1815
2
2
  zipline_polygon_bundle/adjustments.py,sha256=4garYK7RUrYyCIhCm0ZqHsk3y2bCt9vHUkWoHvVniTA,8233
3
- zipline_polygon_bundle/bundle.py,sha256=7f_rpVBhR1XyOJ1e7Lulq1Uh4DWJmHxFQKZNfz9OSgQ,19805
3
+ zipline_polygon_bundle/bundle.py,sha256=EPv3uTgLZFkv2Fi3o5d3Yun0NQ-iHXGVmajVTxUjln4,20161
4
4
  zipline_polygon_bundle/compute_signals.py,sha256=FxcMuwMmxuvyy45y1avdL_uFEn0B4_2ekcv_B4AyPo0,10115
5
5
  zipline_polygon_bundle/concat_all_aggs.py,sha256=Nuj0pytQAVoK8OK7qx5m3jWCV8uJIPsa0XHnmicgSmg,12066
6
6
  zipline_polygon_bundle/concat_all_aggs_partitioned.py,sha256=AQq4ai5u5GyclWzQq2C8zIvHl_zjvLiDtxarNejwCQ4,6325
@@ -11,8 +11,8 @@ zipline_polygon_bundle/process_all_aggs.py,sha256=MVhb8xn9-DngSNSrRIpMG4XAgHjMXk
11
11
  zipline_polygon_bundle/quotes.py,sha256=yFjlPiQXPp0t6w2Bo96VLtYSqITP7WCLwMp5CH3zx1E,4260
12
12
  zipline_polygon_bundle/split_aggs_by_ticker.py,sha256=HI_3nuN6E_VCq7LfOj4Dib_qm8wYME-jdXXX4rt-9YI,2150
13
13
  zipline_polygon_bundle/tickers_and_names.py,sha256=BjYquIlSBQGd1yDW3m3cGuXKVvUfh_waYwdMR7eAhuM,15402
14
- zipline_polygon_bundle/trades.py,sha256=XK2ed06ekByAVCimCDtJUIQ3HZaQbfKc0BXC9orHoJg,20192
15
- zipline_polygon_bundle-0.2.3.dist-info/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
16
- zipline_polygon_bundle-0.2.3.dist-info/METADATA,sha256=a7tw9uwGWQ-cmRk3xzc5r8WYZ03276_3NYajuuxcQR4,51908
17
- zipline_polygon_bundle-0.2.3.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
18
- zipline_polygon_bundle-0.2.3.dist-info/RECORD,,
14
+ zipline_polygon_bundle/trades.py,sha256=OXRILPa7Hyx-cyEe0u1BVhoncpotOzG_dhh_TPHLCBI,21818
15
+ zipline_polygon_bundle-0.2.4.dist-info/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
16
+ zipline_polygon_bundle-0.2.4.dist-info/METADATA,sha256=bh8uyn9tcvitKwjf7lKo1uGeXclD3V62PiRfrr4NBh8,51921
17
+ zipline_polygon_bundle-0.2.4.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
18
+ zipline_polygon_bundle-0.2.4.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 2.1.2
2
+ Generator: poetry-core 2.1.3
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any