zipline_polygon_bundle 0.1.7__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,533 @@
1
+ from .config import PolygonConfig, PARTITION_COLUMN_NAME, to_partition_key
2
+
3
+ from typing import Iterator, Tuple
4
+
5
+ import pyarrow as pa
6
+ import pyarrow.compute as pa_compute
7
+ import pyarrow.csv as pa_csv
8
+ import pyarrow.dataset as pa_ds
9
+ import pyarrow.fs as pa_fs
10
+
11
+ from fsspec.implementations.arrow import ArrowFSWrapper
12
+
13
+ import os
14
+ import datetime
15
+
16
+ import numpy as np
17
+ import pandas as pd
18
+
19
+
20
+ def trades_schema(raw: bool = False) -> pa.Schema:
21
+ # There is some problem reading the timestamps as timestamps so we have to read as integer then change the schema.
22
+ # Polygon Aggregate flatfile timestamps are in nanoseconds (like trades), not milliseconds as the docs say.
23
+ # I make the timestamp timezone-aware because that's how Unix timestamps work and it may help avoid mistakes.
24
+ # The timezone is America/New_York because that's the US exchanges timezone and the date is a trading day.
25
+ # timestamp_type = pa.timestamp("ns", tz="America/New_York")
26
+ # timestamp_type = pa.int64() if raw else pa.timestamp("ns", tz=tz)
27
+ timestamp_type = pa.int64() if raw else pa.timestamp("ns", tz="UTC")
28
+
29
+ # Polygon price scale is 4 decimal places (i.e. hundredths of a penny), but we'll use 10 because we have precision to spare.
30
+ # price_type = pa.decimal128(precision=38, scale=10)
31
+ # 64bit float a little overkill but avoids any plausible truncation error.
32
+ price_type = pa.float64()
33
+
34
+ return pa.schema(
35
+ [
36
+ pa.field("ticker", pa.string(), nullable=False),
37
+ pa.field("conditions", pa.string(), nullable=False),
38
+ pa.field("correction", pa.string(), nullable=False),
39
+ pa.field("exchange", pa.int8(), nullable=False),
40
+ pa.field("id", pa.string(), nullable=False),
41
+ pa.field("participant_timestamp", timestamp_type, nullable=False),
42
+ pa.field("price", price_type, nullable=False),
43
+ pa.field("sequence_number", pa.int64(), nullable=False),
44
+ pa.field("sip_timestamp", timestamp_type, nullable=False),
45
+ pa.field("size", pa.int64(), nullable=False),
46
+ pa.field("tape", pa.int8(), nullable=False),
47
+ pa.field("trf_id", pa.int64(), nullable=False),
48
+ pa.field("trf_timestamp", timestamp_type, nullable=False),
49
+ ]
50
+ )
51
+
52
+
53
+ def trades_dataset(config: PolygonConfig) -> pa_ds.Dataset:
54
+ """
55
+ Create a pyarrow dataset from the trades files.
56
+ """
57
+
58
+ # https://arrow.apache.org/docs/python/filesystems.html#using-arrow-filesystems-with-fsspec
59
+ # https://filesystem-spec.readthedocs.io/en/latest/_modules/fsspec/spec.html#AbstractFileSystem.glob.
60
+ fsspec = ArrowFSWrapper(config.filesystem)
61
+
62
+ # We sort by path because they have the year and month in the dir names and the date in the filename.
63
+ paths = sorted(
64
+ fsspec.glob(os.path.join(config.trades_dir, config.csv_paths_pattern))
65
+ )
66
+
67
+ return pa_ds.FileSystemDataset.from_paths(
68
+ paths,
69
+ format=pa_ds.CsvFileFormat(),
70
+ schema=trades_schema(raw=True),
71
+ filesystem=config.filesystem,
72
+ )
73
+
74
+
75
+ def cast_strings_to_list(
76
+ string_array, separator=",", default="0", value_type=pa.uint8()
77
+ ):
78
+ """Cast a PyArrow StringArray of comma-separated numbers to a ListArray of values."""
79
+
80
+ # Create a mask to identify empty strings
81
+ is_empty = pa_compute.equal(pa_compute.utf8_trim_whitespace(string_array), "")
82
+
83
+ # Use replace_with_mask to replace empty strings with the default ("0")
84
+ filled_column = pa_compute.replace_with_mask(
85
+ string_array, is_empty, pa.scalar(default)
86
+ )
87
+
88
+ # Split the strings by comma
89
+ split_array = pa_compute.split_pattern(filled_column, pattern=separator)
90
+
91
+ # Cast each element in the resulting lists to integers
92
+ int_list_array = pa_compute.cast(split_array, pa.list_(value_type))
93
+
94
+ return int_list_array
95
+
96
+
97
+ def cast_trades(trades) -> pa.Table:
98
+ trades = trades.cast(trades_schema())
99
+ condition_values = cast_strings_to_list(
100
+ trades.column("conditions").combine_chunks()
101
+ )
102
+ return trades.append_column("condition_values", condition_values)
103
+
104
+
105
+ def custom_aggs_schema(raw: bool = False) -> pa.Schema:
106
+ # timestamp_type = pa.int64() if raw else pa.timestamp("ns", tz=tz)
107
+ timestamp_type = pa.int64() if raw else pa.timestamp("ns", tz="UTC")
108
+ price_type = pa.float64()
109
+ return pa.schema(
110
+ [
111
+ pa.field("ticker", pa.string(), nullable=False),
112
+ pa.field("volume", pa.int64(), nullable=False),
113
+ pa.field("open", price_type, nullable=False),
114
+ pa.field("close", price_type, nullable=False),
115
+ pa.field("high", price_type, nullable=False),
116
+ pa.field("low", price_type, nullable=False),
117
+ pa.field("window_start", timestamp_type, nullable=False),
118
+ pa.field("transactions", pa.int64(), nullable=False),
119
+ pa.field("date", pa.date32(), nullable=False),
120
+ pa.field("year", pa.uint16(), nullable=False),
121
+ pa.field("month", pa.uint8(), nullable=False),
122
+ pa.field(PARTITION_COLUMN_NAME, pa.string(), nullable=False),
123
+ ]
124
+ )
125
+
126
+
127
+ def custom_aggs_partitioning() -> pa.Schema:
128
+ return pa_ds.partitioning(
129
+ pa.schema(
130
+ [("year", pa.uint16()), ("month", pa.uint8()), ("date", pa.date32())]
131
+ ),
132
+ flavor="hive",
133
+ )
134
+
135
+
136
+ def get_aggs_dates(config: PolygonConfig) -> set[datetime.date]:
137
+ file_info = config.filesystem.get_file_info(config.aggs_dir)
138
+ if file_info.type == pa_fs.FileType.NotFound:
139
+ return set()
140
+ aggs_ds = pa_ds.dataset(
141
+ config.aggs_dir,
142
+ format="parquet",
143
+ schema=custom_aggs_schema(),
144
+ partitioning=custom_aggs_partitioning(),
145
+ )
146
+ return set(
147
+ [
148
+ pa_ds.get_partition_keys(fragment.partition_expression).get("date")
149
+ for fragment in aggs_ds.get_fragments()
150
+ ]
151
+ )
152
+
153
+
154
+ def generate_csv_trades_tables(
155
+ config: PolygonConfig, overwrite: bool = False
156
+ ) -> Iterator[Tuple[datetime.date, pa.Table]]:
157
+ """Generator for trades tables from flatfile CSVs."""
158
+ existing_aggs_dates = set()
159
+ if not overwrite:
160
+ existing_aggs_dates = get_aggs_dates(config)
161
+ schedule = config.calendar.trading_index(
162
+ start=config.start_timestamp, end=config.end_timestamp, period="1D"
163
+ )
164
+ for timestamp in schedule:
165
+ date: datetime.date = timestamp.to_pydatetime().date()
166
+ if date in existing_aggs_dates:
167
+ continue
168
+ trades_csv_path = config.date_to_csv_file_path(date)
169
+ convert_options = pa_csv.ConvertOptions(column_types=trades_schema(raw=True))
170
+ trades = pa_csv.read_csv(trades_csv_path, convert_options=convert_options)
171
+ trades = trades.cast(trades_schema())
172
+ # min_timestamp = pa.compute.min(trades.column('sip_timestamp')).as_py()
173
+ # max_timestamp = pa.compute.max(trades.column('sip_timestamp')).as_py()
174
+ # start_session = session['pre']
175
+ # end_session = session['post']
176
+ # # print(f"{start_session=} {end_session=}")
177
+ # # print(f"{min_timestamp=} {max_timestamp=}")
178
+ # if min_timestamp < start_session:
179
+ # print(f"ERROR: {min_timestamp=} < {start_session=}")
180
+ # # The end_session is supposed to be a limit but there are many with trades at that second.
181
+ # if max_timestamp >= (end_session + pd.Timedelta(seconds=1)):
182
+ # # print(f"ERROR: {max_timestamp=} >= {end_session=}")
183
+ # print(f"ERROR: {max_timestamp=} > {end_session+pd.Timedelta(seconds=1)=}")
184
+ yield date, trades
185
+ del trades
186
+
187
+
188
+ def trades_to_custom_aggs(
189
+ config: PolygonConfig,
190
+ date: datetime.date,
191
+ table: pa.Table,
192
+ include_trf: bool = False,
193
+ ) -> pa.Table:
194
+ print(f"{date=} {pa.default_memory_pool()=}")
195
+ # print(f"{datetime.datetime.now()=} {date=} {pa.default_memory_pool()=}")
196
+ # print(f"{resource.getrusage(resource.RUSAGE_SELF).ru_maxrss=}")
197
+ table = table.filter(pa_compute.greater(table["size"], 0))
198
+ table = table.filter(pa_compute.equal(table["correction"], "0"))
199
+ if not include_trf:
200
+ table = table.filter(pa_compute.not_equal(table["exchange"], 4))
201
+ table = table.append_column(
202
+ "price_total", pa_compute.multiply(table["price"], table["size"])
203
+ )
204
+ table = table.append_column(
205
+ "window_start",
206
+ pa_compute.floor_temporal(
207
+ table["sip_timestamp"], multiple=config.agg_timedelta.seconds, unit="second"
208
+ ),
209
+ )
210
+ table = table.group_by(["ticker", "window_start"], use_threads=False).aggregate(
211
+ [
212
+ ("price", "first"),
213
+ ("price", "max"),
214
+ ("price", "min"),
215
+ ("price", "last"),
216
+ ("price_total", "sum"),
217
+ ("size", "sum"),
218
+ ([], "count_all"),
219
+ ]
220
+ )
221
+ table = table.rename_columns(
222
+ {
223
+ "price_first": "open",
224
+ "price_max": "high",
225
+ "price_min": "low",
226
+ "price_last": "close",
227
+ "size_sum": "volume",
228
+ "price_total_sum": "total",
229
+ "count_all": "transactions",
230
+ }
231
+ )
232
+ table = table.append_column(
233
+ "vwap", pa_compute.divide(table["total"], table["volume"])
234
+ )
235
+ # table.append_column('date', pa.array([date] * len(table), type=pa.date32()))
236
+ # table.append_column('year', pa.array([date.year] * len(table), type=pa.uint16()))
237
+ # table.append_column('month', pa.array([date.month] * len(table), type=pa.uint8()))
238
+ table = table.append_column("date", pa.array(np.full(len(table), date)))
239
+ table = table.append_column(
240
+ "year", pa.array(np.full(len(table), date.year), type=pa.uint16())
241
+ )
242
+ table = table.append_column(
243
+ "month", pa.array(np.full(len(table), date.month), type=pa.uint8())
244
+ )
245
+ table = table.append_column(
246
+ PARTITION_COLUMN_NAME,
247
+ pa.array(
248
+ [to_partition_key(ticker) for ticker in table.column("ticker").to_pylist()]
249
+ ),
250
+ )
251
+ table = table.sort_by([("window_start", "ascending"), ("ticker", "ascending")])
252
+ # print(f"aggs {date=} {table.to_pandas().head()=}")
253
+ return table
254
+
255
+
256
+ # def generate_custom_agg_batches_from_tables(config: PolygonConfig):
257
+ # for date, trades_table in generate_csv_trades_tables(config):
258
+ # aggs_table = trades_to_custom_aggs(config, date, trades_table)
259
+ # yield aggs_table
260
+ # del aggs_table
261
+ # del trades_table
262
+
263
+
264
+ def file_visitor(written_file):
265
+ print(f"{written_file.path=}")
266
+
267
+
268
+ def convert_trades_to_custom_aggs(
269
+ config: PolygonConfig, overwrite: bool = False
270
+ ) -> str:
271
+ if overwrite:
272
+ print("WARNING: overwrite not implemented/ignored.")
273
+
274
+ # MAX_FILES_OPEN = 8
275
+ # MIN_ROWS_PER_GROUP = 100_000
276
+
277
+ print(f"{config.aggs_dir=}")
278
+
279
+ # pa.set_memory_pool()
280
+
281
+ # pa_ds.write_dataset(
282
+ # generate_custom_agg_batches_from_tables(config),
283
+ # schema=custom_aggs_schema(),
284
+ # filesystem=config.filesystem,
285
+ # base_dir=config.aggs_dir,
286
+ # partitioning=custom_aggs_partitioning(),
287
+ # format="parquet",
288
+ # existing_data_behavior="overwrite_or_ignore",
289
+ # # max_open_files = MAX_FILES_OPEN,
290
+ # # min_rows_per_group = MIN_ROWS_PER_GROUP,
291
+ # )
292
+
293
+ for date, trades_table in generate_csv_trades_tables(config):
294
+ aggs_table = trades_to_custom_aggs(config, date, trades_table)
295
+ pa_ds.write_dataset(
296
+ aggs_table,
297
+ filesystem=config.filesystem,
298
+ base_dir=config.aggs_dir,
299
+ partitioning=custom_aggs_partitioning(),
300
+ format="parquet",
301
+ existing_data_behavior="overwrite_or_ignore",
302
+ file_visitor=file_visitor,
303
+ # max_open_files=10,
304
+ # min_rows_per_group=MIN_ROWS_PER_GROUP,
305
+ )
306
+ del aggs_table
307
+ del trades_table
308
+
309
+ # with ProcessPoolExecutor(max_workers=1) as executor:
310
+ # executor.map(
311
+ # configure_write_custom_aggs_to_dataset(config),
312
+ # generate_csv_trades_tables(config),
313
+ # )
314
+
315
+ print(f"Generated aggregates to {config.aggs_dir=}")
316
+ return config.aggs_dir
317
+
318
+
319
+ # https://github.com/twopirllc/pandas-ta/issues/731#issuecomment-1766786952
320
+
321
+ # def calculate_mfi(high, low, close, volume, period):
322
+ # typical_price = (high + low + close) / 3
323
+ # money_flow = typical_price * volume
324
+ # mf_sign = np.where(typical_price > np.roll(typical_price, shift=1), 1, -1)
325
+ # signed_mf = money_flow * mf_sign
326
+
327
+ # # Calculate gain and loss using vectorized operations
328
+ # positive_mf = np.maximum(signed_mf, 0)
329
+ # negative_mf = np.maximum(-signed_mf, 0)
330
+
331
+ # mf_avg_gain = np.convolve(positive_mf, np.ones(period), mode='full')[:len(positive_mf)] / period
332
+ # mf_avg_loss = np.convolve(negative_mf, np.ones(period), mode='full')[:len(negative_mf)] / period
333
+
334
+ # epsilon = 1e-10 # Small epsilon value to avoid division by zero
335
+ # mfi = 100 - 100 / (1 + mf_avg_gain / (mf_avg_loss + epsilon))
336
+ # return mfi
337
+
338
+
339
+ def get_by_ticker_aggs_dates(config: PolygonConfig) -> set[datetime.date]:
340
+ file_info = config.filesystem.get_file_info(config.by_ticker_aggs_arrow_dir)
341
+ if file_info.type == pa_fs.FileType.NotFound:
342
+ return set()
343
+ by_ticker_aggs_ds = pa_ds.dataset(
344
+ config.by_ticker_aggs_arrow_dir,
345
+ format="parquet",
346
+ schema=custom_aggs_schema(),
347
+ partitioning=custom_aggs_partitioning(),
348
+ )
349
+ return set(
350
+ [
351
+ pa_ds.get_partition_keys(fragment.partition_expression).get("date")
352
+ for fragment in by_ticker_aggs_ds.get_fragments()
353
+ ]
354
+ )
355
+
356
+
357
+ def batches_for_date(aggs_ds: pa_ds.Dataset, date: pd.Timestamp):
358
+ date_filter_expr = (
359
+ (pa_compute.field("year") == date.year)
360
+ & (pa_compute.field("month") == date.month)
361
+ & (pa_compute.field("date") == date.date())
362
+ )
363
+ print(f"table for {date=}")
364
+ # return aggs_ds.scanner(filter=date_filter_expr).to_batches()
365
+ table = aggs_ds.scanner(filter=date_filter_expr).to_table()
366
+ table = table.sort_by([("part", "ascending"), ("ticker", "ascending"), ("window_start", "ascending"), ])
367
+ return table.to_batches()
368
+
369
+ def generate_batches_for_schedule(config, aggs_ds):
370
+ schedule = config.calendar.trading_index(
371
+ start=config.start_timestamp, end=config.end_timestamp, period="1D"
372
+ )
373
+ for timestamp in schedule:
374
+ # print(f"{timestamp=}")
375
+ yield from batches_for_date(aggs_ds=aggs_ds, date=timestamp)
376
+
377
+
378
+ # def scatter_custom_aggs_to_by_ticker(
379
+ # config: PolygonConfig,
380
+ # overwrite: bool = False,
381
+ # ) -> str:
382
+ # lock = FileLock(config.lock_file_path, blocking=False)
383
+ # with lock:
384
+ # if not lock.is_locked:
385
+ # raise IOError("Failed to acquire lock for updating custom assets.")
386
+ # with open(config.by_ticker_dates_path, "a") as f:
387
+ # f.write("I have a bad feeling about this.")
388
+ # by_ticker_aggs_arrow_dir = scatter_custom_aggs_to_by_ticker_(config, overwrite)
389
+
390
+ # print(f"Scattered custom aggregates by ticker to {by_ticker_aggs_arrow_dir=}")
391
+ # return by_ticker_aggs_arrow_dir
392
+
393
+
394
+ def filter_by_date(config: PolygonConfig) -> pa_compute.Expression:
395
+ start_date = config.start_timestamp.tz_localize(config.calendar.tz.key).date()
396
+ limit_date = (
397
+ (config.end_timestamp + pd.Timedelta(days=1))
398
+ .tz_localize(config.calendar.tz.key)
399
+ .date()
400
+ )
401
+ return (pa_compute.field("date") >= start_date) & (
402
+ pa_compute.field("date") <= limit_date
403
+ )
404
+
405
+
406
+ # def generate_batches_with_partition(
407
+ # config: PolygonConfig,
408
+ # aggs_ds: pa_ds.Dataset,
409
+ # ) -> Iterator[pa.Table]:
410
+ # for fragment in aggs_ds.sort_by("date").get_fragments(
411
+ # filter=filter_by_date(config),
412
+ # ):
413
+ # for batch in fragment.to_batches():
414
+ # # batch = batch.append_column(
415
+ # # PARTITION_COLUMN_NAME,
416
+ # # pa.array(
417
+ # # [
418
+ # # to_partition_key(ticker)
419
+ # # for ticker in batch.column("ticker").to_pylist()
420
+ # # ]
421
+ # # ),
422
+ # # )
423
+ # yield batch.sort_by(
424
+ # [("ticker", "ascending"), ("window_start", "ascending")]
425
+ # )
426
+ # del batch
427
+ # del fragment
428
+
429
+
430
+ def generate_batches_with_partition(
431
+ config: PolygonConfig,
432
+ aggs_ds: pa_ds.Dataset,
433
+ ) -> Iterator[pa.Table]:
434
+ for fragment in (
435
+ aggs_ds.filter(filter_by_date(config))
436
+ .sort_by([(PARTITION_COLUMN_NAME, "ascending"), ("date", "ascending")])
437
+ .get_fragments()
438
+ ):
439
+ for batch in fragment.to_batches():
440
+ yield batch.sort_by(
441
+ [("ticker", "ascending"), ("window_start", "ascending")]
442
+ )
443
+ del batch
444
+ del fragment
445
+
446
+
447
+ def scatter_custom_aggs_to_by_ticker(config, overwrite=False) -> str:
448
+ aggs_ds = pa_ds.dataset(
449
+ config.aggs_dir,
450
+ format="parquet",
451
+ schema=custom_aggs_schema(),
452
+ partitioning=custom_aggs_partitioning(),
453
+ )
454
+ by_ticker_schema = aggs_ds.schema
455
+ partitioning = pa_ds.partitioning(
456
+ pa.schema([(PARTITION_COLUMN_NAME, pa.string())]),
457
+ flavor="hive",
458
+ )
459
+ by_ticker_aggs_arrow_dir = config.by_ticker_aggs_arrow_dir
460
+ print(f"Scattering custom aggregates by ticker to {by_ticker_aggs_arrow_dir=}")
461
+ pa_ds.write_dataset(
462
+ # generate_batches_with_partition(config=config, aggs_ds=aggs_ds),
463
+ generate_batches_for_schedule(config=config, aggs_ds=aggs_ds),
464
+ schema=by_ticker_schema,
465
+ base_dir=by_ticker_aggs_arrow_dir,
466
+ partitioning=partitioning,
467
+ format="parquet",
468
+ existing_data_behavior="overwrite_or_ignore",
469
+ )
470
+ print(f"Scattered aggregates by ticker to {by_ticker_aggs_arrow_dir=}")
471
+ return by_ticker_aggs_arrow_dir
472
+
473
+
474
+ # def scatter_custom_aggs_to_by_ticker(config, overwrite=False) -> str:
475
+ # file_info = config.filesystem.get_file_info(config.aggs_dir)
476
+ # if file_info.type == pa_fs.FileType.NotFound:
477
+ # raise FileNotFoundError(f"{config.aggs_dir=} not found.")
478
+
479
+ # by_ticker_aggs_arrow_dir = config.by_ticker_aggs_arrow_dir
480
+ # if os.path.exists(by_ticker_aggs_arrow_dir):
481
+ # if overwrite:
482
+ # print(f"Removing {by_ticker_aggs_arrow_dir=}")
483
+ # shutil.rmtree(by_ticker_aggs_arrow_dir)
484
+
485
+ # schedule = config.calendar.trading_index(
486
+ # start=config.start_timestamp, end=config.end_timestamp, period="1D"
487
+ # )
488
+ # assert type(schedule) is pd.DatetimeIndex
489
+
490
+ # print(f"Scattering custom aggregates by ticker to {by_ticker_aggs_arrow_dir=}")
491
+ # aggs_ds = pa_ds.dataset(
492
+ # config.aggs_dir,
493
+ # format="parquet",
494
+ # schema=custom_aggs_schema(),
495
+ # partitioning=custom_aggs_partitioning(),
496
+ # )
497
+ # by_ticker_partitioning = pa_ds.partitioning(
498
+ # pa.schema([(PARTITION_COLUMN_NAME, pa.string())]),
499
+ # # pa.schema(
500
+ # # [
501
+ # # (PARTITION_COLUMN_NAME, pa.string()),
502
+ # # ("year", pa.uint16()),
503
+ # # ("month", pa.uint8()),
504
+ # # ("date", pa.date32()),
505
+ # # ]
506
+ # # ),
507
+ # flavor="hive",
508
+ # )
509
+ # by_ticker_schema = custom_aggs_schema()
510
+ # by_ticker_schema = by_ticker_schema.append(
511
+ # pa.field(PARTITION_COLUMN_NAME, pa.string(), nullable=False),
512
+ # )
513
+
514
+ # # TODO: Collect the dates we've scattered and write a special partition key with them.
515
+ # pa_ds.write_dataset(
516
+ # generate_batches_for_schedule(schedule, aggs_ds),
517
+ # schema=by_ticker_schema,
518
+ # base_dir=by_ticker_aggs_arrow_dir,
519
+ # partitioning=by_ticker_partitioning,
520
+ # format="parquet",
521
+ # existing_data_behavior="overwrite_or_ignore",
522
+ # # max_open_files=250,
523
+ # # file_visitor=file_visitor,
524
+ # )
525
+
526
+ # return by_ticker_aggs_arrow_dir
527
+
528
+
529
+ # def generate_tables_from_custom_aggs_ds(
530
+ # aggs_ds: pa_ds.Dataset, schedule: pd.DatetimeIndex
531
+ # ):
532
+ # for timestamp in schedule:
533
+ # yield table_for_date(aggs_ds=aggs_ds, date=timestamp.to_pydatetime().date())
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: zipline_polygon_bundle
3
- Version: 0.1.7
3
+ Version: 0.2.0
4
4
  Summary: A zipline-reloaded data provider bundle for Polygon.io
5
5
  License: GNU AFFERO GENERAL PUBLIC LICENSE
6
6
  Version 3, 19 November 2007
@@ -666,19 +666,21 @@ License: GNU AFFERO GENERAL PUBLIC LICENSE
666
666
  Keywords: zipline,data-bundle,finance
667
667
  Author: Jim White
668
668
  Author-email: jim@fovi.com
669
- Requires-Python: >=3.9,<4.0
669
+ Requires-Python: >=3.10,<4.0
670
670
  Classifier: Programming Language :: Python :: 3
671
671
  Classifier: License :: OSI Approved :: GNU Affero General Public License v3
672
672
  Classifier: Operating System :: OS Independent
673
673
  Requires-Dist: bcolz-zipline (>=1.2.11)
674
+ Requires-Dist: filelock (>=3.16.0)
675
+ Requires-Dist: fsspec (>=2024.10)
674
676
  Requires-Dist: numpy (<2)
675
677
  Requires-Dist: pandas (>=2.2,<3)
676
- Requires-Dist: polygon-api-client
677
- Requires-Dist: pyarrow
678
+ Requires-Dist: polygon-api-client (>=1.14.2)
679
+ Requires-Dist: pyarrow (>=18.1.0,<19)
678
680
  Requires-Dist: pytz (>=2018.5)
679
681
  Requires-Dist: requests (>=2.9.1)
680
682
  Requires-Dist: toolz (>=0.8.2)
681
- Requires-Dist: zipline-reloaded (>=3.1)
683
+ Requires-Dist: zipline-arrow (>=3.2)
682
684
  Project-URL: Repository, https://github.com/fovi-llc/zipline-polygon-bundle
683
685
  Description-Content-Type: text/markdown
684
686
 
@@ -742,6 +744,9 @@ register_polygon_equities_bundle(
742
744
  ## Install the Zipline Polygon.io Bundle PyPi package and check that it works.
743
745
  Listing bundles will show if everything is working correctly.
744
746
  ```bash
747
+ pip install -U git+https://github.com/fovi-llc/zipline-reloaded.git@calendar
748
+ pip install -U git+https://github.com/fovi-llc/zipline-polygon-bundle.git
749
+
745
750
  pip install zipline_polygon_bundle
746
751
  zipline -e extension.py bundles
747
752
  ```
@@ -0,0 +1,18 @@
1
+ zipline_polygon_bundle/__init__.py,sha256=KGN5kBi021Eiz_GDtxVRTUdXgYWe6loG_C8XcrVNHrY,1765
2
+ zipline_polygon_bundle/adjustments.py,sha256=4garYK7RUrYyCIhCm0ZqHsk3y2bCt9vHUkWoHvVniTA,8233
3
+ zipline_polygon_bundle/bundle.py,sha256=7f_rpVBhR1XyOJ1e7Lulq1Uh4DWJmHxFQKZNfz9OSgQ,19805
4
+ zipline_polygon_bundle/compute_signals.py,sha256=FxcMuwMmxuvyy45y1avdL_uFEn0B4_2ekcv_B4AyPo0,10115
5
+ zipline_polygon_bundle/concat_all_aggs.py,sha256=Nuj0pytQAVoK8OK7qx5m3jWCV8uJIPsa0XHnmicgSmg,12066
6
+ zipline_polygon_bundle/concat_all_aggs_partitioned.py,sha256=AQq4ai5u5GyclWzQq2C8zIvHl_zjvLiDtxarNejwCQ4,6325
7
+ zipline_polygon_bundle/config.py,sha256=_-BlT57ff4byeOJU54tkQ7OdtFmoaA9xHAQDMdGnkb4,10471
8
+ zipline_polygon_bundle/nyse_all_hours_calendar.py,sha256=QrwWHm3_sfwrtt1tN5u6rqjTQcwN3qxyhjNGeHdyqcI,698
9
+ zipline_polygon_bundle/polygon_file_reader.py,sha256=TCq6hKlxixwtL57xLxs9GnvH3MMa6aWBI9mi1-PBNHw,3749
10
+ zipline_polygon_bundle/process_all_aggs.py,sha256=MVhb8xn9-DngSNSrRIpMG4XAgHjMXktoqYrxuM9ph-c,3069
11
+ zipline_polygon_bundle/quotes.py,sha256=yFjlPiQXPp0t6w2Bo96VLtYSqITP7WCLwMp5CH3zx1E,4260
12
+ zipline_polygon_bundle/split_aggs_by_ticker.py,sha256=HI_3nuN6E_VCq7LfOj4Dib_qm8wYME-jdXXX4rt-9YI,2150
13
+ zipline_polygon_bundle/tickers_and_names.py,sha256=BjYquIlSBQGd1yDW3m3cGuXKVvUfh_waYwdMR7eAhuM,15402
14
+ zipline_polygon_bundle/trades.py,sha256=5EXD8FUKPUB4ROTXJsl29_U7wzBPWTGbOkKZMbPWZUU,20133
15
+ zipline_polygon_bundle-0.2.0.dist-info/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
16
+ zipline_polygon_bundle-0.2.0.dist-info/METADATA,sha256=0PIiUhmj7kTVZeo0iNIjlZmYHBCKIaGVCY7zSbmOvqY,46912
17
+ zipline_polygon_bundle-0.2.0.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
18
+ zipline_polygon_bundle-0.2.0.dist-info/RECORD,,
@@ -1,14 +0,0 @@
1
- zipline_polygon_bundle/__init__.py,sha256=kFkI4ZEej7yeuSig2r59AWLGzqSVh0dON4wdreCeizA,595
2
- zipline_polygon_bundle/adjustments.py,sha256=k8Ykc4zv49Z8m1veFnJNeoPcw1FMN2dAxqV6xWmUfLw,6814
3
- zipline_polygon_bundle/bundle.py,sha256=De1IHUjAxoZRaE6fVXY4qa6E7t43q_ELXAmcnJOtJEc,19260
4
- zipline_polygon_bundle/concat_all_aggs.py,sha256=vv0MDxbSJjgZzstUP1K084FRy3W7w6Tt7FymghwcfMU,9021
5
- zipline_polygon_bundle/concat_all_aggs_partitioned.py,sha256=b-yvwlQMyv2JO8KeeNUFD0EL0giNxWkS9ukDczgIJ20,6349
6
- zipline_polygon_bundle/config.py,sha256=s1z4SGCcZH671NT8wjZZMQeBL4ef5SKzn8c8FXbTvlI,4755
7
- zipline_polygon_bundle/polygon_file_reader.py,sha256=a-MTMc_FnecmB2Q1o_LE03IeqDYvbPYCvFtawxt0INw,3755
8
- zipline_polygon_bundle/process_all_aggs.py,sha256=QLgH2HpS27JvkMqG1dsG-D0FIUDdXw1IR_UaMIJfdeA,3075
9
- zipline_polygon_bundle/split_aggs_by_ticker.py,sha256=HI_3nuN6E_VCq7LfOj4Dib_qm8wYME-jdXXX4rt-9YI,2150
10
- zipline_polygon_bundle/tickers_and_names.py,sha256=VVtI2FD_Gr0YOpCXhUlU0Agg1_-Ul1XW374kVwjMJck,16506
11
- zipline_polygon_bundle-0.1.7.dist-info/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
12
- zipline_polygon_bundle-0.1.7.dist-info/METADATA,sha256=4XfjLKiVXX30yq2xGI-4kuA2b8fzB5J-8AXRzt65HkQ,46667
13
- zipline_polygon_bundle-0.1.7.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
14
- zipline_polygon_bundle-0.1.7.dist-info/RECORD,,