zipline_polygon_bundle 0.1.7__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,17 +1,23 @@
1
1
  from zipline.data.bundles import register
2
2
  from zipline.data.resample import minute_frame_to_session_frame
3
3
 
4
- from .config import PolygonConfig
4
+ from exchange_calendars.calendar_helpers import parse_date
5
+
5
6
  from .concat_all_aggs import concat_all_aggs_from_csv, generate_csv_agg_tables
6
7
  from .adjustments import load_splits, load_dividends
8
+ from .config import PolygonConfig, AGG_TIME_DAY, AGG_TIME_MINUTE, AGG_TIME_TRADES
9
+ from .nyse_all_hours_calendar import register_nyse_all_hours_calendar
10
+ from .trades import convert_trades_to_custom_aggs, scatter_custom_aggs_to_by_ticker
7
11
 
8
12
  import pyarrow
9
13
  import pyarrow.compute
14
+ import pyarrow.dataset
10
15
 
11
16
  import pandas as pd
12
- import logging
13
17
 
14
- import concurrent.futures
18
+ import os
19
+ from filelock import FileLock
20
+ import logging
15
21
 
16
22
 
17
23
  # TODO: Change warnings to be relative to number of days in the range.
@@ -26,67 +32,40 @@ def symbol_to_upper(s: str) -> str:
26
32
  def generate_all_agg_tables_from_csv(
27
33
  config: PolygonConfig,
28
34
  ):
29
- paths, schema, tables = generate_csv_agg_tables(config)
35
+ schema, tables = generate_csv_agg_tables(config)
30
36
  for table in tables:
31
37
  table = table.sort_by([("ticker", "ascending"), ("window_start", "ascending")])
32
38
  yield table
33
39
 
34
40
 
35
- # def remove_duplicated_index(df: pd.DataFrame) -> pd.DataFrame:
36
- # duplicated_index = df.index.duplicated(keep=False)
37
- # if not duplicated_index.any():
38
- # return df
39
- # # Find duplicate index values (date) with zero volume or transactions
40
- # duplicated_index_with_zero_activity = duplicated_index & (
41
- # df["volume"] == 0) | (df["transactions"] == 0)
42
- # if duplicated_index_with_zero_activity.any():
43
- # print(
44
- # f" WARNING: Got dupes with zero activity {df[duplicated_index_with_zero_activity]=}"
45
- # )
46
- # df = df[~duplicated_index_with_zero_activity]
47
- # duplicated_index = df.index.duplicated(keep=False)
48
- # if not duplicated_index.any():
49
- # return df
50
- # print(f" WARNING: Dropping dupes {df[duplicated_index]=}")
51
- # df = df[df.index.duplicated(keep="first")]
52
- # return df
53
-
54
-
55
- def aggregate_multiple_aggs_per_date(df: pd.DataFrame) -> pd.DataFrame:
56
- duplicated_index = df.index.duplicated(keep=False)
57
- if not duplicated_index.any():
58
- return df
59
- duplicates = df[duplicated_index]
60
- duplicate_index_values = duplicates.index.values
61
- print()
62
- if duplicates["symbol"].nunique() != 1:
63
- logging.error(f"{duplicates['symbol'].unique()=} {duplicate_index_values=}")
64
- logging.warning(
65
- f"Aggregating dupes df[df.index.duplicated(keep=False)]=\n{duplicates}"
66
- )
67
- df = df.groupby(df.index).agg(
68
- {
69
- "symbol": "first",
70
- "volume": "sum",
71
- "open": "first",
72
- "close": "last",
73
- "high": "max",
74
- "low": "min",
75
- "transactions": "sum",
76
- }
41
+ def rename_polygon_to_zipline(table: pyarrow.Table, time_name: str) -> pyarrow.Table:
42
+ table = table.rename_columns(
43
+ [
44
+ (
45
+ "symbol"
46
+ if name == "ticker"
47
+ else time_name if name == "window_start" else name
48
+ )
49
+ for name in table.column_names
50
+ ]
77
51
  )
78
- print(f"WARNING: Aggregated dupes df=\n{df[df.index.isin(duplicate_index_values)]}")
79
- return df
52
+ return table
80
53
 
81
54
 
82
- def process_day_aggregates(
55
+ def process_day_table(
83
56
  table,
84
57
  sessions,
58
+ minutes,
85
59
  metadata,
86
60
  calendar,
87
61
  symbol_to_sid: dict[str, int],
88
- dates_with_data: set,
62
+ dates_with_data: set[pd.Timestamp],
63
+ agg_time: str,
89
64
  ):
65
+ table = rename_polygon_to_zipline(table, "day")
66
+ symbols = table.column("symbol").unique().to_pylist()
67
+ for sid, symbol in enumerate(symbols):
68
+ symbol_to_sid[symbol] = sid
90
69
  for symbol, sid in symbol_to_sid.items():
91
70
  df = table.filter(
92
71
  pyarrow.compute.field("symbol") == pyarrow.scalar(symbol)
@@ -94,24 +73,25 @@ def process_day_aggregates(
94
73
  # The SQL schema zipline uses for symbols ignores case
95
74
  sql_symbol = symbol_to_upper(symbol)
96
75
  df["symbol"] = sql_symbol
97
- df["day"] = pd.to_datetime(df["day"].dt.date)
76
+ df["day"] = pd.to_datetime(df["day"].dt.tz_convert(calendar.tz.key).dt.date)
98
77
  df = df.set_index("day")
99
78
  if not df.index.is_monotonic_increasing:
100
- print(f" INFO: {symbol=} {sid=} not monotonic increasing")
79
+ print(f" INFO: {symbol=} {sid=} not monotonic increasing: {df.index.min()=} {df.index.max()=}")
101
80
  df.sort_index(inplace=True)
102
81
  # Remove duplicates
103
82
  df = df[~df.index.duplicated(keep="first")]
104
83
  # Take days as per calendar
105
84
  df = df[df.index.isin(sessions)]
106
85
  # 2019-08-13 has a bunch of tickers with multiple day aggs per date
107
- df = aggregate_multiple_aggs_per_date(df)
86
+ # TODO: Actually they're for different days so if the filtering doesn't work then do something about it.
87
+ # df = aggregate_multiple_aggs_per_date(df)
108
88
  if len(df) < 1:
109
89
  continue
110
90
  # Check first and last date.
111
91
  start_date = df.index[0]
112
- dates_with_data.add(start_date.date())
92
+ dates_with_data.add(start_date)
113
93
  end_date = df.index[-1]
114
- dates_with_data.add(end_date.date())
94
+ dates_with_data.add(end_date)
115
95
  try:
116
96
  duplicated_index = df.index.duplicated(keep=False)
117
97
  df_with_duplicates = df[duplicated_index]
@@ -154,109 +134,22 @@ def process_day_aggregates(
154
134
  return
155
135
 
156
136
 
157
- def rename_polygon_to_zipline(table: pyarrow.Table, time_name: str) -> pyarrow.Table:
158
- table = table.rename_columns(
159
- [
160
- (
161
- "symbol"
162
- if name == "ticker"
163
- else time_name if name == "window_start" else name
164
- )
165
- for name in table.column_names
166
- ]
167
- )
168
- return table
169
-
170
-
171
- def polygon_equities_bundle_day(
172
- environ,
173
- asset_db_writer,
174
- minute_bar_writer,
175
- daily_bar_writer,
176
- adjustment_writer,
177
- calendar,
178
- start_session,
179
- end_session,
180
- cache,
181
- show_progress,
182
- output_dir,
183
- ):
184
- config = PolygonConfig(
185
- environ=environ,
186
- calendar_name=calendar.name,
187
- start_session=start_session,
188
- end_session=end_session,
189
- agg_time="day",
190
- )
191
-
192
- by_ticker_aggs_arrow_dir = concat_all_aggs_from_csv(config)
193
- aggregates = pyarrow.dataset.dataset(by_ticker_aggs_arrow_dir)
194
-
195
- # Zipline uses case-insensitive symbols, so we need to convert them to uppercase with a ^ prefix when lowercase.
196
- # This is because the SQL schema zipline uses for symbols ignores case.
197
- # We put the original symbol in the asset_name field.
198
- metadata = pd.DataFrame(
199
- columns=(
200
- "start_date",
201
- "end_date",
202
- "auto_close_date",
203
- "symbol",
204
- "exchange",
205
- "asset_name",
206
- )
207
- )
208
-
209
- table = aggregates.to_table()
210
- table = rename_polygon_to_zipline(table, "day")
211
- # Get all the symbols in the table by using value_counts to tabulate the unique values.
212
- # pyarrow.Table.column returns a pyarrow.ChunkedArray.
213
- # https://arrow.apache.org/docs/python/generated/pyarrow.ChunkedArray.html#pyarrow.ChunkedArray.value_counts
214
- symbols = sorted(table.column("symbol").value_counts().field(0).to_pylist())
215
- symbol_to_sid = {symbol: sid for sid, symbol in enumerate(symbols)}
216
- dates_with_data = set()
217
-
218
- # Get data for all stocks and write to Zipline
219
- daily_bar_writer.write(
220
- process_day_aggregates(
221
- table=table,
222
- sessions=calendar.sessions_in_range(start_session, end_session),
223
- metadata=metadata,
224
- calendar=calendar,
225
- symbol_to_sid=symbol_to_sid,
226
- dates_with_data=dates_with_data,
227
- ),
228
- show_progress=show_progress,
229
- )
230
-
231
- # Write the metadata
232
- asset_db_writer.write(equities=metadata)
233
-
234
- # Load splits and dividends
235
- first_start_end = min(dates_with_data)
236
- last_end_date = max(dates_with_data)
237
- splits = load_splits(config, first_start_end, last_end_date, symbol_to_sid)
238
- dividends = load_dividends(config, first_start_end, last_end_date, symbol_to_sid)
239
-
240
- # Write splits and dividends
241
- adjustment_writer.write(splits=splits, dividends=dividends)
242
-
243
-
244
- def process_minute_fragment(
245
- fragment,
137
+ def process_minute_table(
138
+ table,
246
139
  sessions,
247
140
  minutes,
248
141
  metadata,
249
142
  calendar,
250
143
  symbol_to_sid: dict[str, int],
251
- dates_with_data: set,
144
+ dates_with_data: set[pd.Timestamp],
252
145
  agg_time: str,
253
146
  ):
254
- table = fragment.to_table()
255
- print(f" {table.num_rows=}")
256
147
  table = rename_polygon_to_zipline(table, "timestamp")
257
- table = table.sort_by([("symbol", "ascending"), ("timestamp", "ascending")])
148
+ # print(f"{minutes[:5]=}\n{minutes[-5:]=}")
258
149
  table = table.filter(pyarrow.compute.field("timestamp").isin(minutes))
150
+ # print(f"filtered {table.num_rows=}")
259
151
  table_df = table.to_pandas()
152
+ # print(f"{table_df.head()=}")
260
153
  for symbol, df in table_df.groupby("symbol"):
261
154
  # print(f"\n{symbol=} {len(df)=} {df['timestamp'].min()} {df['timestamp'].max()}")
262
155
  if symbol not in symbol_to_sid:
@@ -266,29 +159,35 @@ def process_minute_fragment(
266
159
  sql_symbol = symbol_to_upper(symbol)
267
160
  df["symbol"] = sql_symbol
268
161
  df = df.set_index("timestamp")
269
- if agg_time == "day":
162
+ # Shouldn't need to do this because the table is sorted.
163
+ if not df.index.is_monotonic_increasing:
164
+ print(f" INFO: {symbol=} {sid=} not monotonic increasing")
165
+ df.sort_index(inplace=True)
166
+ if agg_time == AGG_TIME_DAY:
270
167
  df.drop(columns=["symbol", "transactions"], inplace=True)
271
- # Check first and last date.
272
- start_date = df.index[0].date()
273
- start_timestamp = df.index[0]
168
+ # Remember first and last date.
169
+ start_date = df.index[0].tz_convert(calendar.tz.key).normalize()
274
170
  dates_with_data.add(start_date)
275
- end_date = df.index[-1].date()
276
- end_timestamp = df.index[-1]
171
+ end_date = df.index[-1].tz_convert(calendar.tz.key).normalize()
277
172
  dates_with_data.add(end_date)
278
173
  df = df[df.index.isin(minutes)]
279
174
  len_before = len(df)
175
+ # print(f"{start_date=} {end_date=} {dates_with_data=}")
176
+ # print(f"day pre {df.head()=}")
280
177
  if len(df) < 1:
281
178
  # TODO: Move sid assignment until after this check for no data.
282
179
  print(
283
- f" WARNING: No data for {symbol=} {sid=} {len_before=} {start_timestamp=} {end_timestamp=}"
180
+ f" WARNING: No data for {symbol=} {sid=} {len_before=} {start_date=} {end_date=}"
284
181
  )
285
182
  continue
286
183
  df = minute_frame_to_session_frame(df, calendar)
287
- df["symbol"] = sql_symbol
184
+ # print(f"day sess {df.head()=}")
185
+ # df["symbol"] = sql_symbol
288
186
  df = df[df.index.isin(sessions)]
289
187
 
290
188
  # The auto_close date is the day after the last trade.
291
- auto_close_date = end_date + pd.Timedelta(days=1)
189
+ # auto_close_date = end_date + pd.Timedelta(days=1)
190
+ auto_close_date = None
292
191
 
293
192
  # If metadata already has this sid, just extend the end_date and ac_date.
294
193
  if sid in metadata.index:
@@ -308,12 +207,12 @@ def process_minute_fragment(
308
207
  start_date,
309
208
  end_date,
310
209
  auto_close_date,
311
- symbol_to_upper(symbol),
210
+ sql_symbol,
312
211
  calendar.name,
313
212
  symbol,
314
213
  )
315
- df = df.reindex(sessions.tz_localize(None))
316
- # df = df.reindex(sessions)
214
+ # df = df.reindex(sessions.tz_localize(None))
215
+ df = df.reindex(sessions)
317
216
  # Missing volume and transactions are zero
318
217
  df["volume"] = df["volume"].fillna(0)
319
218
  # df["transactions"] = df["transactions"].fillna(0)
@@ -321,13 +220,14 @@ def process_minute_fragment(
321
220
  # TODO: These fills should have the same price for OHLC (open for backfill, close for forward fill)
322
221
  df.ffill(inplace=True)
323
222
  # Back fill missing data (maybe necessary for before the first day bar)
223
+ # TODO: Don't want to backfill future values. What's better here?
324
224
  df.bfill(inplace=True)
325
225
  if len(df) > 0:
326
226
  # print(f"\n{symbol=} {sid=} {len_before=} {start_timestamp=} {end_date=} {end_timestamp=} {len(df)=}")
327
227
  yield sid, df
328
228
  else:
329
229
  print(
330
- f" WARNING: No day bars for {symbol=} {sid=} {len_before=} {start_date=} {start_timestamp=} {end_date=} {end_timestamp=}"
230
+ f" WARNING: No day bars for {symbol=} {sid=} {len_before=} {start_date=} {start_date=} {end_date=} {end_date=}"
331
231
  )
332
232
  else:
333
233
  len_before = len(df)
@@ -349,20 +249,35 @@ def process_minute_fragment(
349
249
  return
350
250
 
351
251
 
352
- def process_minute_aggregates(
252
+ def process_aggregates(
253
+ process_table_func,
353
254
  fragments,
354
255
  sessions,
355
256
  minutes,
356
257
  metadata,
357
258
  calendar,
358
259
  symbol_to_sid: dict[str, int],
359
- dates_with_data: set,
260
+ dates_with_data: set[pd.Timestamp],
360
261
  agg_time: str,
361
262
  ):
362
- # We want to do this by Hive partition at a time because each ticker will be complete.
263
+ # We do this by Hive partition at a time because each ticker will be complete.
363
264
  for fragment in fragments:
364
- yield from process_minute_fragment(
365
- fragment=fragment,
265
+ # Only get the columns Zipline allows.
266
+ table = fragment.to_table(
267
+ columns=[
268
+ "ticker",
269
+ "window_start",
270
+ "open",
271
+ "high",
272
+ "low",
273
+ "close",
274
+ "volume",
275
+ "transactions",
276
+ ]
277
+ )
278
+ table = table.sort_by([("ticker", "ascending"), ("window_start", "ascending")])
279
+ yield from process_table_func(
280
+ table=table,
366
281
  sessions=sessions,
367
282
  minutes=minutes,
368
283
  metadata=metadata,
@@ -371,6 +286,7 @@ def process_minute_aggregates(
371
286
  dates_with_data=dates_with_data,
372
287
  agg_time=agg_time,
373
288
  )
289
+ del table
374
290
 
375
291
  # This doesn't seem to be hardly any faster than the above, something with the GIL?
376
292
  # Also to use this we'd need to make sure the symbol_to_sid and dates_with_data are thread safe.
@@ -393,15 +309,16 @@ def process_minute_aggregates(
393
309
  # yield from future.result()
394
310
 
395
311
 
396
- def polygon_equities_bundle_minute(
312
+ def ingest_polygon_equities_bundle(
313
+ agg_time: str,
397
314
  environ,
398
315
  asset_db_writer,
399
316
  minute_bar_writer,
400
317
  daily_bar_writer,
401
318
  adjustment_writer,
402
319
  calendar,
403
- start_session,
404
- end_session,
320
+ start_date,
321
+ end_date,
405
322
  cache,
406
323
  show_progress,
407
324
  output_dir,
@@ -409,12 +326,22 @@ def polygon_equities_bundle_minute(
409
326
  config = PolygonConfig(
410
327
  environ=environ,
411
328
  calendar_name=calendar.name,
412
- start_session=start_session,
413
- end_session=end_session,
414
- agg_time="minute",
329
+ start_date=start_date,
330
+ end_date=end_date,
331
+ agg_time=agg_time,
415
332
  )
416
333
 
417
- by_ticker_aggs_arrow_dir = concat_all_aggs_from_csv(config)
334
+ print(f"{calendar.name=} {start_date=} {end_date=}")
335
+ print(f"{calendar.sessions_in_range(start_date, end_date)[:4]}")
336
+ print(f"{calendar.sessions_in_range(start_date, end_date)[-4:]}")
337
+ print(f"{calendar.sessions_minutes(start_date, end_date)[:4]}")
338
+ print(f"{calendar.sessions_minutes(start_date, end_date)[-4:]}")
339
+
340
+ if agg_time in [AGG_TIME_TRADES, "1min", "1minute"]:
341
+ convert_trades_to_custom_aggs(config, overwrite=False)
342
+ by_ticker_aggs_arrow_dir = scatter_custom_aggs_to_by_ticker(config)
343
+ else:
344
+ by_ticker_aggs_arrow_dir = concat_all_aggs_from_csv(config)
418
345
  aggregates = pyarrow.dataset.dataset(by_ticker_aggs_arrow_dir)
419
346
  # print(f"{aggregates.schema=}")
420
347
  # 3.5 billion rows for 10 years of minute data.
@@ -438,37 +365,40 @@ def polygon_equities_bundle_minute(
438
365
  )
439
366
 
440
367
  symbol_to_sid = {}
368
+ # Keep track of earliest and latest dates with data across all symbols.
441
369
  dates_with_data = set()
442
370
 
443
371
  # Get data for all stocks and write to Zipline
444
372
  daily_bar_writer.write(
445
- process_minute_aggregates(
373
+ process_aggregates(
374
+ process_day_table if config.agg_time == AGG_TIME_DAY else process_minute_table,
446
375
  fragments=aggregates.get_fragments(),
447
- sessions=calendar.sessions_in_range(start_session, end_session),
448
- minutes=calendar.sessions_minutes(start_session, end_session),
376
+ sessions=calendar.sessions_in_range(start_date, end_date),
377
+ minutes=calendar.sessions_minutes(start_date, end_date),
449
378
  metadata=metadata,
450
379
  calendar=calendar,
451
380
  symbol_to_sid=symbol_to_sid,
452
381
  dates_with_data=dates_with_data,
453
- agg_time="day",
382
+ agg_time=AGG_TIME_DAY,
454
383
  ),
455
384
  show_progress=show_progress,
456
385
  )
457
386
 
458
- # Get data for all stocks and write to Zipline
459
- minute_bar_writer.write(
460
- process_minute_aggregates(
461
- fragments=aggregates.get_fragments(),
462
- sessions=calendar.sessions_in_range(start_session, end_session),
463
- minutes=calendar.sessions_minutes(start_session, end_session),
464
- metadata=metadata,
465
- calendar=calendar,
466
- symbol_to_sid=symbol_to_sid,
467
- dates_with_data=dates_with_data,
468
- agg_time="minute",
469
- ),
470
- show_progress=show_progress,
471
- )
387
+ if config.agg_time != AGG_TIME_DAY:
388
+ minute_bar_writer.write(
389
+ process_aggregates(
390
+ process_minute_table,
391
+ fragments=aggregates.get_fragments(),
392
+ sessions=calendar.sessions_in_range(start_date, end_date),
393
+ minutes=calendar.sessions_minutes(start_date, end_date),
394
+ metadata=metadata,
395
+ calendar=calendar,
396
+ symbol_to_sid=symbol_to_sid,
397
+ dates_with_data=dates_with_data,
398
+ agg_time=AGG_TIME_MINUTE,
399
+ ),
400
+ show_progress=show_progress,
401
+ )
472
402
 
473
403
  # Write the metadata
474
404
  asset_db_writer.write(equities=metadata)
@@ -483,29 +413,93 @@ def polygon_equities_bundle_minute(
483
413
  adjustment_writer.write(splits=splits, dividends=dividends)
484
414
 
485
415
 
416
+ def ingest_polygon_equities_bundle_for_agg_time(agg_time: str):
417
+ def ingest_polygon_equities_bundle_inner(
418
+ environ,
419
+ asset_db_writer,
420
+ minute_bar_writer,
421
+ daily_bar_writer,
422
+ adjustment_writer,
423
+ calendar,
424
+ start_date,
425
+ end_date,
426
+ cache,
427
+ show_progress,
428
+ output_dir,
429
+ ):
430
+ return ingest_polygon_equities_bundle(
431
+ agg_time=agg_time,
432
+ environ=environ,
433
+ asset_db_writer=asset_db_writer,
434
+ minute_bar_writer=minute_bar_writer,
435
+ daily_bar_writer=daily_bar_writer,
436
+ adjustment_writer=adjustment_writer,
437
+ calendar=calendar,
438
+ start_date=start_date,
439
+ end_date=end_date,
440
+ cache=cache,
441
+ show_progress=show_progress,
442
+ output_dir=output_dir,
443
+ )
444
+
445
+ return ingest_polygon_equities_bundle_inner
446
+
447
+
486
448
  def register_polygon_equities_bundle(
487
449
  bundlename,
488
- start_session=None,
489
- end_session=None,
450
+ start_date=None,
451
+ end_date=None,
490
452
  calendar_name="XNYS",
491
- agg_time="day",
453
+ agg_time=AGG_TIME_DAY,
454
+ minutes_per_day=390,
455
+ environ=os.environ,
492
456
  # ticker_list=None,
493
457
  # watchlists=None,
494
458
  # include_asset_types=None,
495
459
  ):
496
- if agg_time not in ["day", "minute"]:
497
- raise ValueError(f"agg_time must be 'day' or 'minute', not '{agg_time}'")
460
+ register_nyse_all_hours_calendar()
461
+
462
+ # pd.set_option("display.max_columns", None)
463
+ # pd.set_option("display.width", 500)
464
+
465
+ # Note that "minute" is the Polygon minute aggs and "1minute" is the trades.
466
+ if agg_time not in [AGG_TIME_DAY, AGG_TIME_MINUTE, AGG_TIME_TRADES, "1min", "1minute"]:
467
+ raise ValueError(
468
+ f"agg_time must be 'day', 'minute' (aggs), or '1minute' (trades), not '{agg_time}'"
469
+ )
470
+
471
+ # We need to know the start and end dates of the session before the bundle is
472
+ # registered because even though we only need it for ingest, the metadata in
473
+ # the writer is initialized and written before our ingest function is called.
474
+ if start_date is None or end_date is None:
475
+ config = PolygonConfig(
476
+ environ=environ,
477
+ calendar_name=calendar_name,
478
+ start_date=start_date,
479
+ end_date=end_date,
480
+ agg_time=agg_time,
481
+ )
482
+ first_aggs_date, last_aggs_date = config.find_first_and_last_aggs(
483
+ config.aggs_dir if agg_time in [AGG_TIME_DAY, AGG_TIME_MINUTE] else config.trades_dir,
484
+ config.csv_paths_pattern,
485
+ )
486
+ # print(f"{bundlename=} {first_aggs_date=} {last_aggs_date=}")
487
+ if start_date is None:
488
+ start_date = first_aggs_date
489
+ if end_date is None:
490
+ end_date = last_aggs_date
491
+
492
+ start_session = parse_date(start_date, raise_oob=False) if start_date else None
493
+ end_session = parse_date(end_date, raise_oob=False) if end_date else None
494
+ # print(f"Registered {bundlename=} {agg_time=} {start_session=} {end_session=}")
495
+
498
496
  register(
499
497
  bundlename,
500
- (
501
- polygon_equities_bundle_minute
502
- if agg_time == "minute"
503
- else polygon_equities_bundle_day
504
- ),
498
+ ingest_polygon_equities_bundle_for_agg_time(agg_time),
505
499
  start_session=start_session,
506
500
  end_session=end_session,
507
501
  calendar_name=calendar_name,
508
- # minutes_per_day=390,
502
+ minutes_per_day=minutes_per_day,
509
503
  # create_writers=True,
510
504
  )
511
505
 
@@ -517,12 +511,12 @@ def register_polygon_equities_bundle(
517
511
  # config = PolygonConfig(
518
512
  # environ=os.environ,
519
513
  # calendar_name="XNYS",
520
- # # start_session="2003-10-01",
521
- # # start_session="2018-01-01",
522
- # start_session="2023-01-01",
523
- # # end_session="2023-01-12",
524
- # end_session="2023-12-31",
525
- # # end_session="2024-06-30",
514
+ # # start_date="2003-10-01",
515
+ # # start_date="2018-01-01",
516
+ # start_date="2023-01-01",
517
+ # # end_date="2023-01-12",
518
+ # end_date="2023-12-31",
519
+ # # end_date="2024-06-30",
526
520
  # )
527
521
  # splits = load_polygon_splits(config)
528
522
  # splits.info()