zipline_polygon_bundle 0.2.0.dev1__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,17 +1,17 @@
1
1
  from .bundle import (
2
2
  register_polygon_equities_bundle,
3
3
  symbol_to_upper,
4
- polygon_equities_bundle_day,
5
- polygon_equities_bundle_minute,
4
+ ingest_polygon_equities_bundle
6
5
  )
7
6
 
8
7
  from .config import PolygonConfig
9
8
  from .nyse_all_hours_calendar import NYSE_ALL_HOURS, register_nyse_all_hours_calendar
10
9
  from .concat_all_aggs import concat_all_aggs_from_csv, generate_csv_agg_tables
11
10
  from .adjustments import load_splits, load_dividends, load_conditions
12
- from .trades import trades_schema, trades_dataset, cast_trades, date_to_path
11
+ from .trades import trades_schema, trades_dataset, cast_trades
13
12
  from .trades import custom_aggs_partitioning, custom_aggs_schema, trades_to_custom_aggs, convert_trades_to_custom_aggs
14
- from .trades import get_custom_aggs_dates, generate_csv_trades_tables, compute_signals_for_all_custom_aggs
13
+ from .trades import get_aggs_dates, generate_csv_trades_tables
14
+ # from .compute_signals import compute_signals_for_all_custom_aggs
15
15
  from .quotes import quotes_schema, quotes_dataset, cast_quotes
16
16
  # from .tickers_and_names import load_all_tickers, merge_tickers, ticker_names_from_merged_tickers, get_ticker_universe
17
17
  from .tickers_and_names import PolygonAssets, get_ticker_universe
@@ -22,8 +22,7 @@ __all__ = [
22
22
  "register_nyse_all_hours_calendar",
23
23
  "NYSE_ALL_HOURS",
24
24
  "symbol_to_upper",
25
- "polygon_equities_bundle_day",
26
- "polygon_equities_bundle_minute",
25
+ "ingest_polygon_equities_bundle",
27
26
  "PolygonConfig",
28
27
  "concat_all_aggs_from_csv",
29
28
  "generate_csv_agg_tables",
@@ -33,14 +32,13 @@ __all__ = [
33
32
  "trades_schema",
34
33
  "trades_dataset",
35
34
  "cast_trades",
36
- "date_to_path",
37
- "get_custom_aggs_dates",
35
+ "get_aggs_dates",
38
36
  "generate_csv_trades_tables",
39
37
  "custom_aggs_partitioning",
40
38
  "custom_aggs_schema",
41
39
  "trades_to_custom_aggs",
42
40
  "convert_trades_to_custom_aggs",
43
- "compute_signals_for_all_custom_aggs",
41
+ # "compute_signals_for_all_custom_aggs",
44
42
  "quotes_schema",
45
43
  "quotes_dataset",
46
44
  "cast_quotes",
@@ -10,19 +10,19 @@ from urllib3 import HTTPResponse
10
10
 
11
11
 
12
12
  def load_polygon_splits(
13
- config: PolygonConfig, first_start_end: datetime.date, last_end_date: datetime.date
13
+ config: PolygonConfig, first_day: pd.Timestamp, last_day: pd.Timestamp
14
14
  ) -> pd.DataFrame:
15
15
  # N.B. If the schema changes then the filename should change. We're on v3 now.
16
16
  splits_path = config.api_cache_path(
17
- start_date=first_start_end, end_date=last_end_date, filename="list_splits"
17
+ first_day=first_day, last_day=last_day, filename="list_splits"
18
18
  )
19
- expected_split_count = (last_end_date - first_start_end).days * 3
19
+ expected_split_count = (last_day - first_day).days * 3
20
20
  if not os.path.exists(splits_path):
21
21
  client = polygon.RESTClient(api_key=config.api_key)
22
22
  splits = client.list_splits(
23
23
  limit=1000,
24
- execution_date_gte=first_start_end,
25
- execution_date_lt=last_end_date + datetime.timedelta(days=1),
24
+ execution_date_gte=first_day.date(),
25
+ execution_date_lt=last_day.date() + datetime.timedelta(days=1),
26
26
  )
27
27
  if splits is HTTPResponse:
28
28
  raise ValueError(f"Polygon.list_splits bad HTTPResponse: {splits}")
@@ -32,7 +32,7 @@ def load_polygon_splits(
32
32
  splits.to_parquet(splits_path)
33
33
  if len(splits) < expected_split_count:
34
34
  logging.warning(
35
- f"Only got {len(splits)=} from Polygon list_splits (expected {expected_split_count=}). "
35
+ f"Only got {len(splits)=} from Polygon list_splits ({expected_split_count=}). "
36
36
  "This is probably fine if your historical range is short."
37
37
  )
38
38
  # We will always load from the file to avoid any chance of weird errors.
@@ -41,7 +41,7 @@ def load_polygon_splits(
41
41
  print(f"Loaded {len(splits)=} from {splits_path}")
42
42
  if len(splits) < expected_split_count:
43
43
  logging.warning(
44
- f"Only got {len(splits)=} from Polygon list_splits (expected {expected_split_count=}). "
44
+ f"Only got {len(splits)=} from Polygon list_splits ({expected_split_count=}). "
45
45
  "This is probably fine if your historical range is short."
46
46
  )
47
47
  return splits
@@ -50,11 +50,11 @@ def load_polygon_splits(
50
50
 
51
51
  def load_splits(
52
52
  config: PolygonConfig,
53
- first_start_end: datetime.date,
54
- last_end_date: datetime.date,
53
+ first_day: pd.Timestamp,
54
+ last_day: pd.Timestamp,
55
55
  ticker_to_sid: dict[str, int],
56
56
  ) -> pd.DataFrame:
57
- splits = load_polygon_splits(config, first_start_end, last_end_date)
57
+ splits = load_polygon_splits(config, first_day=first_day, last_day=last_day)
58
58
  splits["sid"] = splits["ticker"].apply(lambda t: ticker_to_sid.get(t, pd.NA))
59
59
  splits.dropna(inplace=True)
60
60
  splits["sid"] = splits["sid"].astype("int64")
@@ -70,18 +70,18 @@ def load_splits(
70
70
 
71
71
 
72
72
  def load_polygon_dividends(
73
- config: PolygonConfig, first_start_date: datetime.date, last_end_date: datetime.date
73
+ config: PolygonConfig, first_day: pd.Timestamp, last_day: pd.Timestamp
74
74
  ) -> pd.DataFrame:
75
75
  # N.B. If the schema changes then the filename should change. We're on v3 now.
76
76
  dividends_path = config.api_cache_path(
77
- start_date=first_start_date, end_date=last_end_date, filename="list_dividends"
77
+ first_day=first_day, last_day=last_day, filename="list_dividends"
78
78
  )
79
79
  if not os.path.exists(dividends_path):
80
80
  client = polygon.RESTClient(api_key=config.api_key)
81
81
  dividends = client.list_dividends(
82
82
  limit=1000,
83
- record_date_gte=first_start_date,
84
- pay_date_lt=last_end_date + datetime.timedelta(days=1),
83
+ record_date_gte=first_day.date(),
84
+ pay_date_lt=last_day.date() + datetime.timedelta(days=1),
85
85
  )
86
86
  if dividends is HTTPResponse:
87
87
  raise ValueError(f"Polygon.list_dividends bad HTTPResponse: {dividends}")
@@ -104,35 +104,30 @@ def load_polygon_dividends(
104
104
 
105
105
 
106
106
  def load_chunked_polygon_dividends(
107
- config: PolygonConfig, first_start_end: datetime.date, last_end_date: datetime.date
107
+ config: PolygonConfig, first_day: pd.Timestamp,
108
+ last_day: pd.Timestamp
108
109
  ) -> pd.DataFrame:
109
110
  dividends_list = []
110
- next_start_end = first_start_end
111
- while next_start_end < last_end_date:
111
+ next_start_end = first_day
112
+ while next_start_end < last_day:
112
113
  # We want at most a month of dividends at a time. They should end on the last day of the month.
113
- # So the next_end_date is the day before the first day of the next month.
114
- first_of_next_month = datetime.date(
115
- next_start_end.year + (next_start_end.month // 12),
116
- (next_start_end.month % 12) + 1,
117
- 1,
118
- )
119
- next_end_date = first_of_next_month - datetime.timedelta(days=1)
120
- if next_end_date > last_end_date:
121
- next_end_date = last_end_date
114
+ next_end_date = next_start_end + pd.offsets.MonthEnd()
115
+ if next_end_date > last_day:
116
+ next_end_date = last_day
122
117
  dividends_list.append(
123
- load_polygon_dividends(config, next_start_end, next_end_date)
118
+ load_polygon_dividends(config, first_day=next_start_end, last_day=next_end_date)
124
119
  )
125
- next_start_end = next_end_date + datetime.timedelta(days=1)
120
+ next_start_end = next_end_date + pd.Timedelta(days=1)
126
121
  return pd.concat(dividends_list)
127
122
 
128
123
 
129
124
  def load_dividends(
130
125
  config: PolygonConfig,
131
- first_start_end: datetime.date,
132
- last_end_date: datetime.date,
126
+ first_day: pd.Timestamp,
127
+ last_day: pd.Timestamp,
133
128
  ticker_to_sid: dict[str, int],
134
129
  ) -> pd.DataFrame:
135
- dividends = load_chunked_polygon_dividends(config, first_start_end, last_end_date)
130
+ dividends = load_chunked_polygon_dividends(config, first_day=first_day, last_day=last_day)
136
131
  dividends["sid"] = dividends["ticker"].apply(lambda t: ticker_to_sid.get(t, pd.NA))
137
132
  dividends.dropna(how="any", inplace=True)
138
133
  dividends["sid"] = dividends["sid"].astype("int64")
@@ -159,7 +154,7 @@ def load_conditions(config: PolygonConfig) -> pd.DataFrame:
159
154
  # The API doesn't use dates for the condition codes but this is a way to provide control over caching.
160
155
  # Main thing is to get the current conditions list but we don't want to call more than once a day.
161
156
  conditions_path = config.api_cache_path(
162
- start_date=config.start_timestamp.date(), end_date=config.end_timestamp.date(), filename="conditions"
157
+ first_day=config.start_timestamp, last_day=config.end_timestamp, filename="conditions"
163
158
  )
164
159
  expected_conditions_count = 100
165
160
  if not os.path.exists(conditions_path):