zipline_polygon_bundle 0.1.7__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,22 +1,50 @@
1
1
  from .bundle import (
2
2
  register_polygon_equities_bundle,
3
3
  symbol_to_upper,
4
- polygon_equities_bundle_day,
5
- polygon_equities_bundle_minute,
4
+ ingest_polygon_equities_bundle
6
5
  )
7
6
 
8
7
  from .config import PolygonConfig
8
+ from .nyse_all_hours_calendar import NYSE_ALL_HOURS, register_nyse_all_hours_calendar
9
9
  from .concat_all_aggs import concat_all_aggs_from_csv, generate_csv_agg_tables
10
- from .adjustments import load_splits, load_dividends
10
+ from .adjustments import load_splits, load_dividends, load_conditions
11
+ from .trades import trades_schema, trades_dataset, cast_trades
12
+ from .trades import custom_aggs_partitioning, custom_aggs_schema, trades_to_custom_aggs, convert_trades_to_custom_aggs
13
+ from .trades import get_aggs_dates, generate_csv_trades_tables
14
+ # from .compute_signals import compute_signals_for_all_custom_aggs
15
+ from .quotes import quotes_schema, quotes_dataset, cast_quotes
16
+ # from .tickers_and_names import load_all_tickers, merge_tickers, ticker_names_from_merged_tickers, get_ticker_universe
17
+ from .tickers_and_names import PolygonAssets, get_ticker_universe
18
+
11
19
 
12
20
  __all__ = [
13
21
  "register_polygon_equities_bundle",
22
+ "register_nyse_all_hours_calendar",
23
+ "NYSE_ALL_HOURS",
14
24
  "symbol_to_upper",
15
- "polygon_equities_bundle_day",
16
- "polygon_equities_bundle_minute",
25
+ "ingest_polygon_equities_bundle",
17
26
  "PolygonConfig",
18
27
  "concat_all_aggs_from_csv",
19
28
  "generate_csv_agg_tables",
20
29
  "load_splits",
21
30
  "load_dividends",
31
+ "load_conditions",
32
+ "trades_schema",
33
+ "trades_dataset",
34
+ "cast_trades",
35
+ "get_aggs_dates",
36
+ "generate_csv_trades_tables",
37
+ "custom_aggs_partitioning",
38
+ "custom_aggs_schema",
39
+ "trades_to_custom_aggs",
40
+ "convert_trades_to_custom_aggs",
41
+ # "compute_signals_for_all_custom_aggs",
42
+ "quotes_schema",
43
+ "quotes_dataset",
44
+ "cast_quotes",
45
+ # "load_all_tickers",
46
+ # "merge_tickers",
47
+ # "ticker_names_from_merged_tickers",
48
+ "PolygonAssets",
49
+ "get_ticker_universe",
22
50
  ]
@@ -10,19 +10,19 @@ from urllib3 import HTTPResponse
10
10
 
11
11
 
12
12
  def load_polygon_splits(
13
- config: PolygonConfig, first_start_end: datetime.date, last_end_date: datetime.date
13
+ config: PolygonConfig, first_day: pd.Timestamp, last_day: pd.Timestamp
14
14
  ) -> pd.DataFrame:
15
15
  # N.B. If the schema changes then the filename should change. We're on v3 now.
16
16
  splits_path = config.api_cache_path(
17
- start_date=first_start_end, end_date=last_end_date, filename="list_splits"
17
+ first_day=first_day, last_day=last_day, filename="list_splits"
18
18
  )
19
- expected_split_count = (last_end_date - first_start_end).days * 3
19
+ expected_split_count = (last_day - first_day).days * 3
20
20
  if not os.path.exists(splits_path):
21
21
  client = polygon.RESTClient(api_key=config.api_key)
22
22
  splits = client.list_splits(
23
23
  limit=1000,
24
- execution_date_gte=first_start_end,
25
- execution_date_lt=last_end_date + datetime.timedelta(days=1),
24
+ execution_date_gte=first_day.date(),
25
+ execution_date_lt=last_day.date() + datetime.timedelta(days=1),
26
26
  )
27
27
  if splits is HTTPResponse:
28
28
  raise ValueError(f"Polygon.list_splits bad HTTPResponse: {splits}")
@@ -32,7 +32,7 @@ def load_polygon_splits(
32
32
  splits.to_parquet(splits_path)
33
33
  if len(splits) < expected_split_count:
34
34
  logging.warning(
35
- f"Only got {len(splits)=} from Polygon list_splits (expected {expected_split_count=}). "
35
+ f"Only got {len(splits)=} from Polygon list_splits ({expected_split_count=}). "
36
36
  "This is probably fine if your historical range is short."
37
37
  )
38
38
  # We will always load from the file to avoid any chance of weird errors.
@@ -41,7 +41,7 @@ def load_polygon_splits(
41
41
  print(f"Loaded {len(splits)=} from {splits_path}")
42
42
  if len(splits) < expected_split_count:
43
43
  logging.warning(
44
- f"Only got {len(splits)=} from Polygon list_splits (expected {expected_split_count=}). "
44
+ f"Only got {len(splits)=} from Polygon list_splits ({expected_split_count=}). "
45
45
  "This is probably fine if your historical range is short."
46
46
  )
47
47
  return splits
@@ -50,11 +50,11 @@ def load_polygon_splits(
50
50
 
51
51
  def load_splits(
52
52
  config: PolygonConfig,
53
- first_start_end: datetime.date,
54
- last_end_date: datetime.date,
53
+ first_day: pd.Timestamp,
54
+ last_day: pd.Timestamp,
55
55
  ticker_to_sid: dict[str, int],
56
56
  ) -> pd.DataFrame:
57
- splits = load_polygon_splits(config, first_start_end, last_end_date)
57
+ splits = load_polygon_splits(config, first_day=first_day, last_day=last_day)
58
58
  splits["sid"] = splits["ticker"].apply(lambda t: ticker_to_sid.get(t, pd.NA))
59
59
  splits.dropna(inplace=True)
60
60
  splits["sid"] = splits["sid"].astype("int64")
@@ -70,18 +70,18 @@ def load_splits(
70
70
 
71
71
 
72
72
  def load_polygon_dividends(
73
- config: PolygonConfig, first_start_date: datetime.date, last_end_date: datetime.date
73
+ config: PolygonConfig, first_day: pd.Timestamp, last_day: pd.Timestamp
74
74
  ) -> pd.DataFrame:
75
75
  # N.B. If the schema changes then the filename should change. We're on v3 now.
76
76
  dividends_path = config.api_cache_path(
77
- start_date=first_start_date, end_date=last_end_date, filename="list_dividends"
77
+ first_day=first_day, last_day=last_day, filename="list_dividends"
78
78
  )
79
79
  if not os.path.exists(dividends_path):
80
80
  client = polygon.RESTClient(api_key=config.api_key)
81
81
  dividends = client.list_dividends(
82
82
  limit=1000,
83
- record_date_gte=first_start_date,
84
- pay_date_lt=last_end_date + datetime.timedelta(days=1),
83
+ record_date_gte=first_day.date(),
84
+ pay_date_lt=last_day.date() + datetime.timedelta(days=1),
85
85
  )
86
86
  if dividends is HTTPResponse:
87
87
  raise ValueError(f"Polygon.list_dividends bad HTTPResponse: {dividends}")
@@ -104,35 +104,30 @@ def load_polygon_dividends(
104
104
 
105
105
 
106
106
  def load_chunked_polygon_dividends(
107
- config: PolygonConfig, first_start_end: datetime.date, last_end_date: datetime.date
107
+ config: PolygonConfig, first_day: pd.Timestamp,
108
+ last_day: pd.Timestamp
108
109
  ) -> pd.DataFrame:
109
110
  dividends_list = []
110
- next_start_end = first_start_end
111
- while next_start_end < last_end_date:
111
+ next_start_end = first_day
112
+ while next_start_end < last_day:
112
113
  # We want at most a month of dividends at a time. They should end on the last day of the month.
113
- # So the next_end_date is the day before the first day of the next month.
114
- first_of_next_month = datetime.date(
115
- next_start_end.year + (next_start_end.month // 12),
116
- (next_start_end.month % 12) + 1,
117
- 1,
118
- )
119
- next_end_date = first_of_next_month - datetime.timedelta(days=1)
120
- if next_end_date > last_end_date:
121
- next_end_date = last_end_date
114
+ next_end_date = next_start_end + pd.offsets.MonthEnd()
115
+ if next_end_date > last_day:
116
+ next_end_date = last_day
122
117
  dividends_list.append(
123
- load_polygon_dividends(config, next_start_end, next_end_date)
118
+ load_polygon_dividends(config, first_day=next_start_end, last_day=next_end_date)
124
119
  )
125
- next_start_end = next_end_date + datetime.timedelta(days=1)
120
+ next_start_end = next_end_date + pd.Timedelta(days=1)
126
121
  return pd.concat(dividends_list)
127
122
 
128
123
 
129
124
  def load_dividends(
130
125
  config: PolygonConfig,
131
- first_start_end: datetime.date,
132
- last_end_date: datetime.date,
126
+ first_day: pd.Timestamp,
127
+ last_day: pd.Timestamp,
133
128
  ticker_to_sid: dict[str, int],
134
129
  ) -> pd.DataFrame:
135
- dividends = load_chunked_polygon_dividends(config, first_start_end, last_end_date)
130
+ dividends = load_chunked_polygon_dividends(config, first_day=first_day, last_day=last_day)
136
131
  dividends["sid"] = dividends["ticker"].apply(lambda t: ticker_to_sid.get(t, pd.NA))
137
132
  dividends.dropna(how="any", inplace=True)
138
133
  dividends["sid"] = dividends["sid"].astype("int64")
@@ -153,3 +148,37 @@ def load_dividends(
153
148
  return dividends[
154
149
  ["sid", "ex_date", "declared_date", "record_date", "pay_date", "amount"]
155
150
  ]
151
+
152
+
153
+ def load_conditions(config: PolygonConfig) -> pd.DataFrame:
154
+ # The API doesn't use dates for the condition codes but this is a way to provide control over caching.
155
+ # Main thing is to get the current conditions list but we don't want to call more than once a day.
156
+ conditions_path = config.api_cache_path(
157
+ first_day=config.start_timestamp, last_day=config.end_timestamp, filename="conditions"
158
+ )
159
+ expected_conditions_count = 100
160
+ if not os.path.exists(conditions_path):
161
+ client = polygon.RESTClient(api_key=config.api_key)
162
+ conditions_response = client.list_conditions(
163
+ limit=1000,
164
+ )
165
+ if conditions_response is HTTPResponse:
166
+ raise ValueError(f"Polygon.list_splits bad HTTPResponse: {conditions_response}")
167
+ conditions = pd.DataFrame(conditions_response)
168
+ print(f"Got {len(conditions)=} from Polygon list_conditions.")
169
+ os.makedirs(os.path.dirname(conditions_path), exist_ok=True)
170
+ conditions.to_parquet(conditions_path)
171
+ if len(conditions) < expected_conditions_count:
172
+ logging.warning(
173
+ f"Only got {len(conditions)=} from Polygon list_splits (expected {expected_conditions_count=}). "
174
+ )
175
+ # We will always load from the file to avoid any chance of weird errors.
176
+ if os.path.exists(conditions_path):
177
+ conditions = pd.read_parquet(conditions_path)
178
+ print(f"Loaded {len(conditions)=} from {conditions_path}")
179
+ if len(conditions) < expected_conditions_count:
180
+ logging.warning(
181
+ f"Only got {len(conditions)=} from cached conditions (expected {expected_conditions_count=}). "
182
+ )
183
+ return conditions
184
+ raise ValueError(f"Failed to load splits from {conditions_path}")