zipline_polygon_bundle 0.1.6__tar.gz → 0.1.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {zipline_polygon_bundle-0.1.6 → zipline_polygon_bundle-0.1.8}/PKG-INFO +6 -3
- {zipline_polygon_bundle-0.1.6 → zipline_polygon_bundle-0.1.8}/pyproject.toml +7 -4
- zipline_polygon_bundle-0.1.8/zipline_polygon_bundle/__init__.py +49 -0
- {zipline_polygon_bundle-0.1.6 → zipline_polygon_bundle-0.1.8}/zipline_polygon_bundle/adjustments.py +47 -9
- {zipline_polygon_bundle-0.1.6 → zipline_polygon_bundle-0.1.8}/zipline_polygon_bundle/bundle.py +47 -25
- {zipline_polygon_bundle-0.1.6 → zipline_polygon_bundle-0.1.8}/zipline_polygon_bundle/concat_all_aggs.py +17 -32
- {zipline_polygon_bundle-0.1.6 → zipline_polygon_bundle-0.1.8}/zipline_polygon_bundle/concat_all_aggs_partitioned.py +6 -6
- zipline_polygon_bundle-0.1.8/zipline_polygon_bundle/config.py +186 -0
- {zipline_polygon_bundle-0.1.6 → zipline_polygon_bundle-0.1.8}/zipline_polygon_bundle/polygon_file_reader.py +1 -1
- {zipline_polygon_bundle-0.1.6 → zipline_polygon_bundle-0.1.8}/zipline_polygon_bundle/process_all_aggs.py +2 -2
- zipline_polygon_bundle-0.1.8/zipline_polygon_bundle/quotes.py +101 -0
- {zipline_polygon_bundle-0.1.6 → zipline_polygon_bundle-0.1.8}/zipline_polygon_bundle/tickers_and_names.py +1 -37
- zipline_polygon_bundle-0.1.8/zipline_polygon_bundle/trades.py +707 -0
- zipline_polygon_bundle-0.1.6/zipline_polygon_bundle/__init__.py +0 -22
- zipline_polygon_bundle-0.1.6/zipline_polygon_bundle/config.py +0 -113
- {zipline_polygon_bundle-0.1.6 → zipline_polygon_bundle-0.1.8}/LICENSE +0 -0
- {zipline_polygon_bundle-0.1.6 → zipline_polygon_bundle-0.1.8}/README.md +0 -0
- {zipline_polygon_bundle-0.1.6 → zipline_polygon_bundle-0.1.8}/zipline_polygon_bundle/split_aggs_by_ticker.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: zipline_polygon_bundle
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.8
|
4
4
|
Summary: A zipline-reloaded data provider bundle for Polygon.io
|
5
5
|
License: GNU AFFERO GENERAL PUBLIC LICENSE
|
6
6
|
Version 3, 19 November 2007
|
@@ -671,10 +671,13 @@ Classifier: Programming Language :: Python :: 3
|
|
671
671
|
Classifier: License :: OSI Approved :: GNU Affero General Public License v3
|
672
672
|
Classifier: Operating System :: OS Independent
|
673
673
|
Requires-Dist: bcolz-zipline (>=1.2.11)
|
674
|
+
Requires-Dist: fsspec (>=2024.10)
|
674
675
|
Requires-Dist: numpy (<2)
|
675
676
|
Requires-Dist: pandas (>=2.2,<3)
|
676
|
-
Requires-Dist:
|
677
|
-
Requires-Dist:
|
677
|
+
Requires-Dist: pandas-market-calendars (>=4.4.2)
|
678
|
+
Requires-Dist: pandas_ta (>=0.3)
|
679
|
+
Requires-Dist: polygon-api-client (>=1.14.2)
|
680
|
+
Requires-Dist: pyarrow (>=18.1.0,<19)
|
678
681
|
Requires-Dist: pytz (>=2018.5)
|
679
682
|
Requires-Dist: requests (>=2.9.1)
|
680
683
|
Requires-Dist: toolz (>=0.8.2)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[project]
|
2
2
|
name = 'zipline_polygon_bundle'
|
3
|
-
version = '0.1.
|
3
|
+
version = '0.1.8'
|
4
4
|
description = 'A zipline-reloaded data provider bundle for Polygon.io'
|
5
5
|
authors = [
|
6
6
|
{ name = 'Jim White', email = 'jim@fovi.com' },
|
@@ -19,7 +19,7 @@ Repository = 'https://github.com/fovi-llc/zipline-polygon-bundle'
|
|
19
19
|
|
20
20
|
[tool.poetry]
|
21
21
|
name = 'zipline-polygon-bundle'
|
22
|
-
version = '0.1.
|
22
|
+
version = '0.1.8'
|
23
23
|
description = 'A zipline-reloaded data provider bundle for Polygon.io'
|
24
24
|
authors = ['Jim White <jim@fovi.com>']
|
25
25
|
license = 'AGPL-3.0'
|
@@ -32,13 +32,16 @@ classifiers = [
|
|
32
32
|
]
|
33
33
|
|
34
34
|
[tool.poetry.dependencies]
|
35
|
+
fsspec = ">=2024.10"
|
35
36
|
python = ">=3.9,<4.0"
|
36
|
-
polygon-api-client = "
|
37
|
+
polygon-api-client = ">=1.14.2"
|
37
38
|
pandas = ">=2.2,<3"
|
39
|
+
pandas-market-calendars = ">=4.4.2"
|
40
|
+
pandas_ta = ">=0.3"
|
38
41
|
pytz = ">=2018.5"
|
39
42
|
requests = ">=2.9.1"
|
40
43
|
bcolz-zipline = ">=1.2.11"
|
41
|
-
pyarrow = "
|
44
|
+
pyarrow = ">=18.1.0,<19"
|
42
45
|
numpy = "<2"
|
43
46
|
toolz = ">=0.8.2"
|
44
47
|
zipline-reloaded = ">=3.1"
|
@@ -0,0 +1,49 @@
|
|
1
|
+
from .bundle import (
|
2
|
+
register_polygon_equities_bundle,
|
3
|
+
symbol_to_upper,
|
4
|
+
polygon_equities_bundle_day,
|
5
|
+
polygon_equities_bundle_minute,
|
6
|
+
)
|
7
|
+
|
8
|
+
from .config import PolygonConfig
|
9
|
+
from .concat_all_aggs import concat_all_aggs_from_csv, generate_csv_agg_tables
|
10
|
+
from .adjustments import load_splits, load_dividends, load_conditions
|
11
|
+
from .trades import trades_schema, trades_dataset, cast_trades, date_to_path
|
12
|
+
from .trades import custom_aggs_partitioning, custom_aggs_schema, trades_to_custom_aggs, convert_all_to_custom_aggs
|
13
|
+
from .trades import get_custom_aggs_dates, generate_csv_trades_tables, compute_signals_for_all_custom_aggs
|
14
|
+
from .quotes import quotes_schema, quotes_dataset, cast_quotes
|
15
|
+
# from .tickers_and_names import load_all_tickers, merge_tickers, ticker_names_from_merged_tickers, get_ticker_universe
|
16
|
+
from .tickers_and_names import PolygonAssets, get_ticker_universe
|
17
|
+
|
18
|
+
|
19
|
+
__all__ = [
|
20
|
+
"register_polygon_equities_bundle",
|
21
|
+
"symbol_to_upper",
|
22
|
+
"polygon_equities_bundle_day",
|
23
|
+
"polygon_equities_bundle_minute",
|
24
|
+
"PolygonConfig",
|
25
|
+
"concat_all_aggs_from_csv",
|
26
|
+
"generate_csv_agg_tables",
|
27
|
+
"load_splits",
|
28
|
+
"load_dividends",
|
29
|
+
"load_conditions",
|
30
|
+
"trades_schema",
|
31
|
+
"trades_dataset",
|
32
|
+
"cast_trades",
|
33
|
+
"date_to_path",
|
34
|
+
"get_custom_aggs_dates",
|
35
|
+
"generate_csv_trades_tables",
|
36
|
+
"custom_aggs_partitioning",
|
37
|
+
"custom_aggs_schema",
|
38
|
+
"trades_to_custom_aggs",
|
39
|
+
"convert_all_to_custom_aggs",
|
40
|
+
"compute_signals_for_all_custom_aggs",
|
41
|
+
"quotes_schema",
|
42
|
+
"quotes_dataset",
|
43
|
+
"cast_quotes",
|
44
|
+
# "load_all_tickers",
|
45
|
+
# "merge_tickers",
|
46
|
+
# "ticker_names_from_merged_tickers",
|
47
|
+
"PolygonAssets",
|
48
|
+
"get_ticker_universe",
|
49
|
+
]
|
{zipline_polygon_bundle-0.1.6 → zipline_polygon_bundle-0.1.8}/zipline_polygon_bundle/adjustments.py
RENAMED
@@ -64,8 +64,9 @@ def load_splits(
|
|
64
64
|
splits["split_from"] = splits["split_from"].astype(float)
|
65
65
|
splits["split_to"] = splits["split_to"].astype(float)
|
66
66
|
splits["ratio"] = splits["split_from"] / splits["split_to"]
|
67
|
-
|
68
|
-
|
67
|
+
# Only return columns Zipline wants.
|
68
|
+
# Polygon may add more columns in the future (as they did with `id`).
|
69
|
+
return splits[["sid", "effective_date", "ratio"]]
|
69
70
|
|
70
71
|
|
71
72
|
def load_polygon_dividends(
|
@@ -87,7 +88,9 @@ def load_polygon_dividends(
|
|
87
88
|
dividends = pd.DataFrame(dividends)
|
88
89
|
os.makedirs(os.path.dirname(dividends_path), exist_ok=True)
|
89
90
|
dividends.to_parquet(dividends_path)
|
90
|
-
print(
|
91
|
+
print(
|
92
|
+
f"Wrote {len(dividends)=} from Polygon list_dividends to {dividends_path=}"
|
93
|
+
)
|
91
94
|
# if len(dividends) < 10000:
|
92
95
|
# logging.error(f"Only got {len(dividends)=} from Polygon list_dividends.")
|
93
96
|
# We will always load from the file to avoid any chance of weird errors.
|
@@ -116,9 +119,9 @@ def load_chunked_polygon_dividends(
|
|
116
119
|
next_end_date = first_of_next_month - datetime.timedelta(days=1)
|
117
120
|
if next_end_date > last_end_date:
|
118
121
|
next_end_date = last_end_date
|
119
|
-
dividends_list.append(
|
120
|
-
config, next_start_end, next_end_date
|
121
|
-
)
|
122
|
+
dividends_list.append(
|
123
|
+
load_polygon_dividends(config, next_start_end, next_end_date)
|
124
|
+
)
|
122
125
|
next_start_end = next_end_date + datetime.timedelta(days=1)
|
123
126
|
return pd.concat(dividends_list)
|
124
127
|
|
@@ -145,7 +148,42 @@ def load_dividends(
|
|
145
148
|
},
|
146
149
|
inplace=True,
|
147
150
|
)
|
148
|
-
|
149
|
-
|
151
|
+
# Only return columns Zipline wants.
|
152
|
+
# Polygon may add more columns in the future (as they did with `id`).
|
153
|
+
return dividends[
|
154
|
+
["sid", "ex_date", "declared_date", "record_date", "pay_date", "amount"]
|
155
|
+
]
|
156
|
+
|
157
|
+
|
158
|
+
def load_conditions(config: PolygonConfig) -> pd.DataFrame:
|
159
|
+
# The API doesn't use dates for the condition codes but this is a way to provide control over caching.
|
160
|
+
# Main thing is to get the current conditions list but we don't want to call more than once a day.
|
161
|
+
conditions_path = config.api_cache_path(
|
162
|
+
start_date=config.start_timestamp.date(), end_date=config.end_timestamp.date(), filename="conditions"
|
150
163
|
)
|
151
|
-
|
164
|
+
expected_conditions_count = 100
|
165
|
+
if not os.path.exists(conditions_path):
|
166
|
+
client = polygon.RESTClient(api_key=config.api_key)
|
167
|
+
conditions_response = client.list_conditions(
|
168
|
+
limit=1000,
|
169
|
+
)
|
170
|
+
if conditions_response is HTTPResponse:
|
171
|
+
raise ValueError(f"Polygon.list_splits bad HTTPResponse: {conditions_response}")
|
172
|
+
conditions = pd.DataFrame(conditions_response)
|
173
|
+
print(f"Got {len(conditions)=} from Polygon list_conditions.")
|
174
|
+
os.makedirs(os.path.dirname(conditions_path), exist_ok=True)
|
175
|
+
conditions.to_parquet(conditions_path)
|
176
|
+
if len(conditions) < expected_conditions_count:
|
177
|
+
logging.warning(
|
178
|
+
f"Only got {len(conditions)=} from Polygon list_splits (expected {expected_conditions_count=}). "
|
179
|
+
)
|
180
|
+
# We will always load from the file to avoid any chance of weird errors.
|
181
|
+
if os.path.exists(conditions_path):
|
182
|
+
conditions = pd.read_parquet(conditions_path)
|
183
|
+
print(f"Loaded {len(conditions)=} from {conditions_path}")
|
184
|
+
if len(conditions) < expected_conditions_count:
|
185
|
+
logging.warning(
|
186
|
+
f"Only got {len(conditions)=} from cached conditions (expected {expected_conditions_count=}). "
|
187
|
+
)
|
188
|
+
return conditions
|
189
|
+
raise ValueError(f"Failed to load splits from {conditions_path}")
|
{zipline_polygon_bundle-0.1.6 → zipline_polygon_bundle-0.1.8}/zipline_polygon_bundle/bundle.py
RENAMED
@@ -1,18 +1,21 @@
|
|
1
|
+
import os
|
1
2
|
from zipline.data.bundles import register
|
2
3
|
from zipline.data.resample import minute_frame_to_session_frame
|
3
4
|
|
5
|
+
from exchange_calendars.calendar_helpers import parse_date
|
6
|
+
from zipline.utils.calendar_utils import get_calendar
|
7
|
+
|
4
8
|
from .config import PolygonConfig
|
5
9
|
from .concat_all_aggs import concat_all_aggs_from_csv, generate_csv_agg_tables
|
6
10
|
from .adjustments import load_splits, load_dividends
|
7
11
|
|
8
12
|
import pyarrow
|
9
13
|
import pyarrow.compute
|
14
|
+
import pyarrow.dataset
|
10
15
|
|
11
16
|
import pandas as pd
|
12
17
|
import logging
|
13
18
|
|
14
|
-
import concurrent.futures
|
15
|
-
|
16
19
|
|
17
20
|
# TODO: Change warnings to be relative to number of days in the range.
|
18
21
|
|
@@ -175,8 +178,8 @@ def polygon_equities_bundle_day(
|
|
175
178
|
daily_bar_writer,
|
176
179
|
adjustment_writer,
|
177
180
|
calendar,
|
178
|
-
|
179
|
-
|
181
|
+
start_date,
|
182
|
+
end_date,
|
180
183
|
cache,
|
181
184
|
show_progress,
|
182
185
|
output_dir,
|
@@ -184,8 +187,8 @@ def polygon_equities_bundle_day(
|
|
184
187
|
config = PolygonConfig(
|
185
188
|
environ=environ,
|
186
189
|
calendar_name=calendar.name,
|
187
|
-
|
188
|
-
|
190
|
+
start_date=start_date,
|
191
|
+
end_date=end_date,
|
189
192
|
agg_time="day",
|
190
193
|
)
|
191
194
|
|
@@ -219,7 +222,7 @@ def polygon_equities_bundle_day(
|
|
219
222
|
daily_bar_writer.write(
|
220
223
|
process_day_aggregates(
|
221
224
|
table=table,
|
222
|
-
sessions=calendar.sessions_in_range(
|
225
|
+
sessions=calendar.sessions_in_range(start_date, end_date),
|
223
226
|
metadata=metadata,
|
224
227
|
calendar=calendar,
|
225
228
|
symbol_to_sid=symbol_to_sid,
|
@@ -400,8 +403,8 @@ def polygon_equities_bundle_minute(
|
|
400
403
|
daily_bar_writer,
|
401
404
|
adjustment_writer,
|
402
405
|
calendar,
|
403
|
-
|
404
|
-
|
406
|
+
start_date,
|
407
|
+
end_date,
|
405
408
|
cache,
|
406
409
|
show_progress,
|
407
410
|
output_dir,
|
@@ -409,8 +412,8 @@ def polygon_equities_bundle_minute(
|
|
409
412
|
config = PolygonConfig(
|
410
413
|
environ=environ,
|
411
414
|
calendar_name=calendar.name,
|
412
|
-
|
413
|
-
|
415
|
+
start_date=start_date,
|
416
|
+
end_date=end_date,
|
414
417
|
agg_time="minute",
|
415
418
|
)
|
416
419
|
|
@@ -444,8 +447,8 @@ def polygon_equities_bundle_minute(
|
|
444
447
|
daily_bar_writer.write(
|
445
448
|
process_minute_aggregates(
|
446
449
|
fragments=aggregates.get_fragments(),
|
447
|
-
sessions=calendar.sessions_in_range(
|
448
|
-
minutes=calendar.sessions_minutes(
|
450
|
+
sessions=calendar.sessions_in_range(start_date, end_date),
|
451
|
+
minutes=calendar.sessions_minutes(start_date, end_date),
|
449
452
|
metadata=metadata,
|
450
453
|
calendar=calendar,
|
451
454
|
symbol_to_sid=symbol_to_sid,
|
@@ -459,8 +462,8 @@ def polygon_equities_bundle_minute(
|
|
459
462
|
minute_bar_writer.write(
|
460
463
|
process_minute_aggregates(
|
461
464
|
fragments=aggregates.get_fragments(),
|
462
|
-
sessions=calendar.sessions_in_range(
|
463
|
-
minutes=calendar.sessions_minutes(
|
465
|
+
sessions=calendar.sessions_in_range(start_date, end_date),
|
466
|
+
minutes=calendar.sessions_minutes(start_date, end_date),
|
464
467
|
metadata=metadata,
|
465
468
|
calendar=calendar,
|
466
469
|
symbol_to_sid=symbol_to_sid,
|
@@ -485,8 +488,8 @@ def polygon_equities_bundle_minute(
|
|
485
488
|
|
486
489
|
def register_polygon_equities_bundle(
|
487
490
|
bundlename,
|
488
|
-
|
489
|
-
|
491
|
+
start_date=None,
|
492
|
+
end_date=None,
|
490
493
|
calendar_name="XNYS",
|
491
494
|
agg_time="day",
|
492
495
|
# ticker_list=None,
|
@@ -495,6 +498,25 @@ def register_polygon_equities_bundle(
|
|
495
498
|
):
|
496
499
|
if agg_time not in ["day", "minute"]:
|
497
500
|
raise ValueError(f"agg_time must be 'day' or 'minute', not '{agg_time}'")
|
501
|
+
# We need to know the start and end dates of the session before the bundle is
|
502
|
+
# registered because even though we only need it for ingest, the metadata in
|
503
|
+
# the writer is initialized and written before our ingest function is called.
|
504
|
+
if start_date is None or end_date is None:
|
505
|
+
config = PolygonConfig(
|
506
|
+
environ=os.environ,
|
507
|
+
calendar_name=calendar_name,
|
508
|
+
start_date=start_date,
|
509
|
+
end_date=end_date,
|
510
|
+
agg_time=agg_time,
|
511
|
+
)
|
512
|
+
first_aggs_date, last_aggs_date = config.find_first_and_last_aggs()
|
513
|
+
if start_date is None:
|
514
|
+
start_date = first_aggs_date
|
515
|
+
if end_date is None:
|
516
|
+
end_date = last_aggs_date
|
517
|
+
|
518
|
+
calendar = get_calendar(calendar_name)
|
519
|
+
|
498
520
|
register(
|
499
521
|
bundlename,
|
500
522
|
(
|
@@ -502,8 +524,8 @@ def register_polygon_equities_bundle(
|
|
502
524
|
if agg_time == "minute"
|
503
525
|
else polygon_equities_bundle_day
|
504
526
|
),
|
505
|
-
start_session=
|
506
|
-
end_session=
|
527
|
+
start_session=parse_date(start_date, calendar=calendar),
|
528
|
+
end_session=parse_date(end_date, calendar=calendar),
|
507
529
|
calendar_name=calendar_name,
|
508
530
|
# minutes_per_day=390,
|
509
531
|
# create_writers=True,
|
@@ -517,12 +539,12 @@ def register_polygon_equities_bundle(
|
|
517
539
|
# config = PolygonConfig(
|
518
540
|
# environ=os.environ,
|
519
541
|
# calendar_name="XNYS",
|
520
|
-
# #
|
521
|
-
# #
|
522
|
-
#
|
523
|
-
# #
|
524
|
-
#
|
525
|
-
# #
|
542
|
+
# # start_date="2003-10-01",
|
543
|
+
# # start_date="2018-01-01",
|
544
|
+
# start_date="2023-01-01",
|
545
|
+
# # end_date="2023-01-12",
|
546
|
+
# end_date="2023-12-31",
|
547
|
+
# # end_date="2024-06-30",
|
526
548
|
# )
|
527
549
|
# splits = load_polygon_splits(config)
|
528
550
|
# splits.info()
|
@@ -1,7 +1,7 @@
|
|
1
1
|
from .config import PolygonConfig
|
2
2
|
|
3
3
|
import shutil
|
4
|
-
from typing import Iterator, Tuple
|
4
|
+
from typing import Iterator, Tuple, List, Union
|
5
5
|
|
6
6
|
import argparse
|
7
7
|
import glob
|
@@ -10,6 +10,7 @@ import os
|
|
10
10
|
import pyarrow as pa
|
11
11
|
from pyarrow import dataset as pa_ds
|
12
12
|
from pyarrow import csv as pa_csv
|
13
|
+
from pyarrow import compute as pa_compute
|
13
14
|
|
14
15
|
import pandas as pd
|
15
16
|
|
@@ -34,7 +35,7 @@ def to_partition_key(s: str) -> str:
|
|
34
35
|
|
35
36
|
|
36
37
|
def generate_tables_from_csv_files(
|
37
|
-
paths:
|
38
|
+
paths: Iterator[Union[str, os.PathLike]],
|
38
39
|
schema: pa.Schema,
|
39
40
|
start_timestamp: pd.Timestamp,
|
40
41
|
limit_timestamp: pd.Timestamp,
|
@@ -57,7 +58,7 @@ def generate_tables_from_csv_files(
|
|
57
58
|
quoted_strings_can_be_null=False,
|
58
59
|
)
|
59
60
|
|
60
|
-
table =
|
61
|
+
table = pa_csv.read_csv(path, convert_options=convert_options)
|
61
62
|
tables_read_count += 1
|
62
63
|
table = table.set_column(
|
63
64
|
table.column_names.index("window_start"),
|
@@ -75,10 +76,10 @@ def generate_tables_from_csv_files(
|
|
75
76
|
),
|
76
77
|
)
|
77
78
|
expr = (
|
78
|
-
|
79
|
+
pa_compute.field("window_start")
|
79
80
|
>= pa.scalar(start_timestamp, type=schema.field("window_start").type)
|
80
81
|
) & (
|
81
|
-
|
82
|
+
pa_compute.field("window_start")
|
82
83
|
< pa.scalar(
|
83
84
|
limit_timestamp,
|
84
85
|
type=schema.field("window_start").type,
|
@@ -101,22 +102,8 @@ def generate_tables_from_csv_files(
|
|
101
102
|
|
102
103
|
def generate_csv_agg_tables(
|
103
104
|
config: PolygonConfig,
|
104
|
-
) -> Tuple[
|
105
|
+
) -> Tuple[pa.Schema, Iterator[pa.Table]]:
|
105
106
|
"""zipline does bundle ingestion one ticker at a time."""
|
106
|
-
# We sort by path because they have the year and month in the dir names and the date in the filename.
|
107
|
-
paths = sorted(
|
108
|
-
list(
|
109
|
-
glob.glob(
|
110
|
-
os.path.join(config.aggs_dir, config.csv_paths_pattern),
|
111
|
-
recursive="**" in config.csv_paths_pattern,
|
112
|
-
)
|
113
|
-
)
|
114
|
-
)
|
115
|
-
|
116
|
-
print(f"{len(paths)=}")
|
117
|
-
if len(paths) > 0:
|
118
|
-
print(f"{paths[0]=}")
|
119
|
-
print(f"{paths[-1]=}")
|
120
107
|
|
121
108
|
# Polygon Aggregate flatfile timestamps are in nanoseconds (like trades), not milliseconds as the docs say.
|
122
109
|
# I make the timestamp timezone-aware because that's how Unix timestamps work and it may help avoid mistakes.
|
@@ -154,11 +141,11 @@ def generate_csv_agg_tables(
|
|
154
141
|
pa.field(PARTITION_COLUMN_NAME, pa.string(), nullable=False)
|
155
142
|
)
|
156
143
|
|
144
|
+
# TODO: Use generator like os.walk for paths.
|
157
145
|
return (
|
158
|
-
paths,
|
159
146
|
polygon_aggs_schema,
|
160
147
|
generate_tables_from_csv_files(
|
161
|
-
paths=
|
148
|
+
paths=config.csv_paths(),
|
162
149
|
schema=polygon_aggs_schema,
|
163
150
|
start_timestamp=config.start_timestamp,
|
164
151
|
limit_timestamp=config.end_timestamp + pd.to_timedelta(1, unit="day"),
|
@@ -176,11 +163,9 @@ def concat_all_aggs_from_csv(
|
|
176
163
|
config: PolygonConfig,
|
177
164
|
overwrite: bool = False,
|
178
165
|
) -> str:
|
179
|
-
|
166
|
+
schema, tables = generate_csv_agg_tables(config)
|
180
167
|
|
181
|
-
|
182
|
-
raise ValueError(f"No Polygon CSV flat files found in {config.aggs_dir=}")
|
183
|
-
by_ticker_aggs_arrow_dir = config.by_ticker_aggs_arrow_dir(paths[0], paths[-1])
|
168
|
+
by_ticker_aggs_arrow_dir = config.by_ticker_aggs_arrow_dir
|
184
169
|
if os.path.exists(by_ticker_aggs_arrow_dir):
|
185
170
|
if overwrite:
|
186
171
|
print(f"Removing {by_ticker_aggs_arrow_dir=}")
|
@@ -212,10 +197,10 @@ if __name__ == "__main__":
|
|
212
197
|
parser = argparse.ArgumentParser()
|
213
198
|
parser.add_argument("--calendar_name", default="XNYS")
|
214
199
|
|
215
|
-
parser.add_argument("--
|
216
|
-
parser.add_argument("--
|
217
|
-
# parser.add_argument("--
|
218
|
-
# parser.add_argument("--
|
200
|
+
parser.add_argument("--start_date", default="2014-06-16")
|
201
|
+
parser.add_argument("--end_date", default="2024-09-06")
|
202
|
+
# parser.add_argument("--start_date", default="2020-01-01")
|
203
|
+
# parser.add_argument("--end_date", default="2020-12-31")
|
219
204
|
|
220
205
|
parser.add_argument("--agg_time", default="day")
|
221
206
|
|
@@ -235,8 +220,8 @@ if __name__ == "__main__":
|
|
235
220
|
config = PolygonConfig(
|
236
221
|
environ=os.environ,
|
237
222
|
calendar_name=args.calendar_name,
|
238
|
-
|
239
|
-
|
223
|
+
start_date=args.start_date,
|
224
|
+
end_date=args.end_date,
|
240
225
|
agg_time=args.agg_time,
|
241
226
|
)
|
242
227
|
|
@@ -138,10 +138,10 @@ if __name__ == "__main__":
|
|
138
138
|
parser = argparse.ArgumentParser()
|
139
139
|
parser.add_argument("--calendar_name", default="XNYS")
|
140
140
|
|
141
|
-
parser.add_argument("--
|
142
|
-
parser.add_argument("--
|
143
|
-
# parser.add_argument("--
|
144
|
-
# parser.add_argument("--
|
141
|
+
parser.add_argument("--start_date", default="2014-06-16")
|
142
|
+
parser.add_argument("--end_date", default="2024-09-06")
|
143
|
+
# parser.add_argument("--start_date", default="2020-10-07")
|
144
|
+
# parser.add_argument("--end_date", default="2020-10-15")
|
145
145
|
# parser.add_argument("--aggs_pattern", default="2020/10/**/*.csv.gz")
|
146
146
|
parser.add_argument("--aggs_pattern", default="**/*.csv.gz")
|
147
147
|
|
@@ -163,8 +163,8 @@ if __name__ == "__main__":
|
|
163
163
|
config = PolygonConfig(
|
164
164
|
environ=os.environ,
|
165
165
|
calendar_name=args.calendar_name,
|
166
|
-
|
167
|
-
|
166
|
+
start_date=args.start_date,
|
167
|
+
end_date=args.end_date,
|
168
168
|
)
|
169
169
|
|
170
170
|
concat_all_aggs_from_csv(
|