vizflow 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vizflow/__init__.py +1 -1
- vizflow/config.py +28 -5
- vizflow/io.py +23 -25
- vizflow/ops.py +11 -3
- {vizflow-0.5.0.dist-info → vizflow-0.5.1.dist-info}/METADATA +1 -1
- vizflow-0.5.1.dist-info/RECORD +10 -0
- vizflow-0.5.0.dist-info/RECORD +0 -10
- {vizflow-0.5.0.dist-info → vizflow-0.5.1.dist-info}/WHEEL +0 -0
vizflow/__init__.py
CHANGED
vizflow/config.py
CHANGED
|
@@ -10,6 +10,21 @@ from typing import Any
|
|
|
10
10
|
_global_config: Config | None = None
|
|
11
11
|
|
|
12
12
|
|
|
13
|
+
def _validate_date(date: str) -> None:
|
|
14
|
+
"""Validate date string format to prevent path traversal.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
date: Date string to validate
|
|
18
|
+
|
|
19
|
+
Raises:
|
|
20
|
+
ValueError: If date is not exactly 8 digits (YYYYMMDD format)
|
|
21
|
+
"""
|
|
22
|
+
if not (len(date) == 8 and date.isdigit()):
|
|
23
|
+
raise ValueError(
|
|
24
|
+
f"Invalid date format: {date!r}. Expected YYYYMMDD (8 digits)."
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
13
28
|
@dataclass
|
|
14
29
|
class ColumnSchema:
|
|
15
30
|
"""Schema for a column with type casting.
|
|
@@ -83,7 +98,11 @@ class Config:
|
|
|
83
98
|
time_cutoff: int | None = None
|
|
84
99
|
|
|
85
100
|
def __post_init__(self):
|
|
86
|
-
"""Convert paths to Path objects
|
|
101
|
+
"""Convert string paths to Path objects.
|
|
102
|
+
|
|
103
|
+
Note: String values for path fields (alpha_dir, trade_dir, calendar_path,
|
|
104
|
+
replay_dir, aggregate_dir) are automatically converted to Path objects.
|
|
105
|
+
"""
|
|
87
106
|
if isinstance(self.alpha_dir, str):
|
|
88
107
|
self.alpha_dir = Path(self.alpha_dir)
|
|
89
108
|
if isinstance(self.trade_dir, str):
|
|
@@ -119,8 +138,9 @@ class Config:
|
|
|
119
138
|
Full path to alpha file
|
|
120
139
|
|
|
121
140
|
Raises:
|
|
122
|
-
ValueError: If alpha_dir is not set
|
|
141
|
+
ValueError: If alpha_dir is not set or date format is invalid
|
|
123
142
|
"""
|
|
143
|
+
_validate_date(date)
|
|
124
144
|
if self.alpha_dir is None:
|
|
125
145
|
raise ValueError("alpha_dir is not set in Config")
|
|
126
146
|
return self.alpha_dir / self.alpha_pattern.format(date=date)
|
|
@@ -135,8 +155,9 @@ class Config:
|
|
|
135
155
|
Full path to trade file
|
|
136
156
|
|
|
137
157
|
Raises:
|
|
138
|
-
ValueError: If trade_dir is not set
|
|
158
|
+
ValueError: If trade_dir is not set or date format is invalid
|
|
139
159
|
"""
|
|
160
|
+
_validate_date(date)
|
|
140
161
|
if self.trade_dir is None:
|
|
141
162
|
raise ValueError("trade_dir is not set in Config")
|
|
142
163
|
return self.trade_dir / self.trade_pattern.format(date=date)
|
|
@@ -152,8 +173,9 @@ class Config:
|
|
|
152
173
|
Full path to replay output file
|
|
153
174
|
|
|
154
175
|
Raises:
|
|
155
|
-
ValueError: If replay_dir is not set
|
|
176
|
+
ValueError: If replay_dir is not set or date format is invalid
|
|
156
177
|
"""
|
|
178
|
+
_validate_date(date)
|
|
157
179
|
if self.replay_dir is None:
|
|
158
180
|
raise ValueError("replay_dir is not set in Config")
|
|
159
181
|
return self.replay_dir / f"{date}{suffix}"
|
|
@@ -169,8 +191,9 @@ class Config:
|
|
|
169
191
|
Full path to aggregate output file
|
|
170
192
|
|
|
171
193
|
Raises:
|
|
172
|
-
ValueError: If aggregate_dir is not set
|
|
194
|
+
ValueError: If aggregate_dir is not set or date format is invalid
|
|
173
195
|
"""
|
|
196
|
+
_validate_date(date)
|
|
174
197
|
if self.aggregate_dir is None:
|
|
175
198
|
raise ValueError("aggregate_dir is not set in Config")
|
|
176
199
|
return self.aggregate_dir / f"{date}{suffix}"
|
vizflow/io.py
CHANGED
|
@@ -2,14 +2,24 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from typing import TYPE_CHECKING
|
|
6
|
-
|
|
7
5
|
import polars as pl
|
|
8
6
|
|
|
9
|
-
from .config import Config, get_config
|
|
7
|
+
from .config import ColumnSchema, Config, get_config
|
|
8
|
+
|
|
10
9
|
|
|
11
|
-
|
|
12
|
-
|
|
10
|
+
def _apply_schema(df: pl.LazyFrame, schema: dict[str, ColumnSchema]) -> pl.LazyFrame:
|
|
11
|
+
"""Apply type casting from schema definition.
|
|
12
|
+
|
|
13
|
+
Args:
|
|
14
|
+
df: LazyFrame to apply schema to
|
|
15
|
+
schema: Mapping from column names to ColumnSchema
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
LazyFrame with type casts applied
|
|
19
|
+
"""
|
|
20
|
+
for col_name, col_schema in schema.items():
|
|
21
|
+
df = df.with_columns(pl.col(col_name).cast(col_schema.cast_to))
|
|
22
|
+
return df
|
|
13
23
|
|
|
14
24
|
|
|
15
25
|
def load_alpha(date: str, config: Config | None = None) -> pl.LazyFrame:
|
|
@@ -34,12 +44,7 @@ def load_alpha(date: str, config: Config | None = None) -> pl.LazyFrame:
|
|
|
34
44
|
config = config or get_config()
|
|
35
45
|
path = config.get_alpha_path(date)
|
|
36
46
|
df = pl.scan_ipc(path)
|
|
37
|
-
|
|
38
|
-
# Apply schema evolution (type casting)
|
|
39
|
-
for col_name, schema in config.alpha_schema.items():
|
|
40
|
-
df = df.with_columns(pl.col(col_name).cast(schema.cast_to))
|
|
41
|
-
|
|
42
|
-
return df
|
|
47
|
+
return _apply_schema(df, config.alpha_schema)
|
|
43
48
|
|
|
44
49
|
|
|
45
50
|
def load_trade(date: str, config: Config | None = None) -> pl.LazyFrame:
|
|
@@ -64,12 +69,7 @@ def load_trade(date: str, config: Config | None = None) -> pl.LazyFrame:
|
|
|
64
69
|
config = config or get_config()
|
|
65
70
|
path = config.get_trade_path(date)
|
|
66
71
|
df = pl.scan_ipc(path)
|
|
67
|
-
|
|
68
|
-
# Apply schema evolution (type casting)
|
|
69
|
-
for col_name, schema in config.trade_schema.items():
|
|
70
|
-
df = df.with_columns(pl.col(col_name).cast(schema.cast_to))
|
|
71
|
-
|
|
72
|
-
return df
|
|
72
|
+
return _apply_schema(df, config.trade_schema)
|
|
73
73
|
|
|
74
74
|
|
|
75
75
|
def load_calendar(config: Config | None = None) -> pl.DataFrame:
|
|
@@ -121,8 +121,10 @@ def _scan_file(path) -> pl.LazyFrame:
|
|
|
121
121
|
elif suffix == "parquet":
|
|
122
122
|
return pl.scan_parquet(path)
|
|
123
123
|
else:
|
|
124
|
-
|
|
125
|
-
|
|
124
|
+
raise ValueError(
|
|
125
|
+
f"Unsupported file format: .{suffix}. "
|
|
126
|
+
"Supported: .feather, .ipc, .arrow, .csv, .meords, .parquet"
|
|
127
|
+
)
|
|
126
128
|
|
|
127
129
|
|
|
128
130
|
def scan_trade(date: str, config: Config | None = None) -> pl.LazyFrame:
|
|
@@ -191,17 +193,13 @@ def scan_trades(config: Config | None = None) -> pl.LazyFrame:
|
|
|
191
193
|
def _apply_trade_mapping(df: pl.LazyFrame, config: Config) -> pl.LazyFrame:
|
|
192
194
|
"""Apply column rename + schema evolution for trade data."""
|
|
193
195
|
df = _apply_rename(df, config.trade_preset)
|
|
194
|
-
|
|
195
|
-
df = df.with_columns(pl.col(col_name).cast(schema.cast_to))
|
|
196
|
-
return df
|
|
196
|
+
return _apply_schema(df, config.trade_schema)
|
|
197
197
|
|
|
198
198
|
|
|
199
199
|
def _apply_alpha_mapping(df: pl.LazyFrame, config: Config) -> pl.LazyFrame:
|
|
200
200
|
"""Apply column rename + schema evolution for alpha data."""
|
|
201
201
|
df = _apply_rename(df, config.alpha_preset)
|
|
202
|
-
|
|
203
|
-
df = df.with_columns(pl.col(col_name).cast(schema.cast_to))
|
|
204
|
-
return df
|
|
202
|
+
return _apply_schema(df, config.alpha_schema)
|
|
205
203
|
|
|
206
204
|
|
|
207
205
|
def _apply_rename(df: pl.LazyFrame, preset: str | None) -> pl.LazyFrame:
|
vizflow/ops.py
CHANGED
|
@@ -157,6 +157,7 @@ def forward_return(
|
|
|
157
157
|
alpha_time_col: str = "elapsed_ticktime",
|
|
158
158
|
price_col: str = "mid",
|
|
159
159
|
symbol_col: str = "ukey",
|
|
160
|
+
tolerance_ms: int = 5000,
|
|
160
161
|
) -> pl.LazyFrame:
|
|
161
162
|
"""Merge alpha's future price to trade and calculate forward returns.
|
|
162
163
|
|
|
@@ -177,6 +178,7 @@ def forward_return(
|
|
|
177
178
|
alpha_time_col: Time column in alpha df (default: "elapsed_ticktime")
|
|
178
179
|
price_col: Column name for price in both dfs (default: "mid")
|
|
179
180
|
symbol_col: Symbol column for grouping (default: "ukey")
|
|
181
|
+
tolerance_ms: Max time difference in ms for asof join (default: 5000)
|
|
180
182
|
|
|
181
183
|
Returns:
|
|
182
184
|
Trade LazyFrame with forward_* and y_* columns added
|
|
@@ -210,6 +212,9 @@ def forward_return(
|
|
|
210
212
|
(pl.col(trade_time_col) + horizon_ms).alias("_forward_time")
|
|
211
213
|
)
|
|
212
214
|
|
|
215
|
+
# Sort by join columns (required for asof join)
|
|
216
|
+
trade = trade.sort([symbol_col, "_forward_time"])
|
|
217
|
+
|
|
213
218
|
# Asof join: find alpha price at forward_time
|
|
214
219
|
joined = trade.join_asof(
|
|
215
220
|
alpha_lookup.rename({alpha_time_col: "_alpha_time", price_col: "_forward_price"}),
|
|
@@ -217,13 +222,16 @@ def forward_return(
|
|
|
217
222
|
right_on="_alpha_time",
|
|
218
223
|
by=symbol_col,
|
|
219
224
|
strategy="nearest",
|
|
220
|
-
tolerance=
|
|
225
|
+
tolerance=tolerance_ms,
|
|
221
226
|
)
|
|
222
227
|
|
|
223
|
-
# Add forward price and calculate return
|
|
228
|
+
# Add forward price and calculate return (guard against zero price)
|
|
224
229
|
trade = joined.with_columns([
|
|
225
230
|
pl.col("_forward_price").alias(forward_col),
|
|
226
|
-
|
|
231
|
+
pl.when(pl.col(price_col) != 0)
|
|
232
|
+
.then((pl.col("_forward_price") - pl.col(price_col)) / pl.col(price_col))
|
|
233
|
+
.otherwise(pl.lit(None))
|
|
234
|
+
.alias(return_col),
|
|
227
235
|
]).drop(["_forward_time", "_alpha_time", "_forward_price"])
|
|
228
236
|
|
|
229
237
|
return trade.lazy()
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
vizflow/__init__.py,sha256=EJ8qF4o2grf4aSochaasaaf0unyXV5yhrMs6rAhyp7k,496
|
|
2
|
+
vizflow/config.py,sha256=JNW5-TshQ1v-Ft3-VV0JYJ5PdC3Yhgy4fW0AV0RWzkE,7322
|
|
3
|
+
vizflow/io.py,sha256=ypiEuuPoHFKSt6VnhXcEI7u7dyVjKORunjqGpkFauXM,8877
|
|
4
|
+
vizflow/market.py,sha256=MtNz_nnZxC66Aq-i2PXEwaFCTknijFWYZUUv6798k2s,2493
|
|
5
|
+
vizflow/ops.py,sha256=4UwxOTPhvZ1_4PI3pcxbXfLAYsn1Ecj6nyBtBBr7KS8,7761
|
|
6
|
+
vizflow/presets.py,sha256=h91NZoOH4YAx0bbsaNigECf9WOcWh1QZavguunWkaLE,2452
|
|
7
|
+
vizflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
+
vizflow-0.5.1.dist-info/METADATA,sha256=DHzwPBvYuj7Rc4BawcXD2juS5iR5UD1FnAxt3cgvpo4,388
|
|
9
|
+
vizflow-0.5.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
10
|
+
vizflow-0.5.1.dist-info/RECORD,,
|
vizflow-0.5.0.dist-info/RECORD
DELETED
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
vizflow/__init__.py,sha256=ZIMYQ-Yzh3eEAd7MSqNA00SlSuj45bEE6NsXM9Qc6O0,496
|
|
2
|
-
vizflow/config.py,sha256=zSZnhdHzgXSqhDenHcHKm4CDGrMpKAdkNNRoUYYF1uc,6530
|
|
3
|
-
vizflow/io.py,sha256=zmN0fFQOTmSBEBKangMExr0Q5mC2gajZM6GgdAyWkw4,8824
|
|
4
|
-
vizflow/market.py,sha256=MtNz_nnZxC66Aq-i2PXEwaFCTknijFWYZUUv6798k2s,2493
|
|
5
|
-
vizflow/ops.py,sha256=6hKOjJowFrw1b6z4y8Liea9KTp8Fgy6kCGPZ6t15PVk,7426
|
|
6
|
-
vizflow/presets.py,sha256=h91NZoOH4YAx0bbsaNigECf9WOcWh1QZavguunWkaLE,2452
|
|
7
|
-
vizflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
-
vizflow-0.5.0.dist-info/METADATA,sha256=cIIvBPZo2U6Sp46Wxgyu_tCVPWF4DGsgvapbavEBGl8,388
|
|
9
|
-
vizflow-0.5.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
10
|
-
vizflow-0.5.0.dist-info/RECORD,,
|
|
File without changes
|