vizflow 0.5.3__py3-none-any.whl → 0.5.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vizflow/__init__.py +4 -2
- vizflow/config.py +25 -2
- vizflow/io.py +121 -12
- vizflow/ops.py +99 -0
- {vizflow-0.5.3.dist-info → vizflow-0.5.5.dist-info}/METADATA +1 -1
- vizflow-0.5.5.dist-info/RECORD +10 -0
- vizflow-0.5.3.dist-info/RECORD +0 -10
- {vizflow-0.5.3.dist-info → vizflow-0.5.5.dist-info}/WHEEL +0 -0
vizflow/__init__.py
CHANGED
|
@@ -5,7 +5,7 @@ Usage:
|
|
|
5
5
|
import vizflow as vf
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
__version__ = "0.5.
|
|
8
|
+
__version__ = "0.5.5"
|
|
9
9
|
|
|
10
10
|
from .config import Config, get_config, set_config
|
|
11
11
|
from .io import (
|
|
@@ -14,9 +14,11 @@ from .io import (
|
|
|
14
14
|
scan_alphas,
|
|
15
15
|
scan_trade,
|
|
16
16
|
scan_trades,
|
|
17
|
+
scan_univ,
|
|
18
|
+
scan_univs,
|
|
17
19
|
)
|
|
18
20
|
from .market import CN, CRYPTO, Market, Session
|
|
19
|
-
from .ops import aggregate, bin, forward_return, parse_time
|
|
21
|
+
from .ops import aggregate, bin, forward_return, mark_to_close, parse_time, sign_by_side
|
|
20
22
|
from .schema_evolution import (
|
|
21
23
|
JYAO_V20251114,
|
|
22
24
|
SCHEMAS,
|
vizflow/config.py
CHANGED
|
@@ -62,6 +62,8 @@ class Config:
|
|
|
62
62
|
alpha_pattern: str = "alpha_{date}.feather"
|
|
63
63
|
trade_dir: Path | None = None
|
|
64
64
|
trade_pattern: str = "trade_{date}.feather"
|
|
65
|
+
univ_dir: Path | None = None
|
|
66
|
+
univ_pattern: str = "{date}.csv"
|
|
65
67
|
calendar_path: Path | None = None
|
|
66
68
|
|
|
67
69
|
# === Output Paths ===
|
|
@@ -75,6 +77,7 @@ class Config:
|
|
|
75
77
|
# Can be a string (schema name) or SchemaEvolution instance
|
|
76
78
|
trade_schema: str | SchemaEvolution | None = None
|
|
77
79
|
alpha_schema: str | SchemaEvolution | None = None
|
|
80
|
+
univ_schema: str | SchemaEvolution | None = None
|
|
78
81
|
|
|
79
82
|
# === Aggregation ===
|
|
80
83
|
binwidths: dict[str, float] = field(default_factory=dict)
|
|
@@ -87,13 +90,16 @@ class Config:
|
|
|
87
90
|
def __post_init__(self):
|
|
88
91
|
"""Convert string paths to Path objects.
|
|
89
92
|
|
|
90
|
-
Note: String values for path fields (alpha_dir, trade_dir,
|
|
91
|
-
replay_dir, aggregate_dir) are automatically converted
|
|
93
|
+
Note: String values for path fields (alpha_dir, trade_dir, univ_dir,
|
|
94
|
+
calendar_path, replay_dir, aggregate_dir) are automatically converted
|
|
95
|
+
to Path objects.
|
|
92
96
|
"""
|
|
93
97
|
if isinstance(self.alpha_dir, str):
|
|
94
98
|
self.alpha_dir = Path(self.alpha_dir)
|
|
95
99
|
if isinstance(self.trade_dir, str):
|
|
96
100
|
self.trade_dir = Path(self.trade_dir)
|
|
101
|
+
if isinstance(self.univ_dir, str):
|
|
102
|
+
self.univ_dir = Path(self.univ_dir)
|
|
97
103
|
if isinstance(self.calendar_path, str):
|
|
98
104
|
self.calendar_path = Path(self.calendar_path)
|
|
99
105
|
if isinstance(self.replay_dir, str):
|
|
@@ -135,6 +141,23 @@ class Config:
|
|
|
135
141
|
raise ValueError("trade_dir is not set in Config")
|
|
136
142
|
return self.trade_dir / self.trade_pattern.format(date=date)
|
|
137
143
|
|
|
144
|
+
def get_univ_path(self, date: str) -> Path:
|
|
145
|
+
"""Get universe file path for a date.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
date: Date string, e.g. "20241001"
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
Full path to univ file
|
|
152
|
+
|
|
153
|
+
Raises:
|
|
154
|
+
ValueError: If univ_dir is not set or date format is invalid
|
|
155
|
+
"""
|
|
156
|
+
_validate_date(date)
|
|
157
|
+
if self.univ_dir is None:
|
|
158
|
+
raise ValueError("univ_dir is not set in Config")
|
|
159
|
+
return self.univ_dir / self.univ_pattern.format(date=date)
|
|
160
|
+
|
|
138
161
|
def get_replay_path(self, date: str, suffix: str = ".parquet") -> Path:
|
|
139
162
|
"""Get replay output file path for a date (FIFO results).
|
|
140
163
|
|
vizflow/io.py
CHANGED
|
@@ -2,12 +2,42 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
+
import re
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
5
8
|
import polars as pl
|
|
6
9
|
|
|
7
10
|
from .config import Config, get_config
|
|
8
11
|
from .schema_evolution import SchemaEvolution, get_schema
|
|
9
12
|
|
|
10
13
|
|
|
14
|
+
def _extract_date_from_path(path: Path, pattern: str) -> str | None:
|
|
15
|
+
"""Extract date from filename using pattern.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
path: File path.
|
|
19
|
+
pattern: Pattern with {date} placeholder, e.g., "{date}.meords"
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
Extracted date string, or None if no match or no {date} in pattern.
|
|
23
|
+
|
|
24
|
+
Example:
|
|
25
|
+
>>> _extract_date_from_path(Path("data/11110101.meords"), "{date}.meords")
|
|
26
|
+
"11110101"
|
|
27
|
+
"""
|
|
28
|
+
# If pattern has no {date} placeholder, return None
|
|
29
|
+
if "{date}" not in pattern:
|
|
30
|
+
return None
|
|
31
|
+
|
|
32
|
+
# Convert pattern to regex: "{date}" -> "(?P<date>.+)"
|
|
33
|
+
# Escape other special chars first
|
|
34
|
+
regex_pattern = re.escape(pattern).replace(r"\{date\}", r"(?P<date>[^/]+)")
|
|
35
|
+
match = re.search(regex_pattern, path.name)
|
|
36
|
+
if match:
|
|
37
|
+
return match.group("date")
|
|
38
|
+
return None
|
|
39
|
+
|
|
40
|
+
|
|
11
41
|
def _resolve_schema(
|
|
12
42
|
schema_ref: str | SchemaEvolution | None,
|
|
13
43
|
) -> SchemaEvolution | None:
|
|
@@ -141,13 +171,15 @@ def scan_trade(date: str, config: Config | None = None) -> pl.LazyFrame:
|
|
|
141
171
|
|
|
142
172
|
|
|
143
173
|
def scan_trades(config: Config | None = None) -> pl.LazyFrame:
|
|
144
|
-
"""Scan all trade files with schema evolution.
|
|
174
|
+
"""Scan all trade files with schema evolution and data_date column.
|
|
175
|
+
|
|
176
|
+
Extracts date from each filename using the pattern and adds a "data_date" column.
|
|
145
177
|
|
|
146
178
|
Args:
|
|
147
179
|
config: Config to use, or get_config() if None
|
|
148
180
|
|
|
149
181
|
Returns:
|
|
150
|
-
LazyFrame with schema evolution applied
|
|
182
|
+
LazyFrame with schema evolution applied and data_date column added
|
|
151
183
|
|
|
152
184
|
Raises:
|
|
153
185
|
ValueError: If trade_dir is not set or no files found
|
|
@@ -159,7 +191,7 @@ def scan_trades(config: Config | None = None) -> pl.LazyFrame:
|
|
|
159
191
|
... trade_schema="ylin_v20251204",
|
|
160
192
|
... )
|
|
161
193
|
>>> vf.set_config(config)
|
|
162
|
-
>>> df = vf.scan_trades()
|
|
194
|
+
>>> df = vf.scan_trades() # Has "data_date" column
|
|
163
195
|
"""
|
|
164
196
|
config = config or get_config()
|
|
165
197
|
if config.trade_dir is None:
|
|
@@ -172,14 +204,19 @@ def scan_trades(config: Config | None = None) -> pl.LazyFrame:
|
|
|
172
204
|
|
|
173
205
|
schema = _resolve_schema(config.trade_schema)
|
|
174
206
|
|
|
175
|
-
#
|
|
176
|
-
|
|
177
|
-
|
|
207
|
+
# Scan each file, apply schema evolution, and add data_date column
|
|
208
|
+
# Schema evolution must be applied per-file BEFORE concat to ensure matching schemas
|
|
209
|
+
dfs = []
|
|
210
|
+
for f in files:
|
|
211
|
+
df = _scan_file(f, schema=schema)
|
|
212
|
+
if schema:
|
|
213
|
+
df = _apply_schema_evolution(df, schema)
|
|
214
|
+
date = _extract_date_from_path(f, config.trade_pattern)
|
|
215
|
+
if date:
|
|
216
|
+
df = df.with_columns(pl.lit(date).alias("data_date"))
|
|
217
|
+
dfs.append(df)
|
|
178
218
|
|
|
179
|
-
|
|
180
|
-
df = _apply_schema_evolution(df, schema)
|
|
181
|
-
|
|
182
|
-
return df
|
|
219
|
+
return pl.concat(dfs)
|
|
183
220
|
|
|
184
221
|
|
|
185
222
|
def scan_alpha(date: str, config: Config | None = None) -> pl.LazyFrame:
|
|
@@ -235,15 +272,87 @@ def scan_alphas(config: Config | None = None) -> pl.LazyFrame:
|
|
|
235
272
|
|
|
236
273
|
schema = _resolve_schema(config.alpha_schema)
|
|
237
274
|
|
|
238
|
-
|
|
239
|
-
|
|
275
|
+
# Apply schema evolution per-file BEFORE concat to ensure matching schemas
|
|
276
|
+
# Note: Alpha files already have data_date column, no need to add it
|
|
277
|
+
dfs = []
|
|
278
|
+
for f in files:
|
|
279
|
+
df = _scan_file(f, schema=schema)
|
|
280
|
+
if schema:
|
|
281
|
+
df = _apply_schema_evolution(df, schema)
|
|
282
|
+
dfs.append(df)
|
|
283
|
+
|
|
284
|
+
return pl.concat(dfs)
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def scan_univ(date: str, config: Config | None = None) -> pl.LazyFrame:
|
|
288
|
+
"""Scan single date universe file with schema evolution.
|
|
289
|
+
|
|
290
|
+
Args:
|
|
291
|
+
date: Date string, e.g. "20241001"
|
|
292
|
+
config: Config to use, or get_config() if None
|
|
293
|
+
|
|
294
|
+
Returns:
|
|
295
|
+
LazyFrame with schema evolution applied
|
|
296
|
+
|
|
297
|
+
Example:
|
|
298
|
+
>>> config = vf.Config(
|
|
299
|
+
... univ_dir=Path("/data/jyao/univ"),
|
|
300
|
+
... univ_pattern="{date}.csv",
|
|
301
|
+
... )
|
|
302
|
+
>>> vf.set_config(config)
|
|
303
|
+
>>> df = vf.scan_univ("20241001")
|
|
304
|
+
"""
|
|
305
|
+
config = config or get_config()
|
|
306
|
+
path = config.get_univ_path(date)
|
|
307
|
+
schema = _resolve_schema(config.univ_schema)
|
|
240
308
|
|
|
309
|
+
df = _scan_file(path, schema=schema)
|
|
241
310
|
if schema:
|
|
242
311
|
df = _apply_schema_evolution(df, schema)
|
|
243
312
|
|
|
244
313
|
return df
|
|
245
314
|
|
|
246
315
|
|
|
316
|
+
def scan_univs(config: Config | None = None) -> pl.LazyFrame:
|
|
317
|
+
"""Scan all universe files with schema evolution and data_date column.
|
|
318
|
+
|
|
319
|
+
Extracts date from each filename using the pattern and adds a "data_date" column.
|
|
320
|
+
|
|
321
|
+
Args:
|
|
322
|
+
config: Config to use, or get_config() if None
|
|
323
|
+
|
|
324
|
+
Returns:
|
|
325
|
+
LazyFrame with schema evolution applied and data_date column added
|
|
326
|
+
|
|
327
|
+
Raises:
|
|
328
|
+
ValueError: If univ_dir is not set or no files found
|
|
329
|
+
"""
|
|
330
|
+
config = config or get_config()
|
|
331
|
+
if config.univ_dir is None:
|
|
332
|
+
raise ValueError("univ_dir is not set in Config")
|
|
333
|
+
|
|
334
|
+
pattern = config.univ_pattern.replace("{date}", "*")
|
|
335
|
+
files = sorted(config.univ_dir.glob(pattern))
|
|
336
|
+
if not files:
|
|
337
|
+
raise ValueError(f"No files found matching {pattern} in {config.univ_dir}")
|
|
338
|
+
|
|
339
|
+
schema = _resolve_schema(config.univ_schema)
|
|
340
|
+
|
|
341
|
+
# Scan each file, apply schema evolution, and add data_date column
|
|
342
|
+
# Schema evolution must be applied per-file BEFORE concat to ensure matching schemas
|
|
343
|
+
dfs = []
|
|
344
|
+
for f in files:
|
|
345
|
+
df = _scan_file(f, schema=schema)
|
|
346
|
+
if schema:
|
|
347
|
+
df = _apply_schema_evolution(df, schema)
|
|
348
|
+
date = _extract_date_from_path(f, config.univ_pattern)
|
|
349
|
+
if date:
|
|
350
|
+
df = df.with_columns(pl.lit(date).alias("data_date"))
|
|
351
|
+
dfs.append(df)
|
|
352
|
+
|
|
353
|
+
return pl.concat(dfs)
|
|
354
|
+
|
|
355
|
+
|
|
247
356
|
def load_calendar(config: Config | None = None) -> pl.DataFrame:
|
|
248
357
|
"""Load trading calendar.
|
|
249
358
|
|
vizflow/ops.py
CHANGED
|
@@ -235,3 +235,102 @@ def forward_return(
|
|
|
235
235
|
]).drop(["_forward_time", "_alpha_time", "_forward_price"])
|
|
236
236
|
|
|
237
237
|
return trade.lazy()
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def mark_to_close(
|
|
241
|
+
df_trade: pl.LazyFrame,
|
|
242
|
+
df_univ: pl.LazyFrame,
|
|
243
|
+
mid_col: str = "mid",
|
|
244
|
+
close_col: str = "close",
|
|
245
|
+
symbol_col: str = "ukey",
|
|
246
|
+
) -> pl.LazyFrame:
|
|
247
|
+
"""Add mark-to-close return column.
|
|
248
|
+
|
|
249
|
+
Joins trade with universe data to get close price, then calculates:
|
|
250
|
+
y_close = (close - mid) / mid
|
|
251
|
+
|
|
252
|
+
Args:
|
|
253
|
+
df_trade: Trade LazyFrame with mid_col and symbol_col
|
|
254
|
+
df_univ: Universe LazyFrame with close_col and symbol_col
|
|
255
|
+
mid_col: Column name for mid price in trade df (default: "mid")
|
|
256
|
+
close_col: Column name for close price in univ df (default: "close")
|
|
257
|
+
symbol_col: Symbol column for joining (default: "ukey")
|
|
258
|
+
|
|
259
|
+
Returns:
|
|
260
|
+
Trade LazyFrame with y_close column added
|
|
261
|
+
|
|
262
|
+
Example:
|
|
263
|
+
>>> df_trade = vf.scan_trade(date)
|
|
264
|
+
>>> df_univ = vf.scan_univ(date)
|
|
265
|
+
>>> df = vf.mark_to_close(df_trade, df_univ)
|
|
266
|
+
>>> # Creates: y_close
|
|
267
|
+
"""
|
|
268
|
+
# Select only needed columns from univ to avoid column conflicts
|
|
269
|
+
univ_cols = [symbol_col, close_col]
|
|
270
|
+
# Check if data_date exists in both for multi-day joining
|
|
271
|
+
trade_schema = df_trade.collect_schema()
|
|
272
|
+
univ_schema = df_univ.collect_schema()
|
|
273
|
+
|
|
274
|
+
if "data_date" in trade_schema.names() and "data_date" in univ_schema.names():
|
|
275
|
+
# Multi-day case: join on symbol and date
|
|
276
|
+
join_cols = [symbol_col, "data_date"]
|
|
277
|
+
univ_cols.append("data_date")
|
|
278
|
+
else:
|
|
279
|
+
# Single-day case: join on symbol only
|
|
280
|
+
join_cols = [symbol_col]
|
|
281
|
+
|
|
282
|
+
# Join with univ to get close price
|
|
283
|
+
df = df_trade.join(
|
|
284
|
+
df_univ.select(univ_cols),
|
|
285
|
+
on=join_cols,
|
|
286
|
+
how="left",
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
# Calculate return (guard against zero mid)
|
|
290
|
+
df = df.with_columns(
|
|
291
|
+
pl.when(pl.col(mid_col) != 0)
|
|
292
|
+
.then((pl.col(close_col) - pl.col(mid_col)) / pl.col(mid_col))
|
|
293
|
+
.otherwise(pl.lit(None))
|
|
294
|
+
.alias("y_close")
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
return df
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
def sign_by_side(
|
|
301
|
+
df: pl.LazyFrame,
|
|
302
|
+
cols: list[str],
|
|
303
|
+
side_col: str = "order_side",
|
|
304
|
+
) -> pl.LazyFrame:
|
|
305
|
+
"""Sign return columns by order side.
|
|
306
|
+
|
|
307
|
+
For Buy trades: keep sign as-is (price going up = positive = good)
|
|
308
|
+
For Sell trades: negate sign (price going down = positive = good)
|
|
309
|
+
|
|
310
|
+
This makes all returns have consistent interpretation:
|
|
311
|
+
positive = favorable price move for that order side
|
|
312
|
+
|
|
313
|
+
Args:
|
|
314
|
+
df: LazyFrame with return columns
|
|
315
|
+
cols: List of column names to sign (e.g., ["y_10m", "y_30m", "y_close"])
|
|
316
|
+
side_col: Column containing order side (default: "order_side")
|
|
317
|
+
Expected values: "Buy" or "Sell"
|
|
318
|
+
|
|
319
|
+
Returns:
|
|
320
|
+
LazyFrame with signed return columns
|
|
321
|
+
|
|
322
|
+
Example:
|
|
323
|
+
>>> df = vf.sign_by_side(df, cols=["y_10m", "y_30m", "y_close"])
|
|
324
|
+
>>> # All y_* columns now have positive = favorable for that side
|
|
325
|
+
"""
|
|
326
|
+
signed_exprs = []
|
|
327
|
+
for col in cols:
|
|
328
|
+
signed = (
|
|
329
|
+
pl.when(pl.col(side_col) == "Sell")
|
|
330
|
+
.then(-pl.col(col))
|
|
331
|
+
.otherwise(pl.col(col))
|
|
332
|
+
.alias(col)
|
|
333
|
+
)
|
|
334
|
+
signed_exprs.append(signed)
|
|
335
|
+
|
|
336
|
+
return df.with_columns(signed_exprs)
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
vizflow/__init__.py,sha256=_jP6bp5U2iBFkPGTSLNwvOQay9XU_opNdrylF22iR9s,589
|
|
2
|
+
vizflow/config.py,sha256=nPZPXlqQbaY8u_FAdtPShvb0mdx3e2TRaQ2CILzliAU,7192
|
|
3
|
+
vizflow/io.py,sha256=qaILo7JYHRILZv7RgPdwltOkCew5ac3nmwOTn1p2ha4,11351
|
|
4
|
+
vizflow/market.py,sha256=MtNz_nnZxC66Aq-i2PXEwaFCTknijFWYZUUv6798k2s,2493
|
|
5
|
+
vizflow/ops.py,sha256=oR44HYKrfaXLh0SmbfXefl714UESSIC5lTNJBrR1kto,10858
|
|
6
|
+
vizflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
+
vizflow/schema_evolution.py,sha256=puwuuJ8LAzelHq1JTwQD968J7GYLvgAsCaHJnDHzu4U,14025
|
|
8
|
+
vizflow-0.5.5.dist-info/METADATA,sha256=m6Zj_2AjsyBr1Tvq7uf20dzH9X9fNrFwpgM6wrDpcVg,388
|
|
9
|
+
vizflow-0.5.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
10
|
+
vizflow-0.5.5.dist-info/RECORD,,
|
vizflow-0.5.3.dist-info/RECORD
DELETED
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
vizflow/__init__.py,sha256=pO3pTaKj7LNyuQlTrCHau9nBymhNGdmiImgJXH5lxt4,529
|
|
2
|
-
vizflow/config.py,sha256=y4vRvjVTa1H5AdQf0q_XhYr-3EBDJst8BJq52ODN3uk,6456
|
|
3
|
-
vizflow/io.py,sha256=eheqyLHGiSh69erxMk98FK-GYycbSheqkrIYRYGFy3A,7687
|
|
4
|
-
vizflow/market.py,sha256=MtNz_nnZxC66Aq-i2PXEwaFCTknijFWYZUUv6798k2s,2493
|
|
5
|
-
vizflow/ops.py,sha256=4UwxOTPhvZ1_4PI3pcxbXfLAYsn1Ecj6nyBtBBr7KS8,7761
|
|
6
|
-
vizflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
-
vizflow/schema_evolution.py,sha256=puwuuJ8LAzelHq1JTwQD968J7GYLvgAsCaHJnDHzu4U,14025
|
|
8
|
-
vizflow-0.5.3.dist-info/METADATA,sha256=e8LTSGVMiHLNcTjMaAO11Lj5xYa6etsoG4yBuxI8E64,388
|
|
9
|
-
vizflow-0.5.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
10
|
-
vizflow-0.5.3.dist-info/RECORD,,
|
|
File without changes
|