vizflow 0.5.3__py3-none-any.whl → 0.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vizflow/__init__.py CHANGED
@@ -5,7 +5,7 @@ Usage:
5
5
  import vizflow as vf
6
6
  """
7
7
 
8
- __version__ = "0.5.3"
8
+ __version__ = "0.5.5"
9
9
 
10
10
  from .config import Config, get_config, set_config
11
11
  from .io import (
@@ -14,9 +14,11 @@ from .io import (
14
14
  scan_alphas,
15
15
  scan_trade,
16
16
  scan_trades,
17
+ scan_univ,
18
+ scan_univs,
17
19
  )
18
20
  from .market import CN, CRYPTO, Market, Session
19
- from .ops import aggregate, bin, forward_return, parse_time
21
+ from .ops import aggregate, bin, forward_return, mark_to_close, parse_time, sign_by_side
20
22
  from .schema_evolution import (
21
23
  JYAO_V20251114,
22
24
  SCHEMAS,
vizflow/config.py CHANGED
@@ -62,6 +62,8 @@ class Config:
62
62
  alpha_pattern: str = "alpha_{date}.feather"
63
63
  trade_dir: Path | None = None
64
64
  trade_pattern: str = "trade_{date}.feather"
65
+ univ_dir: Path | None = None
66
+ univ_pattern: str = "{date}.csv"
65
67
  calendar_path: Path | None = None
66
68
 
67
69
  # === Output Paths ===
@@ -75,6 +77,7 @@ class Config:
75
77
  # Can be a string (schema name) or SchemaEvolution instance
76
78
  trade_schema: str | SchemaEvolution | None = None
77
79
  alpha_schema: str | SchemaEvolution | None = None
80
+ univ_schema: str | SchemaEvolution | None = None
78
81
 
79
82
  # === Aggregation ===
80
83
  binwidths: dict[str, float] = field(default_factory=dict)
@@ -87,13 +90,16 @@ class Config:
87
90
  def __post_init__(self):
88
91
  """Convert string paths to Path objects.
89
92
 
90
- Note: String values for path fields (alpha_dir, trade_dir, calendar_path,
91
- replay_dir, aggregate_dir) are automatically converted to Path objects.
93
+ Note: String values for path fields (alpha_dir, trade_dir, univ_dir,
94
+ calendar_path, replay_dir, aggregate_dir) are automatically converted
95
+ to Path objects.
92
96
  """
93
97
  if isinstance(self.alpha_dir, str):
94
98
  self.alpha_dir = Path(self.alpha_dir)
95
99
  if isinstance(self.trade_dir, str):
96
100
  self.trade_dir = Path(self.trade_dir)
101
+ if isinstance(self.univ_dir, str):
102
+ self.univ_dir = Path(self.univ_dir)
97
103
  if isinstance(self.calendar_path, str):
98
104
  self.calendar_path = Path(self.calendar_path)
99
105
  if isinstance(self.replay_dir, str):
@@ -135,6 +141,23 @@ class Config:
135
141
  raise ValueError("trade_dir is not set in Config")
136
142
  return self.trade_dir / self.trade_pattern.format(date=date)
137
143
 
144
+ def get_univ_path(self, date: str) -> Path:
145
+ """Get universe file path for a date.
146
+
147
+ Args:
148
+ date: Date string, e.g. "20241001"
149
+
150
+ Returns:
151
+ Full path to univ file
152
+
153
+ Raises:
154
+ ValueError: If univ_dir is not set or date format is invalid
155
+ """
156
+ _validate_date(date)
157
+ if self.univ_dir is None:
158
+ raise ValueError("univ_dir is not set in Config")
159
+ return self.univ_dir / self.univ_pattern.format(date=date)
160
+
138
161
  def get_replay_path(self, date: str, suffix: str = ".parquet") -> Path:
139
162
  """Get replay output file path for a date (FIFO results).
140
163
 
vizflow/io.py CHANGED
@@ -2,12 +2,42 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ import re
6
+ from pathlib import Path
7
+
5
8
  import polars as pl
6
9
 
7
10
  from .config import Config, get_config
8
11
  from .schema_evolution import SchemaEvolution, get_schema
9
12
 
10
13
 
14
+ def _extract_date_from_path(path: Path, pattern: str) -> str | None:
15
+ """Extract date from filename using pattern.
16
+
17
+ Args:
18
+ path: File path.
19
+ pattern: Pattern with {date} placeholder, e.g., "{date}.meords"
20
+
21
+ Returns:
22
+ Extracted date string, or None if no match or no {date} in pattern.
23
+
24
+ Example:
25
+ >>> _extract_date_from_path(Path("data/11110101.meords"), "{date}.meords")
26
+ "11110101"
27
+ """
28
+ # If pattern has no {date} placeholder, return None
29
+ if "{date}" not in pattern:
30
+ return None
31
+
32
+ # Convert pattern to regex: "{date}" -> "(?P<date>.+)"
33
+ # Escape other special chars first
34
+ regex_pattern = re.escape(pattern).replace(r"\{date\}", r"(?P<date>[^/]+)")
35
+ match = re.search(regex_pattern, path.name)
36
+ if match:
37
+ return match.group("date")
38
+ return None
39
+
40
+
11
41
  def _resolve_schema(
12
42
  schema_ref: str | SchemaEvolution | None,
13
43
  ) -> SchemaEvolution | None:
@@ -141,13 +171,15 @@ def scan_trade(date: str, config: Config | None = None) -> pl.LazyFrame:
141
171
 
142
172
 
143
173
  def scan_trades(config: Config | None = None) -> pl.LazyFrame:
144
- """Scan all trade files with schema evolution.
174
+ """Scan all trade files with schema evolution and data_date column.
175
+
176
+ Extracts date from each filename using the pattern and adds a "data_date" column.
145
177
 
146
178
  Args:
147
179
  config: Config to use, or get_config() if None
148
180
 
149
181
  Returns:
150
- LazyFrame with schema evolution applied
182
+ LazyFrame with schema evolution applied and data_date column added
151
183
 
152
184
  Raises:
153
185
  ValueError: If trade_dir is not set or no files found
@@ -159,7 +191,7 @@ def scan_trades(config: Config | None = None) -> pl.LazyFrame:
159
191
  ... trade_schema="ylin_v20251204",
160
192
  ... )
161
193
  >>> vf.set_config(config)
162
- >>> df = vf.scan_trades()
194
+ >>> df = vf.scan_trades() # Has "data_date" column
163
195
  """
164
196
  config = config or get_config()
165
197
  if config.trade_dir is None:
@@ -172,14 +204,19 @@ def scan_trades(config: Config | None = None) -> pl.LazyFrame:
172
204
 
173
205
  schema = _resolve_schema(config.trade_schema)
174
206
 
175
- # Concatenate all files using lazy scanning
176
- dfs = [_scan_file(f, schema=schema) for f in files]
177
- df = pl.concat(dfs)
207
+ # Scan each file, apply schema evolution, and add data_date column
208
+ # Schema evolution must be applied per-file BEFORE concat to ensure matching schemas
209
+ dfs = []
210
+ for f in files:
211
+ df = _scan_file(f, schema=schema)
212
+ if schema:
213
+ df = _apply_schema_evolution(df, schema)
214
+ date = _extract_date_from_path(f, config.trade_pattern)
215
+ if date:
216
+ df = df.with_columns(pl.lit(date).alias("data_date"))
217
+ dfs.append(df)
178
218
 
179
- if schema:
180
- df = _apply_schema_evolution(df, schema)
181
-
182
- return df
219
+ return pl.concat(dfs)
183
220
 
184
221
 
185
222
  def scan_alpha(date: str, config: Config | None = None) -> pl.LazyFrame:
@@ -235,15 +272,87 @@ def scan_alphas(config: Config | None = None) -> pl.LazyFrame:
235
272
 
236
273
  schema = _resolve_schema(config.alpha_schema)
237
274
 
238
- dfs = [_scan_file(f, schema=schema) for f in files]
239
- df = pl.concat(dfs)
275
+ # Apply schema evolution per-file BEFORE concat to ensure matching schemas
276
+ # Note: Alpha files already have data_date column, no need to add it
277
+ dfs = []
278
+ for f in files:
279
+ df = _scan_file(f, schema=schema)
280
+ if schema:
281
+ df = _apply_schema_evolution(df, schema)
282
+ dfs.append(df)
283
+
284
+ return pl.concat(dfs)
285
+
286
+
287
+ def scan_univ(date: str, config: Config | None = None) -> pl.LazyFrame:
288
+ """Scan single date universe file with schema evolution.
289
+
290
+ Args:
291
+ date: Date string, e.g. "20241001"
292
+ config: Config to use, or get_config() if None
293
+
294
+ Returns:
295
+ LazyFrame with schema evolution applied
296
+
297
+ Example:
298
+ >>> config = vf.Config(
299
+ ... univ_dir=Path("/data/jyao/univ"),
300
+ ... univ_pattern="{date}.csv",
301
+ ... )
302
+ >>> vf.set_config(config)
303
+ >>> df = vf.scan_univ("20241001")
304
+ """
305
+ config = config or get_config()
306
+ path = config.get_univ_path(date)
307
+ schema = _resolve_schema(config.univ_schema)
240
308
 
309
+ df = _scan_file(path, schema=schema)
241
310
  if schema:
242
311
  df = _apply_schema_evolution(df, schema)
243
312
 
244
313
  return df
245
314
 
246
315
 
316
+ def scan_univs(config: Config | None = None) -> pl.LazyFrame:
317
+ """Scan all universe files with schema evolution and data_date column.
318
+
319
+ Extracts date from each filename using the pattern and adds a "data_date" column.
320
+
321
+ Args:
322
+ config: Config to use, or get_config() if None
323
+
324
+ Returns:
325
+ LazyFrame with schema evolution applied and data_date column added
326
+
327
+ Raises:
328
+ ValueError: If univ_dir is not set or no files found
329
+ """
330
+ config = config or get_config()
331
+ if config.univ_dir is None:
332
+ raise ValueError("univ_dir is not set in Config")
333
+
334
+ pattern = config.univ_pattern.replace("{date}", "*")
335
+ files = sorted(config.univ_dir.glob(pattern))
336
+ if not files:
337
+ raise ValueError(f"No files found matching {pattern} in {config.univ_dir}")
338
+
339
+ schema = _resolve_schema(config.univ_schema)
340
+
341
+ # Scan each file, apply schema evolution, and add data_date column
342
+ # Schema evolution must be applied per-file BEFORE concat to ensure matching schemas
343
+ dfs = []
344
+ for f in files:
345
+ df = _scan_file(f, schema=schema)
346
+ if schema:
347
+ df = _apply_schema_evolution(df, schema)
348
+ date = _extract_date_from_path(f, config.univ_pattern)
349
+ if date:
350
+ df = df.with_columns(pl.lit(date).alias("data_date"))
351
+ dfs.append(df)
352
+
353
+ return pl.concat(dfs)
354
+
355
+
247
356
  def load_calendar(config: Config | None = None) -> pl.DataFrame:
248
357
  """Load trading calendar.
249
358
 
vizflow/ops.py CHANGED
@@ -235,3 +235,102 @@ def forward_return(
235
235
  ]).drop(["_forward_time", "_alpha_time", "_forward_price"])
236
236
 
237
237
  return trade.lazy()
238
+
239
+
240
+ def mark_to_close(
241
+ df_trade: pl.LazyFrame,
242
+ df_univ: pl.LazyFrame,
243
+ mid_col: str = "mid",
244
+ close_col: str = "close",
245
+ symbol_col: str = "ukey",
246
+ ) -> pl.LazyFrame:
247
+ """Add mark-to-close return column.
248
+
249
+ Joins trade with universe data to get close price, then calculates:
250
+ y_close = (close - mid) / mid
251
+
252
+ Args:
253
+ df_trade: Trade LazyFrame with mid_col and symbol_col
254
+ df_univ: Universe LazyFrame with close_col and symbol_col
255
+ mid_col: Column name for mid price in trade df (default: "mid")
256
+ close_col: Column name for close price in univ df (default: "close")
257
+ symbol_col: Symbol column for joining (default: "ukey")
258
+
259
+ Returns:
260
+ Trade LazyFrame with y_close column added
261
+
262
+ Example:
263
+ >>> df_trade = vf.scan_trade(date)
264
+ >>> df_univ = vf.scan_univ(date)
265
+ >>> df = vf.mark_to_close(df_trade, df_univ)
266
+ >>> # Creates: y_close
267
+ """
268
+ # Select only needed columns from univ to avoid column conflicts
269
+ univ_cols = [symbol_col, close_col]
270
+ # Check if data_date exists in both for multi-day joining
271
+ trade_schema = df_trade.collect_schema()
272
+ univ_schema = df_univ.collect_schema()
273
+
274
+ if "data_date" in trade_schema.names() and "data_date" in univ_schema.names():
275
+ # Multi-day case: join on symbol and date
276
+ join_cols = [symbol_col, "data_date"]
277
+ univ_cols.append("data_date")
278
+ else:
279
+ # Single-day case: join on symbol only
280
+ join_cols = [symbol_col]
281
+
282
+ # Join with univ to get close price
283
+ df = df_trade.join(
284
+ df_univ.select(univ_cols),
285
+ on=join_cols,
286
+ how="left",
287
+ )
288
+
289
+ # Calculate return (guard against zero mid)
290
+ df = df.with_columns(
291
+ pl.when(pl.col(mid_col) != 0)
292
+ .then((pl.col(close_col) - pl.col(mid_col)) / pl.col(mid_col))
293
+ .otherwise(pl.lit(None))
294
+ .alias("y_close")
295
+ )
296
+
297
+ return df
298
+
299
+
300
+ def sign_by_side(
301
+ df: pl.LazyFrame,
302
+ cols: list[str],
303
+ side_col: str = "order_side",
304
+ ) -> pl.LazyFrame:
305
+ """Sign return columns by order side.
306
+
307
+ For Buy trades: keep sign as-is (price going up = positive = good)
308
+ For Sell trades: negate sign (price going down = positive = good)
309
+
310
+ This makes all returns have consistent interpretation:
311
+ positive = favorable price move for that order side
312
+
313
+ Args:
314
+ df: LazyFrame with return columns
315
+ cols: List of column names to sign (e.g., ["y_10m", "y_30m", "y_close"])
316
+ side_col: Column containing order side (default: "order_side")
317
+ Expected values: "Buy" or "Sell"
318
+
319
+ Returns:
320
+ LazyFrame with signed return columns
321
+
322
+ Example:
323
+ >>> df = vf.sign_by_side(df, cols=["y_10m", "y_30m", "y_close"])
324
+ >>> # All y_* columns now have positive = favorable for that side
325
+ """
326
+ signed_exprs = []
327
+ for col in cols:
328
+ signed = (
329
+ pl.when(pl.col(side_col) == "Sell")
330
+ .then(-pl.col(col))
331
+ .otherwise(pl.col(col))
332
+ .alias(col)
333
+ )
334
+ signed_exprs.append(signed)
335
+
336
+ return df.with_columns(signed_exprs)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vizflow
3
- Version: 0.5.3
3
+ Version: 0.5.5
4
4
  Requires-Python: >=3.9
5
5
  Requires-Dist: polars>=0.20.0
6
6
  Provides-Extra: dev
@@ -0,0 +1,10 @@
1
+ vizflow/__init__.py,sha256=_jP6bp5U2iBFkPGTSLNwvOQay9XU_opNdrylF22iR9s,589
2
+ vizflow/config.py,sha256=nPZPXlqQbaY8u_FAdtPShvb0mdx3e2TRaQ2CILzliAU,7192
3
+ vizflow/io.py,sha256=qaILo7JYHRILZv7RgPdwltOkCew5ac3nmwOTn1p2ha4,11351
4
+ vizflow/market.py,sha256=MtNz_nnZxC66Aq-i2PXEwaFCTknijFWYZUUv6798k2s,2493
5
+ vizflow/ops.py,sha256=oR44HYKrfaXLh0SmbfXefl714UESSIC5lTNJBrR1kto,10858
6
+ vizflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ vizflow/schema_evolution.py,sha256=puwuuJ8LAzelHq1JTwQD968J7GYLvgAsCaHJnDHzu4U,14025
8
+ vizflow-0.5.5.dist-info/METADATA,sha256=m6Zj_2AjsyBr1Tvq7uf20dzH9X9fNrFwpgM6wrDpcVg,388
9
+ vizflow-0.5.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
10
+ vizflow-0.5.5.dist-info/RECORD,,
@@ -1,10 +0,0 @@
1
- vizflow/__init__.py,sha256=pO3pTaKj7LNyuQlTrCHau9nBymhNGdmiImgJXH5lxt4,529
2
- vizflow/config.py,sha256=y4vRvjVTa1H5AdQf0q_XhYr-3EBDJst8BJq52ODN3uk,6456
3
- vizflow/io.py,sha256=eheqyLHGiSh69erxMk98FK-GYycbSheqkrIYRYGFy3A,7687
4
- vizflow/market.py,sha256=MtNz_nnZxC66Aq-i2PXEwaFCTknijFWYZUUv6798k2s,2493
5
- vizflow/ops.py,sha256=4UwxOTPhvZ1_4PI3pcxbXfLAYsn1Ecj6nyBtBBr7KS8,7761
6
- vizflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- vizflow/schema_evolution.py,sha256=puwuuJ8LAzelHq1JTwQD968J7GYLvgAsCaHJnDHzu4U,14025
8
- vizflow-0.5.3.dist-info/METADATA,sha256=e8LTSGVMiHLNcTjMaAO11Lj5xYa6etsoG4yBuxI8E64,388
9
- vizflow-0.5.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
10
- vizflow-0.5.3.dist-info/RECORD,,