vizflow 0.4.4__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vizflow/__init__.py CHANGED
@@ -5,7 +5,7 @@ Usage:
5
5
  import vizflow as vf
6
6
  """
7
7
 
8
- __version__ = "0.4.4"
8
+ __version__ = "0.5.1"
9
9
 
10
10
  from .config import ColumnSchema, Config, get_config, set_config
11
11
  from .io import (
@@ -18,5 +18,5 @@ from .io import (
18
18
  scan_trades,
19
19
  )
20
20
  from .market import CN, CRYPTO, Market, Session
21
- from .ops import aggregate, bin, parse_time
21
+ from .ops import aggregate, bin, forward_return, parse_time
22
22
  from .presets import JYAO_V20251114, PRESETS, YLIN_V20251204
vizflow/config.py CHANGED
@@ -10,6 +10,21 @@ from typing import Any
10
10
  _global_config: Config | None = None
11
11
 
12
12
 
13
+ def _validate_date(date: str) -> None:
14
+ """Validate date string format to prevent path traversal.
15
+
16
+ Args:
17
+ date: Date string to validate
18
+
19
+ Raises:
20
+ ValueError: If date is not exactly 8 digits (YYYYMMDD format)
21
+ """
22
+ if not (len(date) == 8 and date.isdigit()):
23
+ raise ValueError(
24
+ f"Invalid date format: {date!r}. Expected YYYYMMDD (8 digits)."
25
+ )
26
+
27
+
13
28
  @dataclass
14
29
  class ColumnSchema:
15
30
  """Schema for a column with type casting.
@@ -83,7 +98,11 @@ class Config:
83
98
  time_cutoff: int | None = None
84
99
 
85
100
  def __post_init__(self):
86
- """Convert paths to Path objects if needed."""
101
+ """Convert string paths to Path objects.
102
+
103
+ Note: String values for path fields (alpha_dir, trade_dir, calendar_path,
104
+ replay_dir, aggregate_dir) are automatically converted to Path objects.
105
+ """
87
106
  if isinstance(self.alpha_dir, str):
88
107
  self.alpha_dir = Path(self.alpha_dir)
89
108
  if isinstance(self.trade_dir, str):
@@ -119,8 +138,9 @@ class Config:
119
138
  Full path to alpha file
120
139
 
121
140
  Raises:
122
- ValueError: If alpha_dir is not set
141
+ ValueError: If alpha_dir is not set or date format is invalid
123
142
  """
143
+ _validate_date(date)
124
144
  if self.alpha_dir is None:
125
145
  raise ValueError("alpha_dir is not set in Config")
126
146
  return self.alpha_dir / self.alpha_pattern.format(date=date)
@@ -135,8 +155,9 @@ class Config:
135
155
  Full path to trade file
136
156
 
137
157
  Raises:
138
- ValueError: If trade_dir is not set
158
+ ValueError: If trade_dir is not set or date format is invalid
139
159
  """
160
+ _validate_date(date)
140
161
  if self.trade_dir is None:
141
162
  raise ValueError("trade_dir is not set in Config")
142
163
  return self.trade_dir / self.trade_pattern.format(date=date)
@@ -152,8 +173,9 @@ class Config:
152
173
  Full path to replay output file
153
174
 
154
175
  Raises:
155
- ValueError: If replay_dir is not set
176
+ ValueError: If replay_dir is not set or date format is invalid
156
177
  """
178
+ _validate_date(date)
157
179
  if self.replay_dir is None:
158
180
  raise ValueError("replay_dir is not set in Config")
159
181
  return self.replay_dir / f"{date}{suffix}"
@@ -169,8 +191,9 @@ class Config:
169
191
  Full path to aggregate output file
170
192
 
171
193
  Raises:
172
- ValueError: If aggregate_dir is not set
194
+ ValueError: If aggregate_dir is not set or date format is invalid
173
195
  """
196
+ _validate_date(date)
174
197
  if self.aggregate_dir is None:
175
198
  raise ValueError("aggregate_dir is not set in Config")
176
199
  return self.aggregate_dir / f"{date}{suffix}"
vizflow/io.py CHANGED
@@ -2,14 +2,24 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from typing import TYPE_CHECKING
6
-
7
5
  import polars as pl
8
6
 
9
- from .config import Config, get_config
7
+ from .config import ColumnSchema, Config, get_config
8
+
10
9
 
11
- if TYPE_CHECKING:
12
- pass
10
+ def _apply_schema(df: pl.LazyFrame, schema: dict[str, ColumnSchema]) -> pl.LazyFrame:
11
+ """Apply type casting from schema definition.
12
+
13
+ Args:
14
+ df: LazyFrame to apply schema to
15
+ schema: Mapping from column names to ColumnSchema
16
+
17
+ Returns:
18
+ LazyFrame with type casts applied
19
+ """
20
+ for col_name, col_schema in schema.items():
21
+ df = df.with_columns(pl.col(col_name).cast(col_schema.cast_to))
22
+ return df
13
23
 
14
24
 
15
25
  def load_alpha(date: str, config: Config | None = None) -> pl.LazyFrame:
@@ -34,12 +44,7 @@ def load_alpha(date: str, config: Config | None = None) -> pl.LazyFrame:
34
44
  config = config or get_config()
35
45
  path = config.get_alpha_path(date)
36
46
  df = pl.scan_ipc(path)
37
-
38
- # Apply schema evolution (type casting)
39
- for col_name, schema in config.alpha_schema.items():
40
- df = df.with_columns(pl.col(col_name).cast(schema.cast_to))
41
-
42
- return df
47
+ return _apply_schema(df, config.alpha_schema)
43
48
 
44
49
 
45
50
  def load_trade(date: str, config: Config | None = None) -> pl.LazyFrame:
@@ -64,12 +69,7 @@ def load_trade(date: str, config: Config | None = None) -> pl.LazyFrame:
64
69
  config = config or get_config()
65
70
  path = config.get_trade_path(date)
66
71
  df = pl.scan_ipc(path)
67
-
68
- # Apply schema evolution (type casting)
69
- for col_name, schema in config.trade_schema.items():
70
- df = df.with_columns(pl.col(col_name).cast(schema.cast_to))
71
-
72
- return df
72
+ return _apply_schema(df, config.trade_schema)
73
73
 
74
74
 
75
75
  def load_calendar(config: Config | None = None) -> pl.DataFrame:
@@ -121,8 +121,10 @@ def _scan_file(path) -> pl.LazyFrame:
121
121
  elif suffix == "parquet":
122
122
  return pl.scan_parquet(path)
123
123
  else:
124
- # Default to IPC
125
- return pl.scan_ipc(path)
124
+ raise ValueError(
125
+ f"Unsupported file format: .{suffix}. "
126
+ "Supported: .feather, .ipc, .arrow, .csv, .meords, .parquet"
127
+ )
126
128
 
127
129
 
128
130
  def scan_trade(date: str, config: Config | None = None) -> pl.LazyFrame:
@@ -191,17 +193,13 @@ def scan_trades(config: Config | None = None) -> pl.LazyFrame:
191
193
  def _apply_trade_mapping(df: pl.LazyFrame, config: Config) -> pl.LazyFrame:
192
194
  """Apply column rename + schema evolution for trade data."""
193
195
  df = _apply_rename(df, config.trade_preset)
194
- for col_name, schema in config.trade_schema.items():
195
- df = df.with_columns(pl.col(col_name).cast(schema.cast_to))
196
- return df
196
+ return _apply_schema(df, config.trade_schema)
197
197
 
198
198
 
199
199
  def _apply_alpha_mapping(df: pl.LazyFrame, config: Config) -> pl.LazyFrame:
200
200
  """Apply column rename + schema evolution for alpha data."""
201
201
  df = _apply_rename(df, config.alpha_preset)
202
- for col_name, schema in config.alpha_schema.items():
203
- df = df.with_columns(pl.col(col_name).cast(schema.cast_to))
204
- return df
202
+ return _apply_schema(df, config.alpha_schema)
205
203
 
206
204
 
207
205
  def _apply_rename(df: pl.LazyFrame, preset: str | None) -> pl.LazyFrame:
vizflow/ops.py CHANGED
@@ -135,3 +135,103 @@ def aggregate(
135
135
  """
136
136
  agg_exprs = [expr.alias(name) for name, expr in metrics.items()]
137
137
  return df.group_by(group_by).agg(agg_exprs)
138
+
139
+
140
+ def _horizon_to_suffix(horizon_seconds: int) -> str:
141
+ """Convert horizon in seconds to column name suffix.
142
+
143
+ Rule: ≤60s → use seconds (60s), >60s → use minutes (3m, 30m)
144
+ """
145
+ if horizon_seconds <= 60:
146
+ return f"{horizon_seconds}s"
147
+ else:
148
+ minutes = horizon_seconds // 60
149
+ return f"{minutes}m"
150
+
151
+
152
+ def forward_return(
153
+ df_trade: pl.LazyFrame,
154
+ df_alpha: pl.LazyFrame,
155
+ horizons: list[int],
156
+ trade_time_col: str = "elapsed_alpha_ts",
157
+ alpha_time_col: str = "elapsed_ticktime",
158
+ price_col: str = "mid",
159
+ symbol_col: str = "ukey",
160
+ tolerance_ms: int = 5000,
161
+ ) -> pl.LazyFrame:
162
+ """Merge alpha's future price to trade and calculate forward returns.
163
+
164
+ For each trade row:
165
+ 1. Look up alpha price at trade_time + horizon
166
+ 2. Add forward_{price_col}_{horizon} column (the future price)
167
+ 3. Calculate y_{horizon} = (forward_price - current_price) / current_price
168
+
169
+ Output column names follow the convention:
170
+ - ≤60s → forward_mid_60s, y_60s
171
+ - >60s → forward_mid_3m, y_3m
172
+
173
+ Args:
174
+ df_trade: Trade LazyFrame with trade_time_col and price_col
175
+ df_alpha: Alpha LazyFrame with alpha_time_col and price_col
176
+ horizons: List of horizon in seconds, e.g., [60, 180, 1800]
177
+ trade_time_col: Time column in trade df (default: "elapsed_alpha_ts")
178
+ alpha_time_col: Time column in alpha df (default: "elapsed_ticktime")
179
+ price_col: Column name for price in both dfs (default: "mid")
180
+ symbol_col: Symbol column for grouping (default: "ukey")
181
+ tolerance_ms: Max time difference in ms for asof join (default: 5000)
182
+
183
+ Returns:
184
+ Trade LazyFrame with forward_* and y_* columns added
185
+
186
+ Example:
187
+ >>> df_trade = vf.parse_time(vf.scan_trade(date), "alpha_ts")
188
+ >>> df_alpha = vf.parse_time(vf.scan_alpha(date), "ticktime")
189
+ >>> df = vf.forward_return(df_trade, df_alpha, horizons=[60, 180, 1800])
190
+ >>> # Creates: forward_mid_60s, forward_mid_3m, forward_mid_30m
191
+ >>> # y_60s, y_3m, y_30m
192
+ """
193
+ # Collect for asof join
194
+ trade = df_trade.collect()
195
+ alpha = df_alpha.collect()
196
+
197
+ # Prepare alpha lookup table: (symbol, time) -> price
198
+ alpha_lookup = alpha.select([
199
+ pl.col(symbol_col),
200
+ pl.col(alpha_time_col),
201
+ pl.col(price_col),
202
+ ]).sort([symbol_col, alpha_time_col])
203
+
204
+ for horizon in horizons:
205
+ suffix = _horizon_to_suffix(horizon)
206
+ horizon_ms = horizon * 1000
207
+ forward_col = f"forward_{price_col}_{suffix}"
208
+ return_col = f"y_{suffix}"
209
+
210
+ # Add target time column for this horizon
211
+ trade = trade.with_columns(
212
+ (pl.col(trade_time_col) + horizon_ms).alias("_forward_time")
213
+ )
214
+
215
+ # Sort by join columns (required for asof join)
216
+ trade = trade.sort([symbol_col, "_forward_time"])
217
+
218
+ # Asof join: find alpha price at forward_time
219
+ joined = trade.join_asof(
220
+ alpha_lookup.rename({alpha_time_col: "_alpha_time", price_col: "_forward_price"}),
221
+ left_on="_forward_time",
222
+ right_on="_alpha_time",
223
+ by=symbol_col,
224
+ strategy="nearest",
225
+ tolerance=tolerance_ms,
226
+ )
227
+
228
+ # Add forward price and calculate return (guard against zero price)
229
+ trade = joined.with_columns([
230
+ pl.col("_forward_price").alias(forward_col),
231
+ pl.when(pl.col(price_col) != 0)
232
+ .then((pl.col("_forward_price") - pl.col(price_col)) / pl.col(price_col))
233
+ .otherwise(pl.lit(None))
234
+ .alias(return_col),
235
+ ]).drop(["_forward_time", "_alpha_time", "_forward_price"])
236
+
237
+ return trade.lazy()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vizflow
3
- Version: 0.4.4
3
+ Version: 0.5.1
4
4
  Requires-Python: >=3.9
5
5
  Requires-Dist: polars>=0.20.0
6
6
  Provides-Extra: dev
@@ -0,0 +1,10 @@
1
+ vizflow/__init__.py,sha256=EJ8qF4o2grf4aSochaasaaf0unyXV5yhrMs6rAhyp7k,496
2
+ vizflow/config.py,sha256=JNW5-TshQ1v-Ft3-VV0JYJ5PdC3Yhgy4fW0AV0RWzkE,7322
3
+ vizflow/io.py,sha256=ypiEuuPoHFKSt6VnhXcEI7u7dyVjKORunjqGpkFauXM,8877
4
+ vizflow/market.py,sha256=MtNz_nnZxC66Aq-i2PXEwaFCTknijFWYZUUv6798k2s,2493
5
+ vizflow/ops.py,sha256=4UwxOTPhvZ1_4PI3pcxbXfLAYsn1Ecj6nyBtBBr7KS8,7761
6
+ vizflow/presets.py,sha256=h91NZoOH4YAx0bbsaNigECf9WOcWh1QZavguunWkaLE,2452
7
+ vizflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ vizflow-0.5.1.dist-info/METADATA,sha256=DHzwPBvYuj7Rc4BawcXD2juS5iR5UD1FnAxt3cgvpo4,388
9
+ vizflow-0.5.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
10
+ vizflow-0.5.1.dist-info/RECORD,,
@@ -1,10 +0,0 @@
1
- vizflow/__init__.py,sha256=vYhGrFOQoONxBA6TJ08OQwWFd_UzltTsoZNQryNwnko,480
2
- vizflow/config.py,sha256=zSZnhdHzgXSqhDenHcHKm4CDGrMpKAdkNNRoUYYF1uc,6530
3
- vizflow/io.py,sha256=zmN0fFQOTmSBEBKangMExr0Q5mC2gajZM6GgdAyWkw4,8824
4
- vizflow/market.py,sha256=MtNz_nnZxC66Aq-i2PXEwaFCTknijFWYZUUv6798k2s,2493
5
- vizflow/ops.py,sha256=NL-Gtv-m_O1hv-0RUb9Wt43916HsQ5tYK_0e_uKR90w,4062
6
- vizflow/presets.py,sha256=h91NZoOH4YAx0bbsaNigECf9WOcWh1QZavguunWkaLE,2452
7
- vizflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- vizflow-0.4.4.dist-info/METADATA,sha256=NxMcTJ5fQKbB6GPak9dHARYTp9h0WwcDwxQAhrHcxRU,388
9
- vizflow-0.4.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
10
- vizflow-0.4.4.dist-info/RECORD,,