vizflow 0.5.1__py3-none-any.whl → 0.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vizflow/__init__.py +10 -5
- vizflow/config.py +17 -44
- vizflow/io.py +131 -156
- vizflow/schema_evolution.py +395 -0
- {vizflow-0.5.1.dist-info → vizflow-0.5.3.dist-info}/METADATA +1 -1
- vizflow-0.5.3.dist-info/RECORD +10 -0
- vizflow/presets.py +0 -87
- vizflow-0.5.1.dist-info/RECORD +0 -10
- {vizflow-0.5.1.dist-info → vizflow-0.5.3.dist-info}/WHEEL +0 -0
vizflow/__init__.py
CHANGED
|
@@ -5,13 +5,11 @@ Usage:
|
|
|
5
5
|
import vizflow as vf
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
__version__ = "0.5.
|
|
8
|
+
__version__ = "0.5.3"
|
|
9
9
|
|
|
10
|
-
from .config import
|
|
10
|
+
from .config import Config, get_config, set_config
|
|
11
11
|
from .io import (
|
|
12
|
-
load_alpha,
|
|
13
12
|
load_calendar,
|
|
14
|
-
load_trade,
|
|
15
13
|
scan_alpha,
|
|
16
14
|
scan_alphas,
|
|
17
15
|
scan_trade,
|
|
@@ -19,4 +17,11 @@ from .io import (
|
|
|
19
17
|
)
|
|
20
18
|
from .market import CN, CRYPTO, Market, Session
|
|
21
19
|
from .ops import aggregate, bin, forward_return, parse_time
|
|
22
|
-
from .
|
|
20
|
+
from .schema_evolution import (
|
|
21
|
+
JYAO_V20251114,
|
|
22
|
+
SCHEMAS,
|
|
23
|
+
YLIN_V20251204,
|
|
24
|
+
ColumnSpec,
|
|
25
|
+
SchemaEvolution,
|
|
26
|
+
get_schema,
|
|
27
|
+
)
|
vizflow/config.py
CHANGED
|
@@ -4,7 +4,10 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
from dataclasses import dataclass, field
|
|
6
6
|
from pathlib import Path
|
|
7
|
-
from typing import Any
|
|
7
|
+
from typing import TYPE_CHECKING, Any
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from .schema_evolution import SchemaEvolution
|
|
8
11
|
|
|
9
12
|
# Global config instance
|
|
10
13
|
_global_config: Config | None = None
|
|
@@ -25,21 +28,6 @@ def _validate_date(date: str) -> None:
|
|
|
25
28
|
)
|
|
26
29
|
|
|
27
30
|
|
|
28
|
-
@dataclass
|
|
29
|
-
class ColumnSchema:
|
|
30
|
-
"""Schema for a column with type casting.
|
|
31
|
-
|
|
32
|
-
Attributes:
|
|
33
|
-
cast_to: Target type after casting (e.g. pl.Int64)
|
|
34
|
-
|
|
35
|
-
Example:
|
|
36
|
-
# Handle float precision errors: 1.00000002 → 1
|
|
37
|
-
ColumnSchema(cast_to=pl.Int64)
|
|
38
|
-
"""
|
|
39
|
-
|
|
40
|
-
cast_to: Any # pl.DataType, but avoid import for now
|
|
41
|
-
|
|
42
|
-
|
|
43
31
|
@dataclass
|
|
44
32
|
class Config:
|
|
45
33
|
"""Central configuration for a pipeline run.
|
|
@@ -53,14 +41,20 @@ class Config:
|
|
|
53
41
|
replay_dir: Directory for FIFO replay output (materialization 1)
|
|
54
42
|
aggregate_dir: Directory for aggregation output (materialization 2)
|
|
55
43
|
market: Market identifier, e.g. "CN"
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
alpha_schema: Schema evolution for alpha columns
|
|
59
|
-
trade_schema: Schema evolution for trade columns
|
|
44
|
+
trade_schema: Schema evolution for trade data (name or SchemaEvolution)
|
|
45
|
+
alpha_schema: Schema evolution for alpha data (name or SchemaEvolution)
|
|
60
46
|
binwidths: Mapping from column names to bin widths
|
|
61
47
|
group_by: Columns to group by in aggregation
|
|
62
48
|
horizons: List of forward return horizons in seconds
|
|
63
49
|
time_cutoff: Optional time cutoff (e.g. 143000000 for 14:30:00)
|
|
50
|
+
|
|
51
|
+
Example:
|
|
52
|
+
>>> config = vf.Config(
|
|
53
|
+
... trade_dir=Path("data/ylin/trade"),
|
|
54
|
+
... trade_pattern="{date}.meords",
|
|
55
|
+
... trade_schema="ylin_v20251204", # Use registered schema by name
|
|
56
|
+
... market="CN",
|
|
57
|
+
... )
|
|
64
58
|
"""
|
|
65
59
|
|
|
66
60
|
# === Input Paths ===
|
|
@@ -77,17 +71,10 @@ class Config:
|
|
|
77
71
|
# === Market ===
|
|
78
72
|
market: str = "CN"
|
|
79
73
|
|
|
80
|
-
# === Column Mapping ===
|
|
81
|
-
alpha_columns: dict[str, str] = field(default_factory=dict)
|
|
82
|
-
trade_columns: dict[str, str] = field(default_factory=dict)
|
|
83
|
-
|
|
84
74
|
# === Schema Evolution ===
|
|
85
|
-
|
|
86
|
-
trade_schema:
|
|
87
|
-
|
|
88
|
-
# === Column Mapping Presets ===
|
|
89
|
-
trade_preset: str | None = None # "ylin" or None
|
|
90
|
-
alpha_preset: str | None = None # "jyao_v20251114" or None
|
|
75
|
+
# Can be a string (schema name) or SchemaEvolution instance
|
|
76
|
+
trade_schema: str | SchemaEvolution | None = None
|
|
77
|
+
alpha_schema: str | SchemaEvolution | None = None
|
|
91
78
|
|
|
92
79
|
# === Aggregation ===
|
|
93
80
|
binwidths: dict[str, float] = field(default_factory=dict)
|
|
@@ -114,20 +101,6 @@ class Config:
|
|
|
114
101
|
if isinstance(self.aggregate_dir, str):
|
|
115
102
|
self.aggregate_dir = Path(self.aggregate_dir)
|
|
116
103
|
|
|
117
|
-
def col(self, semantic: str, source: str = "trade") -> str:
|
|
118
|
-
"""Get actual column name from semantic name.
|
|
119
|
-
|
|
120
|
-
Args:
|
|
121
|
-
semantic: Semantic column name (e.g. "timestamp", "price")
|
|
122
|
-
source: "alpha" or "trade"
|
|
123
|
-
|
|
124
|
-
Returns:
|
|
125
|
-
Actual column name, or the semantic name if no mapping exists
|
|
126
|
-
"""
|
|
127
|
-
if source == "alpha":
|
|
128
|
-
return self.alpha_columns.get(semantic, semantic)
|
|
129
|
-
return self.trade_columns.get(semantic, semantic)
|
|
130
|
-
|
|
131
104
|
def get_alpha_path(self, date: str) -> Path:
|
|
132
105
|
"""Get alpha file path for a date.
|
|
133
106
|
|
vizflow/io.py
CHANGED
|
@@ -4,108 +4,40 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
import polars as pl
|
|
6
6
|
|
|
7
|
-
from .config import
|
|
7
|
+
from .config import Config, get_config
|
|
8
|
+
from .schema_evolution import SchemaEvolution, get_schema
|
|
8
9
|
|
|
9
10
|
|
|
10
|
-
def
|
|
11
|
-
|
|
11
|
+
def _resolve_schema(
|
|
12
|
+
schema_ref: str | SchemaEvolution | None,
|
|
13
|
+
) -> SchemaEvolution | None:
|
|
14
|
+
"""Resolve schema reference to SchemaEvolution instance.
|
|
12
15
|
|
|
13
16
|
Args:
|
|
14
|
-
|
|
15
|
-
schema: Mapping from column names to ColumnSchema
|
|
17
|
+
schema_ref: Schema name string, SchemaEvolution instance, or None.
|
|
16
18
|
|
|
17
19
|
Returns:
|
|
18
|
-
|
|
20
|
+
SchemaEvolution instance or None.
|
|
19
21
|
"""
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def load_alpha(date: str, config: Config | None = None) -> pl.LazyFrame:
|
|
26
|
-
"""Load alpha data for a date with automatic schema evolution.
|
|
27
|
-
|
|
28
|
-
Args:
|
|
29
|
-
date: Date string, e.g. "20241001"
|
|
30
|
-
config: Config to use, or get_config() if None
|
|
31
|
-
|
|
32
|
-
Returns:
|
|
33
|
-
LazyFrame with schema evolution applied
|
|
34
|
-
|
|
35
|
-
Example:
|
|
36
|
-
>>> config = vf.Config(
|
|
37
|
-
... output_dir=Path("/data/output"),
|
|
38
|
-
... alpha_dir=Path("/data/alpha"),
|
|
39
|
-
... alpha_schema={"qty": vf.ColumnSchema(cast_to=pl.Int64)}
|
|
40
|
-
... )
|
|
41
|
-
>>> vf.set_config(config)
|
|
42
|
-
>>> alpha = vf.load_alpha("20241001")
|
|
43
|
-
"""
|
|
44
|
-
config = config or get_config()
|
|
45
|
-
path = config.get_alpha_path(date)
|
|
46
|
-
df = pl.scan_ipc(path)
|
|
47
|
-
return _apply_schema(df, config.alpha_schema)
|
|
22
|
+
if schema_ref is None:
|
|
23
|
+
return None
|
|
24
|
+
if isinstance(schema_ref, SchemaEvolution):
|
|
25
|
+
return schema_ref
|
|
26
|
+
return get_schema(schema_ref)
|
|
48
27
|
|
|
49
28
|
|
|
50
|
-
def
|
|
51
|
-
|
|
29
|
+
def _scan_file(
|
|
30
|
+
path,
|
|
31
|
+
schema: SchemaEvolution | None = None,
|
|
32
|
+
) -> pl.LazyFrame:
|
|
33
|
+
"""Scan a file based on its extension with optional schema.
|
|
52
34
|
|
|
53
35
|
Args:
|
|
54
|
-
|
|
55
|
-
|
|
36
|
+
path: Path to file.
|
|
37
|
+
schema: Optional SchemaEvolution for CSV parsing options.
|
|
56
38
|
|
|
57
39
|
Returns:
|
|
58
|
-
LazyFrame
|
|
59
|
-
|
|
60
|
-
Example:
|
|
61
|
-
>>> config = vf.Config(
|
|
62
|
-
... output_dir=Path("/data/output"),
|
|
63
|
-
... trade_dir=Path("/data/trade"),
|
|
64
|
-
... trade_schema={"qty": vf.ColumnSchema(cast_to=pl.Int64)}
|
|
65
|
-
... )
|
|
66
|
-
>>> vf.set_config(config)
|
|
67
|
-
>>> trade = vf.load_trade("20241001") # qty: 1.00000002 → 1
|
|
68
|
-
"""
|
|
69
|
-
config = config or get_config()
|
|
70
|
-
path = config.get_trade_path(date)
|
|
71
|
-
df = pl.scan_ipc(path)
|
|
72
|
-
return _apply_schema(df, config.trade_schema)
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
def load_calendar(config: Config | None = None) -> pl.DataFrame:
|
|
76
|
-
"""Load trading calendar.
|
|
77
|
-
|
|
78
|
-
Args:
|
|
79
|
-
config: Config to use, or get_config() if None
|
|
80
|
-
|
|
81
|
-
Returns:
|
|
82
|
-
DataFrame with date, prev_date, next_date columns
|
|
83
|
-
|
|
84
|
-
Raises:
|
|
85
|
-
ValueError: If calendar_path is not set in config
|
|
86
|
-
|
|
87
|
-
Example:
|
|
88
|
-
>>> config = vf.Config(
|
|
89
|
-
... output_dir=Path("/data/output"),
|
|
90
|
-
... calendar_path=Path("/data/calendar.parquet")
|
|
91
|
-
... )
|
|
92
|
-
>>> vf.set_config(config)
|
|
93
|
-
>>> calendar = vf.load_calendar()
|
|
94
|
-
"""
|
|
95
|
-
config = config or get_config()
|
|
96
|
-
if config.calendar_path is None:
|
|
97
|
-
raise ValueError("calendar_path is not set in Config")
|
|
98
|
-
return pl.read_parquet(config.calendar_path)
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
def _scan_file(path) -> pl.LazyFrame:
|
|
102
|
-
"""Scan a file based on its extension.
|
|
103
|
-
|
|
104
|
-
Args:
|
|
105
|
-
path: Path to file
|
|
106
|
-
|
|
107
|
-
Returns:
|
|
108
|
-
LazyFrame from the file
|
|
40
|
+
LazyFrame from the file.
|
|
109
41
|
|
|
110
42
|
Supported formats:
|
|
111
43
|
- .feather, .ipc, .arrow: IPC format (pl.scan_ipc)
|
|
@@ -117,7 +49,15 @@ def _scan_file(path) -> pl.LazyFrame:
|
|
|
117
49
|
if suffix in ("feather", "ipc", "arrow"):
|
|
118
50
|
return pl.scan_ipc(path)
|
|
119
51
|
elif suffix in ("csv", "meords"):
|
|
120
|
-
|
|
52
|
+
csv_kwargs = {}
|
|
53
|
+
if schema:
|
|
54
|
+
schema_overrides = schema.get_schema_overrides()
|
|
55
|
+
if schema_overrides:
|
|
56
|
+
csv_kwargs["schema_overrides"] = schema_overrides
|
|
57
|
+
null_values = schema.get_null_values()
|
|
58
|
+
if null_values:
|
|
59
|
+
csv_kwargs["null_values"] = null_values
|
|
60
|
+
return pl.scan_csv(path, **csv_kwargs)
|
|
121
61
|
elif suffix == "parquet":
|
|
122
62
|
return pl.scan_parquet(path)
|
|
123
63
|
else:
|
|
@@ -127,50 +67,96 @@ def _scan_file(path) -> pl.LazyFrame:
|
|
|
127
67
|
)
|
|
128
68
|
|
|
129
69
|
|
|
70
|
+
def _apply_schema_evolution(
|
|
71
|
+
df: pl.LazyFrame,
|
|
72
|
+
schema: SchemaEvolution,
|
|
73
|
+
) -> pl.LazyFrame:
|
|
74
|
+
"""Apply full schema evolution: drop, rename, cast.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
df: LazyFrame to transform.
|
|
78
|
+
schema: SchemaEvolution with transformation rules.
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
Transformed LazyFrame.
|
|
82
|
+
"""
|
|
83
|
+
existing = set(df.collect_schema().names())
|
|
84
|
+
|
|
85
|
+
# Step 1: Drop excluded columns
|
|
86
|
+
drop_cols = schema.get_drop_columns()
|
|
87
|
+
to_drop = [c for c in drop_cols if c in existing]
|
|
88
|
+
if to_drop:
|
|
89
|
+
df = df.drop(to_drop)
|
|
90
|
+
existing -= set(to_drop)
|
|
91
|
+
|
|
92
|
+
# Step 2: Rename columns
|
|
93
|
+
rename_map = schema.get_rename_map()
|
|
94
|
+
to_rename = {k: v for k, v in rename_map.items() if k in existing}
|
|
95
|
+
if to_rename:
|
|
96
|
+
df = df.rename(to_rename)
|
|
97
|
+
# Update existing names after rename
|
|
98
|
+
for old, new in to_rename.items():
|
|
99
|
+
existing.discard(old)
|
|
100
|
+
existing.add(new)
|
|
101
|
+
|
|
102
|
+
# Step 3: Cast columns (using FINAL names after rename)
|
|
103
|
+
cast_map = schema.get_cast_map()
|
|
104
|
+
for col_name, dtype in cast_map.items():
|
|
105
|
+
if col_name in existing:
|
|
106
|
+
df = df.with_columns(pl.col(col_name).cast(dtype))
|
|
107
|
+
|
|
108
|
+
return df
|
|
109
|
+
|
|
110
|
+
|
|
130
111
|
def scan_trade(date: str, config: Config | None = None) -> pl.LazyFrame:
|
|
131
|
-
"""Scan single date trade file with
|
|
112
|
+
"""Scan single date trade file with schema evolution.
|
|
132
113
|
|
|
133
|
-
Supports
|
|
114
|
+
Supports IPC/feather, CSV (including .meords), and Parquet formats.
|
|
134
115
|
|
|
135
116
|
Args:
|
|
136
117
|
date: Date string, e.g. "20241001"
|
|
137
118
|
config: Config to use, or get_config() if None
|
|
138
119
|
|
|
139
120
|
Returns:
|
|
140
|
-
LazyFrame with
|
|
121
|
+
LazyFrame with schema evolution applied
|
|
141
122
|
|
|
142
123
|
Example:
|
|
143
124
|
>>> config = vf.Config(
|
|
144
|
-
... trade_dir=Path("/data/
|
|
125
|
+
... trade_dir=Path("/data/ylin/trade"),
|
|
145
126
|
... trade_pattern="{date}.meords",
|
|
146
|
-
...
|
|
127
|
+
... trade_schema="ylin_v20251204",
|
|
147
128
|
... )
|
|
148
129
|
>>> vf.set_config(config)
|
|
149
130
|
>>> df = vf.scan_trade("20241001")
|
|
150
131
|
"""
|
|
151
132
|
config = config or get_config()
|
|
152
133
|
path = config.get_trade_path(date)
|
|
153
|
-
|
|
154
|
-
|
|
134
|
+
schema = _resolve_schema(config.trade_schema)
|
|
135
|
+
|
|
136
|
+
df = _scan_file(path, schema=schema)
|
|
137
|
+
if schema:
|
|
138
|
+
df = _apply_schema_evolution(df, schema)
|
|
139
|
+
|
|
140
|
+
return df
|
|
155
141
|
|
|
156
142
|
|
|
157
143
|
def scan_trades(config: Config | None = None) -> pl.LazyFrame:
|
|
158
|
-
"""Scan all trade files with
|
|
144
|
+
"""Scan all trade files with schema evolution.
|
|
159
145
|
|
|
160
146
|
Args:
|
|
161
147
|
config: Config to use, or get_config() if None
|
|
162
148
|
|
|
163
149
|
Returns:
|
|
164
|
-
LazyFrame with
|
|
150
|
+
LazyFrame with schema evolution applied
|
|
165
151
|
|
|
166
152
|
Raises:
|
|
167
153
|
ValueError: If trade_dir is not set or no files found
|
|
168
154
|
|
|
169
155
|
Example:
|
|
170
156
|
>>> config = vf.Config(
|
|
171
|
-
... trade_dir=Path("/data/
|
|
172
|
-
... trade_pattern="{date}.
|
|
173
|
-
...
|
|
157
|
+
... trade_dir=Path("/data/ylin/trade"),
|
|
158
|
+
... trade_pattern="{date}.meords",
|
|
159
|
+
... trade_schema="ylin_v20251204",
|
|
174
160
|
... )
|
|
175
161
|
>>> vf.set_config(config)
|
|
176
162
|
>>> df = vf.scan_trades()
|
|
@@ -184,82 +170,56 @@ def scan_trades(config: Config | None = None) -> pl.LazyFrame:
|
|
|
184
170
|
if not files:
|
|
185
171
|
raise ValueError(f"No files found matching {pattern} in {config.trade_dir}")
|
|
186
172
|
|
|
173
|
+
schema = _resolve_schema(config.trade_schema)
|
|
174
|
+
|
|
187
175
|
# Concatenate all files using lazy scanning
|
|
188
|
-
dfs = [_scan_file(f) for f in files]
|
|
176
|
+
dfs = [_scan_file(f, schema=schema) for f in files]
|
|
189
177
|
df = pl.concat(dfs)
|
|
190
|
-
return _apply_trade_mapping(df, config)
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
def _apply_trade_mapping(df: pl.LazyFrame, config: Config) -> pl.LazyFrame:
|
|
194
|
-
"""Apply column rename + schema evolution for trade data."""
|
|
195
|
-
df = _apply_rename(df, config.trade_preset)
|
|
196
|
-
return _apply_schema(df, config.trade_schema)
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
def _apply_alpha_mapping(df: pl.LazyFrame, config: Config) -> pl.LazyFrame:
|
|
200
|
-
"""Apply column rename + schema evolution for alpha data."""
|
|
201
|
-
df = _apply_rename(df, config.alpha_preset)
|
|
202
|
-
return _apply_schema(df, config.alpha_schema)
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
def _apply_rename(df: pl.LazyFrame, preset: str | None) -> pl.LazyFrame:
|
|
206
|
-
"""Apply column rename from preset name.
|
|
207
|
-
|
|
208
|
-
Args:
|
|
209
|
-
df: LazyFrame to rename columns
|
|
210
|
-
preset: Preset name (e.g., "ylin", "jyao_v20251114") or None
|
|
211
|
-
"""
|
|
212
|
-
# Drop record type prefix column if present (from CSV files)
|
|
213
|
-
existing = set(df.collect_schema().names())
|
|
214
|
-
if "#HFTORD" in existing:
|
|
215
|
-
df = df.drop("#HFTORD")
|
|
216
|
-
existing.remove("#HFTORD")
|
|
217
178
|
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
if rename_map:
|
|
222
|
-
existing = set(df.collect_schema().names())
|
|
223
|
-
to_rename = {k: v for k, v in rename_map.items() if k in existing}
|
|
224
|
-
if to_rename:
|
|
225
|
-
df = df.rename(to_rename)
|
|
179
|
+
if schema:
|
|
180
|
+
df = _apply_schema_evolution(df, schema)
|
|
226
181
|
|
|
227
182
|
return df
|
|
228
183
|
|
|
229
184
|
|
|
230
185
|
def scan_alpha(date: str, config: Config | None = None) -> pl.LazyFrame:
|
|
231
|
-
"""Scan single date alpha file with
|
|
186
|
+
"""Scan single date alpha file with schema evolution.
|
|
232
187
|
|
|
233
188
|
Args:
|
|
234
189
|
date: Date string, e.g. "20241001"
|
|
235
190
|
config: Config to use, or get_config() if None
|
|
236
191
|
|
|
237
192
|
Returns:
|
|
238
|
-
LazyFrame with
|
|
193
|
+
LazyFrame with schema evolution applied
|
|
239
194
|
|
|
240
195
|
Example:
|
|
241
196
|
>>> config = vf.Config(
|
|
242
197
|
... alpha_dir=Path("/data/jyao/alpha"),
|
|
243
198
|
... alpha_pattern="alpha_{date}.feather",
|
|
244
|
-
...
|
|
199
|
+
... alpha_schema="jyao_v20251114",
|
|
245
200
|
... )
|
|
246
201
|
>>> vf.set_config(config)
|
|
247
202
|
>>> df = vf.scan_alpha("20251114")
|
|
248
203
|
"""
|
|
249
204
|
config = config or get_config()
|
|
250
205
|
path = config.get_alpha_path(date)
|
|
251
|
-
|
|
252
|
-
|
|
206
|
+
schema = _resolve_schema(config.alpha_schema)
|
|
207
|
+
|
|
208
|
+
df = _scan_file(path, schema=schema)
|
|
209
|
+
if schema:
|
|
210
|
+
df = _apply_schema_evolution(df, schema)
|
|
211
|
+
|
|
212
|
+
return df
|
|
253
213
|
|
|
254
214
|
|
|
255
215
|
def scan_alphas(config: Config | None = None) -> pl.LazyFrame:
|
|
256
|
-
"""Scan all alpha files with
|
|
216
|
+
"""Scan all alpha files with schema evolution.
|
|
257
217
|
|
|
258
218
|
Args:
|
|
259
219
|
config: Config to use, or get_config() if None
|
|
260
220
|
|
|
261
221
|
Returns:
|
|
262
|
-
LazyFrame with
|
|
222
|
+
LazyFrame with schema evolution applied
|
|
263
223
|
|
|
264
224
|
Raises:
|
|
265
225
|
ValueError: If alpha_dir is not set or no files found
|
|
@@ -273,22 +233,37 @@ def scan_alphas(config: Config | None = None) -> pl.LazyFrame:
|
|
|
273
233
|
if not files:
|
|
274
234
|
raise ValueError(f"No files found matching {pattern} in {config.alpha_dir}")
|
|
275
235
|
|
|
276
|
-
|
|
236
|
+
schema = _resolve_schema(config.alpha_schema)
|
|
237
|
+
|
|
238
|
+
dfs = [_scan_file(f, schema=schema) for f in files]
|
|
277
239
|
df = pl.concat(dfs)
|
|
278
|
-
return _apply_alpha_mapping(df, config)
|
|
279
240
|
|
|
241
|
+
if schema:
|
|
242
|
+
df = _apply_schema_evolution(df, schema)
|
|
280
243
|
|
|
281
|
-
|
|
282
|
-
|
|
244
|
+
return df
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def load_calendar(config: Config | None = None) -> pl.DataFrame:
|
|
248
|
+
"""Load trading calendar.
|
|
283
249
|
|
|
284
250
|
Args:
|
|
285
|
-
|
|
251
|
+
config: Config to use, or get_config() if None
|
|
286
252
|
|
|
287
253
|
Returns:
|
|
288
|
-
|
|
289
|
-
"""
|
|
290
|
-
if not preset:
|
|
291
|
-
return {}
|
|
292
|
-
from .presets import PRESETS
|
|
254
|
+
DataFrame with date, prev_date, next_date columns
|
|
293
255
|
|
|
294
|
-
|
|
256
|
+
Raises:
|
|
257
|
+
ValueError: If calendar_path is not set in config
|
|
258
|
+
|
|
259
|
+
Example:
|
|
260
|
+
>>> config = vf.Config(
|
|
261
|
+
... calendar_path=Path("/data/calendar.parquet")
|
|
262
|
+
... )
|
|
263
|
+
>>> vf.set_config(config)
|
|
264
|
+
>>> calendar = vf.load_calendar()
|
|
265
|
+
"""
|
|
266
|
+
config = config or get_config()
|
|
267
|
+
if config.calendar_path is None:
|
|
268
|
+
raise ValueError("calendar_path is not set in Config")
|
|
269
|
+
return pl.read_parquet(config.calendar_path)
|
|
@@ -0,0 +1,395 @@
|
|
|
1
|
+
"""Schema Evolution for VizFlow.
|
|
2
|
+
|
|
3
|
+
This module defines how raw data evolves into standard format through:
|
|
4
|
+
- Column renaming (raw names → standard names)
|
|
5
|
+
- Parse-time type specification (for CSV parsing)
|
|
6
|
+
- Post-load type casting (e.g., Float64 → Int64)
|
|
7
|
+
- Null value handling
|
|
8
|
+
- Column exclusion
|
|
9
|
+
|
|
10
|
+
Example:
|
|
11
|
+
>>> schema = SchemaEvolution(
|
|
12
|
+
... columns={
|
|
13
|
+
... "fillQty": ColumnSpec(
|
|
14
|
+
... rename_to="order_filled_qty",
|
|
15
|
+
... parse_dtype=pl.Float64, # Parse as float (catch decimals)
|
|
16
|
+
... cast_dtype=pl.Int64, # Then cast to int
|
|
17
|
+
... ),
|
|
18
|
+
... },
|
|
19
|
+
... null_values=["", "NA"],
|
|
20
|
+
... drop=["#HFTORD"],
|
|
21
|
+
... )
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
from dataclasses import dataclass, field
|
|
27
|
+
from typing import Any
|
|
28
|
+
|
|
29
|
+
import polars as pl
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class ColumnSpec:
|
|
34
|
+
"""Specification for a single column's parsing and transformation.
|
|
35
|
+
|
|
36
|
+
Attributes:
|
|
37
|
+
rename_to: Standard column name after rename. None keeps original name.
|
|
38
|
+
parse_dtype: Type to use when parsing CSV. None uses Polars inference.
|
|
39
|
+
cast_dtype: Final type after post-load casting. None keeps parse type.
|
|
40
|
+
|
|
41
|
+
Examples:
|
|
42
|
+
# Rename only (most common)
|
|
43
|
+
ColumnSpec(rename_to="ukey")
|
|
44
|
+
|
|
45
|
+
# Parse as Float64, cast to Int64 (handle decimal errors in qty)
|
|
46
|
+
ColumnSpec(rename_to="order_filled_qty",
|
|
47
|
+
parse_dtype=pl.Float64,
|
|
48
|
+
cast_dtype=pl.Int64)
|
|
49
|
+
|
|
50
|
+
# Parse as specific type, no cast (trusted integer)
|
|
51
|
+
ColumnSpec(rename_to="timestamp", parse_dtype=pl.Int64)
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
rename_to: str | None = None
|
|
55
|
+
parse_dtype: Any = None # pl.DataType
|
|
56
|
+
cast_dtype: Any = None # pl.DataType
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@dataclass
|
|
60
|
+
class SchemaEvolution:
|
|
61
|
+
"""Defines how raw data evolves into standard format.
|
|
62
|
+
|
|
63
|
+
Combines column renaming, parse-time types, post-load casting,
|
|
64
|
+
null value handling, and column exclusion into a single structure.
|
|
65
|
+
|
|
66
|
+
Attributes:
|
|
67
|
+
columns: Mapping from original column name to ColumnSpec.
|
|
68
|
+
null_values: Strings to treat as null at parse time.
|
|
69
|
+
drop: Column names to exclude from output.
|
|
70
|
+
parent: Optional parent schema for version inheritance.
|
|
71
|
+
|
|
72
|
+
Example:
|
|
73
|
+
>>> YLIN_V20251204 = SchemaEvolution(
|
|
74
|
+
... columns={
|
|
75
|
+
... "symbol": ColumnSpec(rename_to="ukey", parse_dtype=pl.Int64),
|
|
76
|
+
... "fillQty": ColumnSpec(
|
|
77
|
+
... rename_to="order_filled_qty",
|
|
78
|
+
... parse_dtype=pl.Float64,
|
|
79
|
+
... cast_dtype=pl.Int64,
|
|
80
|
+
... ),
|
|
81
|
+
... },
|
|
82
|
+
... null_values=["", "NA", "null"],
|
|
83
|
+
... drop=["#HFTORD"],
|
|
84
|
+
... )
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
columns: dict[str, ColumnSpec] = field(default_factory=dict)
|
|
88
|
+
null_values: list[str] = field(default_factory=lambda: ["", "NA", "null"])
|
|
89
|
+
drop: list[str] = field(default_factory=list)
|
|
90
|
+
parent: SchemaEvolution | None = None
|
|
91
|
+
|
|
92
|
+
def get_schema_overrides(self) -> dict[str, Any]:
|
|
93
|
+
"""Get schema_overrides dict for pl.scan_csv().
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
Mapping from original column name to Polars dtype.
|
|
97
|
+
"""
|
|
98
|
+
result = {}
|
|
99
|
+
if self.parent:
|
|
100
|
+
result.update(self.parent.get_schema_overrides())
|
|
101
|
+
for col_name, spec in self.columns.items():
|
|
102
|
+
if spec.parse_dtype is not None:
|
|
103
|
+
result[col_name] = spec.parse_dtype
|
|
104
|
+
return result
|
|
105
|
+
|
|
106
|
+
def get_rename_map(self) -> dict[str, str]:
|
|
107
|
+
"""Get rename mapping dict for df.rename().
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
Mapping from original column name to new name.
|
|
111
|
+
"""
|
|
112
|
+
result = {}
|
|
113
|
+
if self.parent:
|
|
114
|
+
result.update(self.parent.get_rename_map())
|
|
115
|
+
for col_name, spec in self.columns.items():
|
|
116
|
+
if spec.rename_to is not None:
|
|
117
|
+
result[col_name] = spec.rename_to
|
|
118
|
+
return result
|
|
119
|
+
|
|
120
|
+
def get_cast_map(self) -> dict[str, Any]:
|
|
121
|
+
"""Get post-load cast mapping dict.
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
Mapping from FINAL column name (after rename) to cast dtype.
|
|
125
|
+
"""
|
|
126
|
+
result = {}
|
|
127
|
+
if self.parent:
|
|
128
|
+
result.update(self.parent.get_cast_map())
|
|
129
|
+
for col_name, spec in self.columns.items():
|
|
130
|
+
if spec.cast_dtype is not None:
|
|
131
|
+
final_name = spec.rename_to or col_name
|
|
132
|
+
result[final_name] = spec.cast_dtype
|
|
133
|
+
return result
|
|
134
|
+
|
|
135
|
+
def get_drop_columns(self) -> set[str]:
|
|
136
|
+
"""Get set of columns to drop.
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
Set of original column names to exclude.
|
|
140
|
+
"""
|
|
141
|
+
result = set()
|
|
142
|
+
if self.parent:
|
|
143
|
+
result.update(self.parent.get_drop_columns())
|
|
144
|
+
result.update(self.drop)
|
|
145
|
+
return result
|
|
146
|
+
|
|
147
|
+
def get_null_values(self) -> list[str]:
|
|
148
|
+
"""Get list of null value strings.
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
List of strings to treat as null at parse time.
|
|
152
|
+
"""
|
|
153
|
+
return self.null_values
|
|
154
|
+
|
|
155
|
+
def validate(self) -> list[str]:
|
|
156
|
+
"""Validate schema configuration.
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
List of warnings about potential issues.
|
|
160
|
+
"""
|
|
161
|
+
warnings = []
|
|
162
|
+
for col_name, spec in self.columns.items():
|
|
163
|
+
if spec.cast_dtype is not None and spec.parse_dtype is None:
|
|
164
|
+
warnings.append(
|
|
165
|
+
f"{col_name}: cast_dtype without parse_dtype may fail "
|
|
166
|
+
"if Polars infers wrong type"
|
|
167
|
+
)
|
|
168
|
+
return warnings
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
# =============================================================================
|
|
172
|
+
# YLIN Trade Format (v2025-12-04)
|
|
173
|
+
# =============================================================================
|
|
174
|
+
|
|
175
|
+
YLIN_V20251204 = SchemaEvolution(
|
|
176
|
+
columns={
|
|
177
|
+
# === Order columns (18) ===
|
|
178
|
+
"symbol": ColumnSpec(rename_to="ukey", parse_dtype=pl.Int64),
|
|
179
|
+
"orderId": ColumnSpec(rename_to="order_id", parse_dtype=pl.Int64),
|
|
180
|
+
"orderSide": ColumnSpec(rename_to="order_side", parse_dtype=pl.String),
|
|
181
|
+
"orderQty": ColumnSpec(
|
|
182
|
+
rename_to="order_qty",
|
|
183
|
+
parse_dtype=pl.Float64,
|
|
184
|
+
cast_dtype=pl.Int64,
|
|
185
|
+
),
|
|
186
|
+
"orderPrice": ColumnSpec(rename_to="order_price", parse_dtype=pl.Float64),
|
|
187
|
+
"priceType": ColumnSpec(rename_to="order_price_type", parse_dtype=pl.String),
|
|
188
|
+
"fillQty": ColumnSpec(
|
|
189
|
+
rename_to="order_filled_qty",
|
|
190
|
+
parse_dtype=pl.Float64,
|
|
191
|
+
cast_dtype=pl.Int64,
|
|
192
|
+
),
|
|
193
|
+
"fillPrice": ColumnSpec(rename_to="fill_price", parse_dtype=pl.Float64),
|
|
194
|
+
"lastExchangeTs": ColumnSpec(rename_to="update_exchange_ts", parse_dtype=pl.Int64),
|
|
195
|
+
"createdTs": ColumnSpec(rename_to="create_exchange_ts", parse_dtype=pl.Int64),
|
|
196
|
+
"localTs": ColumnSpec(rename_to="create_local_ts", parse_dtype=pl.Int64),
|
|
197
|
+
"qtyAhead": ColumnSpec(
|
|
198
|
+
rename_to="qty_ahead",
|
|
199
|
+
parse_dtype=pl.Float64,
|
|
200
|
+
cast_dtype=pl.Int64,
|
|
201
|
+
),
|
|
202
|
+
"qtyBehind": ColumnSpec(
|
|
203
|
+
rename_to="qty_behind",
|
|
204
|
+
parse_dtype=pl.Float64,
|
|
205
|
+
cast_dtype=pl.Int64,
|
|
206
|
+
),
|
|
207
|
+
"orderStatus": ColumnSpec(rename_to="order_curr_state", parse_dtype=pl.String),
|
|
208
|
+
"orderTposType": ColumnSpec(rename_to="order_tpos_type", parse_dtype=pl.String),
|
|
209
|
+
"alphaTs": ColumnSpec(rename_to="alpha_ts", parse_dtype=pl.Int64),
|
|
210
|
+
"event": ColumnSpec(rename_to="event_type", parse_dtype=pl.String),
|
|
211
|
+
"cumFilledNotional": ColumnSpec(
|
|
212
|
+
rename_to="order_filled_notional",
|
|
213
|
+
parse_dtype=pl.Float64,
|
|
214
|
+
),
|
|
215
|
+
# === Quote columns (20) ===
|
|
216
|
+
"bid": ColumnSpec(rename_to="bid_px0", parse_dtype=pl.Float64),
|
|
217
|
+
"bid2": ColumnSpec(rename_to="bid_px1", parse_dtype=pl.Float64),
|
|
218
|
+
"bid3": ColumnSpec(rename_to="bid_px2", parse_dtype=pl.Float64),
|
|
219
|
+
"bid4": ColumnSpec(rename_to="bid_px3", parse_dtype=pl.Float64),
|
|
220
|
+
"bid5": ColumnSpec(rename_to="bid_px4", parse_dtype=pl.Float64),
|
|
221
|
+
"ask": ColumnSpec(rename_to="ask_px0", parse_dtype=pl.Float64),
|
|
222
|
+
"ask2": ColumnSpec(rename_to="ask_px1", parse_dtype=pl.Float64),
|
|
223
|
+
"ask3": ColumnSpec(rename_to="ask_px2", parse_dtype=pl.Float64),
|
|
224
|
+
"ask4": ColumnSpec(rename_to="ask_px3", parse_dtype=pl.Float64),
|
|
225
|
+
"ask5": ColumnSpec(rename_to="ask_px4", parse_dtype=pl.Float64),
|
|
226
|
+
"bsize": ColumnSpec(
|
|
227
|
+
rename_to="bid_size0",
|
|
228
|
+
parse_dtype=pl.Float64,
|
|
229
|
+
cast_dtype=pl.Int64,
|
|
230
|
+
),
|
|
231
|
+
"bsize2": ColumnSpec(
|
|
232
|
+
rename_to="bid_size1",
|
|
233
|
+
parse_dtype=pl.Float64,
|
|
234
|
+
cast_dtype=pl.Int64,
|
|
235
|
+
),
|
|
236
|
+
"bsize3": ColumnSpec(
|
|
237
|
+
rename_to="bid_size2",
|
|
238
|
+
parse_dtype=pl.Float64,
|
|
239
|
+
cast_dtype=pl.Int64,
|
|
240
|
+
),
|
|
241
|
+
"bsize4": ColumnSpec(
|
|
242
|
+
rename_to="bid_size3",
|
|
243
|
+
parse_dtype=pl.Float64,
|
|
244
|
+
cast_dtype=pl.Int64,
|
|
245
|
+
),
|
|
246
|
+
"bsize5": ColumnSpec(
|
|
247
|
+
rename_to="bid_size4",
|
|
248
|
+
parse_dtype=pl.Float64,
|
|
249
|
+
cast_dtype=pl.Int64,
|
|
250
|
+
),
|
|
251
|
+
"asize": ColumnSpec(
|
|
252
|
+
rename_to="ask_size0",
|
|
253
|
+
parse_dtype=pl.Float64,
|
|
254
|
+
cast_dtype=pl.Int64,
|
|
255
|
+
),
|
|
256
|
+
"asize2": ColumnSpec(
|
|
257
|
+
rename_to="ask_size1",
|
|
258
|
+
parse_dtype=pl.Float64,
|
|
259
|
+
cast_dtype=pl.Int64,
|
|
260
|
+
),
|
|
261
|
+
"asize3": ColumnSpec(
|
|
262
|
+
rename_to="ask_size2",
|
|
263
|
+
parse_dtype=pl.Float64,
|
|
264
|
+
cast_dtype=pl.Int64,
|
|
265
|
+
),
|
|
266
|
+
"asize4": ColumnSpec(
|
|
267
|
+
rename_to="ask_size3",
|
|
268
|
+
parse_dtype=pl.Float64,
|
|
269
|
+
cast_dtype=pl.Int64,
|
|
270
|
+
),
|
|
271
|
+
"asize5": ColumnSpec(
|
|
272
|
+
rename_to="ask_size4",
|
|
273
|
+
parse_dtype=pl.Float64,
|
|
274
|
+
cast_dtype=pl.Int64,
|
|
275
|
+
),
|
|
276
|
+
"isRebasedQuote": ColumnSpec(rename_to="is_rebased", parse_dtype=pl.String),
|
|
277
|
+
"quoteSeqNum": ColumnSpec(rename_to="seq_num", parse_dtype=pl.Int64),
|
|
278
|
+
"quoteTs": ColumnSpec(rename_to="timestamp", parse_dtype=pl.Int64),
|
|
279
|
+
# === Position columns (11) ===
|
|
280
|
+
"startPos": ColumnSpec(
|
|
281
|
+
rename_to="init_net_pos",
|
|
282
|
+
parse_dtype=pl.Float64,
|
|
283
|
+
cast_dtype=pl.Int64,
|
|
284
|
+
),
|
|
285
|
+
"pos": ColumnSpec(
|
|
286
|
+
rename_to="current_net_pos",
|
|
287
|
+
parse_dtype=pl.Float64,
|
|
288
|
+
cast_dtype=pl.Int64,
|
|
289
|
+
),
|
|
290
|
+
"realizedPos": ColumnSpec(
|
|
291
|
+
rename_to="current_realized_net_pos",
|
|
292
|
+
parse_dtype=pl.Float64,
|
|
293
|
+
cast_dtype=pl.Int64,
|
|
294
|
+
),
|
|
295
|
+
"openBuyPos": ColumnSpec(
|
|
296
|
+
rename_to="open_buy",
|
|
297
|
+
parse_dtype=pl.Float64,
|
|
298
|
+
cast_dtype=pl.Int64,
|
|
299
|
+
),
|
|
300
|
+
"openSellPos": ColumnSpec(
|
|
301
|
+
rename_to="open_sell",
|
|
302
|
+
parse_dtype=pl.Float64,
|
|
303
|
+
cast_dtype=pl.Int64,
|
|
304
|
+
),
|
|
305
|
+
"cumBuy": ColumnSpec(
|
|
306
|
+
rename_to="cum_buy",
|
|
307
|
+
parse_dtype=pl.Float64,
|
|
308
|
+
cast_dtype=pl.Int64,
|
|
309
|
+
),
|
|
310
|
+
"cumSell": ColumnSpec(
|
|
311
|
+
rename_to="cum_sell",
|
|
312
|
+
parse_dtype=pl.Float64,
|
|
313
|
+
cast_dtype=pl.Int64,
|
|
314
|
+
),
|
|
315
|
+
"cashFlow": ColumnSpec(rename_to="cash_flow", parse_dtype=pl.Float64),
|
|
316
|
+
"frozenCash": ColumnSpec(rename_to="frozen_cash", parse_dtype=pl.Float64),
|
|
317
|
+
"globalCumBuyNotional": ColumnSpec(
|
|
318
|
+
rename_to="cum_buy_filled_notional",
|
|
319
|
+
parse_dtype=pl.Float64,
|
|
320
|
+
),
|
|
321
|
+
"globalCumSellNotional": ColumnSpec(
|
|
322
|
+
rename_to="cum_sell_filled_notional",
|
|
323
|
+
parse_dtype=pl.Float64,
|
|
324
|
+
),
|
|
325
|
+
},
|
|
326
|
+
null_values=["", "NA", "null", "NULL"],
|
|
327
|
+
drop=["#HFTORD"],
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
# =============================================================================
|
|
332
|
+
# JYAO Alpha Format (v2025-11-14)
|
|
333
|
+
# =============================================================================
|
|
334
|
+
|
|
335
|
+
JYAO_V20251114 = SchemaEvolution(
|
|
336
|
+
columns={
|
|
337
|
+
# Symbol column - parse_dtype for CSV, cast_dtype for feather/IPC
|
|
338
|
+
# (feather files have embedded types, so cast is needed post-load)
|
|
339
|
+
"ukey": ColumnSpec(parse_dtype=pl.Int64, cast_dtype=pl.Int64),
|
|
340
|
+
# Quote columns
|
|
341
|
+
"BidPrice1": ColumnSpec(rename_to="bid_px0", parse_dtype=pl.Float64),
|
|
342
|
+
"AskPrice1": ColumnSpec(rename_to="ask_px0", parse_dtype=pl.Float64),
|
|
343
|
+
"BidVolume1": ColumnSpec(
|
|
344
|
+
rename_to="bid_size0",
|
|
345
|
+
parse_dtype=pl.Float64,
|
|
346
|
+
cast_dtype=pl.Int64,
|
|
347
|
+
),
|
|
348
|
+
"AskVolume1": ColumnSpec(
|
|
349
|
+
rename_to="ask_size0",
|
|
350
|
+
parse_dtype=pl.Float64,
|
|
351
|
+
cast_dtype=pl.Int64,
|
|
352
|
+
),
|
|
353
|
+
# Time columns
|
|
354
|
+
"TimeStamp": ColumnSpec(rename_to="timestamp", parse_dtype=pl.Int64),
|
|
355
|
+
"GlobalExTime": ColumnSpec(rename_to="global_exchange_ts", parse_dtype=pl.Int64),
|
|
356
|
+
"DataDate": ColumnSpec(rename_to="data_date", parse_dtype=pl.String),
|
|
357
|
+
# Volume
|
|
358
|
+
"Volume": ColumnSpec(
|
|
359
|
+
rename_to="volume",
|
|
360
|
+
parse_dtype=pl.Float64,
|
|
361
|
+
cast_dtype=pl.Int64,
|
|
362
|
+
),
|
|
363
|
+
# Predictor columns (x_* = alpha predictions)
|
|
364
|
+
# Rule: ≤60s → s, >60s → m
|
|
365
|
+
"x10s": ColumnSpec(rename_to="x_10s", parse_dtype=pl.Float64),
|
|
366
|
+
"x60s": ColumnSpec(rename_to="x_60s", parse_dtype=pl.Float64),
|
|
367
|
+
"alpha1": ColumnSpec(rename_to="x_3m", parse_dtype=pl.Float64),
|
|
368
|
+
"alpha2": ColumnSpec(rename_to="x_30m", parse_dtype=pl.Float64),
|
|
369
|
+
},
|
|
370
|
+
null_values=["", "NA"],
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
# =============================================================================
|
|
375
|
+
# Schema Registry
|
|
376
|
+
# =============================================================================
|
|
377
|
+
|
|
378
|
+
SCHEMAS: dict[str, SchemaEvolution] = {
|
|
379
|
+
"ylin_v20251204": YLIN_V20251204,
|
|
380
|
+
"jyao_v20251114": JYAO_V20251114,
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
def get_schema(name: str | None) -> SchemaEvolution | None:
|
|
385
|
+
"""Get SchemaEvolution by name.
|
|
386
|
+
|
|
387
|
+
Args:
|
|
388
|
+
name: Schema name (e.g., "ylin_v20251204") or None.
|
|
389
|
+
|
|
390
|
+
Returns:
|
|
391
|
+
SchemaEvolution or None if name is None or not found.
|
|
392
|
+
"""
|
|
393
|
+
if not name:
|
|
394
|
+
return None
|
|
395
|
+
return SCHEMAS.get(name.lower())
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
vizflow/__init__.py,sha256=pO3pTaKj7LNyuQlTrCHau9nBymhNGdmiImgJXH5lxt4,529
|
|
2
|
+
vizflow/config.py,sha256=y4vRvjVTa1H5AdQf0q_XhYr-3EBDJst8BJq52ODN3uk,6456
|
|
3
|
+
vizflow/io.py,sha256=eheqyLHGiSh69erxMk98FK-GYycbSheqkrIYRYGFy3A,7687
|
|
4
|
+
vizflow/market.py,sha256=MtNz_nnZxC66Aq-i2PXEwaFCTknijFWYZUUv6798k2s,2493
|
|
5
|
+
vizflow/ops.py,sha256=4UwxOTPhvZ1_4PI3pcxbXfLAYsn1Ecj6nyBtBBr7KS8,7761
|
|
6
|
+
vizflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
+
vizflow/schema_evolution.py,sha256=puwuuJ8LAzelHq1JTwQD968J7GYLvgAsCaHJnDHzu4U,14025
|
|
8
|
+
vizflow-0.5.3.dist-info/METADATA,sha256=e8LTSGVMiHLNcTjMaAO11Lj5xYa6etsoG4yBuxI8E64,388
|
|
9
|
+
vizflow-0.5.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
10
|
+
vizflow-0.5.3.dist-info/RECORD,,
|
vizflow/presets.py
DELETED
|
@@ -1,87 +0,0 @@
|
|
|
1
|
-
"""Column mapping presets for VizFlow."""
|
|
2
|
-
|
|
3
|
-
# ylin's trade format (v2025-12-04)
|
|
4
|
-
YLIN_V20251204 = {
|
|
5
|
-
# Order columns (18)
|
|
6
|
-
"symbol": "ukey",
|
|
7
|
-
"orderId": "order_id",
|
|
8
|
-
"orderSide": "order_side",
|
|
9
|
-
"orderQty": "order_qty",
|
|
10
|
-
"orderPrice": "order_price",
|
|
11
|
-
"priceType": "order_price_type",
|
|
12
|
-
"fillQty": "order_filled_qty",
|
|
13
|
-
"fillPrice": "fill_price",
|
|
14
|
-
"lastExchangeTs": "update_exchange_ts",
|
|
15
|
-
"createdTs": "create_exchange_ts",
|
|
16
|
-
"localTs": "create_local_ts",
|
|
17
|
-
"qtyAhead": "qty_ahead",
|
|
18
|
-
"qtyBehind": "qty_behind",
|
|
19
|
-
"orderStatus": "order_curr_state",
|
|
20
|
-
"orderTposType": "order_tpos_type",
|
|
21
|
-
"alphaTs": "alpha_ts",
|
|
22
|
-
"event": "event_type",
|
|
23
|
-
"cumFilledNotional": "order_filled_notional",
|
|
24
|
-
# Quote columns (15)
|
|
25
|
-
"bid": "bid_px0",
|
|
26
|
-
"bid2": "bid_px1",
|
|
27
|
-
"bid3": "bid_px2",
|
|
28
|
-
"bid4": "bid_px3",
|
|
29
|
-
"bid5": "bid_px4",
|
|
30
|
-
"ask": "ask_px0",
|
|
31
|
-
"ask2": "ask_px1",
|
|
32
|
-
"ask3": "ask_px2",
|
|
33
|
-
"ask4": "ask_px3",
|
|
34
|
-
"ask5": "ask_px4",
|
|
35
|
-
"bsize": "bid_size0",
|
|
36
|
-
"bsize2": "bid_size1",
|
|
37
|
-
"bsize3": "bid_size2",
|
|
38
|
-
"bsize4": "bid_size3",
|
|
39
|
-
"bsize5": "bid_size4",
|
|
40
|
-
"asize": "ask_size0",
|
|
41
|
-
"asize2": "ask_size1",
|
|
42
|
-
"asize3": "ask_size2",
|
|
43
|
-
"asize4": "ask_size3",
|
|
44
|
-
"asize5": "ask_size4",
|
|
45
|
-
"isRebasedQuote": "is_rebased",
|
|
46
|
-
"quoteSeqNum": "seq_num",
|
|
47
|
-
"quoteTs": "timestamp",
|
|
48
|
-
# Position columns (11)
|
|
49
|
-
"startPos": "init_net_pos",
|
|
50
|
-
"pos": "current_net_pos",
|
|
51
|
-
"realizedPos": "current_realized_net_pos",
|
|
52
|
-
"openBuyPos": "open_buy",
|
|
53
|
-
"openSellPos": "open_sell",
|
|
54
|
-
"cumBuy": "cum_buy",
|
|
55
|
-
"cumSell": "cum_sell",
|
|
56
|
-
"cashFlow": "cash_flow",
|
|
57
|
-
"frozenCash": "frozen_cash",
|
|
58
|
-
"globalCumBuyNotional": "cum_buy_filled_notional",
|
|
59
|
-
"globalCumSellNotional": "cum_sell_filled_notional",
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
# jyao's alpha format (v2025-11-14)
|
|
63
|
-
JYAO_V20251114 = {
|
|
64
|
-
# Quote columns
|
|
65
|
-
"BidPrice1": "bid_px0",
|
|
66
|
-
"AskPrice1": "ask_px0",
|
|
67
|
-
"BidVolume1": "bid_size0",
|
|
68
|
-
"AskVolume1": "ask_size0",
|
|
69
|
-
# Time columns
|
|
70
|
-
"TimeStamp": "timestamp",
|
|
71
|
-
"GlobalExTime": "global_exchange_ts",
|
|
72
|
-
"DataDate": "data_date",
|
|
73
|
-
# Volume
|
|
74
|
-
"Volume": "volume",
|
|
75
|
-
# Predictor columns (x_* = alpha predictions)
|
|
76
|
-
# Rule: ≤60s → s, >60s → m
|
|
77
|
-
"x10s": "x_10s",
|
|
78
|
-
"x60s": "x_60s",
|
|
79
|
-
"alpha1": "x_3m",
|
|
80
|
-
"alpha2": "x_30m",
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
# Preset registry for dynamic lookup
|
|
84
|
-
PRESETS: dict[str, dict[str, str]] = {
|
|
85
|
-
"ylin_v20251204": YLIN_V20251204,
|
|
86
|
-
"jyao_v20251114": JYAO_V20251114,
|
|
87
|
-
}
|
vizflow-0.5.1.dist-info/RECORD
DELETED
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
vizflow/__init__.py,sha256=EJ8qF4o2grf4aSochaasaaf0unyXV5yhrMs6rAhyp7k,496
|
|
2
|
-
vizflow/config.py,sha256=JNW5-TshQ1v-Ft3-VV0JYJ5PdC3Yhgy4fW0AV0RWzkE,7322
|
|
3
|
-
vizflow/io.py,sha256=ypiEuuPoHFKSt6VnhXcEI7u7dyVjKORunjqGpkFauXM,8877
|
|
4
|
-
vizflow/market.py,sha256=MtNz_nnZxC66Aq-i2PXEwaFCTknijFWYZUUv6798k2s,2493
|
|
5
|
-
vizflow/ops.py,sha256=4UwxOTPhvZ1_4PI3pcxbXfLAYsn1Ecj6nyBtBBr7KS8,7761
|
|
6
|
-
vizflow/presets.py,sha256=h91NZoOH4YAx0bbsaNigECf9WOcWh1QZavguunWkaLE,2452
|
|
7
|
-
vizflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
-
vizflow-0.5.1.dist-info/METADATA,sha256=DHzwPBvYuj7Rc4BawcXD2juS5iR5UD1FnAxt3cgvpo4,388
|
|
9
|
-
vizflow-0.5.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
10
|
-
vizflow-0.5.1.dist-info/RECORD,,
|
|
File without changes
|