vizflow 0.3.0__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vizflow/__init__.py +4 -2
- vizflow/config.py +153 -24
- vizflow/io.py +239 -0
- vizflow/ops.py +66 -37
- vizflow/presets.py +63 -0
- {vizflow-0.3.0.dist-info → vizflow-0.4.1.dist-info}/METADATA +1 -1
- vizflow-0.4.1.dist-info/RECORD +10 -0
- vizflow-0.3.0.dist-info/RECORD +0 -8
- {vizflow-0.3.0.dist-info → vizflow-0.4.1.dist-info}/WHEEL +0 -0
vizflow/__init__.py
CHANGED
|
@@ -5,8 +5,10 @@ Usage:
|
|
|
5
5
|
import vizflow as vf
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
__version__ = "0.
|
|
8
|
+
__version__ = "0.4.1"
|
|
9
9
|
|
|
10
|
-
from .config import Config
|
|
10
|
+
from .config import ColumnSchema, Config, get_config, set_config
|
|
11
|
+
from .io import load_alpha, load_calendar, load_trade, scan_trade, scan_trades
|
|
11
12
|
from .market import CN, CRYPTO, Market, Session
|
|
12
13
|
from .ops import aggregate, bin, parse_time
|
|
14
|
+
from .presets import YLIN
|
vizflow/config.py
CHANGED
|
@@ -4,6 +4,25 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
from dataclasses import dataclass, field
|
|
6
6
|
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
# Global config instance
|
|
10
|
+
_global_config: Config | None = None
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class ColumnSchema:
|
|
15
|
+
"""Schema for a column with type casting.
|
|
16
|
+
|
|
17
|
+
Attributes:
|
|
18
|
+
cast_to: Target type after casting (e.g. pl.Int64)
|
|
19
|
+
|
|
20
|
+
Example:
|
|
21
|
+
# Handle float precision errors: 1.00000002 → 1
|
|
22
|
+
ColumnSchema(cast_to=pl.Int64)
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
cast_to: Any # pl.DataType, but avoid import for now
|
|
7
26
|
|
|
8
27
|
|
|
9
28
|
@dataclass
|
|
@@ -11,63 +30,173 @@ class Config:
|
|
|
11
30
|
"""Central configuration for a pipeline run.
|
|
12
31
|
|
|
13
32
|
Attributes:
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
33
|
+
alpha_dir: Directory containing alpha files
|
|
34
|
+
alpha_pattern: Pattern for alpha files, e.g. "alpha_{date}.feather"
|
|
35
|
+
trade_dir: Directory containing trade files
|
|
36
|
+
trade_pattern: Pattern for trade files, e.g. "trade_{date}.feather"
|
|
37
|
+
calendar_path: Path to calendar parquet file
|
|
38
|
+
replay_dir: Directory for FIFO replay output (materialization 1)
|
|
39
|
+
aggregate_dir: Directory for aggregation output (materialization 2)
|
|
40
|
+
market: Market identifier, e.g. "CN"
|
|
41
|
+
alpha_columns: Mapping from semantic names to alpha column names
|
|
42
|
+
trade_columns: Mapping from semantic names to trade column names
|
|
43
|
+
alpha_schema: Schema evolution for alpha columns
|
|
44
|
+
trade_schema: Schema evolution for trade columns
|
|
19
45
|
binwidths: Mapping from column names to bin widths
|
|
46
|
+
group_by: Columns to group by in aggregation
|
|
20
47
|
horizons: List of forward return horizons in seconds
|
|
21
48
|
time_cutoff: Optional time cutoff (e.g. 143000000 for 14:30:00)
|
|
22
49
|
"""
|
|
23
50
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
51
|
+
# === Input Paths ===
|
|
52
|
+
alpha_dir: Path | None = None
|
|
53
|
+
alpha_pattern: str = "alpha_{date}.feather"
|
|
54
|
+
trade_dir: Path | None = None
|
|
55
|
+
trade_pattern: str = "trade_{date}.feather"
|
|
56
|
+
calendar_path: Path | None = None
|
|
57
|
+
|
|
58
|
+
# === Output Paths ===
|
|
59
|
+
replay_dir: Path | None = None # FIFO output (materialization 1)
|
|
60
|
+
aggregate_dir: Path | None = None # Aggregation output (materialization 2)
|
|
61
|
+
|
|
62
|
+
# === Market ===
|
|
27
63
|
market: str = "CN"
|
|
28
|
-
|
|
64
|
+
|
|
65
|
+
# === Column Mapping ===
|
|
66
|
+
alpha_columns: dict[str, str] = field(default_factory=dict)
|
|
67
|
+
trade_columns: dict[str, str] = field(default_factory=dict)
|
|
68
|
+
|
|
69
|
+
# === Schema Evolution ===
|
|
70
|
+
alpha_schema: dict[str, ColumnSchema] = field(default_factory=dict)
|
|
71
|
+
trade_schema: dict[str, ColumnSchema] = field(default_factory=dict)
|
|
72
|
+
|
|
73
|
+
# === Column Mapping ===
|
|
74
|
+
column_preset: str | None = None # "ylin" or None
|
|
75
|
+
column_rename: dict[str, str] = field(default_factory=dict) # Custom rename map
|
|
76
|
+
|
|
77
|
+
# === Aggregation ===
|
|
29
78
|
binwidths: dict[str, float] = field(default_factory=dict)
|
|
30
79
|
group_by: list[str] = field(default_factory=list)
|
|
80
|
+
|
|
81
|
+
# === Analysis ===
|
|
31
82
|
horizons: list[int] = field(default_factory=list)
|
|
32
83
|
time_cutoff: int | None = None
|
|
33
84
|
|
|
34
85
|
def __post_init__(self):
|
|
35
86
|
"""Convert paths to Path objects if needed."""
|
|
36
|
-
if isinstance(self.
|
|
37
|
-
self.
|
|
38
|
-
if isinstance(self.
|
|
39
|
-
self.
|
|
40
|
-
|
|
41
|
-
|
|
87
|
+
if isinstance(self.alpha_dir, str):
|
|
88
|
+
self.alpha_dir = Path(self.alpha_dir)
|
|
89
|
+
if isinstance(self.trade_dir, str):
|
|
90
|
+
self.trade_dir = Path(self.trade_dir)
|
|
91
|
+
if isinstance(self.calendar_path, str):
|
|
92
|
+
self.calendar_path = Path(self.calendar_path)
|
|
93
|
+
if isinstance(self.replay_dir, str):
|
|
94
|
+
self.replay_dir = Path(self.replay_dir)
|
|
95
|
+
if isinstance(self.aggregate_dir, str):
|
|
96
|
+
self.aggregate_dir = Path(self.aggregate_dir)
|
|
97
|
+
|
|
98
|
+
def col(self, semantic: str, source: str = "trade") -> str:
|
|
42
99
|
"""Get actual column name from semantic name.
|
|
43
100
|
|
|
44
101
|
Args:
|
|
45
102
|
semantic: Semantic column name (e.g. "timestamp", "price")
|
|
103
|
+
source: "alpha" or "trade"
|
|
46
104
|
|
|
47
105
|
Returns:
|
|
48
106
|
Actual column name, or the semantic name if no mapping exists
|
|
49
107
|
"""
|
|
50
|
-
|
|
108
|
+
if source == "alpha":
|
|
109
|
+
return self.alpha_columns.get(semantic, semantic)
|
|
110
|
+
return self.trade_columns.get(semantic, semantic)
|
|
51
111
|
|
|
52
|
-
def
|
|
53
|
-
"""Get
|
|
112
|
+
def get_alpha_path(self, date: str) -> Path:
|
|
113
|
+
"""Get alpha file path for a date.
|
|
54
114
|
|
|
55
115
|
Args:
|
|
56
116
|
date: Date string, e.g. "20241001"
|
|
57
117
|
|
|
58
118
|
Returns:
|
|
59
|
-
Full path to
|
|
119
|
+
Full path to alpha file
|
|
120
|
+
|
|
121
|
+
Raises:
|
|
122
|
+
ValueError: If alpha_dir is not set
|
|
60
123
|
"""
|
|
61
|
-
|
|
124
|
+
if self.alpha_dir is None:
|
|
125
|
+
raise ValueError("alpha_dir is not set in Config")
|
|
126
|
+
return self.alpha_dir / self.alpha_pattern.format(date=date)
|
|
62
127
|
|
|
63
|
-
def
|
|
64
|
-
"""Get
|
|
128
|
+
def get_trade_path(self, date: str) -> Path:
|
|
129
|
+
"""Get trade file path for a date.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
date: Date string, e.g. "20241001"
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
Full path to trade file
|
|
136
|
+
|
|
137
|
+
Raises:
|
|
138
|
+
ValueError: If trade_dir is not set
|
|
139
|
+
"""
|
|
140
|
+
if self.trade_dir is None:
|
|
141
|
+
raise ValueError("trade_dir is not set in Config")
|
|
142
|
+
return self.trade_dir / self.trade_pattern.format(date=date)
|
|
143
|
+
|
|
144
|
+
def get_replay_path(self, date: str, suffix: str = ".parquet") -> Path:
|
|
145
|
+
"""Get replay output file path for a date (FIFO results).
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
date: Date string, e.g. "20241001"
|
|
149
|
+
suffix: File suffix, default ".parquet"
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
Full path to replay output file
|
|
153
|
+
|
|
154
|
+
Raises:
|
|
155
|
+
ValueError: If replay_dir is not set
|
|
156
|
+
"""
|
|
157
|
+
if self.replay_dir is None:
|
|
158
|
+
raise ValueError("replay_dir is not set in Config")
|
|
159
|
+
return self.replay_dir / f"{date}{suffix}"
|
|
160
|
+
|
|
161
|
+
def get_aggregate_path(self, date: str, suffix: str = ".parquet") -> Path:
|
|
162
|
+
"""Get aggregate output file path for a date (partial results).
|
|
65
163
|
|
|
66
164
|
Args:
|
|
67
165
|
date: Date string, e.g. "20241001"
|
|
68
166
|
suffix: File suffix, default ".parquet"
|
|
69
167
|
|
|
70
168
|
Returns:
|
|
71
|
-
Full path to output file
|
|
169
|
+
Full path to aggregate output file
|
|
170
|
+
|
|
171
|
+
Raises:
|
|
172
|
+
ValueError: If aggregate_dir is not set
|
|
72
173
|
"""
|
|
73
|
-
|
|
174
|
+
if self.aggregate_dir is None:
|
|
175
|
+
raise ValueError("aggregate_dir is not set in Config")
|
|
176
|
+
return self.aggregate_dir / f"{date}{suffix}"
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def set_config(config: Config) -> None:
|
|
180
|
+
"""Set the global config.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
config: Config instance to set as global
|
|
184
|
+
"""
|
|
185
|
+
global _global_config
|
|
186
|
+
_global_config = config
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def get_config() -> Config:
|
|
190
|
+
"""Get the global config.
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
The global Config instance
|
|
194
|
+
|
|
195
|
+
Raises:
|
|
196
|
+
RuntimeError: If config has not been set via set_config()
|
|
197
|
+
"""
|
|
198
|
+
if _global_config is None:
|
|
199
|
+
raise RuntimeError(
|
|
200
|
+
"Config not set. Call vf.set_config(config) first."
|
|
201
|
+
)
|
|
202
|
+
return _global_config
|
vizflow/io.py
ADDED
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
"""I/O functions for VizFlow with automatic schema evolution."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
import polars as pl
|
|
8
|
+
|
|
9
|
+
from .config import Config, get_config
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
pass
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def load_alpha(date: str, config: Config | None = None) -> pl.LazyFrame:
|
|
16
|
+
"""Load alpha data for a date with automatic schema evolution.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
date: Date string, e.g. "20241001"
|
|
20
|
+
config: Config to use, or get_config() if None
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
LazyFrame with schema evolution applied
|
|
24
|
+
|
|
25
|
+
Example:
|
|
26
|
+
>>> config = vf.Config(
|
|
27
|
+
... output_dir=Path("/data/output"),
|
|
28
|
+
... alpha_dir=Path("/data/alpha"),
|
|
29
|
+
... alpha_schema={"qty": vf.ColumnSchema(cast_to=pl.Int64)}
|
|
30
|
+
... )
|
|
31
|
+
>>> vf.set_config(config)
|
|
32
|
+
>>> alpha = vf.load_alpha("20241001")
|
|
33
|
+
"""
|
|
34
|
+
config = config or get_config()
|
|
35
|
+
path = config.get_alpha_path(date)
|
|
36
|
+
df = pl.scan_ipc(path)
|
|
37
|
+
|
|
38
|
+
# Apply schema evolution (type casting)
|
|
39
|
+
for col_name, schema in config.alpha_schema.items():
|
|
40
|
+
df = df.with_columns(pl.col(col_name).cast(schema.cast_to))
|
|
41
|
+
|
|
42
|
+
return df
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def load_trade(date: str, config: Config | None = None) -> pl.LazyFrame:
|
|
46
|
+
"""Load trade data for a date with automatic schema evolution.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
date: Date string, e.g. "20241001"
|
|
50
|
+
config: Config to use, or get_config() if None
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
LazyFrame with schema evolution applied
|
|
54
|
+
|
|
55
|
+
Example:
|
|
56
|
+
>>> config = vf.Config(
|
|
57
|
+
... output_dir=Path("/data/output"),
|
|
58
|
+
... trade_dir=Path("/data/trade"),
|
|
59
|
+
... trade_schema={"qty": vf.ColumnSchema(cast_to=pl.Int64)}
|
|
60
|
+
... )
|
|
61
|
+
>>> vf.set_config(config)
|
|
62
|
+
>>> trade = vf.load_trade("20241001") # qty: 1.00000002 → 1
|
|
63
|
+
"""
|
|
64
|
+
config = config or get_config()
|
|
65
|
+
path = config.get_trade_path(date)
|
|
66
|
+
df = pl.scan_ipc(path)
|
|
67
|
+
|
|
68
|
+
# Apply schema evolution (type casting)
|
|
69
|
+
for col_name, schema in config.trade_schema.items():
|
|
70
|
+
df = df.with_columns(pl.col(col_name).cast(schema.cast_to))
|
|
71
|
+
|
|
72
|
+
return df
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def load_calendar(config: Config | None = None) -> pl.DataFrame:
|
|
76
|
+
"""Load trading calendar.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
config: Config to use, or get_config() if None
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
DataFrame with date, prev_date, next_date columns
|
|
83
|
+
|
|
84
|
+
Raises:
|
|
85
|
+
ValueError: If calendar_path is not set in config
|
|
86
|
+
|
|
87
|
+
Example:
|
|
88
|
+
>>> config = vf.Config(
|
|
89
|
+
... output_dir=Path("/data/output"),
|
|
90
|
+
... calendar_path=Path("/data/calendar.parquet")
|
|
91
|
+
... )
|
|
92
|
+
>>> vf.set_config(config)
|
|
93
|
+
>>> calendar = vf.load_calendar()
|
|
94
|
+
"""
|
|
95
|
+
config = config or get_config()
|
|
96
|
+
if config.calendar_path is None:
|
|
97
|
+
raise ValueError("calendar_path is not set in Config")
|
|
98
|
+
return pl.read_parquet(config.calendar_path)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _scan_file(path) -> pl.LazyFrame:
|
|
102
|
+
"""Scan a file based on its extension.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
path: Path to file
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
LazyFrame from the file
|
|
109
|
+
|
|
110
|
+
Supported formats:
|
|
111
|
+
- .feather, .ipc, .arrow: IPC format (pl.scan_ipc)
|
|
112
|
+
- .csv, .meords: CSV format (pl.scan_csv)
|
|
113
|
+
- .parquet: Parquet format (pl.scan_parquet)
|
|
114
|
+
"""
|
|
115
|
+
suffix = str(path).lower().split(".")[-1]
|
|
116
|
+
|
|
117
|
+
if suffix in ("feather", "ipc", "arrow"):
|
|
118
|
+
return pl.scan_ipc(path)
|
|
119
|
+
elif suffix in ("csv", "meords"):
|
|
120
|
+
return pl.scan_csv(path)
|
|
121
|
+
elif suffix == "parquet":
|
|
122
|
+
return pl.scan_parquet(path)
|
|
123
|
+
else:
|
|
124
|
+
# Default to IPC
|
|
125
|
+
return pl.scan_ipc(path)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def scan_trade(date: str, config: Config | None = None) -> pl.LazyFrame:
|
|
129
|
+
"""Scan single date trade file with column mapping.
|
|
130
|
+
|
|
131
|
+
Supports both IPC/feather format and CSV format (including .meords files).
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
date: Date string, e.g. "20241001"
|
|
135
|
+
config: Config to use, or get_config() if None
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
LazyFrame with column mapping and schema evolution applied
|
|
139
|
+
|
|
140
|
+
Example:
|
|
141
|
+
>>> config = vf.Config(
|
|
142
|
+
... trade_dir=Path("/data/yuanzhao/"),
|
|
143
|
+
... trade_pattern="{date}.meords",
|
|
144
|
+
... column_preset="ylin",
|
|
145
|
+
... )
|
|
146
|
+
>>> vf.set_config(config)
|
|
147
|
+
>>> df = vf.scan_trade("20241001")
|
|
148
|
+
"""
|
|
149
|
+
config = config or get_config()
|
|
150
|
+
path = config.get_trade_path(date)
|
|
151
|
+
df = _scan_file(path)
|
|
152
|
+
return _apply_mapping(df, config)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def scan_trades(config: Config | None = None) -> pl.LazyFrame:
|
|
156
|
+
"""Scan all trade files with column mapping.
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
config: Config to use, or get_config() if None
|
|
160
|
+
|
|
161
|
+
Returns:
|
|
162
|
+
LazyFrame with column mapping and schema evolution applied
|
|
163
|
+
|
|
164
|
+
Raises:
|
|
165
|
+
ValueError: If trade_dir is not set or no files found
|
|
166
|
+
|
|
167
|
+
Example:
|
|
168
|
+
>>> config = vf.Config(
|
|
169
|
+
... trade_dir=Path("/data/yuanzhao/"),
|
|
170
|
+
... trade_pattern="{date}.feather",
|
|
171
|
+
... column_preset="ylin",
|
|
172
|
+
... )
|
|
173
|
+
>>> vf.set_config(config)
|
|
174
|
+
>>> df = vf.scan_trades()
|
|
175
|
+
"""
|
|
176
|
+
config = config or get_config()
|
|
177
|
+
if config.trade_dir is None:
|
|
178
|
+
raise ValueError("trade_dir is not set in Config")
|
|
179
|
+
|
|
180
|
+
pattern = config.trade_pattern.replace("{date}", "*")
|
|
181
|
+
files = sorted(config.trade_dir.glob(pattern))
|
|
182
|
+
if not files:
|
|
183
|
+
raise ValueError(f"No files found matching {pattern} in {config.trade_dir}")
|
|
184
|
+
|
|
185
|
+
# Concatenate all files using lazy scanning
|
|
186
|
+
dfs = [_scan_file(f) for f in files]
|
|
187
|
+
df = pl.concat(dfs)
|
|
188
|
+
return _apply_mapping(df, config)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def _apply_mapping(df: pl.LazyFrame, config: Config) -> pl.LazyFrame:
|
|
192
|
+
"""Apply column rename + schema evolution.
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
df: Input LazyFrame
|
|
196
|
+
config: Config with mapping settings
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
LazyFrame with mapping applied
|
|
200
|
+
"""
|
|
201
|
+
# Drop record type prefix column if present (from CSV files)
|
|
202
|
+
existing = set(df.collect_schema().names())
|
|
203
|
+
if "#HFTORD" in existing:
|
|
204
|
+
df = df.drop("#HFTORD")
|
|
205
|
+
existing.remove("#HFTORD")
|
|
206
|
+
|
|
207
|
+
# Get rename map from preset or custom
|
|
208
|
+
rename_map = _get_rename_map(config)
|
|
209
|
+
|
|
210
|
+
if rename_map:
|
|
211
|
+
existing = set(df.collect_schema().names())
|
|
212
|
+
to_rename = {k: v for k, v in rename_map.items() if k in existing}
|
|
213
|
+
if to_rename:
|
|
214
|
+
df = df.rename(to_rename)
|
|
215
|
+
|
|
216
|
+
# Schema evolution (type casting) - use renamed column names
|
|
217
|
+
for col_name, schema in config.trade_schema.items():
|
|
218
|
+
df = df.with_columns(pl.col(col_name).cast(schema.cast_to))
|
|
219
|
+
|
|
220
|
+
return df
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def _get_rename_map(config: Config) -> dict[str, str]:
|
|
224
|
+
"""Get rename map from preset name or custom dict.
|
|
225
|
+
|
|
226
|
+
Args:
|
|
227
|
+
config: Config with column_preset or column_rename
|
|
228
|
+
|
|
229
|
+
Returns:
|
|
230
|
+
Dict mapping old column names to new names
|
|
231
|
+
"""
|
|
232
|
+
if config.column_rename:
|
|
233
|
+
return config.column_rename
|
|
234
|
+
if config.column_preset:
|
|
235
|
+
from .presets import YLIN
|
|
236
|
+
|
|
237
|
+
presets = {"ylin": YLIN}
|
|
238
|
+
return presets.get(config.column_preset.lower(), {})
|
|
239
|
+
return {}
|
vizflow/ops.py
CHANGED
|
@@ -4,61 +4,90 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
import polars as pl
|
|
6
6
|
|
|
7
|
-
from .
|
|
7
|
+
from .config import get_config
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
def parse_time(
|
|
11
11
|
df: pl.LazyFrame,
|
|
12
|
-
|
|
13
|
-
timestamp_col: str = "timestamp",
|
|
12
|
+
timestamp_col: str = "ticktime",
|
|
14
13
|
) -> pl.LazyFrame:
|
|
15
|
-
"""
|
|
14
|
+
"""Parse HHMMSSMMM timestamp to time-of-day and elapsed milliseconds.
|
|
15
|
+
|
|
16
|
+
Adds two columns:
|
|
17
|
+
- tod_{timestamp_col}: pl.Time (time-of-day HH:MM:SS.mmm) - good for plotting
|
|
18
|
+
- elapsed_{timestamp_col}: pl.Int64 (milliseconds since market open)
|
|
16
19
|
|
|
17
20
|
Args:
|
|
18
21
|
df: Input LazyFrame
|
|
19
|
-
|
|
20
|
-
timestamp_col: Column with integer timestamp (H/HHMMSSMMM format)
|
|
22
|
+
timestamp_col: Column with integer HHMMSSMMM format timestamps
|
|
21
23
|
e.g., 93012145 = 09:30:12.145, 142058425 = 14:20:58.425
|
|
22
24
|
|
|
23
25
|
Returns:
|
|
24
|
-
LazyFrame with
|
|
25
|
-
e.g., 09:30:12.145 → 12.145 (12 seconds + 145ms into trading)
|
|
26
|
+
LazyFrame with tod and elapsed columns added
|
|
26
27
|
|
|
27
28
|
Raises:
|
|
29
|
+
RuntimeError: If config not set via set_config()
|
|
28
30
|
NotImplementedError: If market is not CN
|
|
31
|
+
|
|
32
|
+
Example:
|
|
33
|
+
>>> config = vf.Config(market="CN", input_dir=".", output_dir=".")
|
|
34
|
+
>>> vf.set_config(config)
|
|
35
|
+
>>> df = vf.parse_time(df, "ticktime")
|
|
36
|
+
>>> # Creates: tod_ticktime (pl.Time), elapsed_ticktime (pl.Int64)
|
|
29
37
|
"""
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
# Parse
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
#
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
.
|
|
47
|
-
.
|
|
48
|
-
.
|
|
49
|
-
.
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
38
|
+
config = get_config()
|
|
39
|
+
|
|
40
|
+
if config.market != "CN":
|
|
41
|
+
raise NotImplementedError(f"Market {config.market} not supported yet")
|
|
42
|
+
|
|
43
|
+
# Parse HHMMSSMMM to components
|
|
44
|
+
df = df.with_columns([
|
|
45
|
+
(pl.col(timestamp_col) // 10000000).alias("_hour"),
|
|
46
|
+
(pl.col(timestamp_col) // 100000 % 100).alias("_minute"),
|
|
47
|
+
(pl.col(timestamp_col) // 1000 % 100).alias("_second"),
|
|
48
|
+
(pl.col(timestamp_col) % 1000).alias("_ms"),
|
|
49
|
+
])
|
|
50
|
+
|
|
51
|
+
# Add time-of-day column (pl.Time)
|
|
52
|
+
# Convert to nanoseconds since midnight
|
|
53
|
+
tod_ns = (
|
|
54
|
+
pl.col("_hour") * 3_600_000_000_000
|
|
55
|
+
+ pl.col("_minute") * 60_000_000_000
|
|
56
|
+
+ pl.col("_second") * 1_000_000_000
|
|
57
|
+
+ pl.col("_ms") * 1_000_000
|
|
58
|
+
)
|
|
59
|
+
df = df.with_columns(tod_ns.cast(pl.Time).alias(f"tod_{timestamp_col}"))
|
|
60
|
+
|
|
61
|
+
# Add elapsed milliseconds (int)
|
|
62
|
+
# CN market: 09:30-11:30 (morning), 13:00-15:00 (afternoon)
|
|
63
|
+
# Using user's hardcoded logic
|
|
64
|
+
elapsed_ms = (
|
|
65
|
+
pl.when(pl.col("_hour") < 12)
|
|
66
|
+
.then(
|
|
67
|
+
# Morning session: from 09:30:00.000
|
|
68
|
+
(
|
|
69
|
+
(pl.col("_hour") - 9) * 3600
|
|
70
|
+
+ (pl.col("_minute") - 30) * 60
|
|
71
|
+
+ pl.col("_second")
|
|
72
|
+
)
|
|
73
|
+
* 1000
|
|
74
|
+
+ pl.col("_ms")
|
|
75
|
+
)
|
|
76
|
+
.otherwise(
|
|
77
|
+
# Afternoon session: 2 hours of morning + time since 13:00
|
|
78
|
+
(
|
|
79
|
+
2 * 3600 + (pl.col("_hour") - 13) * 3600 + pl.col("_minute") * 60 + pl.col("_second")
|
|
80
|
+
)
|
|
81
|
+
* 1000
|
|
82
|
+
+ pl.col("_ms")
|
|
83
|
+
)
|
|
56
84
|
)
|
|
85
|
+
df = df.with_columns(elapsed_ms.cast(pl.Int64).alias(f"elapsed_{timestamp_col}"))
|
|
57
86
|
|
|
58
|
-
#
|
|
59
|
-
|
|
87
|
+
# Drop temporary columns
|
|
88
|
+
df = df.drop(["_hour", "_minute", "_second", "_ms"])
|
|
60
89
|
|
|
61
|
-
return df
|
|
90
|
+
return df
|
|
62
91
|
|
|
63
92
|
|
|
64
93
|
def bin(df: pl.LazyFrame, widths: dict[str, float]) -> pl.LazyFrame:
|
vizflow/presets.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""Column mapping presets for VizFlow."""
|
|
2
|
+
|
|
3
|
+
# Yuanzhao's log format -> VizFlow standard
|
|
4
|
+
YUANZHAO = {
|
|
5
|
+
# Order columns (18)
|
|
6
|
+
"symbol": "ukey",
|
|
7
|
+
"orderId": "order_id",
|
|
8
|
+
"orderSide": "order_side",
|
|
9
|
+
"orderQty": "order_qty",
|
|
10
|
+
"orderPrice": "order_price",
|
|
11
|
+
"priceType": "order_price_type",
|
|
12
|
+
"fillQty": "order_filled_qty",
|
|
13
|
+
"fillPrice": "fill_price",
|
|
14
|
+
"lastExchangeTs": "update_exchange_ts",
|
|
15
|
+
"createdTs": "create_exchange_ts",
|
|
16
|
+
"localTs": "create_local_ts",
|
|
17
|
+
"qtyAhead": "qty_ahead",
|
|
18
|
+
"qtyBehind": "qty_behind",
|
|
19
|
+
"orderStatus": "order_curr_state",
|
|
20
|
+
"orderTposType": "order_tpos_type",
|
|
21
|
+
"alphaTs": "alpha_ts",
|
|
22
|
+
"event": "event_type",
|
|
23
|
+
"cumFilledNotional": "order_filled_notional",
|
|
24
|
+
# Quote columns (15)
|
|
25
|
+
"bid": "bid_px0",
|
|
26
|
+
"bid2": "bid_px1",
|
|
27
|
+
"bid3": "bid_px2",
|
|
28
|
+
"bid4": "bid_px3",
|
|
29
|
+
"bid5": "bid_px4",
|
|
30
|
+
"ask": "ask_px0",
|
|
31
|
+
"ask2": "ask_px1",
|
|
32
|
+
"ask3": "ask_px2",
|
|
33
|
+
"ask4": "ask_px3",
|
|
34
|
+
"ask5": "ask_px4",
|
|
35
|
+
"bsize": "bid_size0",
|
|
36
|
+
"bsize2": "bid_size1",
|
|
37
|
+
"bsize3": "bid_size2",
|
|
38
|
+
"bsize4": "bid_size3",
|
|
39
|
+
"bsize5": "bid_size4",
|
|
40
|
+
"asize": "ask_size0",
|
|
41
|
+
"asize2": "ask_size1",
|
|
42
|
+
"asize3": "ask_size2",
|
|
43
|
+
"asize4": "ask_size3",
|
|
44
|
+
"asize5": "ask_size4",
|
|
45
|
+
"isRebasedQuote": "is_rebased",
|
|
46
|
+
"quoteSeqNum": "seq_num",
|
|
47
|
+
"quoteTs": "timestamp",
|
|
48
|
+
# Position columns (11)
|
|
49
|
+
"startPos": "init_net_pos",
|
|
50
|
+
"pos": "current_net_pos",
|
|
51
|
+
"realizedPos": "current_realized_net_pos",
|
|
52
|
+
"openBuyPos": "open_buy",
|
|
53
|
+
"openSellPos": "open_sell",
|
|
54
|
+
"cumBuy": "cum_buy",
|
|
55
|
+
"cumSell": "cum_sell",
|
|
56
|
+
"cashFlow": "cash_flow",
|
|
57
|
+
"frozenCash": "frozen_cash",
|
|
58
|
+
"globalCumBuyNotional": "cum_buy_filled_notional",
|
|
59
|
+
"globalCumSellNotional": "cum_sell_filled_notional",
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
# Alias: ylin (Yuanzhao's username)
|
|
63
|
+
YLIN = YUANZHAO
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
vizflow/__init__.py,sha256=s03IltxCkl4odJo8ywVERpOD8Dl0GqOaKhFyELjGwM4,387
|
|
2
|
+
vizflow/config.py,sha256=IiVau-4WyO_7NOWlR7Tw58RpDeGFYBbgkNzH7xuQIUg,6544
|
|
3
|
+
vizflow/io.py,sha256=Z2W-jU8nrX52DjzzXCxfHkKSWl3AGnIUC3cFDg4dCTk,6906
|
|
4
|
+
vizflow/market.py,sha256=MtNz_nnZxC66Aq-i2PXEwaFCTknijFWYZUUv6798k2s,2493
|
|
5
|
+
vizflow/ops.py,sha256=NL-Gtv-m_O1hv-0RUb9Wt43916HsQ5tYK_0e_uKR90w,4062
|
|
6
|
+
vizflow/presets.py,sha256=RdxlFouysHBH8IftQx3v5e9Mq82DznxZFbMxv4w1vnA,1825
|
|
7
|
+
vizflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
+
vizflow-0.4.1.dist-info/METADATA,sha256=qRa3pRup8YXzTvR9kxBFmhNZR_BBqsR1CkYrOa3t-As,388
|
|
9
|
+
vizflow-0.4.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
10
|
+
vizflow-0.4.1.dist-info/RECORD,,
|
vizflow-0.3.0.dist-info/RECORD
DELETED
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
vizflow/__init__.py,sha256=_2uMkH3E9eiPvkbE8-2Rmq4J5zjXxZcgqcSa3wnDI5E,244
|
|
2
|
-
vizflow/config.py,sha256=1bc4maiFzeTVVMAcHyeJku4f5KcU-LlWGFpcHhjXylg,2356
|
|
3
|
-
vizflow/market.py,sha256=MtNz_nnZxC66Aq-i2PXEwaFCTknijFWYZUUv6798k2s,2493
|
|
4
|
-
vizflow/ops.py,sha256=-C-e_WYJCdfl8DHkSvufyE_tMkruq6AI2MBoCwt2Hqo,3304
|
|
5
|
-
vizflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
-
vizflow-0.3.0.dist-info/METADATA,sha256=hixuCfcNqckSK81T-vosviHJoCPS60Ju6brL-tHWwdg,388
|
|
7
|
-
vizflow-0.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
8
|
-
vizflow-0.3.0.dist-info/RECORD,,
|
|
File without changes
|