vizflow 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vizflow/__init__.py +5 -1
- vizflow/config.py +73 -0
- vizflow/market.py +94 -0
- vizflow/ops.py +108 -0
- {vizflow-0.1.0.dist-info → vizflow-0.3.0.dist-info}/METADATA +1 -4
- vizflow-0.3.0.dist-info/RECORD +8 -0
- vizflow-0.1.0.dist-info/RECORD +0 -5
- {vizflow-0.1.0.dist-info → vizflow-0.3.0.dist-info}/WHEEL +0 -0
vizflow/__init__.py
CHANGED
vizflow/config.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""Configuration classes for VizFlow."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class Config:
|
|
11
|
+
"""Central configuration for a pipeline run.
|
|
12
|
+
|
|
13
|
+
Attributes:
|
|
14
|
+
input_dir: Directory containing input files
|
|
15
|
+
output_dir: Directory for output files
|
|
16
|
+
input_pattern: Pattern for input files, e.g. "{date}.feather"
|
|
17
|
+
market: Market identifier, e.g. "CN", "crypto"
|
|
18
|
+
columns: Mapping from semantic names to actual column names
|
|
19
|
+
binwidths: Mapping from column names to bin widths
|
|
20
|
+
horizons: List of forward return horizons in seconds
|
|
21
|
+
time_cutoff: Optional time cutoff (e.g. 143000000 for 14:30:00)
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
input_dir: Path
|
|
25
|
+
output_dir: Path
|
|
26
|
+
input_pattern: str = "{date}.feather"
|
|
27
|
+
market: str = "CN"
|
|
28
|
+
columns: dict[str, str] = field(default_factory=dict)
|
|
29
|
+
binwidths: dict[str, float] = field(default_factory=dict)
|
|
30
|
+
group_by: list[str] = field(default_factory=list)
|
|
31
|
+
horizons: list[int] = field(default_factory=list)
|
|
32
|
+
time_cutoff: int | None = None
|
|
33
|
+
|
|
34
|
+
def __post_init__(self):
|
|
35
|
+
"""Convert paths to Path objects if needed."""
|
|
36
|
+
if isinstance(self.input_dir, str):
|
|
37
|
+
self.input_dir = Path(self.input_dir)
|
|
38
|
+
if isinstance(self.output_dir, str):
|
|
39
|
+
self.output_dir = Path(self.output_dir)
|
|
40
|
+
|
|
41
|
+
def col(self, semantic: str) -> str:
|
|
42
|
+
"""Get actual column name from semantic name.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
semantic: Semantic column name (e.g. "timestamp", "price")
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
Actual column name, or the semantic name if no mapping exists
|
|
49
|
+
"""
|
|
50
|
+
return self.columns.get(semantic, semantic)
|
|
51
|
+
|
|
52
|
+
def get_input_path(self, date: str) -> Path:
|
|
53
|
+
"""Get input file path for a date.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
date: Date string, e.g. "20241001"
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
Full path to input file
|
|
60
|
+
"""
|
|
61
|
+
return self.input_dir / self.input_pattern.format(date=date)
|
|
62
|
+
|
|
63
|
+
def get_output_path(self, date: str, suffix: str = ".parquet") -> Path:
|
|
64
|
+
"""Get output file path for a date.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
date: Date string, e.g. "20241001"
|
|
68
|
+
suffix: File suffix, default ".parquet"
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
Full path to output file
|
|
72
|
+
"""
|
|
73
|
+
return self.output_dir / f"{date}{suffix}"
|
vizflow/market.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
"""Market session definitions and time handling."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class Session:
|
|
9
|
+
"""A trading session.
|
|
10
|
+
|
|
11
|
+
Attributes:
|
|
12
|
+
start: Start time as "HH:MM"
|
|
13
|
+
end: End time as "HH:MM"
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
start: str # "HH:MM"
|
|
17
|
+
end: str # "HH:MM"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class Market:
|
|
22
|
+
"""Market definition with trading sessions.
|
|
23
|
+
|
|
24
|
+
Attributes:
|
|
25
|
+
name: Market identifier (e.g. "CN", "crypto")
|
|
26
|
+
sessions: List of trading sessions
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
name: str
|
|
30
|
+
sessions: list[Session]
|
|
31
|
+
|
|
32
|
+
def elapsed_seconds(self, time: datetime) -> int:
|
|
33
|
+
"""Convert wall-clock time to continuous trading seconds.
|
|
34
|
+
|
|
35
|
+
For CN market:
|
|
36
|
+
Morning: elapsed = (hour - 9) * 3600 + (minute - 30) * 60 + second
|
|
37
|
+
Afternoon: elapsed = 7200 + (hour - 13) * 3600 + minute * 60 + second
|
|
38
|
+
|
|
39
|
+
Examples (CN):
|
|
40
|
+
09:30:00 → 0
|
|
41
|
+
11:29:59 → 7199
|
|
42
|
+
13:00:00 → 7200
|
|
43
|
+
15:00:00 → 14400
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
time: datetime object
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
Elapsed trading seconds from market open
|
|
50
|
+
"""
|
|
51
|
+
h, m, s = time.hour, time.minute, time.second
|
|
52
|
+
|
|
53
|
+
if self.name == "CN":
|
|
54
|
+
# Morning session: 09:30 - 11:30
|
|
55
|
+
if 9 <= h < 11 or (h == 11 and m < 30) or (h == 9 and m >= 30):
|
|
56
|
+
if h == 9 and m >= 30:
|
|
57
|
+
return (m - 30) * 60 + s
|
|
58
|
+
elif h == 10:
|
|
59
|
+
return 30 * 60 + m * 60 + s
|
|
60
|
+
elif h == 11 and m < 30:
|
|
61
|
+
return 90 * 60 + m * 60 + s
|
|
62
|
+
# Afternoon session: 13:00 - 15:00
|
|
63
|
+
elif 13 <= h < 15 or (h == 15 and m == 0 and s == 0):
|
|
64
|
+
return 7200 + (h - 13) * 3600 + m * 60 + s
|
|
65
|
+
|
|
66
|
+
elif self.name == "crypto":
|
|
67
|
+
# 24/7: simple seconds since midnight
|
|
68
|
+
return h * 3600 + m * 60 + s
|
|
69
|
+
|
|
70
|
+
elif self.name == "KR":
|
|
71
|
+
# Korea: 09:00 - 15:30
|
|
72
|
+
if 9 <= h < 15 or (h == 15 and m <= 30):
|
|
73
|
+
return (h - 9) * 3600 + m * 60 + s
|
|
74
|
+
|
|
75
|
+
raise ValueError(f"Time {time} is outside trading hours for market {self.name}")
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
# === Presets ===
|
|
79
|
+
|
|
80
|
+
CN = Market(
|
|
81
|
+
name="CN",
|
|
82
|
+
sessions=[
|
|
83
|
+
Session(start="09:30", end="11:30"), # Morning (2 hours)
|
|
84
|
+
Session(start="13:00", end="15:00"), # Afternoon (2 hours)
|
|
85
|
+
],
|
|
86
|
+
)
|
|
87
|
+
# Total: 4 hours = 14,400 seconds
|
|
88
|
+
|
|
89
|
+
CRYPTO = Market(
|
|
90
|
+
name="crypto",
|
|
91
|
+
sessions=[
|
|
92
|
+
Session(start="00:00", end="24:00"),
|
|
93
|
+
],
|
|
94
|
+
)
|
vizflow/ops.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""Core operations for data transformation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import polars as pl
|
|
6
|
+
|
|
7
|
+
from .market import Market
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def parse_time(
|
|
11
|
+
df: pl.LazyFrame,
|
|
12
|
+
market: Market,
|
|
13
|
+
timestamp_col: str = "timestamp",
|
|
14
|
+
) -> pl.LazyFrame:
|
|
15
|
+
"""Add elapsed_seconds column based on market sessions.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
df: Input LazyFrame
|
|
19
|
+
market: Market definition (only CN supported)
|
|
20
|
+
timestamp_col: Column with integer timestamp (H/HHMMSSMMM format)
|
|
21
|
+
e.g., 93012145 = 09:30:12.145, 142058425 = 14:20:58.425
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
LazyFrame with elapsed_seconds column (float, includes milliseconds)
|
|
25
|
+
e.g., 09:30:12.145 → 12.145 (12 seconds + 145ms into trading)
|
|
26
|
+
|
|
27
|
+
Raises:
|
|
28
|
+
NotImplementedError: If market is not CN
|
|
29
|
+
"""
|
|
30
|
+
if market.name != "CN":
|
|
31
|
+
raise NotImplementedError(f"Market {market.name} not supported yet")
|
|
32
|
+
|
|
33
|
+
col = pl.col(timestamp_col)
|
|
34
|
+
|
|
35
|
+
# Parse H/HHMMSSMMM → hour, minute, second, millisecond
|
|
36
|
+
h = col // 10000000 # 93012145 // 10000000 = 9
|
|
37
|
+
m = (col // 100000) % 100 # 93012145 // 100000 = 930, 930 % 100 = 30
|
|
38
|
+
s = (col // 1000) % 100 # 93012145 // 1000 = 93012, 93012 % 100 = 12
|
|
39
|
+
ms = col % 1000 # 93012145 % 1000 = 145
|
|
40
|
+
|
|
41
|
+
# CN market: calculate elapsed seconds from market open
|
|
42
|
+
# Morning: 09:30-11:30 (2 hours = 7200 seconds)
|
|
43
|
+
# Afternoon: 13:00-15:00 (2 hours = 7200 seconds)
|
|
44
|
+
base_seconds = (
|
|
45
|
+
pl.when((h == 9) & (m >= 30))
|
|
46
|
+
.then((m - 30) * 60 + s) # 09:30-09:59
|
|
47
|
+
.when(h == 10)
|
|
48
|
+
.then(30 * 60 + m * 60 + s) # 10:00-10:59
|
|
49
|
+
.when((h == 11) & (m < 30))
|
|
50
|
+
.then(90 * 60 + m * 60 + s) # 11:00-11:29
|
|
51
|
+
.when((h >= 13) & (h < 15))
|
|
52
|
+
.then(7200 + (h - 13) * 3600 + m * 60 + s) # 13:00-14:59
|
|
53
|
+
.when((h == 15) & (m == 0) & (s == 0))
|
|
54
|
+
.then(14400) # 15:00:00 exactly
|
|
55
|
+
.otherwise(None) # Outside trading hours
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
# Include milliseconds as fractional part
|
|
59
|
+
elapsed = base_seconds.cast(pl.Float64) + ms.cast(pl.Float64) / 1000.0
|
|
60
|
+
|
|
61
|
+
return df.with_columns(elapsed.alias("elapsed_seconds"))
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def bin(df: pl.LazyFrame, widths: dict[str, float]) -> pl.LazyFrame:
|
|
65
|
+
"""Add bin columns for specified columns.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
df: Input LazyFrame
|
|
69
|
+
widths: Column name to bin width mapping
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
LazyFrame with {col}_bin columns added
|
|
73
|
+
|
|
74
|
+
Formula:
|
|
75
|
+
bin_value = round(raw_value / binwidth)
|
|
76
|
+
actual_value = bin_value * binwidth # To recover
|
|
77
|
+
"""
|
|
78
|
+
exprs = [
|
|
79
|
+
(pl.col(col) / width).round().cast(pl.Int64).alias(f"{col}_bin")
|
|
80
|
+
for col, width in widths.items()
|
|
81
|
+
]
|
|
82
|
+
return df.with_columns(exprs)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def aggregate(
|
|
86
|
+
df: pl.LazyFrame,
|
|
87
|
+
group_by: list[str],
|
|
88
|
+
metrics: dict[str, pl.Expr],
|
|
89
|
+
) -> pl.LazyFrame:
|
|
90
|
+
"""Aggregate data with custom metrics.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
df: Input LazyFrame
|
|
94
|
+
group_by: Columns to group by
|
|
95
|
+
metrics: Name to Polars expression mapping
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
Aggregated LazyFrame
|
|
99
|
+
|
|
100
|
+
Example:
|
|
101
|
+
metrics = {
|
|
102
|
+
"count": pl.len(),
|
|
103
|
+
"total_qty": pl.col("quantity").sum(),
|
|
104
|
+
"vwap": pl.col("notional").sum() / pl.col("quantity").sum(),
|
|
105
|
+
}
|
|
106
|
+
"""
|
|
107
|
+
agg_exprs = [expr.alias(name) for name, expr in metrics.items()]
|
|
108
|
+
return df.group_by(group_by).agg(agg_exprs)
|
|
@@ -1,9 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: vizflow
|
|
3
|
-
Version: 0.
|
|
4
|
-
Project-URL: Homepage, https://github.com/vizflow/vizflow
|
|
5
|
-
Project-URL: Documentation, https://github.com/vizflow/vizflow
|
|
6
|
-
Project-URL: Repository, https://github.com/vizflow/vizflow
|
|
3
|
+
Version: 0.3.0
|
|
7
4
|
Requires-Python: >=3.9
|
|
8
5
|
Requires-Dist: polars>=0.20.0
|
|
9
6
|
Provides-Extra: dev
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
vizflow/__init__.py,sha256=_2uMkH3E9eiPvkbE8-2Rmq4J5zjXxZcgqcSa3wnDI5E,244
|
|
2
|
+
vizflow/config.py,sha256=1bc4maiFzeTVVMAcHyeJku4f5KcU-LlWGFpcHhjXylg,2356
|
|
3
|
+
vizflow/market.py,sha256=MtNz_nnZxC66Aq-i2PXEwaFCTknijFWYZUUv6798k2s,2493
|
|
4
|
+
vizflow/ops.py,sha256=-C-e_WYJCdfl8DHkSvufyE_tMkruq6AI2MBoCwt2Hqo,3304
|
|
5
|
+
vizflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
vizflow-0.3.0.dist-info/METADATA,sha256=hixuCfcNqckSK81T-vosviHJoCPS60Ju6brL-tHWwdg,388
|
|
7
|
+
vizflow-0.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
8
|
+
vizflow-0.3.0.dist-info/RECORD,,
|
vizflow-0.1.0.dist-info/RECORD
DELETED
|
@@ -1,5 +0,0 @@
|
|
|
1
|
-
vizflow/__init__.py,sha256=TMwTRqV4GoUUvrZQ-IrDTEZP77noJFKptpfWyMGZbmA,124
|
|
2
|
-
vizflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
-
vizflow-0.1.0.dist-info/METADATA,sha256=DRwpVGhhmQM06_bnpBHcYDbHxJXsy4VR4Ldmlm06kJ8,569
|
|
4
|
-
vizflow-0.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
5
|
-
vizflow-0.1.0.dist-info/RECORD,,
|
|
File without changes
|