vizflow 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vizflow/__init__.py
CHANGED
vizflow/ops.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""Core operations for data transformation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import polars as pl
|
|
6
|
+
|
|
7
|
+
from .market import Market
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def parse_time(
|
|
11
|
+
df: pl.LazyFrame,
|
|
12
|
+
market: Market,
|
|
13
|
+
timestamp_col: str = "timestamp",
|
|
14
|
+
) -> pl.LazyFrame:
|
|
15
|
+
"""Add elapsed_seconds column based on market sessions.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
df: Input LazyFrame
|
|
19
|
+
market: Market definition (only CN supported)
|
|
20
|
+
timestamp_col: Column with integer timestamp (H/HHMMSSMMM format)
|
|
21
|
+
e.g., 93012145 = 09:30:12.145, 142058425 = 14:20:58.425
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
LazyFrame with elapsed_seconds column (float, includes milliseconds)
|
|
25
|
+
e.g., 09:30:12.145 → 12.145 (12 seconds + 145ms into trading)
|
|
26
|
+
|
|
27
|
+
Raises:
|
|
28
|
+
NotImplementedError: If market is not CN
|
|
29
|
+
"""
|
|
30
|
+
if market.name != "CN":
|
|
31
|
+
raise NotImplementedError(f"Market {market.name} not supported yet")
|
|
32
|
+
|
|
33
|
+
col = pl.col(timestamp_col)
|
|
34
|
+
|
|
35
|
+
# Parse H/HHMMSSMMM → hour, minute, second, millisecond
|
|
36
|
+
h = col // 10000000 # 93012145 // 10000000 = 9
|
|
37
|
+
m = (col // 100000) % 100 # 93012145 // 100000 = 930, 930 % 100 = 30
|
|
38
|
+
s = (col // 1000) % 100 # 93012145 // 1000 = 93012, 93012 % 100 = 12
|
|
39
|
+
ms = col % 1000 # 93012145 % 1000 = 145
|
|
40
|
+
|
|
41
|
+
# CN market: calculate elapsed seconds from market open
|
|
42
|
+
# Morning: 09:30-11:30 (2 hours = 7200 seconds)
|
|
43
|
+
# Afternoon: 13:00-15:00 (2 hours = 7200 seconds)
|
|
44
|
+
base_seconds = (
|
|
45
|
+
pl.when((h == 9) & (m >= 30))
|
|
46
|
+
.then((m - 30) * 60 + s) # 09:30-09:59
|
|
47
|
+
.when(h == 10)
|
|
48
|
+
.then(30 * 60 + m * 60 + s) # 10:00-10:59
|
|
49
|
+
.when((h == 11) & (m < 30))
|
|
50
|
+
.then(90 * 60 + m * 60 + s) # 11:00-11:29
|
|
51
|
+
.when((h >= 13) & (h < 15))
|
|
52
|
+
.then(7200 + (h - 13) * 3600 + m * 60 + s) # 13:00-14:59
|
|
53
|
+
.when((h == 15) & (m == 0) & (s == 0))
|
|
54
|
+
.then(14400) # 15:00:00 exactly
|
|
55
|
+
.otherwise(None) # Outside trading hours
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
# Include milliseconds as fractional part
|
|
59
|
+
elapsed = base_seconds.cast(pl.Float64) + ms.cast(pl.Float64) / 1000.0
|
|
60
|
+
|
|
61
|
+
return df.with_columns(elapsed.alias("elapsed_seconds"))
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def bin(df: pl.LazyFrame, widths: dict[str, float]) -> pl.LazyFrame:
|
|
65
|
+
"""Add bin columns for specified columns.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
df: Input LazyFrame
|
|
69
|
+
widths: Column name to bin width mapping
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
LazyFrame with {col}_bin columns added
|
|
73
|
+
|
|
74
|
+
Formula:
|
|
75
|
+
bin_value = round(raw_value / binwidth)
|
|
76
|
+
actual_value = bin_value * binwidth # To recover
|
|
77
|
+
"""
|
|
78
|
+
exprs = [
|
|
79
|
+
(pl.col(col) / width).round().cast(pl.Int64).alias(f"{col}_bin")
|
|
80
|
+
for col, width in widths.items()
|
|
81
|
+
]
|
|
82
|
+
return df.with_columns(exprs)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def aggregate(
|
|
86
|
+
df: pl.LazyFrame,
|
|
87
|
+
group_by: list[str],
|
|
88
|
+
metrics: dict[str, pl.Expr],
|
|
89
|
+
) -> pl.LazyFrame:
|
|
90
|
+
"""Aggregate data with custom metrics.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
df: Input LazyFrame
|
|
94
|
+
group_by: Columns to group by
|
|
95
|
+
metrics: Name to Polars expression mapping
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
Aggregated LazyFrame
|
|
99
|
+
|
|
100
|
+
Example:
|
|
101
|
+
metrics = {
|
|
102
|
+
"count": pl.len(),
|
|
103
|
+
"total_qty": pl.col("quantity").sum(),
|
|
104
|
+
"vwap": pl.col("notional").sum() / pl.col("quantity").sum(),
|
|
105
|
+
}
|
|
106
|
+
"""
|
|
107
|
+
agg_exprs = [expr.alias(name) for name, expr in metrics.items()]
|
|
108
|
+
return df.group_by(group_by).agg(agg_exprs)
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
vizflow/__init__.py,sha256=_2uMkH3E9eiPvkbE8-2Rmq4J5zjXxZcgqcSa3wnDI5E,244
|
|
2
|
+
vizflow/config.py,sha256=1bc4maiFzeTVVMAcHyeJku4f5KcU-LlWGFpcHhjXylg,2356
|
|
3
|
+
vizflow/market.py,sha256=MtNz_nnZxC66Aq-i2PXEwaFCTknijFWYZUUv6798k2s,2493
|
|
4
|
+
vizflow/ops.py,sha256=-C-e_WYJCdfl8DHkSvufyE_tMkruq6AI2MBoCwt2Hqo,3304
|
|
5
|
+
vizflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
vizflow-0.3.0.dist-info/METADATA,sha256=hixuCfcNqckSK81T-vosviHJoCPS60Ju6brL-tHWwdg,388
|
|
7
|
+
vizflow-0.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
8
|
+
vizflow-0.3.0.dist-info/RECORD,,
|
vizflow-0.2.0.dist-info/RECORD
DELETED
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
vizflow/__init__.py,sha256=ENq0VwprFPbrFSUXS74rX2Nd4ieTjgcBHCSLl-tZMSw,200
|
|
2
|
-
vizflow/config.py,sha256=1bc4maiFzeTVVMAcHyeJku4f5KcU-LlWGFpcHhjXylg,2356
|
|
3
|
-
vizflow/market.py,sha256=MtNz_nnZxC66Aq-i2PXEwaFCTknijFWYZUUv6798k2s,2493
|
|
4
|
-
vizflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
|
-
vizflow-0.2.0.dist-info/METADATA,sha256=LJ97yfPACqShNOSR3isPmd7ahR2miFp54QewiOUKygw,388
|
|
6
|
-
vizflow-0.2.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
7
|
-
vizflow-0.2.0.dist-info/RECORD,,
|
|
File without changes
|