vizflow 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vizflow/__init__.py CHANGED
@@ -5,4 +5,8 @@ Usage:
5
5
  import vizflow as vf
6
6
  """
7
7
 
8
- __version__ = "0.1.0"
8
+ __version__ = "0.3.0"
9
+
10
+ from .config import Config
11
+ from .market import CN, CRYPTO, Market, Session
12
+ from .ops import aggregate, bin, parse_time
vizflow/config.py ADDED
@@ -0,0 +1,73 @@
1
+ """Configuration classes for VizFlow."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from pathlib import Path
7
+
8
+
9
+ @dataclass
10
+ class Config:
11
+ """Central configuration for a pipeline run.
12
+
13
+ Attributes:
14
+ input_dir: Directory containing input files
15
+ output_dir: Directory for output files
16
+ input_pattern: Pattern for input files, e.g. "{date}.feather"
17
+ market: Market identifier, e.g. "CN", "crypto"
18
+ columns: Mapping from semantic names to actual column names
19
+ binwidths: Mapping from column names to bin widths
20
+ horizons: List of forward return horizons in seconds
21
+ time_cutoff: Optional time cutoff (e.g. 143000000 for 14:30:00)
22
+ """
23
+
24
+ input_dir: Path
25
+ output_dir: Path
26
+ input_pattern: str = "{date}.feather"
27
+ market: str = "CN"
28
+ columns: dict[str, str] = field(default_factory=dict)
29
+ binwidths: dict[str, float] = field(default_factory=dict)
30
+ group_by: list[str] = field(default_factory=list)
31
+ horizons: list[int] = field(default_factory=list)
32
+ time_cutoff: int | None = None
33
+
34
+ def __post_init__(self):
35
+ """Convert paths to Path objects if needed."""
36
+ if isinstance(self.input_dir, str):
37
+ self.input_dir = Path(self.input_dir)
38
+ if isinstance(self.output_dir, str):
39
+ self.output_dir = Path(self.output_dir)
40
+
41
+ def col(self, semantic: str) -> str:
42
+ """Get actual column name from semantic name.
43
+
44
+ Args:
45
+ semantic: Semantic column name (e.g. "timestamp", "price")
46
+
47
+ Returns:
48
+ Actual column name, or the semantic name if no mapping exists
49
+ """
50
+ return self.columns.get(semantic, semantic)
51
+
52
+ def get_input_path(self, date: str) -> Path:
53
+ """Get input file path for a date.
54
+
55
+ Args:
56
+ date: Date string, e.g. "20241001"
57
+
58
+ Returns:
59
+ Full path to input file
60
+ """
61
+ return self.input_dir / self.input_pattern.format(date=date)
62
+
63
+ def get_output_path(self, date: str, suffix: str = ".parquet") -> Path:
64
+ """Get output file path for a date.
65
+
66
+ Args:
67
+ date: Date string, e.g. "20241001"
68
+ suffix: File suffix, default ".parquet"
69
+
70
+ Returns:
71
+ Full path to output file
72
+ """
73
+ return self.output_dir / f"{date}{suffix}"
vizflow/market.py ADDED
@@ -0,0 +1,94 @@
1
+ """Market session definitions and time handling."""
2
+
3
+ from dataclasses import dataclass
4
+ from datetime import datetime
5
+
6
+
7
+ @dataclass
8
+ class Session:
9
+ """A trading session.
10
+
11
+ Attributes:
12
+ start: Start time as "HH:MM"
13
+ end: End time as "HH:MM"
14
+ """
15
+
16
+ start: str # "HH:MM"
17
+ end: str # "HH:MM"
18
+
19
+
20
+ @dataclass
21
+ class Market:
22
+ """Market definition with trading sessions.
23
+
24
+ Attributes:
25
+ name: Market identifier (e.g. "CN", "crypto")
26
+ sessions: List of trading sessions
27
+ """
28
+
29
+ name: str
30
+ sessions: list[Session]
31
+
32
+ def elapsed_seconds(self, time: datetime) -> int:
33
+ """Convert wall-clock time to continuous trading seconds.
34
+
35
+ For CN market:
36
+ Morning: elapsed = (hour - 9) * 3600 + (minute - 30) * 60 + second
37
+ Afternoon: elapsed = 7200 + (hour - 13) * 3600 + minute * 60 + second
38
+
39
+ Examples (CN):
40
+ 09:30:00 → 0
41
+ 11:29:59 → 7199
42
+ 13:00:00 → 7200
43
+ 15:00:00 → 14400
44
+
45
+ Args:
46
+ time: datetime object
47
+
48
+ Returns:
49
+ Elapsed trading seconds from market open
50
+ """
51
+ h, m, s = time.hour, time.minute, time.second
52
+
53
+ if self.name == "CN":
54
+ # Morning session: 09:30 - 11:30
55
+ if 9 <= h < 11 or (h == 11 and m < 30) or (h == 9 and m >= 30):
56
+ if h == 9 and m >= 30:
57
+ return (m - 30) * 60 + s
58
+ elif h == 10:
59
+ return 30 * 60 + m * 60 + s
60
+ elif h == 11 and m < 30:
61
+ return 90 * 60 + m * 60 + s
62
+ # Afternoon session: 13:00 - 15:00
63
+ elif 13 <= h < 15 or (h == 15 and m == 0 and s == 0):
64
+ return 7200 + (h - 13) * 3600 + m * 60 + s
65
+
66
+ elif self.name == "crypto":
67
+ # 24/7: simple seconds since midnight
68
+ return h * 3600 + m * 60 + s
69
+
70
+ elif self.name == "KR":
71
+ # Korea: 09:00 - 15:30
72
+ if 9 <= h < 15 or (h == 15 and m <= 30):
73
+ return (h - 9) * 3600 + m * 60 + s
74
+
75
+ raise ValueError(f"Time {time} is outside trading hours for market {self.name}")
76
+
77
+
78
+ # === Presets ===
79
+
80
+ CN = Market(
81
+ name="CN",
82
+ sessions=[
83
+ Session(start="09:30", end="11:30"), # Morning (2 hours)
84
+ Session(start="13:00", end="15:00"), # Afternoon (2 hours)
85
+ ],
86
+ )
87
+ # Total: 4 hours = 14,400 seconds
88
+
89
+ CRYPTO = Market(
90
+ name="crypto",
91
+ sessions=[
92
+ Session(start="00:00", end="24:00"),
93
+ ],
94
+ )
vizflow/ops.py ADDED
@@ -0,0 +1,108 @@
1
+ """Core operations for data transformation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import polars as pl
6
+
7
+ from .market import Market
8
+
9
+
10
+ def parse_time(
11
+ df: pl.LazyFrame,
12
+ market: Market,
13
+ timestamp_col: str = "timestamp",
14
+ ) -> pl.LazyFrame:
15
+ """Add elapsed_seconds column based on market sessions.
16
+
17
+ Args:
18
+ df: Input LazyFrame
19
+ market: Market definition (only CN supported)
20
+ timestamp_col: Column with integer timestamp (H/HHMMSSMMM format)
21
+ e.g., 93012145 = 09:30:12.145, 142058425 = 14:20:58.425
22
+
23
+ Returns:
24
+ LazyFrame with elapsed_seconds column (float, includes milliseconds)
25
+ e.g., 09:30:12.145 → 12.145 (12 seconds + 145ms into trading)
26
+
27
+ Raises:
28
+ NotImplementedError: If market is not CN
29
+ """
30
+ if market.name != "CN":
31
+ raise NotImplementedError(f"Market {market.name} not supported yet")
32
+
33
+ col = pl.col(timestamp_col)
34
+
35
+ # Parse H/HHMMSSMMM → hour, minute, second, millisecond
36
+ h = col // 10000000 # 93012145 // 10000000 = 9
37
+ m = (col // 100000) % 100 # 93012145 // 100000 = 930, 930 % 100 = 30
38
+ s = (col // 1000) % 100 # 93012145 // 1000 = 93012, 93012 % 100 = 12
39
+ ms = col % 1000 # 93012145 % 1000 = 145
40
+
41
+ # CN market: calculate elapsed seconds from market open
42
+ # Morning: 09:30-11:30 (2 hours = 7200 seconds)
43
+ # Afternoon: 13:00-15:00 (2 hours = 7200 seconds)
44
+ base_seconds = (
45
+ pl.when((h == 9) & (m >= 30))
46
+ .then((m - 30) * 60 + s) # 09:30-09:59
47
+ .when(h == 10)
48
+ .then(30 * 60 + m * 60 + s) # 10:00-10:59
49
+ .when((h == 11) & (m < 30))
50
+ .then(90 * 60 + m * 60 + s) # 11:00-11:29
51
+ .when((h >= 13) & (h < 15))
52
+ .then(7200 + (h - 13) * 3600 + m * 60 + s) # 13:00-14:59
53
+ .when((h == 15) & (m == 0) & (s == 0))
54
+ .then(14400) # 15:00:00 exactly
55
+ .otherwise(None) # Outside trading hours
56
+ )
57
+
58
+ # Include milliseconds as fractional part
59
+ elapsed = base_seconds.cast(pl.Float64) + ms.cast(pl.Float64) / 1000.0
60
+
61
+ return df.with_columns(elapsed.alias("elapsed_seconds"))
62
+
63
+
64
+ def bin(df: pl.LazyFrame, widths: dict[str, float]) -> pl.LazyFrame:
65
+ """Add bin columns for specified columns.
66
+
67
+ Args:
68
+ df: Input LazyFrame
69
+ widths: Column name to bin width mapping
70
+
71
+ Returns:
72
+ LazyFrame with {col}_bin columns added
73
+
74
+ Formula:
75
+ bin_value = round(raw_value / binwidth)
76
+ actual_value = bin_value * binwidth # To recover
77
+ """
78
+ exprs = [
79
+ (pl.col(col) / width).round().cast(pl.Int64).alias(f"{col}_bin")
80
+ for col, width in widths.items()
81
+ ]
82
+ return df.with_columns(exprs)
83
+
84
+
85
+ def aggregate(
86
+ df: pl.LazyFrame,
87
+ group_by: list[str],
88
+ metrics: dict[str, pl.Expr],
89
+ ) -> pl.LazyFrame:
90
+ """Aggregate data with custom metrics.
91
+
92
+ Args:
93
+ df: Input LazyFrame
94
+ group_by: Columns to group by
95
+ metrics: Name to Polars expression mapping
96
+
97
+ Returns:
98
+ Aggregated LazyFrame
99
+
100
+ Example:
101
+ metrics = {
102
+ "count": pl.len(),
103
+ "total_qty": pl.col("quantity").sum(),
104
+ "vwap": pl.col("notional").sum() / pl.col("quantity").sum(),
105
+ }
106
+ """
107
+ agg_exprs = [expr.alias(name) for name, expr in metrics.items()]
108
+ return df.group_by(group_by).agg(agg_exprs)
@@ -1,9 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vizflow
3
- Version: 0.1.0
4
- Project-URL: Homepage, https://github.com/vizflow/vizflow
5
- Project-URL: Documentation, https://github.com/vizflow/vizflow
6
- Project-URL: Repository, https://github.com/vizflow/vizflow
3
+ Version: 0.3.0
7
4
  Requires-Python: >=3.9
8
5
  Requires-Dist: polars>=0.20.0
9
6
  Provides-Extra: dev
@@ -0,0 +1,8 @@
1
+ vizflow/__init__.py,sha256=_2uMkH3E9eiPvkbE8-2Rmq4J5zjXxZcgqcSa3wnDI5E,244
2
+ vizflow/config.py,sha256=1bc4maiFzeTVVMAcHyeJku4f5KcU-LlWGFpcHhjXylg,2356
3
+ vizflow/market.py,sha256=MtNz_nnZxC66Aq-i2PXEwaFCTknijFWYZUUv6798k2s,2493
4
+ vizflow/ops.py,sha256=-C-e_WYJCdfl8DHkSvufyE_tMkruq6AI2MBoCwt2Hqo,3304
5
+ vizflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ vizflow-0.3.0.dist-info/METADATA,sha256=hixuCfcNqckSK81T-vosviHJoCPS60Ju6brL-tHWwdg,388
7
+ vizflow-0.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
8
+ vizflow-0.3.0.dist-info/RECORD,,
@@ -1,5 +0,0 @@
1
- vizflow/__init__.py,sha256=TMwTRqV4GoUUvrZQ-IrDTEZP77noJFKptpfWyMGZbmA,124
2
- vizflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- vizflow-0.1.0.dist-info/METADATA,sha256=DRwpVGhhmQM06_bnpBHcYDbHxJXsy4VR4Ldmlm06kJ8,569
4
- vizflow-0.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
5
- vizflow-0.1.0.dist-info/RECORD,,