vizflow 0.5.6__py3-none-any.whl → 0.5.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vizflow/__init__.py +2 -1
- vizflow/io.py +9 -3
- vizflow/ops.py +6 -16
- vizflow/schema_evolution.py +48 -1
- vizflow/viz.py +35 -0
- {vizflow-0.5.6.dist-info → vizflow-0.5.8.dist-info}/METADATA +1 -1
- vizflow-0.5.8.dist-info/RECORD +11 -0
- vizflow-0.5.6.dist-info/RECORD +0 -10
- {vizflow-0.5.6.dist-info → vizflow-0.5.8.dist-info}/WHEEL +0 -0
vizflow/__init__.py
CHANGED
|
@@ -5,7 +5,7 @@ Usage:
|
|
|
5
5
|
import vizflow as vf
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
__version__ = "0.5.
|
|
8
|
+
__version__ = "0.5.8"
|
|
9
9
|
|
|
10
10
|
from .config import Config, get_config, set_config
|
|
11
11
|
from .io import (
|
|
@@ -19,6 +19,7 @@ from .io import (
|
|
|
19
19
|
)
|
|
20
20
|
from .market import CN, CRYPTO, Market, Session
|
|
21
21
|
from .ops import aggregate, bin, forward_return, mark_to_close, parse_time, sign_by_side
|
|
22
|
+
from .viz import add_tod
|
|
22
23
|
from .schema_evolution import (
|
|
23
24
|
JYAO_V20251114,
|
|
24
25
|
SCHEMAS,
|
vizflow/io.py
CHANGED
|
@@ -167,6 +167,8 @@ def scan_trade(date: str, config: Config | None = None) -> pl.LazyFrame:
|
|
|
167
167
|
if schema:
|
|
168
168
|
df = _apply_schema_evolution(df, schema)
|
|
169
169
|
|
|
170
|
+
df = df.with_columns(pl.lit(date).str.to_date("%Y%m%d").cast(pl.Date).alias("data_date"))
|
|
171
|
+
|
|
170
172
|
return df
|
|
171
173
|
|
|
172
174
|
|
|
@@ -213,7 +215,7 @@ def scan_trades(config: Config | None = None) -> pl.LazyFrame:
|
|
|
213
215
|
df = _apply_schema_evolution(df, schema)
|
|
214
216
|
date = _extract_date_from_path(f, config.trade_pattern)
|
|
215
217
|
if date:
|
|
216
|
-
df = df.with_columns(pl.lit(date).str.to_date("%Y%m%d").alias("data_date"))
|
|
218
|
+
df = df.with_columns(pl.lit(date).str.to_date("%Y%m%d").cast(pl.Date).alias("data_date"))
|
|
217
219
|
dfs.append(df)
|
|
218
220
|
|
|
219
221
|
return pl.concat(dfs)
|
|
@@ -246,6 +248,8 @@ def scan_alpha(date: str, config: Config | None = None) -> pl.LazyFrame:
|
|
|
246
248
|
if schema:
|
|
247
249
|
df = _apply_schema_evolution(df, schema)
|
|
248
250
|
|
|
251
|
+
df = df.with_columns(pl.lit(date).str.to_date("%Y%m%d").cast(pl.Date).alias("data_date"))
|
|
252
|
+
|
|
249
253
|
return df
|
|
250
254
|
|
|
251
255
|
|
|
@@ -281,7 +285,7 @@ def scan_alphas(config: Config | None = None) -> pl.LazyFrame:
|
|
|
281
285
|
df = _apply_schema_evolution(df, schema)
|
|
282
286
|
# Convert data_date to Date type (may be Int64 from feather or String from CSV)
|
|
283
287
|
df = df.with_columns(
|
|
284
|
-
pl.col("data_date").cast(pl.String).str.to_date("%Y%m%d")
|
|
288
|
+
pl.col("data_date").cast(pl.String).str.to_date("%Y%m%d").cast(pl.Date)
|
|
285
289
|
)
|
|
286
290
|
dfs.append(df)
|
|
287
291
|
|
|
@@ -314,6 +318,8 @@ def scan_univ(date: str, config: Config | None = None) -> pl.LazyFrame:
|
|
|
314
318
|
if schema:
|
|
315
319
|
df = _apply_schema_evolution(df, schema)
|
|
316
320
|
|
|
321
|
+
df = df.with_columns(pl.lit(date).str.to_date("%Y%m%d").cast(pl.Date).alias("data_date"))
|
|
322
|
+
|
|
317
323
|
return df
|
|
318
324
|
|
|
319
325
|
|
|
@@ -351,7 +357,7 @@ def scan_univs(config: Config | None = None) -> pl.LazyFrame:
|
|
|
351
357
|
df = _apply_schema_evolution(df, schema)
|
|
352
358
|
date = _extract_date_from_path(f, config.univ_pattern)
|
|
353
359
|
if date:
|
|
354
|
-
df = df.with_columns(pl.lit(date).str.to_date("%Y%m%d").alias("data_date"))
|
|
360
|
+
df = df.with_columns(pl.lit(date).str.to_date("%Y%m%d").cast(pl.Date).alias("data_date"))
|
|
355
361
|
dfs.append(df)
|
|
356
362
|
|
|
357
363
|
return pl.concat(dfs)
|
vizflow/ops.py
CHANGED
|
@@ -11,19 +11,20 @@ def parse_time(
|
|
|
11
11
|
df: pl.LazyFrame,
|
|
12
12
|
timestamp_col: str = "ticktime",
|
|
13
13
|
) -> pl.LazyFrame:
|
|
14
|
-
"""Parse HHMMSSMMM timestamp to
|
|
14
|
+
"""Parse HHMMSSMMM timestamp to elapsed milliseconds.
|
|
15
15
|
|
|
16
|
-
Adds
|
|
17
|
-
- tod_{timestamp_col}: pl.Time (time-of-day HH:MM:SS.mmm) - good for plotting
|
|
16
|
+
Adds one column:
|
|
18
17
|
- elapsed_{timestamp_col}: pl.Int64 (milliseconds since market open)
|
|
19
18
|
|
|
19
|
+
For time-of-day column (pl.Time) for plotting, use vf.add_tod() separately.
|
|
20
|
+
|
|
20
21
|
Args:
|
|
21
22
|
df: Input LazyFrame
|
|
22
23
|
timestamp_col: Column with integer HHMMSSMMM format timestamps
|
|
23
24
|
e.g., 93012145 = 09:30:12.145, 142058425 = 14:20:58.425
|
|
24
25
|
|
|
25
26
|
Returns:
|
|
26
|
-
LazyFrame with
|
|
27
|
+
LazyFrame with elapsed column added
|
|
27
28
|
|
|
28
29
|
Raises:
|
|
29
30
|
RuntimeError: If config not set via set_config()
|
|
@@ -33,7 +34,7 @@ def parse_time(
|
|
|
33
34
|
>>> config = vf.Config(market="CN", input_dir=".", output_dir=".")
|
|
34
35
|
>>> vf.set_config(config)
|
|
35
36
|
>>> df = vf.parse_time(df, "ticktime")
|
|
36
|
-
>>> # Creates:
|
|
37
|
+
>>> # Creates: elapsed_ticktime (pl.Int64)
|
|
37
38
|
"""
|
|
38
39
|
config = get_config()
|
|
39
40
|
|
|
@@ -48,19 +49,8 @@ def parse_time(
|
|
|
48
49
|
(pl.col(timestamp_col) % 1000).alias("_ms"),
|
|
49
50
|
])
|
|
50
51
|
|
|
51
|
-
# Add time-of-day column (pl.Time)
|
|
52
|
-
# Convert to nanoseconds since midnight
|
|
53
|
-
tod_ns = (
|
|
54
|
-
pl.col("_hour") * 3_600_000_000_000
|
|
55
|
-
+ pl.col("_minute") * 60_000_000_000
|
|
56
|
-
+ pl.col("_second") * 1_000_000_000
|
|
57
|
-
+ pl.col("_ms") * 1_000_000
|
|
58
|
-
)
|
|
59
|
-
df = df.with_columns(tod_ns.cast(pl.Time).alias(f"tod_{timestamp_col}"))
|
|
60
|
-
|
|
61
52
|
# Add elapsed milliseconds (int)
|
|
62
53
|
# CN market: 09:30-11:30 (morning), 13:00-15:00 (afternoon)
|
|
63
|
-
# Using user's hardcoded logic
|
|
64
54
|
elapsed_ms = (
|
|
65
55
|
pl.when(pl.col("_hour") < 12)
|
|
66
56
|
.then(
|
vizflow/schema_evolution.py
CHANGED
|
@@ -353,7 +353,7 @@ JYAO_V20251114 = SchemaEvolution(
|
|
|
353
353
|
# Time columns
|
|
354
354
|
"TimeStamp": ColumnSpec(rename_to="timestamp", parse_dtype=pl.Int64),
|
|
355
355
|
"GlobalExTime": ColumnSpec(rename_to="global_exchange_ts", parse_dtype=pl.Int64),
|
|
356
|
-
"DataDate": ColumnSpec(rename_to="data_date", parse_dtype=pl.String),
|
|
356
|
+
"DataDate": ColumnSpec(rename_to="data_date", parse_dtype=pl.String, cast_dtype=pl.Date),
|
|
357
357
|
# Volume
|
|
358
358
|
"Volume": ColumnSpec(
|
|
359
359
|
rename_to="volume",
|
|
@@ -371,6 +371,52 @@ JYAO_V20251114 = SchemaEvolution(
|
|
|
371
371
|
)
|
|
372
372
|
|
|
373
373
|
|
|
374
|
+
# =============================================================================
|
|
375
|
+
# JYAO Univ Format (v2025-12-30)
|
|
376
|
+
# =============================================================================
|
|
377
|
+
|
|
378
|
+
JYAO_UNIV_V20251230 = SchemaEvolution(
|
|
379
|
+
columns={
|
|
380
|
+
# ID
|
|
381
|
+
"ukey": ColumnSpec(parse_dtype=pl.Int64),
|
|
382
|
+
# Price columns (Float64)
|
|
383
|
+
"ydclose": ColumnSpec(parse_dtype=pl.Float64),
|
|
384
|
+
"preclose": ColumnSpec(parse_dtype=pl.Float64),
|
|
385
|
+
"open": ColumnSpec(parse_dtype=pl.Float64),
|
|
386
|
+
"close": ColumnSpec(parse_dtype=pl.Float64),
|
|
387
|
+
"upper_limit_price": ColumnSpec(parse_dtype=pl.Float64),
|
|
388
|
+
"lower_limit_price": ColumnSpec(parse_dtype=pl.Float64),
|
|
389
|
+
"tick_size": ColumnSpec(parse_dtype=pl.Float64),
|
|
390
|
+
# Lot size columns (parse Float64 → cast Int64)
|
|
391
|
+
"trade_min_size": ColumnSpec(parse_dtype=pl.Float64, cast_dtype=pl.Int64),
|
|
392
|
+
"trade_unit_size": ColumnSpec(parse_dtype=pl.Float64, cast_dtype=pl.Int64),
|
|
393
|
+
"qty_unit": ColumnSpec(parse_dtype=pl.Float64, cast_dtype=pl.Int64),
|
|
394
|
+
# Average/aggregated columns (Float64)
|
|
395
|
+
"trade_max_size": ColumnSpec(parse_dtype=pl.Float64),
|
|
396
|
+
"adv": ColumnSpec(parse_dtype=pl.Float64),
|
|
397
|
+
"roll_spread": ColumnSpec(parse_dtype=pl.Float64),
|
|
398
|
+
"buy_avg_volume": ColumnSpec(parse_dtype=pl.Float64),
|
|
399
|
+
"sell_avg_volume": ColumnSpec(parse_dtype=pl.Float64),
|
|
400
|
+
"avg_touch_size_mean": ColumnSpec(parse_dtype=pl.Float64),
|
|
401
|
+
"avg_touch_order_size": ColumnSpec(parse_dtype=pl.Float64),
|
|
402
|
+
# Risk columns (Float64)
|
|
403
|
+
"TotalRisk": ColumnSpec(parse_dtype=pl.Float64),
|
|
404
|
+
"SpecRisk": ColumnSpec(parse_dtype=pl.Float64),
|
|
405
|
+
# Boolean columns (TRUE/FALSE strings)
|
|
406
|
+
"is_price_limited": ColumnSpec(parse_dtype=pl.Boolean),
|
|
407
|
+
"is_t0": ColumnSpec(parse_dtype=pl.Boolean),
|
|
408
|
+
# Integer columns
|
|
409
|
+
"category": ColumnSpec(parse_dtype=pl.Float64, cast_dtype=pl.Int64),
|
|
410
|
+
"is_ST": ColumnSpec(parse_dtype=pl.Float64, cast_dtype=pl.Int64), # 0/1 numeric
|
|
411
|
+
# String columns
|
|
412
|
+
"UNIVERSE": ColumnSpec(parse_dtype=pl.String),
|
|
413
|
+
"INDUSTRY": ColumnSpec(parse_dtype=pl.String),
|
|
414
|
+
"INDEX": ColumnSpec(parse_dtype=pl.String),
|
|
415
|
+
},
|
|
416
|
+
null_values=["", "NA"],
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
|
|
374
420
|
# =============================================================================
|
|
375
421
|
# Schema Registry
|
|
376
422
|
# =============================================================================
|
|
@@ -378,6 +424,7 @@ JYAO_V20251114 = SchemaEvolution(
|
|
|
378
424
|
SCHEMAS: dict[str, SchemaEvolution] = {
|
|
379
425
|
"ylin_v20251204": YLIN_V20251204,
|
|
380
426
|
"jyao_v20251114": JYAO_V20251114,
|
|
427
|
+
"jyao_univ_v20251230": JYAO_UNIV_V20251230,
|
|
381
428
|
}
|
|
382
429
|
|
|
383
430
|
|
vizflow/viz.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""Visualization utilities for VizFlow."""
|
|
2
|
+
|
|
3
|
+
import polars as pl
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def add_tod(
|
|
7
|
+
df: pl.LazyFrame,
|
|
8
|
+
timestamp_col: str = "ticktime",
|
|
9
|
+
) -> pl.LazyFrame:
|
|
10
|
+
"""Add time-of-day column for plotting.
|
|
11
|
+
|
|
12
|
+
Converts HHMMSSMMM integer timestamp to pl.Time for visualization.
|
|
13
|
+
Note: pl.Time type is not supported by Delta Lake - use this only
|
|
14
|
+
for plotting, not for data that will be written to Delta Lake.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
df: Input LazyFrame with HHMMSSMMM timestamp column
|
|
18
|
+
timestamp_col: Column with integer HHMMSSMMM format timestamps
|
|
19
|
+
e.g., 93012145 = 09:30:12.145, 142058425 = 14:20:58.425
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
LazyFrame with tod_{timestamp_col} (pl.Time) column added
|
|
23
|
+
|
|
24
|
+
Example:
|
|
25
|
+
>>> df = vf.add_tod(df, "ticktime")
|
|
26
|
+
>>> # Creates: tod_ticktime (pl.Time)
|
|
27
|
+
"""
|
|
28
|
+
# Parse HHMMSSMMM to nanoseconds since midnight
|
|
29
|
+
tod_ns = (
|
|
30
|
+
(pl.col(timestamp_col) // 10000000) * 3_600_000_000_000
|
|
31
|
+
+ (pl.col(timestamp_col) // 100000 % 100) * 60_000_000_000
|
|
32
|
+
+ (pl.col(timestamp_col) // 1000 % 100) * 1_000_000_000
|
|
33
|
+
+ (pl.col(timestamp_col) % 1000) * 1_000_000
|
|
34
|
+
)
|
|
35
|
+
return df.with_columns(tod_ns.cast(pl.Time).alias(f"tod_{timestamp_col}"))
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
vizflow/__init__.py,sha256=KGksMU9wGsdGSbd2OgSHM2ofMZbTwgaCN92ZDEgASwU,614
|
|
2
|
+
vizflow/config.py,sha256=nPZPXlqQbaY8u_FAdtPShvb0mdx3e2TRaQ2CILzliAU,7192
|
|
3
|
+
vizflow/io.py,sha256=1T7t-L1ijrfEkE-gr4f45yiupJKA4-DxbJhsyN_tLnI,11939
|
|
4
|
+
vizflow/market.py,sha256=MtNz_nnZxC66Aq-i2PXEwaFCTknijFWYZUUv6798k2s,2493
|
|
5
|
+
vizflow/ops.py,sha256=QO8airzHFfKw8nGsStsecic2Z1dmq1rciSyAfSElh7M,10421
|
|
6
|
+
vizflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
+
vizflow/schema_evolution.py,sha256=3_qFIQJAgXUK0vQzanb355YVjytmfOACkelZlykIO8w,16349
|
|
8
|
+
vizflow/viz.py,sha256=dzcY72hWMVbxWIyjwfUW3Ot3CunaP7O4GLVUzzOjkbY,1212
|
|
9
|
+
vizflow-0.5.8.dist-info/METADATA,sha256=uapEZDP5ntZ1_a8uzexB_K45cIpK9rdOiqq87fSv_4M,388
|
|
10
|
+
vizflow-0.5.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
11
|
+
vizflow-0.5.8.dist-info/RECORD,,
|
vizflow-0.5.6.dist-info/RECORD
DELETED
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
vizflow/__init__.py,sha256=_jP6bp5U2iBFkPGTSLNwvOQay9XU_opNdrylF22iR9s,589
|
|
2
|
-
vizflow/config.py,sha256=nPZPXlqQbaY8u_FAdtPShvb0mdx3e2TRaQ2CILzliAU,7192
|
|
3
|
-
vizflow/io.py,sha256=5gucUD68zBhfnVOmFRbDyxqVM5ikzPWRTeOY20gXQOM,11612
|
|
4
|
-
vizflow/market.py,sha256=MtNz_nnZxC66Aq-i2PXEwaFCTknijFWYZUUv6798k2s,2493
|
|
5
|
-
vizflow/ops.py,sha256=oR44HYKrfaXLh0SmbfXefl714UESSIC5lTNJBrR1kto,10858
|
|
6
|
-
vizflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
-
vizflow/schema_evolution.py,sha256=puwuuJ8LAzelHq1JTwQD968J7GYLvgAsCaHJnDHzu4U,14025
|
|
8
|
-
vizflow-0.5.6.dist-info/METADATA,sha256=oadPQYbrOT8G2aPZ-QZ1_VZ0uqRbde62ddsZnnmDKHE,388
|
|
9
|
-
vizflow-0.5.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
10
|
-
vizflow-0.5.6.dist-info/RECORD,,
|
|
File without changes
|