vizflow 0.5.6__py3-none-any.whl → 0.5.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vizflow/__init__.py CHANGED
@@ -5,7 +5,7 @@ Usage:
5
5
  import vizflow as vf
6
6
  """
7
7
 
8
- __version__ = "0.5.5"
8
+ __version__ = "0.5.8"
9
9
 
10
10
  from .config import Config, get_config, set_config
11
11
  from .io import (
@@ -19,6 +19,7 @@ from .io import (
19
19
  )
20
20
  from .market import CN, CRYPTO, Market, Session
21
21
  from .ops import aggregate, bin, forward_return, mark_to_close, parse_time, sign_by_side
22
+ from .viz import add_tod
22
23
  from .schema_evolution import (
23
24
  JYAO_V20251114,
24
25
  SCHEMAS,
vizflow/io.py CHANGED
@@ -167,6 +167,8 @@ def scan_trade(date: str, config: Config | None = None) -> pl.LazyFrame:
167
167
  if schema:
168
168
  df = _apply_schema_evolution(df, schema)
169
169
 
170
+ df = df.with_columns(pl.lit(date).str.to_date("%Y%m%d").cast(pl.Date).alias("data_date"))
171
+
170
172
  return df
171
173
 
172
174
 
@@ -213,7 +215,7 @@ def scan_trades(config: Config | None = None) -> pl.LazyFrame:
213
215
  df = _apply_schema_evolution(df, schema)
214
216
  date = _extract_date_from_path(f, config.trade_pattern)
215
217
  if date:
216
- df = df.with_columns(pl.lit(date).str.to_date("%Y%m%d").alias("data_date"))
218
+ df = df.with_columns(pl.lit(date).str.to_date("%Y%m%d").cast(pl.Date).alias("data_date"))
217
219
  dfs.append(df)
218
220
 
219
221
  return pl.concat(dfs)
@@ -246,6 +248,8 @@ def scan_alpha(date: str, config: Config | None = None) -> pl.LazyFrame:
246
248
  if schema:
247
249
  df = _apply_schema_evolution(df, schema)
248
250
 
251
+ df = df.with_columns(pl.lit(date).str.to_date("%Y%m%d").cast(pl.Date).alias("data_date"))
252
+
249
253
  return df
250
254
 
251
255
 
@@ -281,7 +285,7 @@ def scan_alphas(config: Config | None = None) -> pl.LazyFrame:
281
285
  df = _apply_schema_evolution(df, schema)
282
286
  # Convert data_date to Date type (may be Int64 from feather or String from CSV)
283
287
  df = df.with_columns(
284
- pl.col("data_date").cast(pl.String).str.to_date("%Y%m%d")
288
+ pl.col("data_date").cast(pl.String).str.to_date("%Y%m%d").cast(pl.Date)
285
289
  )
286
290
  dfs.append(df)
287
291
 
@@ -314,6 +318,8 @@ def scan_univ(date: str, config: Config | None = None) -> pl.LazyFrame:
314
318
  if schema:
315
319
  df = _apply_schema_evolution(df, schema)
316
320
 
321
+ df = df.with_columns(pl.lit(date).str.to_date("%Y%m%d").cast(pl.Date).alias("data_date"))
322
+
317
323
  return df
318
324
 
319
325
 
@@ -351,7 +357,7 @@ def scan_univs(config: Config | None = None) -> pl.LazyFrame:
351
357
  df = _apply_schema_evolution(df, schema)
352
358
  date = _extract_date_from_path(f, config.univ_pattern)
353
359
  if date:
354
- df = df.with_columns(pl.lit(date).str.to_date("%Y%m%d").alias("data_date"))
360
+ df = df.with_columns(pl.lit(date).str.to_date("%Y%m%d").cast(pl.Date).alias("data_date"))
355
361
  dfs.append(df)
356
362
 
357
363
  return pl.concat(dfs)
vizflow/ops.py CHANGED
@@ -11,19 +11,20 @@ def parse_time(
11
11
  df: pl.LazyFrame,
12
12
  timestamp_col: str = "ticktime",
13
13
  ) -> pl.LazyFrame:
14
- """Parse HHMMSSMMM timestamp to time-of-day and elapsed milliseconds.
14
+ """Parse HHMMSSMMM timestamp to elapsed milliseconds.
15
15
 
16
- Adds two columns:
17
- - tod_{timestamp_col}: pl.Time (time-of-day HH:MM:SS.mmm) - good for plotting
16
+ Adds one column:
18
17
  - elapsed_{timestamp_col}: pl.Int64 (milliseconds since market open)
19
18
 
19
+ For time-of-day column (pl.Time) for plotting, use vf.add_tod() separately.
20
+
20
21
  Args:
21
22
  df: Input LazyFrame
22
23
  timestamp_col: Column with integer HHMMSSMMM format timestamps
23
24
  e.g., 93012145 = 09:30:12.145, 142058425 = 14:20:58.425
24
25
 
25
26
  Returns:
26
- LazyFrame with tod and elapsed columns added
27
+ LazyFrame with elapsed column added
27
28
 
28
29
  Raises:
29
30
  RuntimeError: If config not set via set_config()
@@ -33,7 +34,7 @@ def parse_time(
33
34
  >>> config = vf.Config(market="CN", input_dir=".", output_dir=".")
34
35
  >>> vf.set_config(config)
35
36
  >>> df = vf.parse_time(df, "ticktime")
36
- >>> # Creates: tod_ticktime (pl.Time), elapsed_ticktime (pl.Int64)
37
+ >>> # Creates: elapsed_ticktime (pl.Int64)
37
38
  """
38
39
  config = get_config()
39
40
 
@@ -48,19 +49,8 @@ def parse_time(
48
49
  (pl.col(timestamp_col) % 1000).alias("_ms"),
49
50
  ])
50
51
 
51
- # Add time-of-day column (pl.Time)
52
- # Convert to nanoseconds since midnight
53
- tod_ns = (
54
- pl.col("_hour") * 3_600_000_000_000
55
- + pl.col("_minute") * 60_000_000_000
56
- + pl.col("_second") * 1_000_000_000
57
- + pl.col("_ms") * 1_000_000
58
- )
59
- df = df.with_columns(tod_ns.cast(pl.Time).alias(f"tod_{timestamp_col}"))
60
-
61
52
  # Add elapsed milliseconds (int)
62
53
  # CN market: 09:30-11:30 (morning), 13:00-15:00 (afternoon)
63
- # Using user's hardcoded logic
64
54
  elapsed_ms = (
65
55
  pl.when(pl.col("_hour") < 12)
66
56
  .then(
@@ -353,7 +353,7 @@ JYAO_V20251114 = SchemaEvolution(
353
353
  # Time columns
354
354
  "TimeStamp": ColumnSpec(rename_to="timestamp", parse_dtype=pl.Int64),
355
355
  "GlobalExTime": ColumnSpec(rename_to="global_exchange_ts", parse_dtype=pl.Int64),
356
- "DataDate": ColumnSpec(rename_to="data_date", parse_dtype=pl.String),
356
+ "DataDate": ColumnSpec(rename_to="data_date", parse_dtype=pl.String, cast_dtype=pl.Date),
357
357
  # Volume
358
358
  "Volume": ColumnSpec(
359
359
  rename_to="volume",
@@ -371,6 +371,52 @@ JYAO_V20251114 = SchemaEvolution(
371
371
  )
372
372
 
373
373
 
374
+ # =============================================================================
375
+ # JYAO Univ Format (v2025-12-30)
376
+ # =============================================================================
377
+
378
+ JYAO_UNIV_V20251230 = SchemaEvolution(
379
+ columns={
380
+ # ID
381
+ "ukey": ColumnSpec(parse_dtype=pl.Int64),
382
+ # Price columns (Float64)
383
+ "ydclose": ColumnSpec(parse_dtype=pl.Float64),
384
+ "preclose": ColumnSpec(parse_dtype=pl.Float64),
385
+ "open": ColumnSpec(parse_dtype=pl.Float64),
386
+ "close": ColumnSpec(parse_dtype=pl.Float64),
387
+ "upper_limit_price": ColumnSpec(parse_dtype=pl.Float64),
388
+ "lower_limit_price": ColumnSpec(parse_dtype=pl.Float64),
389
+ "tick_size": ColumnSpec(parse_dtype=pl.Float64),
390
+ # Lot size columns (parse Float64 → cast Int64)
391
+ "trade_min_size": ColumnSpec(parse_dtype=pl.Float64, cast_dtype=pl.Int64),
392
+ "trade_unit_size": ColumnSpec(parse_dtype=pl.Float64, cast_dtype=pl.Int64),
393
+ "qty_unit": ColumnSpec(parse_dtype=pl.Float64, cast_dtype=pl.Int64),
394
+ # Average/aggregated columns (Float64)
395
+ "trade_max_size": ColumnSpec(parse_dtype=pl.Float64),
396
+ "adv": ColumnSpec(parse_dtype=pl.Float64),
397
+ "roll_spread": ColumnSpec(parse_dtype=pl.Float64),
398
+ "buy_avg_volume": ColumnSpec(parse_dtype=pl.Float64),
399
+ "sell_avg_volume": ColumnSpec(parse_dtype=pl.Float64),
400
+ "avg_touch_size_mean": ColumnSpec(parse_dtype=pl.Float64),
401
+ "avg_touch_order_size": ColumnSpec(parse_dtype=pl.Float64),
402
+ # Risk columns (Float64)
403
+ "TotalRisk": ColumnSpec(parse_dtype=pl.Float64),
404
+ "SpecRisk": ColumnSpec(parse_dtype=pl.Float64),
405
+ # Boolean columns (TRUE/FALSE strings)
406
+ "is_price_limited": ColumnSpec(parse_dtype=pl.Boolean),
407
+ "is_t0": ColumnSpec(parse_dtype=pl.Boolean),
408
+ # Integer columns
409
+ "category": ColumnSpec(parse_dtype=pl.Float64, cast_dtype=pl.Int64),
410
+ "is_ST": ColumnSpec(parse_dtype=pl.Float64, cast_dtype=pl.Int64), # 0/1 numeric
411
+ # String columns
412
+ "UNIVERSE": ColumnSpec(parse_dtype=pl.String),
413
+ "INDUSTRY": ColumnSpec(parse_dtype=pl.String),
414
+ "INDEX": ColumnSpec(parse_dtype=pl.String),
415
+ },
416
+ null_values=["", "NA"],
417
+ )
418
+
419
+
374
420
  # =============================================================================
375
421
  # Schema Registry
376
422
  # =============================================================================
@@ -378,6 +424,7 @@ JYAO_V20251114 = SchemaEvolution(
378
424
  SCHEMAS: dict[str, SchemaEvolution] = {
379
425
  "ylin_v20251204": YLIN_V20251204,
380
426
  "jyao_v20251114": JYAO_V20251114,
427
+ "jyao_univ_v20251230": JYAO_UNIV_V20251230,
381
428
  }
382
429
 
383
430
 
vizflow/viz.py ADDED
@@ -0,0 +1,35 @@
1
+ """Visualization utilities for VizFlow."""
2
+
3
+ import polars as pl
4
+
5
+
6
+ def add_tod(
7
+ df: pl.LazyFrame,
8
+ timestamp_col: str = "ticktime",
9
+ ) -> pl.LazyFrame:
10
+ """Add time-of-day column for plotting.
11
+
12
+ Converts HHMMSSMMM integer timestamp to pl.Time for visualization.
13
+ Note: pl.Time type is not supported by Delta Lake - use this only
14
+ for plotting, not for data that will be written to Delta Lake.
15
+
16
+ Args:
17
+ df: Input LazyFrame with HHMMSSMMM timestamp column
18
+ timestamp_col: Column with integer HHMMSSMMM format timestamps
19
+ e.g., 93012145 = 09:30:12.145, 142058425 = 14:20:58.425
20
+
21
+ Returns:
22
+ LazyFrame with tod_{timestamp_col} (pl.Time) column added
23
+
24
+ Example:
25
+ >>> df = vf.add_tod(df, "ticktime")
26
+ >>> # Creates: tod_ticktime (pl.Time)
27
+ """
28
+ # Parse HHMMSSMMM to nanoseconds since midnight
29
+ tod_ns = (
30
+ (pl.col(timestamp_col) // 10000000) * 3_600_000_000_000
31
+ + (pl.col(timestamp_col) // 100000 % 100) * 60_000_000_000
32
+ + (pl.col(timestamp_col) // 1000 % 100) * 1_000_000_000
33
+ + (pl.col(timestamp_col) % 1000) * 1_000_000
34
+ )
35
+ return df.with_columns(tod_ns.cast(pl.Time).alias(f"tod_{timestamp_col}"))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vizflow
3
- Version: 0.5.6
3
+ Version: 0.5.8
4
4
  Requires-Python: >=3.9
5
5
  Requires-Dist: polars>=0.20.0
6
6
  Provides-Extra: dev
@@ -0,0 +1,11 @@
1
+ vizflow/__init__.py,sha256=KGksMU9wGsdGSbd2OgSHM2ofMZbTwgaCN92ZDEgASwU,614
2
+ vizflow/config.py,sha256=nPZPXlqQbaY8u_FAdtPShvb0mdx3e2TRaQ2CILzliAU,7192
3
+ vizflow/io.py,sha256=1T7t-L1ijrfEkE-gr4f45yiupJKA4-DxbJhsyN_tLnI,11939
4
+ vizflow/market.py,sha256=MtNz_nnZxC66Aq-i2PXEwaFCTknijFWYZUUv6798k2s,2493
5
+ vizflow/ops.py,sha256=QO8airzHFfKw8nGsStsecic2Z1dmq1rciSyAfSElh7M,10421
6
+ vizflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ vizflow/schema_evolution.py,sha256=3_qFIQJAgXUK0vQzanb355YVjytmfOACkelZlykIO8w,16349
8
+ vizflow/viz.py,sha256=dzcY72hWMVbxWIyjwfUW3Ot3CunaP7O4GLVUzzOjkbY,1212
9
+ vizflow-0.5.8.dist-info/METADATA,sha256=uapEZDP5ntZ1_a8uzexB_K45cIpK9rdOiqq87fSv_4M,388
10
+ vizflow-0.5.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
11
+ vizflow-0.5.8.dist-info/RECORD,,
@@ -1,10 +0,0 @@
1
- vizflow/__init__.py,sha256=_jP6bp5U2iBFkPGTSLNwvOQay9XU_opNdrylF22iR9s,589
2
- vizflow/config.py,sha256=nPZPXlqQbaY8u_FAdtPShvb0mdx3e2TRaQ2CILzliAU,7192
3
- vizflow/io.py,sha256=5gucUD68zBhfnVOmFRbDyxqVM5ikzPWRTeOY20gXQOM,11612
4
- vizflow/market.py,sha256=MtNz_nnZxC66Aq-i2PXEwaFCTknijFWYZUUv6798k2s,2493
5
- vizflow/ops.py,sha256=oR44HYKrfaXLh0SmbfXefl714UESSIC5lTNJBrR1kto,10858
6
- vizflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- vizflow/schema_evolution.py,sha256=puwuuJ8LAzelHq1JTwQD968J7GYLvgAsCaHJnDHzu4U,14025
8
- vizflow-0.5.6.dist-info/METADATA,sha256=oadPQYbrOT8G2aPZ-QZ1_VZ0uqRbde62ddsZnnmDKHE,388
9
- vizflow-0.5.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
10
- vizflow-0.5.6.dist-info/RECORD,,