ygo 1.0.5__py3-none-any.whl → 1.0.6b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ygo might be problematic. Click here for more details.

qdf/errors.py CHANGED
@@ -14,18 +14,52 @@ class ParseError(Exception):
14
14
  message: str
15
15
 
16
16
  def __str__(self):
17
- return f"ParseError(message={self.message})"
17
+ return self.message
18
+
19
+ def __repr__(self):
20
+ return self.__str__()
18
21
 
19
22
  @dataclass
20
23
  class CalculateError(Exception):
21
24
  message: str
22
25
 
23
26
  def __str__(self):
24
- return f"CalculateError(message={self.message})"
27
+ return self.message
28
+
29
+ def __repr__(self):
30
+ return self.__str__()
31
+
32
+ @dataclass
33
+ class CompileError(Exception):
34
+ message: str
35
+
36
+ def __str__(self):
37
+ return self.message
38
+
39
+ def __repr__(self):
40
+ return self.__str__()
25
41
 
26
42
  @dataclass
27
43
  class PolarsError(Exception):
28
44
  message: str
29
45
 
30
46
  def __str__(self):
31
- return f"PolarsError(message={self.message})"
47
+ return self.message
48
+
49
+ def __repr__(self):
50
+ return self.__str__()
51
+
52
+ @dataclass
53
+ class FailError:
54
+ expr: str
55
+ error: Exception
56
+
57
+ def __str__(self):
58
+ return f"""
59
+ [失败表达式]: {self.expr}
60
+ [错误类型]: {self.error.__class__.__name__}
61
+ [错误信息]: \n{self.error}
62
+ """
63
+
64
+ def __repr__(self):
65
+ return self.__str__()
qdf/expr.py CHANGED
@@ -41,40 +41,40 @@ with warnings.catch_warnings():
41
41
  start: expr
42
42
  ?expr: ternary_expr
43
43
  ?ternary_expr: or_expr
44
- | or_expr "?" or_expr ":" ternary_expr -> ternary
44
+ | or_expr "?" or_expr ":" ternary_expr -> ternary
45
45
  ?or_expr: and_expr
46
- | or_expr "|" and_expr -> or_
46
+ | or_expr "|" and_expr -> or_
47
47
  ?and_expr: comp_expr
48
- | and_expr "&" comp_expr -> and_
48
+ | and_expr "&" comp_expr -> and_
49
49
  ?comp_expr: eq_expr
50
- | comp_expr "<" eq_expr -> lt
51
- | comp_expr ">" eq_expr -> gt
52
- | comp_expr "<=" eq_expr -> le
53
- | comp_expr ">=" eq_expr -> ge
50
+ | comp_expr "<" eq_expr -> lt
51
+ | comp_expr ">" eq_expr -> gt
52
+ | comp_expr "<=" eq_expr -> le
53
+ | comp_expr ">=" eq_expr -> ge
54
54
  ?eq_expr: arith_expr
55
- | eq_expr "==" arith_expr -> eq
56
- | eq_expr "!=" arith_expr -> neq
55
+ | eq_expr "==" arith_expr -> eq
56
+ | eq_expr "!=" arith_expr -> neq
57
57
  ?arith_expr: term
58
- | arith_expr "+" term -> add
59
- | arith_expr "-" term -> sub
58
+ | arith_expr "+" term -> add
59
+ | arith_expr "-" term -> sub
60
60
  ?term: pow_expr
61
- | term "*" pow_expr -> mul
62
- | term "/" pow_expr -> div
63
- | term "//" pow_expr -> floordiv // 取整
64
- | term "%" pow_expr -> mod // 求余
61
+ | term "*" pow_expr -> mul
62
+ | term "/" pow_expr -> div
63
+ | term "//" pow_expr -> floordiv // 取整
64
+ | term "%" pow_expr -> mod // 求余
65
65
  ?pow_expr: factor
66
- | factor "**" pow_expr -> pow
66
+ | factor "**" pow_expr -> pow
67
67
  ?factor: atom
68
- | "-" factor -> neg
69
- | "!" factor -> not_
70
- | "~" factor -> not_
68
+ | "-" factor -> neg
69
+ | "!" factor -> not_
70
+ | "~" factor -> not_
71
71
  ?atom: function
72
- | NAME
73
- | NUMBER
74
- | FLOAT
75
- | "(" expr ")"
76
- | implicit_mul // 隐式乘法
77
- | attribute_access // 新增:属性访问
72
+ | NAME
73
+ | NUMBER
74
+ | FLOAT
75
+ | "(" expr ")"
76
+ | implicit_mul // 隐式乘法
77
+ | attribute_access // 新增:属性访问
78
78
  implicit_mul: (NUMBER | FLOAT) NAME -> implicit_mul // 隐式乘法
79
79
  attribute_access: atom "." NAME -> attribute_access // 新增:属性访问
80
80
  function: NAME "(" expr_list ")" -> function
@@ -83,7 +83,7 @@ with warnings.catch_warnings():
83
83
  expr_list: (expr | keyword_arg) ("," (expr | keyword_arg))* // 支持关键字参数
84
84
  NAME: /[a-zA-Z_$,][a-zA-Z0-9_$]*/
85
85
  NUMBER: /\d+/ // regex for numbers
86
- FLOAT: /\d+\.\d+/
86
+ FLOAT: /\d+\.\d+([eE][+-]?\d+)?/ | /\d+[eE][+-]?\d+/ // 支持科学计数法
87
87
  %import common.WS
88
88
  %ignore WS
89
89
  """
@@ -181,7 +181,6 @@ parser = Lark(grammar, parser='lalr', transformer=ExprParser())
181
181
  def parse_expr(expression: str) -> Expr:
182
182
  return parser.parse(expression).children[0]
183
183
 
184
-
185
184
  class Expr:
186
185
 
187
186
  def __init__(self, expr: str | None = None):
@@ -203,6 +202,13 @@ class Expr:
203
202
  expr.alias = alias if alias is not None else str(expr)
204
203
  return expr
205
204
 
205
+ def __hash__(self):
206
+ return hash(str(self).strip())
207
+
208
+ def __eq__(self, other):
209
+ return str(self).strip() == str(other).strip()
210
+
211
+
206
212
  def to_rpn(self) -> list[Token]:
207
213
  """生成逆波兰表达式: (后缀表达式: 运算符在后)"""
208
214
  rpn = list()
@@ -285,6 +291,7 @@ class Expr:
285
291
  else:
286
292
  self.fn_name, self.args = expr_.fn_name, expr_.args
287
293
 
294
+
288
295
  @property
289
296
  def n_args(self) -> int:
290
297
  """返回表达式的参数个数"""
qdf/qdf.py CHANGED
@@ -10,18 +10,16 @@ from __future__ import annotations
10
10
 
11
11
  import importlib.util
12
12
  import sys
13
- from dataclasses import dataclass
13
+ from functools import lru_cache
14
14
  from pathlib import Path
15
15
 
16
16
  import polars as pl
17
17
  from toolz import partial
18
- from functools import lru_cache
19
18
 
20
19
  import ygo
21
20
  import ylog
22
- from .errors import CalculateError, PolarsError
21
+ from .errors import CalculateError, CompileError, PolarsError, FailError
23
22
  from .expr import Expr
24
- import time
25
23
 
26
24
  # 动态加载模块
27
25
  module_name = "udf"
@@ -32,43 +30,32 @@ sys.modules[module_name] = module
32
30
  spec.loader.exec_module(module)
33
31
 
34
32
 
35
- @dataclass
36
- class FailInfo:
37
- expr: str
38
- error: Exception
39
-
40
- def __str__(self):
41
- return f"""
42
- expr={self.expr}
43
- =================================================
44
- {self.error}
45
- =================================================
46
- """
47
-
48
- def __repr__(self):
49
- return self.__str__()
50
-
51
33
  @lru_cache(maxsize=512)
52
34
  def parse_expr(expr: str) -> Expr:
53
35
  return Expr(expr)
54
36
 
37
+
55
38
  class QDF:
56
39
 
57
40
  def __init__(self,
58
- data: pl.LazyFrame,
41
+ data: pl.LazyFrame | pl.DataFrame,
59
42
  index: tuple[str] = ("date", "time", "asset"),
60
- align: bool = True,):
61
- self.data = data.with_columns(pl.col(pl.Decimal).cast(pl.Float32))
62
- self.dims = [self.data.select(index_).drop_nulls().unique().count().collect().item() for index_ in index]
43
+ align: bool = True, ):
44
+ assert isinstance(data, (pl.LazyFrame, pl.DataFrame)), "data must be a polars DataFrame or LazyFrame"
45
+ self.data = data.with_columns(pl.col(pl.Decimal).cast(pl.Float32).round(5))
46
+ if isinstance(self.data, pl.LazyFrame):
47
+ self.data = self.data.collect()
48
+ self.index = index
49
+ self.dims = [self.data[name].drop_nulls().n_unique() for name in index]
63
50
  if align:
64
51
  lev_vals: list[pl.DataFrame] = [self.data.select(name).drop_nulls().unique() for name in index]
65
52
  full_index = lev_vals[0]
66
53
  for lev_val in lev_vals[1:]:
67
54
  full_index = full_index.join(lev_val, how="cross")
68
- self.data = full_index.join(self.data, on=index, how='left') #.sort(index).collect().lazy()
69
-
70
- self.index = index
55
+ self.data = full_index.join(self.data, on=index, how='left').sort(index)
71
56
  self.failed = list()
57
+ self._expr_cache = dict() # type: dict[Expr, str]
58
+ self._cur_expr_cache = dict()
72
59
 
73
60
  def __str__(self):
74
61
  return self.data.__str__()
@@ -81,53 +68,63 @@ class QDF:
81
68
  setattr(module, name, func)
82
69
 
83
70
  def _compile_expr(self, expr: str, cover: bool):
84
- expr_parsed = Expr(expr)
85
- alias = expr_parsed.alias # if expr_parsed.alias is not None else str(expr_parsed)
86
- current_cols = set(self.data.collect_schema().keys())
87
- columns = self.data.collect_schema().names()
88
- if alias in current_cols and not cover:
89
- return alias
90
-
91
- def calc(expr_: Expr):
92
- alias_ = expr_.alias
93
- # _cols = self.data.collect_schema().names()
94
- if alias_ in current_cols and not cover:
95
- # 已存在:直接select数据源
96
- return alias_
97
- func = getattr(module, expr_.fn_name)
98
- _params = ygo.fn_signature_params(func)
99
- if "dims" in _params:
100
- func = partial(func, dims=self.dims)
101
- args = list()
102
- kwargs = dict()
103
- for arg in expr_.args:
104
- if isinstance(arg, Expr):
105
- args.append(pl.col(calc(arg)))
106
- elif isinstance(arg, dict):
107
- kwargs.update(arg)
108
- elif isinstance(arg, str):
109
- args.append(pl.col(arg))
110
- else:
111
- args.append(arg) # or args.append(pl.lit(arg))
112
- try:
113
- expr_pl: pl.Expr = func(*args, **kwargs).alias(alias_)
114
- except Exception as e:
115
- raise CalculateError(f"{expr_.fn_name}({', '.join([str(arg) for arg in args])})\n{e}")
116
- try:
117
- self.data = self.data.with_columns(expr_pl)
118
- except Exception as e:
119
- raise PolarsError(f"{expr_}\n{e}")
120
- return alias_
121
-
122
- calc(expr_parsed)
123
-
124
- columns.append(alias)
125
- drop = current_cols.difference(set(columns))
126
- self.data = self.data.drop(*drop)
127
-
128
- return alias
129
-
130
- def sql(self, *exprs: str, cover: bool = False,) -> pl.LazyFrame:
71
+ """str表达式 -> polars 表达式"""
72
+ try:
73
+ expr_parsed = Expr(expr)
74
+ alias = expr_parsed.alias # if expr_parsed.alias is not None else str(expr_parsed)
75
+ current_cols = set(self.data.columns)
76
+ # columns = self.data.columns
77
+ if alias in current_cols and not cover:
78
+ return pl.col(alias), alias
79
+ # 如果该表达式已有对应列,直接复用
80
+ if expr_parsed in self._expr_cache and not cover:
81
+ expr_pl: pl.Expr = pl.col(self._expr_cache[expr_parsed]).alias(alias)
82
+ return expr_pl, alias
83
+ elif expr_parsed in self._cur_expr_cache and not cover:
84
+ expr_pl: pl.Expr = pl.col(self._cur_expr_cache[expr_parsed]).alias(alias)
85
+ return expr_pl, alias
86
+
87
+ def recur_compile(expr_: Expr):
88
+ """递归编译"""
89
+ alias_ = expr_.alias
90
+ if alias_ in current_cols and not cover:
91
+ # 已存在:直接select数据源
92
+ return pl.col(alias_)
93
+ if expr_ in self._expr_cache:
94
+ return pl.col(self._expr_cache[expr_]).alias(alias_)
95
+ elif expr_ in self._cur_expr_cache:
96
+ return pl.col(self._cur_expr_cache[expr_]).alias(alias_)
97
+ func = getattr(module, expr_.fn_name)
98
+ _params = ygo.fn_signature_params(func)
99
+ if "dims" in _params:
100
+ func = partial(func, dims=self.dims)
101
+ args = list()
102
+ kwargs = dict()
103
+ for arg in expr_.args:
104
+ if isinstance(arg, Expr):
105
+ args.append(recur_compile(arg))
106
+ elif isinstance(arg, dict):
107
+ kwargs.update(arg)
108
+ elif isinstance(arg, str):
109
+ args.append(pl.col(arg))
110
+ else:
111
+ args.append(arg) # or args.append(pl.lit(arg))
112
+ try:
113
+ expr_pl: pl.Expr = func(*args, **kwargs).alias(alias_)
114
+ self._cur_expr_cache[expr_] = alias_
115
+ return expr_pl
116
+ except Exception as e:
117
+ raise CompileError(message=f"{expr_.fn_name}({', '.join([str(arg) for arg in args])})\n{e}") from e
118
+
119
+ return recur_compile(expr_parsed), alias
120
+ except (CalculateError, CompileError, PolarsError) as e:
121
+ # 已经是你自己的错误类
122
+ raise e
123
+ except Exception as e:
124
+ # 所有未处理的错误统一抛出为 CompileError
125
+ raise CompileError(message=f"[编译器外层]\n{e}") from e
126
+
127
+ def sql(self, *exprs: str, cover: bool = False, ) -> pl.LazyFrame:
131
128
  """
132
129
  表达式查询
133
130
  Parameters
@@ -144,15 +141,26 @@ class QDF:
144
141
  """
145
142
  self.failed = list()
146
143
  exprs_to_add = list()
144
+ exprs_select = list()
145
+ self._cur_expr_cache = {}
146
+ data = self.data.lazy()
147
+
147
148
  for expr in exprs:
148
149
  try:
149
- compiled = self._compile_expr(expr, cover)
150
+ compiled, alias = self._compile_expr(expr, cover)
150
151
  if compiled is not None:
151
152
  exprs_to_add.append(compiled)
153
+ exprs_select.append(alias)
152
154
  except Exception as e:
153
- self.failed.append(FailInfo(expr, e))
155
+ self.failed.append(FailError(expr, e))
154
156
  if self.failed:
155
157
  ylog.warning(f"QDF.sql 失败:{len(self.failed)}/{len(exprs)}: \n {self.failed}")
156
- final_df = self.data.with_columns(exprs_to_add).select(*self.index, *exprs_to_add).fill_nan(None).drop_nulls().sort(self.index)
157
- return final_df.collect()
158
-
158
+ for expr in exprs_to_add:
159
+ data = data.with_columns(expr).fill_nan(None)
160
+ try:
161
+ self.data = data.collect()
162
+ final_df = self.data.select(*self.index, *exprs_to_add)
163
+ self._expr_cache.update(self._cur_expr_cache)
164
+ return final_df
165
+ except Exception as e:
166
+ raise PolarsError(message=f"LazyFrame.collect() 阶段出错\n{e}") from e
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ygo
3
- Version: 1.0.5
3
+ Version: 1.0.6b0
4
4
  Project-URL: homepage, https://github.com/link-yundi/ygo
5
5
  Project-URL: repository, https://github.com/link-yundi/ygo
6
6
  Requires-Python: >=3.8
@@ -8,6 +8,7 @@ Description-Content-Type: text/markdown
8
8
  License-File: LICENSE
9
9
  Requires-Dist: clickhouse-driver>=0.2.9
10
10
  Requires-Dist: dynaconf>=3.2.11
11
+ Requires-Dist: exchange-calendars>=4.2.8
11
12
  Requires-Dist: joblib>=1.4.2
12
13
  Requires-Dist: lark>=1.2.2
13
14
  Requires-Dist: loguru>=0.7.3
@@ -1,7 +1,7 @@
1
1
  qdf/__init__.py,sha256=XwH17ae6kX5Grhb_odgDqkNf6N-ambTVi3LoGzq1Fmc,7519
2
- qdf/errors.py,sha256=vSzX6S_72O1CA5xwzyC2hrVdPCqp6gfqV09Lh-TZ-ns,651
3
- qdf/expr.py,sha256=qRksAdc9TFUy-_li3DBDuDqsy8iWYTqDdCW3TmtPsZ0,8268
4
- qdf/qdf.py,sha256=zTFBLanMPTXfgW_lno8UI5Sg5TXUkw4Bnx6d3dnO2fw,5340
2
+ qdf/errors.py,sha256=lJhhjDRdQOOKUFGlLQ9ELK4AexXBwYQSYus_V-kc5K8,1180
3
+ qdf/expr.py,sha256=ck_BHMCV29Q8-szci1_v4ud964QI7JoRRcmA0ppupsc,8454
4
+ qdf/qdf.py,sha256=mMNr6fn6oak1R2_LBGKcWvPdbmM6avxFHXI9lJmzXNU,6641
5
5
  qdf/udf/__init__.py,sha256=DdrSGaCB__5C1YL0vd_5rjIB3KLrAKn3h3k9k50L0jA,313
6
6
  qdf/udf/base_udf.py,sha256=6VDaCIGNLJxZ7UsoIDWtTH6PzUDj89b8FiwN-TEat2g,3437
7
7
  qdf/udf/cs_udf.py,sha256=HT3EKBwAhOxOFDQnpfwb4YcMTT3-lqFXkdysdn5_FI4,3179
@@ -16,10 +16,10 @@ ycat/yck.py,sha256=FlGMBuKEngB4TwFXMp4P3dLg9IfFmUg3eDqXzQ0kQoI,2738
16
16
  ygo/__init__.py,sha256=FMN06Tfa8_oV26eklBZCtGTyHZ6MghHxHj4PS_FSXCA,222
17
17
  ygo/exceptions.py,sha256=4Kd92kpwpsXHJJkSv4OqcN--PEEvIGGvDDgOOsk68gg,385
18
18
  ygo/ygo.py,sha256=vCMUur_41yY0QB4gj8K5wBZHql_cbmANhI8QwPRCTmo,11613
19
- ygo-1.0.5.dist-info/licenses/LICENSE,sha256=6AKUWQ1xe-jwPSFv_H6FMQLNNWb7AYqzuEUTwlP2S8M,1067
19
+ ygo-1.0.6b0.dist-info/licenses/LICENSE,sha256=6AKUWQ1xe-jwPSFv_H6FMQLNNWb7AYqzuEUTwlP2S8M,1067
20
20
  ylog/__init__.py,sha256=2sIp4PHNoQMCi0QtIarTI4raACd7SdRHNY7fY5hKYwc,397
21
- ylog/core.py,sha256=To9kTRVUy5tpg299h0psDov3MFsBUhOHplqT2S_B4w4,7920
22
- ygo-1.0.5.dist-info/METADATA,sha256=_PzF9Of2_ECmmB3f9G09anaRVXgUlpGcuiH4mM6R6H4,2035
23
- ygo-1.0.5.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
24
- ygo-1.0.5.dist-info/top_level.txt,sha256=FGbsOtsHgqWzZ9mGRSTCg0pLZEErR1lq5TFQSy2TL1w,18
25
- ygo-1.0.5.dist-info/RECORD,,
21
+ ylog/core.py,sha256=jmz9JhklbVCQz-zahEXV6P-LEHnqU6opnY4CUEyo8Ss,7924
22
+ ygo-1.0.6b0.dist-info/METADATA,sha256=1aSVsl0EZMP7YzHEW1omgNvKY6TE3QbW_rOhnNqBIFU,2078
23
+ ygo-1.0.6b0.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
24
+ ygo-1.0.6b0.dist-info/top_level.txt,sha256=FGbsOtsHgqWzZ9mGRSTCg0pLZEErR1lq5TFQSy2TL1w,18
25
+ ygo-1.0.6b0.dist-info/RECORD,,
ylog/core.py CHANGED
@@ -94,7 +94,7 @@ class _Logger:
94
94
  colorize=True,
95
95
  backtrace=self.debug_mode,
96
96
  diagnose=self.debug_mode,
97
- enqueue=True # 异步写入
97
+ # enqueue=True # 异步写入
98
98
  )
99
99
 
100
100
  def _setup_file_logging(self, retention_days: int, error_retention_days: int):
@@ -128,7 +128,7 @@ class _Logger:
128
128
  compression="zip",
129
129
  backtrace=True,
130
130
  diagnose=self.debug_mode,
131
- enqueue=True, # 异步写入
131
+ # enqueue=True, # 异步写入
132
132
  filter=lambda record, lvl=level: record["level"].name == lvl,
133
133
  catch=True # 捕获格式化异常
134
134
  )
File without changes