ygo 1.0.2__py3-none-any.whl → 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ygo might be problematic. Click here for more details.

qdf/expr.py ADDED
@@ -0,0 +1,301 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ ---------------------------------------------
4
+ Created on 2025/3/3 19:52
5
+ @author: ZhangYundi
6
+ @email: yundi.xxii@outlook.com
7
+ ---------------------------------------------
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import re
13
+ import warnings
14
+ from dataclasses import dataclass
15
+
16
+ from lark import Lark, Transformer, v_args
17
+
18
+ from .errors import ParseError
19
+
20
+
21
+ # 基类
22
+ class Token:
23
+ pass
24
+
25
+
26
+ @dataclass
27
+ class OperatorToken(Token):
28
+ """算子类型token"""
29
+ value: str
30
+
31
+
32
+ @dataclass
33
+ class OperandToken(Token):
34
+ """运算对象token"""
35
+ value: str | float | int
36
+
37
+
38
+ with warnings.catch_warnings():
39
+ warnings.simplefilter("ignore")
40
+ grammar = """
41
+ start: expr
42
+ ?expr: ternary_expr
43
+ ?ternary_expr: or_expr
44
+ | or_expr "?" or_expr ":" ternary_expr -> ternary
45
+ ?or_expr: and_expr
46
+ | or_expr "|" and_expr -> or_
47
+ ?and_expr: comp_expr
48
+ | and_expr "&" comp_expr -> and_
49
+ ?comp_expr: eq_expr
50
+ | comp_expr "<" eq_expr -> lt
51
+ | comp_expr ">" eq_expr -> gt
52
+ | comp_expr "<=" eq_expr -> le
53
+ | comp_expr ">=" eq_expr -> ge
54
+ ?eq_expr: arith_expr
55
+ | eq_expr "==" arith_expr -> eq
56
+ | eq_expr "!=" arith_expr -> neq
57
+ ?arith_expr: term
58
+ | arith_expr "+" term -> add
59
+ | arith_expr "-" term -> sub
60
+ ?term: pow_expr
61
+ | term "*" pow_expr -> mul
62
+ | term "/" pow_expr -> div
63
+ | term "//" pow_expr -> floordiv // 取整
64
+ | term "%" pow_expr -> mod // 求余
65
+ ?pow_expr: factor
66
+ | factor "**" pow_expr -> pow
67
+ ?factor: atom
68
+ | "-" factor -> neg
69
+ | "!" factor -> not_
70
+ | "~" factor -> not_
71
+ ?atom: function
72
+ | NAME
73
+ | NUMBER
74
+ | FLOAT
75
+ | "(" expr ")"
76
+ | implicit_mul // 隐式乘法
77
+ | attribute_access // 新增:属性访问
78
+ implicit_mul: (NUMBER | FLOAT) NAME -> implicit_mul // 隐式乘法
79
+ attribute_access: atom "." NAME -> attribute_access // 新增:属性访问
80
+ function: NAME "(" expr_list ")" -> function
81
+ // expr_list: expr ("," expr)*
82
+ keyword_arg: NAME "=" expr -> keyword_arg // 关键字参数
83
+ expr_list: (expr | keyword_arg) ("," (expr | keyword_arg))* // 支持关键字参数
84
+ NAME: /[a-zA-Z_$,][a-zA-Z0-9_$]*/
85
+ NUMBER: /\d+/ // regex for numbers
86
+ FLOAT: /\d+\.\d+/
87
+ %import common.WS
88
+ %ignore WS
89
+ """
90
+
91
+
92
+ class ExprParser(Transformer):
93
+ @v_args(inline=True)
94
+ def ternary(self, a, b, c):
95
+ return Expr.new("if_", [a, b, c])
96
+
97
+ def attribute_access(self, items):
98
+ return ".".join(items)
99
+
100
+ def keyword_arg(self, item):
101
+ k, v = item
102
+ return {k: v}
103
+
104
+ def NAME(self, name):
105
+ return str(name)
106
+
107
+ def NUMBER(self, number): # new transformer for numbers
108
+ return int(number)
109
+
110
+ def FLOAT(self, number):
111
+ return float(number)
112
+
113
+ def add(self, items):
114
+ return Expr.new("add", items)
115
+
116
+ def sub(self, items):
117
+ return Expr.new("sub", items)
118
+
119
+ def mul(self, items):
120
+ return Expr.new("mul", items)
121
+
122
+ def div(self, items):
123
+ return Expr.new("div", items)
124
+
125
+ def floordiv(self, items):
126
+ return Expr.new("floordiv", items)
127
+
128
+ def mod(self, items):
129
+ return Expr.new("mod", items)
130
+
131
+ def pow(self, items):
132
+ return Expr.new("pow", items)
133
+
134
+ def neg(self, items):
135
+ item = items[0]
136
+ if isinstance(item, (int, float)):
137
+ return -item
138
+ return Expr.new("neg", items)
139
+
140
+ def not_(self, item):
141
+ return Expr.new("not_", item)
142
+
143
+ def and_(self, items):
144
+ return Expr.new("and_", items)
145
+
146
+ def or_(self, items):
147
+ return Expr.new("or_", items)
148
+
149
+ def eq(self, items):
150
+ return Expr.new("eq", items)
151
+
152
+ def neq(self, items):
153
+ return Expr.new("neq", items)
154
+
155
+ def lt(self, items):
156
+ return Expr.new("lt", items)
157
+
158
+ def gt(self, items):
159
+ return Expr.new("gt", items)
160
+
161
+ def le(self, items):
162
+ return Expr.new("le", items)
163
+
164
+ def ge(self, items):
165
+ return Expr.new("ge", items)
166
+
167
+ def function(self, items):
168
+ name = items.pop(0)
169
+ return Expr.new(name, items[0])
170
+
171
+ def implicit_mul(self, items):
172
+ return Expr.new("mul", items)
173
+
174
+ def expr_list(self, items):
175
+ return items
176
+
177
+
178
+ parser = Lark(grammar, parser='lalr', transformer=ExprParser())
179
+
180
+
181
+ def parse_expr(expression: str) -> Expr:
182
+ return parser.parse(expression).children[0]
183
+
184
+
185
+ class Expr:
186
+
187
+ def __init__(self, expr: str | None = None):
188
+
189
+ self.fn_name: str | None = ""
190
+ self.args: list | None = None
191
+ self.alias: str | None = None
192
+ if expr:
193
+ try:
194
+ self._parse(expr)
195
+ except Exception as e:
196
+ raise ParseError(f"{expr}\n{e}")
197
+
198
+ @classmethod
199
+ def new(cls, fn_name: str | None, args: list | None, alias: str | None = None):
200
+ expr = cls()
201
+ expr.fn_name = fn_name
202
+ expr.args = args
203
+ expr.alias = alias if alias is not None else str(expr)
204
+ return expr
205
+
206
+ def to_rpn(self) -> list[Token]:
207
+ """生成逆波兰表达式: (后缀表达式: 运算符在后)"""
208
+ rpn = list()
209
+
210
+ # 递归遍历子表达式
211
+ def _traverse(node: Expr):
212
+
213
+ if node.args is not None:
214
+ for child in node.args:
215
+ if isinstance(child, Expr):
216
+ _traverse(child)
217
+ else:
218
+ rpn.append(OperandToken(child))
219
+ rpn.append(OperatorToken(node.fn_name))
220
+
221
+ _traverse(self)
222
+
223
+ return rpn
224
+
225
+ def __str__(self):
226
+ unary_map = {"neg": "-", "not_": "!"}
227
+ binary_map = {"add": "+",
228
+ "mul": "*",
229
+ "div": "/",
230
+ "sub": "-",
231
+ "floordiv": "//",
232
+ "mod": "%",
233
+ "pow": "**",
234
+ "and_": "&",
235
+ "or_": "|",
236
+ "gt": ">",
237
+ "gte": ">=",
238
+ "lt": "<",
239
+ "lte": "<=",
240
+ "eq": "==",
241
+ "neq": "!=",
242
+ }
243
+ if self.fn_name is None:
244
+ return str(self.args[0])
245
+ if self.fn_name == "if_":
246
+ cond, body, orelse = self.args
247
+ return f"{cond}?{body}:{orelse}"
248
+ elif self.fn_name in ("neg", "not_"):
249
+ return f"{unary_map.get(self.fn_name)}{self.args[0]}"
250
+ elif self.fn_name in binary_map:
251
+ return f"({binary_map.get(self.fn_name).join([str(arg) for arg in self.args])})"
252
+ else:
253
+ return f"{self.fn_name}({', '.join([str(arg) for arg in self.args])})"
254
+
255
+ def __repr__(self):
256
+ return self.__str__()
257
+
258
+ def _parse(self, expr):
259
+ """
260
+ 解析表达式
261
+ """
262
+ convertor = {
263
+ 'if(': 'if_(',
264
+ 'not(': 'not_(',
265
+ 'and(': 'and_(',
266
+ 'or(': 'or_(',
267
+ '$': '',
268
+ "\n": '',
269
+ "!": "~",
270
+ ",": ", ",
271
+ }
272
+ for old, new in convertor.items():
273
+ expr = expr.replace(old, new)
274
+ new_expr = expr
275
+ match = re.search(r'(?i)(.+?)\s+AS\s+(\w+)', new_expr)
276
+ alias = None
277
+ if match:
278
+ new_expr = match.group(1).strip()
279
+ alias = match.group(2).strip()
280
+
281
+ expr_ = parse_expr(new_expr)
282
+ self.alias = alias if alias is not None else str(expr_)
283
+ if not isinstance(expr_, Expr):
284
+ self.args = [expr_]
285
+ else:
286
+ self.fn_name, self.args = expr_.fn_name, expr_.args
287
+
288
+ @property
289
+ def n_args(self) -> int:
290
+ """返回表达式的参数个数"""
291
+ return len(self.args)
292
+
293
+ @property
294
+ def depth(self) -> int:
295
+ """返回表达式的嵌套深度"""
296
+ _depth = 1
297
+ _depths = [0]
298
+ for arg in self.args:
299
+ if isinstance(arg, Expr):
300
+ _depths.append(arg.depth)
301
+ return _depth + max(_depths)
qdf/qdf.py ADDED
@@ -0,0 +1,158 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ ---------------------------------------------
4
+ Created on 2025/3/5 21:40
5
+ @author: ZhangYundi
6
+ @email: yundi.xxii@outlook.com
7
+ ---------------------------------------------
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import importlib.util
12
+ import sys
13
+ from dataclasses import dataclass
14
+ from pathlib import Path
15
+
16
+ import polars as pl
17
+ from toolz import partial
18
+ from functools import lru_cache
19
+
20
+ import ygo
21
+ import ylog
22
+ from .errors import CalculateError, PolarsError
23
+ from .expr import Expr
24
+ import time
25
+
26
+ # 动态加载模块
27
+ module_name = "udf"
28
+ module_path = Path(__file__).parent / "udf" / "__init__.py"
29
+ spec = importlib.util.spec_from_file_location(module_name, module_path)
30
+ module = importlib.util.module_from_spec(spec)
31
+ sys.modules[module_name] = module
32
+ spec.loader.exec_module(module)
33
+
34
+
35
+ @dataclass
36
+ class FailInfo:
37
+ expr: str
38
+ error: Exception
39
+
40
+ def __str__(self):
41
+ return f"""
42
+ expr={self.expr}
43
+ =================================================
44
+ {self.error}
45
+ =================================================
46
+ """
47
+
48
+ def __repr__(self):
49
+ return self.__str__()
50
+
51
+ @lru_cache(maxsize=512)
52
+ def parse_expr(expr: str) -> Expr:
53
+ return Expr(expr)
54
+
55
+ class QDF:
56
+
57
+ def __init__(self,
58
+ data: pl.LazyFrame,
59
+ index: tuple[str] = ("date", "time", "asset"),
60
+ align: bool = True,):
61
+ self.data = data.with_columns(pl.col(pl.Decimal).cast(pl.Float32))
62
+ self.dims = [self.data.select(index_).drop_nulls().unique().count().collect().item() for index_ in index]
63
+ if align:
64
+ lev_vals: list[pl.DataFrame] = [self.data.select(name).drop_nulls().unique() for name in index]
65
+ full_index = lev_vals[0]
66
+ for lev_val in lev_vals[1:]:
67
+ full_index = full_index.join(lev_val, how="cross")
68
+ self.data = full_index.join(self.data, on=index, how='left') #.sort(index).collect().lazy()
69
+
70
+ self.index = index
71
+ self.failed = list()
72
+
73
+ def __str__(self):
74
+ return self.data.__str__()
75
+
76
+ def __repr__(self):
77
+ return self.data.__str__()
78
+
79
+ def register_udf(self, func: callable, name: str = None):
80
+ name = name if name is not None else func.__name__
81
+ setattr(module, name, func)
82
+
83
+ def _compile_expr(self, expr: str, cover: bool):
84
+ expr_parsed = Expr(expr)
85
+ alias = expr_parsed.alias # if expr_parsed.alias is not None else str(expr_parsed)
86
+ current_cols = set(self.data.collect_schema().keys())
87
+ columns = self.data.collect_schema().names()
88
+ if alias in current_cols and not cover:
89
+ return alias
90
+
91
+ def calc(expr_: Expr):
92
+ alias_ = expr_.alias
93
+ # _cols = self.data.collect_schema().names()
94
+ if alias_ in current_cols and not cover:
95
+ # 已存在:直接select数据源
96
+ return alias_
97
+ func = getattr(module, expr_.fn_name)
98
+ _params = ygo.fn_signature_params(func)
99
+ if "dims" in _params:
100
+ func = partial(func, dims=self.dims)
101
+ args = list()
102
+ kwargs = dict()
103
+ for arg in expr_.args:
104
+ if isinstance(arg, Expr):
105
+ args.append(pl.col(calc(arg)))
106
+ elif isinstance(arg, dict):
107
+ kwargs.update(arg)
108
+ elif isinstance(arg, str):
109
+ args.append(pl.col(arg))
110
+ else:
111
+ args.append(arg) # or args.append(pl.lit(arg))
112
+ try:
113
+ expr_pl: pl.Expr = func(*args, **kwargs).alias(alias_)
114
+ except Exception as e:
115
+ raise CalculateError(f"{expr_.fn_name}({', '.join([str(arg) for arg in args])})\n{e}")
116
+ try:
117
+ self.data = self.data.with_columns(expr_pl)
118
+ except Exception as e:
119
+ raise PolarsError(f"{expr_}\n{e}")
120
+ return alias_
121
+
122
+ calc(expr_parsed)
123
+
124
+ columns.append(alias)
125
+ drop = current_cols.difference(set(columns))
126
+ self.data = self.data.drop(*drop)
127
+
128
+ return alias
129
+
130
+ def sql(self, *exprs: str, cover: bool = False,) -> pl.LazyFrame:
131
+ """
132
+ 表达式查询
133
+ Parameters
134
+ ----------
135
+ exprs: str
136
+ 表达式,比如 "ts_mean(close, 5) as close_ma5"
137
+ cover: bool
138
+ 当遇到已经存在列名的时候,是否重新计算覆盖原来的列, 默认False,返回已经存在的列,跳过计算
139
+ - True: 重新计算并且返回新的结果,覆盖掉原来的列
140
+ - False, 返回已经存在的列,跳过计算
141
+ Returns
142
+ -------
143
+ polars.DataFrame
144
+ """
145
+ self.failed = list()
146
+ exprs_to_add = list()
147
+ for expr in exprs:
148
+ try:
149
+ compiled = self._compile_expr(expr, cover)
150
+ if compiled is not None:
151
+ exprs_to_add.append(compiled)
152
+ except Exception as e:
153
+ self.failed.append(FailInfo(expr, e))
154
+ if self.failed:
155
+ ylog.warning(f"QDF.sql 失败:{len(self.failed)}/{len(exprs)}: \n {self.failed}")
156
+ final_df = self.data.with_columns(exprs_to_add).select(*self.index, *exprs_to_add).fill_nan(None).drop_nulls().sort(self.index)
157
+ return final_df.collect()
158
+
qdf/udf/__init__.py ADDED
@@ -0,0 +1,14 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ ---------------------------------------------
4
+ Created on 2025/3/4 20:20
5
+ @author: ZhangYundi
6
+ @email: yundi.xxii@outlook.com
7
+ ---------------------------------------------
8
+ """
9
+
10
+ from .base_udf import *
11
+ from .cs_udf import *
12
+ from .ts_udf import *
13
+ from .d_udf import *
14
+ from .ind_udf import *
qdf/udf/base_udf.py ADDED
@@ -0,0 +1,145 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ ---------------------------------------------
4
+ Created on 2025/3/4 20:28
5
+ @author: ZhangYundi
6
+ @email: yundi.xxii@outlook.com
7
+ ---------------------------------------------
8
+ """
9
+
10
+ import polars as pl
11
+ import math
12
+
13
+ """
14
+ 基本算子:一元算子、二元算子、三元算子 以及 polars 支持的表达式(剔除数据泄露的)
15
+ """
16
+ # ======================== 一元算子 ========================
17
+
18
+ def not_(expr: pl.Expr): return ~expr
19
+
20
+
21
+ def neg(expr: pl.Expr): return -expr
22
+
23
+
24
+ def abs(expr: pl.Expr): return expr.abs()
25
+
26
+
27
+ def log(expr: pl.Expr, base=math.e): return expr.log(base=base)
28
+
29
+
30
+ def sqrt(expr: pl.Expr): return expr.sqrt()
31
+
32
+
33
+ def square(expr: pl.Expr): return expr ** 2
34
+
35
+
36
+ def cube(expr: pl.Expr): return expr ** 3
37
+
38
+
39
+ def cbrt(expr: pl.Expr): return expr ** (1 / 3)
40
+
41
+
42
+ def sin(expr: pl.Expr): return expr.sin()
43
+
44
+ def sinh(expr: pl.Expr): return expr.sinh()
45
+
46
+ def arcsin(expr: pl.Expr): return expr.arcsin()
47
+
48
+ def arcsinh(expr: pl.Expr): return expr.arcsinh()
49
+
50
+
51
+ def cos(expr: pl.Expr): return expr.cos()
52
+
53
+ def cosh(expr: pl.Expr): return expr.cosh()
54
+
55
+ def arccos(expr: pl.Expr): return expr.arccos()
56
+
57
+ def arccosh(expr: pl.Expr): return expr.arccosh()
58
+
59
+ def tan(expr: pl.Expr): return expr.tan()
60
+
61
+ def tanh(expr: pl.Expr): return expr.tanh()
62
+
63
+ def arctan(expr: pl.Expr): return expr.arctan()
64
+
65
+ def arctanh(expr: pl.Expr): return expr.arctanh()
66
+
67
+
68
+ def sign(expr: pl.Expr): return expr.sign()
69
+
70
+
71
+ def sigmoid(expr: pl.Expr): return 1 / (1 + (-expr).exp())
72
+
73
+
74
+ # def all(expr: pl.Expr, ignore_nulls: bool = True): return expr.all(ignore_nulls=ignore_nulls)
75
+
76
+
77
+ # def any(expr: pl.Expr, ignore_nulls: bool = True): return expr.any(ignore_nulls=ignore_nulls)
78
+
79
+ def cot(expr: pl.Expr): return expr.cot()
80
+
81
+ def degrees(expr: pl.Expr): return expr.degrees()
82
+
83
+ def exp(expr: pl.Expr): return expr.exp()
84
+
85
+ def log1p(expr: pl.Expr): return expr.log1p()
86
+
87
+ def clip(expr: pl.Expr, lower_bound, upper_bound): return expr.clip(lower_bound, upper_bound)
88
+
89
+ # ======================== 二元算子 ========================
90
+ def add(left: pl.Expr, right: pl.Expr): return left + right
91
+
92
+
93
+ def sub(left: pl.Expr, right: pl.Expr): return left - right
94
+
95
+
96
+ def mul(left: pl.Expr, right: pl.Expr): return left * right
97
+
98
+
99
+ def div(left: pl.Expr, right: pl.Expr): return left / right
100
+
101
+
102
+ def floordiv(left: pl.Expr, right: pl.Expr): return left // right
103
+
104
+
105
+ def mod(left: pl.Expr, right: pl.Expr): return left % right
106
+
107
+
108
+ def lt(left: pl.Expr, right: pl.Expr): return left < right
109
+
110
+
111
+ def le(left: pl.Expr, right: pl.Expr): return left <= right
112
+
113
+
114
+ def gt(left: pl.Expr, right: pl.Expr): return left > right
115
+
116
+
117
+ def ge(left: pl.Expr, right: pl.Expr): return left >= right
118
+
119
+
120
+ def eq(left: pl.Expr, right: pl.Expr): return left == right
121
+
122
+
123
+ def neq(left: pl.Expr, right: pl.Expr): return left != right
124
+
125
+ def and_(left: pl.Expr, right: pl.Expr): return left & right
126
+
127
+ def or_(left: pl.Expr, right: pl.Expr): return left | right
128
+
129
+ def max(*exprs: pl.Expr): return pl.max_horizontal(*exprs)
130
+
131
+ def min(*exprs: pl.Expr): return pl.min_horizontal(*exprs)
132
+
133
+ def sum(*exprs: pl.Expr): return pl.sum_horizontal(*exprs)
134
+
135
+
136
+ # ======================== 三元 ========================
137
+ def if_(cond: pl.Expr, body: pl.Expr, or_else: pl.Expr):
138
+ return pl.when(cond).then(body).otherwise(or_else)
139
+
140
+ def fib(high: pl.Expr, low: pl.Expr, ratio: float = 0.618):
141
+ """
142
+ 计算裴波那契回调比率
143
+ ratio: 0.236 | 0.382 | 0.618 等黄金分割比例
144
+ """
145
+ return low + (high - low) * ratio