xlin 0.1.39__py2.py3-none-any.whl → 0.2.3__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xlin/dataframe_util.py +52 -0
- xlin/datetime_util.py +157 -3
- xlin/file_util.py +0 -3
- xlin/image_util.py +11 -0
- xlin/metric.py +1 -1
- {xlin-0.1.39.dist-info → xlin-0.2.3.dist-info}/METADATA +4 -1
- xlin-0.2.3.dist-info/RECORD +17 -0
- xlin-0.1.39.dist-info/RECORD +0 -17
- {xlin-0.1.39.dist-info → xlin-0.2.3.dist-info}/LICENSE +0 -0
- {xlin-0.1.39.dist-info → xlin-0.2.3.dist-info}/WHEEL +0 -0
xlin/dataframe_util.py
CHANGED
@@ -306,3 +306,55 @@ def grouped_row(df: pd.DataFrame, key_col="query"):
|
|
306
306
|
for i, row in df.iterrows():
|
307
307
|
grouped[row[key_col]].append(row)
|
308
308
|
return grouped
|
309
|
+
|
310
|
+
def select_sub_df(
|
311
|
+
df: pd.DataFrame,
|
312
|
+
start_date: str,
|
313
|
+
end_date: str,
|
314
|
+
lookback_window: int = 0,
|
315
|
+
lookforward_window: int = 0,
|
316
|
+
include_end_date: bool = False,
|
317
|
+
) -> pd.DataFrame:
|
318
|
+
"""
|
319
|
+
从DataFrame中选择指定日期范围内的子DataFrame。
|
320
|
+
|
321
|
+
Args:
|
322
|
+
df (pd.DataFrame): 带有日期索引的DataFrame,index是日期。
|
323
|
+
start_date (str): 起始日期,格式'YYYY-MM-DD'。
|
324
|
+
end_date (str): 结束日期,格式'YYYY-MM-DD'。
|
325
|
+
lookback_window (int): 向后查看的天数,默认为0。
|
326
|
+
lookforward_window (int): 向前查看的天数,默认为0。
|
327
|
+
include_end_date (bool): 是否包含结束日期,默认为False。
|
328
|
+
|
329
|
+
Returns:
|
330
|
+
pd.DataFrame: 指定日期范围内的子DataFrame。
|
331
|
+
"""
|
332
|
+
# 确保索引是DatetimeIndex类型
|
333
|
+
if not isinstance(df.index, pd.DatetimeIndex):
|
334
|
+
df.index = pd.to_datetime(df.index)
|
335
|
+
|
336
|
+
# 确保索引是有序的
|
337
|
+
if not df.index.is_monotonic_increasing:
|
338
|
+
df = df.sort_index()
|
339
|
+
|
340
|
+
# 获取索引的时区信息
|
341
|
+
tz = df.index.tz
|
342
|
+
|
343
|
+
# 创建带时区的切片日期
|
344
|
+
start = pd.Timestamp(start_date, tz=tz)
|
345
|
+
end = pd.Timestamp(end_date, tz=tz)
|
346
|
+
|
347
|
+
# 选择子DataFrame
|
348
|
+
try:
|
349
|
+
if lookback_window > 0:
|
350
|
+
start = start - pd.Timedelta(days=lookback_window)
|
351
|
+
if lookforward_window > 0:
|
352
|
+
end = end + pd.Timedelta(days=lookforward_window)
|
353
|
+
if include_end_date:
|
354
|
+
end = end + pd.Timedelta(days=1)
|
355
|
+
sub_df = df[start:end]
|
356
|
+
except KeyError:
|
357
|
+
print(f"日期 {start_date} 或 {end_date} 不在索引范围内。")
|
358
|
+
sub_df = pd.DataFrame()
|
359
|
+
|
360
|
+
return sub_df
|
xlin/datetime_util.py
CHANGED
@@ -1,14 +1,17 @@
|
|
1
1
|
|
2
2
|
|
3
|
+
from typing import Literal, Optional, Union
|
3
4
|
import datetime
|
4
5
|
import random
|
5
6
|
|
7
|
+
import pandas as pd
|
8
|
+
|
6
9
|
|
7
10
|
date_str = datetime.datetime.now().strftime("%Y%m%d")
|
8
11
|
datetime_str = datetime.datetime.now().strftime("%Y%m%d_%Hh%Mm%Ss")
|
9
12
|
|
10
13
|
|
11
|
-
def random_timestamp(start_timestamp=None, end_timestamp=None):
|
14
|
+
def random_timestamp(start_timestamp: Optional[float]=None, end_timestamp: Optional[float]=None):
|
12
15
|
if start_timestamp is None:
|
13
16
|
start_timestamp = datetime.datetime(2024, 1, 1).timestamp()
|
14
17
|
if end_timestamp is None:
|
@@ -16,6 +19,157 @@ def random_timestamp(start_timestamp=None, end_timestamp=None):
|
|
16
19
|
return random.uniform(start_timestamp, end_timestamp)
|
17
20
|
|
18
21
|
|
19
|
-
def
|
20
|
-
|
22
|
+
def random_datetime(
|
23
|
+
start_datetime: Optional[datetime.datetime] = None,
|
24
|
+
end_datetime: Optional[datetime.datetime] = None,
|
25
|
+
) -> datetime.datetime:
|
26
|
+
"""
|
27
|
+
生成一个随机的 datetime 对象,范围在指定的开始和结束时间之间。
|
28
|
+
如果未指定,则默认范围为 2024 年 1 月 1 日到当前时间。
|
29
|
+
"""
|
30
|
+
if start_datetime is None:
|
31
|
+
start_datetime = datetime.datetime(2024, 1, 1)
|
32
|
+
if end_datetime is None:
|
33
|
+
end_datetime = datetime.datetime.now()
|
34
|
+
|
35
|
+
random_timestamp_value = random.uniform(start_datetime.timestamp(), end_datetime.timestamp())
|
36
|
+
return datetime.datetime.fromtimestamp(random_timestamp_value)
|
37
|
+
|
38
|
+
|
39
|
+
|
40
|
+
# 初始化中美节假日(可缓存)懒加载
|
41
|
+
us_holidays = None # US(categories=US.supported_categories)
|
42
|
+
cn_holidays = None # CN(categories=CN.supported_categories)
|
43
|
+
|
44
|
+
|
45
|
+
def format_datetime_with_holiday(
|
46
|
+
dt: Union[datetime.datetime, str, pd.Series, float],
|
47
|
+
language: Literal["zh", "en"] = "zh",
|
48
|
+
with_time: bool = True,
|
49
|
+
with_weekday: bool = True,
|
50
|
+
with_holiday: bool = True,
|
51
|
+
) -> Union[str, pd.Series]:
|
52
|
+
"""
|
53
|
+
格式化时间为中文日期+英文星期几,附带中美节假日信息。
|
54
|
+
如:2024年01月01日 10:00:00 星期一 [假期: 🇨🇳 元旦, 🇺🇸 New Year's Day]
|
55
|
+
支持 datetime, str, pandas.Series 批处理。
|
56
|
+
Args:
|
57
|
+
dt: 待格式化的时间,可以是 datetime, str, pandas.Series 或 timestamp。
|
58
|
+
language: 语言选择,支持 "zh" 和 "en"
|
59
|
+
with_time: 是否包含时间
|
60
|
+
with_weekday: 是否包含星期几
|
61
|
+
with_holiday: 是否包含节假日信息
|
62
|
+
Returns:
|
63
|
+
格式化后的字符串或 pandas.Series
|
64
|
+
Raises:
|
65
|
+
ValueError: 如果输入类型不正确
|
66
|
+
ImportError: 如果未安装 'holidays' 库
|
67
|
+
"""
|
68
|
+
language_dict = {
|
69
|
+
"zh": {
|
70
|
+
"weekday": ["星期一", "星期二", "星期三", "星期四", "星期五", "星期六", "星期日"],
|
71
|
+
"holiday": "假期",
|
72
|
+
"date_format": "%Y年%m月%d日",
|
73
|
+
"time_format": "%H:%M:%S",
|
74
|
+
},
|
75
|
+
"en": {
|
76
|
+
"weekday": ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"],
|
77
|
+
"holiday": "Holiday",
|
78
|
+
"date_format": "%Y-%m-%d",
|
79
|
+
"time_format": "%H:%M:%S",
|
80
|
+
},
|
81
|
+
}
|
82
|
+
|
83
|
+
def _format_one(d: Union[datetime.datetime, str]) -> str:
|
84
|
+
if isinstance(d, str):
|
85
|
+
d = pd.to_datetime(d)
|
86
|
+
elif isinstance(d, float):
|
87
|
+
d = datetime.datetime.fromtimestamp(d)
|
88
|
+
|
89
|
+
if not isinstance(d, datetime.datetime):
|
90
|
+
raise ValueError("输入必须是 datetime, timestamp, str 或 pandas.Series 类型。")
|
91
|
+
|
92
|
+
formatted = d.strftime(language_dict[language]["date_format"])
|
93
|
+
if with_time:
|
94
|
+
formatted += " " + d.strftime(language_dict[language]["time_format"])
|
95
|
+
if with_weekday:
|
96
|
+
weekday_index = d.weekday()
|
97
|
+
formatted += " " + language_dict[language]["weekday"][weekday_index]
|
98
|
+
if not with_holiday:
|
99
|
+
return formatted
|
100
|
+
# 检查节假日
|
101
|
+
global us_holidays, cn_holidays
|
102
|
+
if not us_holidays or not cn_holidays:
|
103
|
+
try:
|
104
|
+
from holidays.countries import US, CN
|
105
|
+
except ImportError:
|
106
|
+
raise ImportError("请安装 'holidays' 库以支持节假日查询。可以使用 'pip install holidays' 安装。")
|
107
|
+
us_holidays = US(categories=US.supported_categories)
|
108
|
+
cn_holidays = CN(categories=CN.supported_categories)
|
109
|
+
tags = []
|
110
|
+
if d in cn_holidays:
|
111
|
+
tags.append(f"🇨🇳 {cn_holidays[d]}")
|
112
|
+
if d in us_holidays:
|
113
|
+
tags.append(f"🇺🇸 {us_holidays[d]}")
|
114
|
+
|
115
|
+
if tags:
|
116
|
+
holiday_str = language_dict[language]["holiday"]
|
117
|
+
formatted += f" [{holiday_str}: " + ", ".join(tags) + "]"
|
118
|
+
return formatted
|
119
|
+
|
120
|
+
if isinstance(dt, pd.Series):
|
121
|
+
return dt.apply(_format_one)
|
122
|
+
else:
|
123
|
+
return _format_one(dt)
|
124
|
+
|
125
|
+
|
126
|
+
def format_timedelta(
|
127
|
+
delta: datetime.timedelta,
|
128
|
+
language: Literal["zh", "en"] = "zh",
|
129
|
+
) -> str:
|
130
|
+
"""
|
131
|
+
将 timedelta 格式化为精简的中文可读字符串,省略零值单位,四舍五入到秒
|
132
|
+
|
133
|
+
Args:
|
134
|
+
delta: 待格式化的时间间隔
|
135
|
+
language: 语言选择,支持 "zh" 和 "en"
|
136
|
+
|
137
|
+
Returns:
|
138
|
+
精简的中文时间字符串(如 "1天3小时5分" 或 "45秒")
|
139
|
+
"""
|
140
|
+
language_dict = {
|
141
|
+
"zh": {
|
142
|
+
"days": "天",
|
143
|
+
"hours": "小时",
|
144
|
+
"minutes": "分",
|
145
|
+
"seconds": "秒",
|
146
|
+
},
|
147
|
+
"en": {
|
148
|
+
"days": "days",
|
149
|
+
"hours": "hours",
|
150
|
+
"minutes": "minutes",
|
151
|
+
"seconds": "seconds",
|
152
|
+
},
|
153
|
+
}
|
154
|
+
# 处理负数时间(转为正数)
|
155
|
+
delta = abs(delta)
|
156
|
+
|
157
|
+
# 分解时间单位(四舍五入到秒)
|
158
|
+
days = delta.days
|
159
|
+
total_seconds = int(delta.total_seconds() + 0.5) # 四舍五入到秒
|
160
|
+
hours, remainder = divmod(total_seconds, 3600)
|
161
|
+
minutes, seconds = divmod(remainder, 60)
|
162
|
+
|
163
|
+
# 构建结果列表,跳过零值单位
|
164
|
+
parts = []
|
165
|
+
if days > 0:
|
166
|
+
parts.append(f"{days}{language_dict[language]['days']}")
|
167
|
+
if hours > 0:
|
168
|
+
parts.append(f"{hours}{language_dict[language]['hours']}")
|
169
|
+
if minutes > 0:
|
170
|
+
parts.append(f"{minutes}{language_dict[language]['minutes']}")
|
171
|
+
if seconds > 0:
|
172
|
+
parts.append(f"{seconds}{language_dict[language]['seconds']}")
|
21
173
|
|
174
|
+
# 处理全零情况(如 timedelta(0))
|
175
|
+
return "".join(parts) if parts else f"0{language_dict[language]['seconds']}"
|
xlin/file_util.py
CHANGED
@@ -1,11 +1,8 @@
|
|
1
1
|
from typing import *
|
2
|
-
from collections import defaultdict
|
3
2
|
from pathlib import Path
|
4
3
|
import os
|
5
|
-
import asyncio
|
6
4
|
import shutil
|
7
5
|
|
8
|
-
import pandas as pd
|
9
6
|
from loguru import logger
|
10
7
|
|
11
8
|
def auto_retry_to_get_data(retry_times, request, data_key="data", *args, **kwargs):
|
xlin/image_util.py
CHANGED
@@ -6,6 +6,17 @@ from PIL import Image, ImageDraw, ImageFont
|
|
6
6
|
import uuid
|
7
7
|
import os
|
8
8
|
|
9
|
+
import requests
|
10
|
+
|
11
|
+
|
12
|
+
def read_image_http_url(image_url: str) -> Image.Image:
|
13
|
+
# 使用 requests 获取图像的二进制数据
|
14
|
+
response = requests.get(image_url)
|
15
|
+
image_data = response.content
|
16
|
+
|
17
|
+
# 使用 Pillow 将二进制数据转换为 Image.Image 对象
|
18
|
+
image = Image.open(BytesIO(image_data))
|
19
|
+
return image
|
9
20
|
|
10
21
|
def image_to_base64(image: Image.Image) -> str:
|
11
22
|
buffered = BytesIO()
|
xlin/metric.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
|
2
|
-
def stream_average(avg_pre, cur_num_index, cur_num):
|
2
|
+
def stream_average(avg_pre: float, cur_num_index: int, cur_num: float) -> float:
|
3
3
|
"""
|
4
4
|
calculate the average of number in a stream
|
5
5
|
given nums: List[float], calculate avg[n] = sum(nums[:n]) / len(nums[:n]) = sum(nums[:n]) / n
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: xlin
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.2.3
|
4
4
|
Summary: toolbox for LinXueyuan
|
5
5
|
License: MIT
|
6
6
|
Author: LinXueyuanStdio
|
@@ -18,12 +18,15 @@ Classifier: Programming Language :: Python :: 3.9
|
|
18
18
|
Classifier: Programming Language :: Python :: 3.10
|
19
19
|
Classifier: Programming Language :: Python :: 3.11
|
20
20
|
Classifier: Programming Language :: Python :: 3.12
|
21
|
+
Requires-Dist: Pillow
|
22
|
+
Requires-Dist: holidays
|
21
23
|
Requires-Dist: loguru
|
22
24
|
Requires-Dist: pandas
|
23
25
|
Requires-Dist: pyexcel
|
24
26
|
Requires-Dist: pyexcel-xls
|
25
27
|
Requires-Dist: pyexcel-xlsx
|
26
28
|
Requires-Dist: pyyaml
|
29
|
+
Requires-Dist: requests
|
27
30
|
Requires-Dist: tqdm
|
28
31
|
Requires-Dist: xlsxwriter
|
29
32
|
Description-Content-Type: text/markdown
|
@@ -0,0 +1,17 @@
|
|
1
|
+
xlin/__init__.py,sha256=CIhMAGhFgqwC6w16MzKcwo2mDjmaRUAcrlZFR3Am--I,321
|
2
|
+
xlin/dataframe_util.py,sha256=zWpkGN-C9V9qVAVH8K4ElkPVu9pq4MjDbxwjJKSOO2o,12151
|
3
|
+
xlin/datetime_util.py,sha256=MHi827LBuAOX6SSMb31staNBjmtnNOXwg7JDk73_pLU,6212
|
4
|
+
xlin/file_util.py,sha256=QG7UiO-hePEB02H53lAWFanQvKlfFTPMhSPyQPma4dU,7199
|
5
|
+
xlin/image_util.py,sha256=hSNQ5suCrxFXpQwP-wfUT1ig3SfEdC6msuVp2k7J7b8,8438
|
6
|
+
xlin/jsonlist_util.py,sha256=dLgrgrSTvg_1plVRCEnilajPM_s3vYdVx2bCTqrZAN8,11316
|
7
|
+
xlin/metric.py,sha256=mBaHy4ZkBbJxq7bCEF9NHdCRTaMcIIqfUj5JS8ElW98,1718
|
8
|
+
xlin/multiprocess_util.py,sha256=-tskCWQlBBCOPycXLj9Y2MugYg-tHF_QYYWW7c1ixOk,17300
|
9
|
+
xlin/statistic.py,sha256=ioJJjL4qwHiwNPeBFBB67keoAIbB-uZM51zkDYviar0,17037
|
10
|
+
xlin/text_util.py,sha256=ejFD8-j8tLCbPlCPFg0Tu3MEMPEpF7R5_IpXXjl6qzA,735
|
11
|
+
xlin/timing_util.py,sha256=nNVKtSXel-Cc8SF_BqPRNkyNDOjGqOMxTol-L1vpON4,1340
|
12
|
+
xlin/xlsx_util.py,sha256=uSmXcDvIhi5Sq0LGidMXy0wErNBXdjaoa6EftYVjTXs,947
|
13
|
+
xlin/yaml_util.py,sha256=kICi7G3Td5q2MaSXXt85qNTWoHMgjzt7pvn7r3C4dME,183
|
14
|
+
xlin-0.2.3.dist-info/LICENSE,sha256=60ys6rRtc1dZOP8UjSUr9fAqhZudT3WpKe5WbMCralM,1066
|
15
|
+
xlin-0.2.3.dist-info/METADATA,sha256=xI6UKJaM3MLUlp_YhkVaGA-50HNOKZzVHc6zWFZCbfE,8061
|
16
|
+
xlin-0.2.3.dist-info/WHEEL,sha256=IrRNNNJ-uuL1ggO5qMvT1GGhQVdQU54d6ZpYqEZfEWo,92
|
17
|
+
xlin-0.2.3.dist-info/RECORD,,
|
xlin-0.1.39.dist-info/RECORD
DELETED
@@ -1,17 +0,0 @@
|
|
1
|
-
xlin/__init__.py,sha256=CIhMAGhFgqwC6w16MzKcwo2mDjmaRUAcrlZFR3Am--I,321
|
2
|
-
xlin/dataframe_util.py,sha256=Z8k3_XLMP5B13IMov2dQJhe-7dPh_YUJGokibWSx8II,10460
|
3
|
-
xlin/datetime_util.py,sha256=jzdF-58PTb_ofBy6F-LBDnEmsTQ9jvoCgqKZJmyDtqE,687
|
4
|
-
xlin/file_util.py,sha256=mYTABNywdYoSfh1RLJcH7l1FzgKTFWN2-JZMFzv-ehw,7270
|
5
|
-
xlin/image_util.py,sha256=j1QlVXS-aikTDFDINbTmxjZi6CokPDTVlQ6_ABctMWQ,8109
|
6
|
-
xlin/jsonlist_util.py,sha256=dLgrgrSTvg_1plVRCEnilajPM_s3vYdVx2bCTqrZAN8,11316
|
7
|
-
xlin/metric.py,sha256=N7wJ35y-C-IaBr1I1CJ_37lTG7gA69zmn9Xg6xSwKoI,1690
|
8
|
-
xlin/multiprocess_util.py,sha256=-tskCWQlBBCOPycXLj9Y2MugYg-tHF_QYYWW7c1ixOk,17300
|
9
|
-
xlin/statistic.py,sha256=ioJJjL4qwHiwNPeBFBB67keoAIbB-uZM51zkDYviar0,17037
|
10
|
-
xlin/text_util.py,sha256=ejFD8-j8tLCbPlCPFg0Tu3MEMPEpF7R5_IpXXjl6qzA,735
|
11
|
-
xlin/timing_util.py,sha256=nNVKtSXel-Cc8SF_BqPRNkyNDOjGqOMxTol-L1vpON4,1340
|
12
|
-
xlin/xlsx_util.py,sha256=uSmXcDvIhi5Sq0LGidMXy0wErNBXdjaoa6EftYVjTXs,947
|
13
|
-
xlin/yaml_util.py,sha256=kICi7G3Td5q2MaSXXt85qNTWoHMgjzt7pvn7r3C4dME,183
|
14
|
-
xlin-0.1.39.dist-info/LICENSE,sha256=60ys6rRtc1dZOP8UjSUr9fAqhZudT3WpKe5WbMCralM,1066
|
15
|
-
xlin-0.1.39.dist-info/METADATA,sha256=d9F6S7LBxur58Jm817Kww54kVOBeWTRvc4R0AgOWYfw,7992
|
16
|
-
xlin-0.1.39.dist-info/WHEEL,sha256=IrRNNNJ-uuL1ggO5qMvT1GGhQVdQU54d6ZpYqEZfEWo,92
|
17
|
-
xlin-0.1.39.dist-info/RECORD,,
|
File without changes
|
File without changes
|