syutils 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- syutils-0.0.1/PKG-INFO +10 -0
- syutils-0.0.1/setup.cfg +4 -0
- syutils-0.0.1/setup.py +15 -0
- syutils-0.0.1/syutils/__init__.py +1 -0
- syutils-0.0.1/syutils/syutils.py +333 -0
- syutils-0.0.1/syutils.egg-info/PKG-INFO +10 -0
- syutils-0.0.1/syutils.egg-info/SOURCES.txt +9 -0
- syutils-0.0.1/syutils.egg-info/dependency_links.txt +1 -0
- syutils-0.0.1/syutils.egg-info/not-zip-safe +1 -0
- syutils-0.0.1/syutils.egg-info/requires.txt +2 -0
- syutils-0.0.1/syutils.egg-info/top_level.txt +1 -0
syutils-0.0.1/PKG-INFO
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: syutils
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: High frequency functions and class
|
|
5
|
+
Home-page: https://gitee.com/wdy0401/syutils
|
|
6
|
+
Author: wangdeyang
|
|
7
|
+
Author-email: wdy0401@gmail.com
|
|
8
|
+
License: MIT
|
|
9
|
+
Requires-Dist: trade_date
|
|
10
|
+
Requires-Dist: py7zr
|
syutils-0.0.1/setup.cfg
ADDED
syutils-0.0.1/setup.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from setuptools import setup, find_packages
|
|
2
|
+
setup(name='syutils',
|
|
3
|
+
version='0.0.1',
|
|
4
|
+
description='High frequency functions and class',
|
|
5
|
+
url='https://gitee.com/wdy0401/syutils',
|
|
6
|
+
author='wangdeyang',
|
|
7
|
+
author_email='wdy0401@gmail.com',
|
|
8
|
+
license='MIT',
|
|
9
|
+
packages=find_packages(),
|
|
10
|
+
install_requires=[
|
|
11
|
+
# 依赖的库
|
|
12
|
+
"trade_date",
|
|
13
|
+
"py7zr",
|
|
14
|
+
],
|
|
15
|
+
zip_safe=False)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .syutils import *
|
|
@@ -0,0 +1,333 @@
|
|
|
1
|
+
import unittest
|
|
2
|
+
import re
|
|
3
|
+
import sys
|
|
4
|
+
import os
|
|
5
|
+
import datetime
|
|
6
|
+
import json
|
|
7
|
+
import subprocess
|
|
8
|
+
import time
|
|
9
|
+
import pickle
|
|
10
|
+
from datetime import date
|
|
11
|
+
from functools import wraps
|
|
12
|
+
|
|
13
|
+
def run_func_limit_time(func,func_args,limit_time):
|
|
14
|
+
'''
|
|
15
|
+
运行一个任务并设定最长运行时间
|
|
16
|
+
用进程的原因是线程停不了
|
|
17
|
+
p=lambda x,y:print(x+y)
|
|
18
|
+
print(run_func_limit_time(p,[10,24],2))
|
|
19
|
+
'''
|
|
20
|
+
from multiprocessing import Process
|
|
21
|
+
p = Process(target=func, args=func_args)
|
|
22
|
+
p.start()
|
|
23
|
+
p.join(timeout=limit_time)
|
|
24
|
+
if p.exitcode== 0:
|
|
25
|
+
return 0
|
|
26
|
+
#print(f"Done finishing child process with exit code {p.exitcode}")
|
|
27
|
+
else:
|
|
28
|
+
p.terminate()
|
|
29
|
+
return -1
|
|
30
|
+
def get_last_line(filename):
|
|
31
|
+
"""
|
|
32
|
+
读取最后一行
|
|
33
|
+
get last line of a file
|
|
34
|
+
:param filename: file name
|
|
35
|
+
:return: last line or None for empty file
|
|
36
|
+
"""
|
|
37
|
+
try:
|
|
38
|
+
filesize = os.path.getsize(filename)
|
|
39
|
+
if filesize == 0:
|
|
40
|
+
return None
|
|
41
|
+
else:
|
|
42
|
+
count=0
|
|
43
|
+
block_size = 4096 #文件占用空间一般最小是4k
|
|
44
|
+
res=filesize%block_size or block_size
|
|
45
|
+
offset=res + block_size #首次读两个整块 如果这都凑不够一行多 那就指数增加读取数量
|
|
46
|
+
with open(filename, 'rb') as fp: # to use seek from end, must uss mode 'rb'
|
|
47
|
+
while offset < filesize: # offset cannot exceed file size
|
|
48
|
+
fp.seek(-1*offset, 2) # read # offset chars from eof(represent by number '2')
|
|
49
|
+
lines = fp.readlines() # read from fp to eof
|
|
50
|
+
if len(lines) >= 2: # if contains at least 2 lines
|
|
51
|
+
return lines[-1] # then last line is totally included
|
|
52
|
+
count=count+1
|
|
53
|
+
offset=(2**count)*block_size # double the read size
|
|
54
|
+
fp.seek(0)
|
|
55
|
+
lines = fp.readlines()
|
|
56
|
+
return lines[-1]
|
|
57
|
+
except FileNotFoundError:
|
|
58
|
+
print(f'''get last line {filename} not found!''')
|
|
59
|
+
|
|
60
|
+
def read_gz_flow(filename):
|
|
61
|
+
import gzip
|
|
62
|
+
with gzip.open(filename, 'rb') as f:
|
|
63
|
+
for line in f:
|
|
64
|
+
yield line.decode().strip()
|
|
65
|
+
f.close()
|
|
66
|
+
def read_7z_flow(filename):
|
|
67
|
+
from py7zr import SevenZipFile
|
|
68
|
+
archive = SevenZipFile(filename)
|
|
69
|
+
name=archive.getnames()[0]
|
|
70
|
+
content = archive.read(name)[name]
|
|
71
|
+
for line in content:
|
|
72
|
+
yield line.decode().strip()
|
|
73
|
+
archive.close()
|
|
74
|
+
def read_zip_flow(filename):
|
|
75
|
+
if filename.endswith('.7z'):
|
|
76
|
+
return read_7z_flow(filename)
|
|
77
|
+
elif filename.endswith('.gz'):
|
|
78
|
+
return read_gz_flow(filename)
|
|
79
|
+
def read_7z_csv_pd(filename,**argv):
|
|
80
|
+
import pandas as pd
|
|
81
|
+
from py7zr import SevenZipFile
|
|
82
|
+
archive = SevenZipFile(filename)
|
|
83
|
+
name=archive.getnames()[0]
|
|
84
|
+
content = archive.read(name)[name]
|
|
85
|
+
return pd.read_csv(content,**argv)
|
|
86
|
+
|
|
87
|
+
def error_callback(error):
|
|
88
|
+
print(f"Error info: {error}")
|
|
89
|
+
|
|
90
|
+
def getsize(file_path):
|
|
91
|
+
if not os.path.exists(file_path):
|
|
92
|
+
print(f"{file_path} not exists!")
|
|
93
|
+
else:
|
|
94
|
+
fsize = os.path.getsize(file_path)
|
|
95
|
+
div_n = 0
|
|
96
|
+
while fsize >= 1024:
|
|
97
|
+
fsize /= 1024
|
|
98
|
+
div_n += 1
|
|
99
|
+
size_dict = dict(zip([0,1,2,3, 4], ['B', 'KB', 'MB', 'GB', 'TB']))
|
|
100
|
+
return f"{round(fsize, 2)} {size_dict[div_n]}"
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def handle_mid_call(func, args, pkl_filepath=None, reload=False, force_save=False, ifprint=True, dump_protocol=None, **kwargs):
|
|
104
|
+
"""
|
|
105
|
+
保存路径pkl_filepath
|
|
106
|
+
存在文件就读文件内容返回;
|
|
107
|
+
不存在文件就运行函数并保存返回值;
|
|
108
|
+
"""
|
|
109
|
+
if os.path.exists(pkl_filepath) and not reload:
|
|
110
|
+
if ifprint:
|
|
111
|
+
print(f'Read data from {pkl_filepath}, {getsize(pkl_filepath)}')
|
|
112
|
+
with open(pkl_filepath, 'rb') as f:
|
|
113
|
+
res = pickle.load(f)
|
|
114
|
+
else:
|
|
115
|
+
res = func(*args, **kwargs)
|
|
116
|
+
with open(pkl_filepath, 'wb') as f:
|
|
117
|
+
pickle.dump(res, f, protocol=dump_protocol)
|
|
118
|
+
if force_save:
|
|
119
|
+
if ifprint:
|
|
120
|
+
print(f'Save data to {pkl_filepath}, {getsize(pkl_filepath)}')
|
|
121
|
+
else:
|
|
122
|
+
if getsize(pkl_filepath).split(' ')[1] == 'B':
|
|
123
|
+
print(f'Data too small {pkl_filepath}, {getsize(pkl_filepath)}')
|
|
124
|
+
os.system(f"rm -f {pkl_filepath}")
|
|
125
|
+
else:
|
|
126
|
+
if ifprint:
|
|
127
|
+
print(f'Save data to {pkl_filepath}, {getsize(pkl_filepath)}')
|
|
128
|
+
return res
|
|
129
|
+
|
|
130
|
+
# 定义一个函数,等待文件,默认等待时间为1秒
|
|
131
|
+
def wait_file(file, sec=1):
|
|
132
|
+
while True:
|
|
133
|
+
if os.path.isfile(file):
|
|
134
|
+
return
|
|
135
|
+
time.sleep(sec)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
# 定义一个函数,等待文件列表,默认等待时间为1秒
|
|
139
|
+
def wait_files(files, sec=1):
|
|
140
|
+
while True:
|
|
141
|
+
if all(os.path.isfile(file) for file in files):
|
|
142
|
+
return
|
|
143
|
+
time.sleep(sec)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def read_cmd(cmd):
|
|
147
|
+
"""
|
|
148
|
+
流式读取命令行的标注输出
|
|
149
|
+
"""
|
|
150
|
+
process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
|
|
151
|
+
while True:
|
|
152
|
+
output = process.stdout.readline()
|
|
153
|
+
if output == b"" and process.poll() is not None:
|
|
154
|
+
break
|
|
155
|
+
if output:
|
|
156
|
+
yield str(output.strip(), encoding="utf-8")
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def run_cmd(cmd, dryrun=0):
|
|
160
|
+
if dryrun:
|
|
161
|
+
print("Dryrun ", cmd)
|
|
162
|
+
else:
|
|
163
|
+
print("Realrun", cmd)
|
|
164
|
+
os.system(cmd)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def printt(*args, **kw):
|
|
168
|
+
print(datetime.datetime.now().strftime("%Y%m%d %H:%M:%S"), *args, **kw)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def except_msg(msg):
|
|
172
|
+
input(f"\n\n{msg}\n按回车键退出\n\n")
|
|
173
|
+
exit()
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def dump_dict(d):
|
|
177
|
+
return json.dumps(d, sort_keys=True, indent=2, ensure_ascii=False)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def err(*args, **kw):
|
|
181
|
+
print(*args, **kw, file=sys.stderr)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def mkpath(path):
|
|
185
|
+
if os.path.exists(path):
|
|
186
|
+
return
|
|
187
|
+
try:
|
|
188
|
+
os.makedirs(path)
|
|
189
|
+
except Exception as e:
|
|
190
|
+
print("Error:", e)
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def get_dir(file):
|
|
194
|
+
return os.path.dirname(os.path.realpath(file))
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def get_ip():
|
|
198
|
+
"""
|
|
199
|
+
ip route get 1 | awk '{print $NF;exit}'
|
|
200
|
+
"""
|
|
201
|
+
ip = "127.0.0.1"
|
|
202
|
+
import socket
|
|
203
|
+
try:
|
|
204
|
+
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
|
205
|
+
s.connect(("8.8.8.8", 80))
|
|
206
|
+
ip = s.getsockname()[0]
|
|
207
|
+
finally:
|
|
208
|
+
s.close()
|
|
209
|
+
return ip
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def get_internet_ip():
|
|
213
|
+
import requests
|
|
214
|
+
import re
|
|
215
|
+
req = requests.get("http://txt.go.sohu.com/ip/soip")
|
|
216
|
+
return re.findall(r"\d+.\d+.\d+.\d+", req)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def make_dir(dir_name):
|
|
220
|
+
mkpath(dir_name)
|
|
221
|
+
|
|
222
|
+
def uniq_symbol(old_symbol, market=0):
|
|
223
|
+
new_symbol = ""
|
|
224
|
+
old_symbol = str(old_symbol)
|
|
225
|
+
old_symbol = old_symbol.upper()
|
|
226
|
+
old_symbol = (6 - len(old_symbol)) * "0" + old_symbol
|
|
227
|
+
market = int(market)
|
|
228
|
+
if market > 0 and re.match(r"^\d{6}$", old_symbol):
|
|
229
|
+
if market == 1:
|
|
230
|
+
new_symbol = f"{old_symbol[0:6]}.SH"
|
|
231
|
+
elif market == 2:
|
|
232
|
+
new_symbol = f"{old_symbol[0:6]}.SZ"
|
|
233
|
+
else:
|
|
234
|
+
raise Exception(
|
|
235
|
+
f"ERROR: not vaild symbol format {old_symbol} {market} uniq_symbol"
|
|
236
|
+
)
|
|
237
|
+
elif re.match(r"^S[H|Z]\d{6}$", old_symbol):
|
|
238
|
+
new_symbol = f"{old_symbol[2:]}.{old_symbol[0:2]}"
|
|
239
|
+
elif re.match(r"^\d{6}\.S[H|Z]$", old_symbol):
|
|
240
|
+
new_symbol = old_symbol
|
|
241
|
+
elif re.match(r"^\d{6}_\d$", old_symbol):
|
|
242
|
+
if old_symbol[-1] == "1":
|
|
243
|
+
new_symbol = f"{old_symbol[:6]}.SH"
|
|
244
|
+
elif old_symbol[-1] == "2":
|
|
245
|
+
new_symbol = f"{old_symbol[:6]}.SZ"
|
|
246
|
+
elif re.match(r"^\d{6}$", old_symbol):
|
|
247
|
+
if old_symbol[0] in ["6", "5"]:
|
|
248
|
+
new_symbol = f"{old_symbol[:6]}.SH"
|
|
249
|
+
else:
|
|
250
|
+
new_symbol = f"{old_symbol[:6]}.SZ"
|
|
251
|
+
else:
|
|
252
|
+
raise Exception(
|
|
253
|
+
f"ERROR: symbol format not support {old_symbol} uniq_symbol"
|
|
254
|
+
)
|
|
255
|
+
return new_symbol
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def cmd_parse(cmd):
|
|
259
|
+
"""
|
|
260
|
+
cmd=cmd_parse(sys.argv)
|
|
261
|
+
"""
|
|
262
|
+
argDict = {}
|
|
263
|
+
last_key = False
|
|
264
|
+
for i, j in enumerate(cmd):
|
|
265
|
+
if i == 0:
|
|
266
|
+
continue
|
|
267
|
+
if j[0] == "-":
|
|
268
|
+
while j[0] == "-":
|
|
269
|
+
j = j[1:]
|
|
270
|
+
argDict[j] = True
|
|
271
|
+
last_key = j
|
|
272
|
+
else:
|
|
273
|
+
if last_key != False:
|
|
274
|
+
argDict[last_key] = j
|
|
275
|
+
else:
|
|
276
|
+
argDict[j] = True
|
|
277
|
+
last_key = False
|
|
278
|
+
return argDict
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def timer(func):
|
|
282
|
+
"""Function Level Timer via Decorator"""
|
|
283
|
+
@wraps(func)
|
|
284
|
+
def timed(*args, **kwargs):
|
|
285
|
+
start = datetime.datetime.now()
|
|
286
|
+
result = func(*args, **kwargs)
|
|
287
|
+
end = datetime.datetime.now()
|
|
288
|
+
elapse = (end - start).total_seconds()
|
|
289
|
+
print(f"||{func.__name__}|| Using time: {elapse} s")
|
|
290
|
+
return result
|
|
291
|
+
return timed
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def time_used(*dargs):
|
|
295
|
+
def time_(f):
|
|
296
|
+
@wraps(f)
|
|
297
|
+
def count_time(*args, **kw):
|
|
298
|
+
start = datetime.datetime.now()
|
|
299
|
+
f_res = f(*args, **kw)
|
|
300
|
+
end = datetime.datetime.now()
|
|
301
|
+
interval = (end - start).seconds
|
|
302
|
+
times = datetime.timedelta(seconds=interval)
|
|
303
|
+
print("[{}]开始时间为:{}, 结束时间:{},耗时{}".format(dargs, start.strftime('%Y-%m-%d %H:%M:%S'), end.strftime('%Y-%m-%d %H:%M:%S'), times))
|
|
304
|
+
return f_res
|
|
305
|
+
return count_time
|
|
306
|
+
return time_
|
|
307
|
+
|
|
308
|
+
class TestDict(unittest.TestCase):
|
|
309
|
+
def test_cmd_parse(self):
|
|
310
|
+
d = cmd_parse("NAN -a 1 --b 2 e f -c 3 w".split(" "))
|
|
311
|
+
self.assertEqual(d["a"], "1")
|
|
312
|
+
self.assertEqual(d["b"], "2")
|
|
313
|
+
self.assertEqual(d["e"], True)
|
|
314
|
+
with self.assertRaises(KeyError):
|
|
315
|
+
d["empty"]
|
|
316
|
+
|
|
317
|
+
def test_uniq_symbol(self):
|
|
318
|
+
self.assertEqual(uniq_symbol("SH600000"), "600000.SH")
|
|
319
|
+
self.assertEqual(uniq_symbol("600000.SH"), "600000.SH")
|
|
320
|
+
self.assertEqual(uniq_symbol("600000_1"), "600000.SH")
|
|
321
|
+
self.assertEqual(uniq_symbol("600000", 1), "600000.SH")
|
|
322
|
+
self.assertEqual(uniq_symbol("sz000001"), "000001.SZ")
|
|
323
|
+
self.assertEqual(uniq_symbol("000001.sz"), "000001.SZ")
|
|
324
|
+
self.assertEqual(uniq_symbol("000001_2"), "000001.SZ")
|
|
325
|
+
self.assertEqual(uniq_symbol("000001", 2), "000001.SZ")
|
|
326
|
+
with self.assertRaises(Exception):
|
|
327
|
+
uniq_symbol("SH6000000")
|
|
328
|
+
with self.assertRaises(Exception):
|
|
329
|
+
uniq_symbol("600000", 3)
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
if __name__ == "__main__":
|
|
333
|
+
unittest.main()
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: syutils
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: High frequency functions and class
|
|
5
|
+
Home-page: https://gitee.com/wdy0401/syutils
|
|
6
|
+
Author: wangdeyang
|
|
7
|
+
Author-email: wdy0401@gmail.com
|
|
8
|
+
License: MIT
|
|
9
|
+
Requires-Dist: trade_date
|
|
10
|
+
Requires-Dist: py7zr
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
syutils
|