upplib 3.1.7__py3-none-any.whl → 3.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
upplib/file_text.py ADDED
@@ -0,0 +1,980 @@
1
+ from upplib import *
2
+ from datetime import datetime, timezone, timedelta
3
+ from typing import Any, Optional, Union
4
+ from upplib.common_package import *
5
+ from upplib.file import get_file
6
+ from collections import defaultdict
7
+
8
+
9
+ def get_latest_file(file_path: str = None,
10
+ path_prefix: str = None,
11
+ prefix: str = None,
12
+ path_contain: str = None,
13
+ contain: str = None,
14
+ path_suffix: str = None,
15
+ suffix: str = None,
16
+ full_path: bool = None) -> None | tuple[str, str] | str:
17
+ """
18
+ 按照文件名字排序,获得最新的一个文件
19
+ full_path: 是否返回完整的路径
20
+ """
21
+ html_list = get_file(file_path=file_path,
22
+ path_prefix=path_prefix,
23
+ prefix=prefix,
24
+ path_contain=path_contain,
25
+ contain=contain,
26
+ sort_asc=False,
27
+ path_suffix=path_suffix,
28
+ suffix=suffix)
29
+ r1 = html_list[-1] if len(html_list) > 0 else None
30
+ if r1 is None:
31
+ return None
32
+ r1_short = r1
33
+ file_sep = '\\' if is_win() else '/'
34
+ if file_sep in r1:
35
+ r1_short = r1.split(file_sep)[-1]
36
+ if full_path is None:
37
+ return r1_short, r1
38
+ return r1 if full_path else r1_short
39
+
40
+
41
+ def to_print(*args,
42
+ time_prefix: bool = False,
43
+ line_with_space_count: int = None,
44
+ interval: int = None) -> str:
45
+ """
46
+ 记录日志, 如果是对象会转化为 json
47
+ 数据直接 print, 不记录到文件
48
+ 例如: aaa
49
+ interval: 间隔一段时间,打印一下, 单位: 秒,不要频繁打印
50
+ time_prefix : 是否在前面加时间, 默认 False
51
+ """
52
+ d = ' '.join(map(lambda x: json.dumps(x) if is_json_serializable(x) else str(x), args))
53
+ d = d.strip()
54
+ lo = datetime.today().strftime('%Y-%m-%d %H:%M:%S') + ' ' + d if time_prefix is True else d
55
+ if lo is None or str(lo) == '':
56
+ lo = to_datetime(r_str=True)
57
+ prefix_space = ' ' * (line_with_space_count or 0)
58
+ if interval is None or get_timestamp() - get_thread_local_index_data().get('to_print_time', 0) >= interval:
59
+ get_thread_local_index_data()['to_print_time'] = get_timestamp()
60
+ s = ''
61
+ if interval is not None:
62
+ s = str(to_datetime()) + ' '
63
+ print(prefix_space + s + str(lo))
64
+ return prefix_space + lo
65
+
66
+
67
+ def to_log(*args,
68
+ time_prefix: bool = False,
69
+ line_with_space_count: int = None,
70
+ interval: int = None) -> str:
71
+ """
72
+ 记录日志, 如果是对象会转化为 json
73
+ 前面加了时间
74
+ 例如: 2024-11-07 10:23:47 aaa
75
+ """
76
+ return to_print(*args,
77
+ time_prefix=time_prefix if time_prefix is not None else True,
78
+ line_with_space_count=line_with_space_count,
79
+ interval=interval)
80
+
81
+
82
+ def to_print_file(*args,
83
+ file_path: str = None,
84
+ file_name: str = None,
85
+ file_name_with_date: bool = False,
86
+ file_name_prefix: str = None,
87
+ file_name_suffix: str = None,
88
+ line_with_space_count: int = None,
89
+ mode: str = 'a',
90
+ interval: int = None) -> str:
91
+ """
92
+ 将 print 数据, 写入到 print_file 文件
93
+ 文件按照 日期自动创建
94
+ 例如: print_file/2020-01-01.txt
95
+ to_print_file(query_string, mode='w', file_path=file_path, file_name=get_file_name(file_name=file_path, is_date=True))
96
+ """
97
+ [file_path, file_name, file_name_prefix, file_name_suffix, interval,
98
+ line_with_space_count] = get_and_save_data_to_thread(
99
+ file_path=file_path,
100
+ file_name=file_name,
101
+ file_name_prefix=file_name_prefix,
102
+ file_name_suffix=file_name_suffix,
103
+ interval=interval,
104
+ mode=mode,
105
+ file_name_with_date=file_name_with_date,
106
+ line_with_space_count=line_with_space_count,
107
+ fun_name=to_print_file
108
+ )
109
+ return to_txt(data_param=[to_print(*args, line_with_space_count=line_with_space_count, interval=interval)],
110
+ file_name=('' if file_name_prefix is None else file_name_prefix)
111
+ + (datetime.today().strftime('%Y-%m-%d') if file_name is None else file_name)
112
+ + ('' if file_name_suffix is None else file_name_suffix),
113
+ file_path=str(file_path if file_path is not None else 'to_print_file'),
114
+ mode=mode,
115
+ fixed_name=True,
116
+ suffix='.txt')
117
+
118
+
119
+ def to_print_txt(*args,
120
+ file_path: str = None,
121
+ file_name_with_date: bool = False,
122
+ file_name: str = None,
123
+ file_name_prefix: str = None,
124
+ file_name_suffix: str = None,
125
+ line_with_space_count: int = None,
126
+ mode: str = 'a',
127
+ interval: int = None) -> str:
128
+ """
129
+ 将 print 数据, 写入到 print_txt 文件
130
+ 文件按照 日期自动创建
131
+ 例如: print_txt/2020-01-01.txt
132
+ """
133
+ [file_path, file_name, file_name_prefix, file_name_suffix, interval,
134
+ line_with_space_count] = get_and_save_data_to_thread(
135
+ file_path=file_path,
136
+ file_name=file_name,
137
+ file_name_prefix=file_name_prefix,
138
+ file_name_suffix=file_name_suffix,
139
+ interval=interval,
140
+ mode=mode,
141
+ file_name_with_date=file_name_with_date,
142
+ line_with_space_count=line_with_space_count,
143
+ fun_name=to_print_txt
144
+ )
145
+ return to_txt(data_param=[to_print(*args, line_with_space_count=line_with_space_count, interval=interval)],
146
+ file_name=('' if file_name_prefix is None else file_name_prefix)
147
+ + (datetime.today().strftime('%Y-%m-%d') if file_name is None else file_name)
148
+ + ('' if file_name_suffix is None else file_name_suffix),
149
+ file_path=str(file_path if file_path is not None else 'to_print_txt'),
150
+ mode=mode,
151
+ fixed_name=True,
152
+ suffix='.txt')
153
+
154
+
155
+ def to_log_file(*args,
156
+ file_path: str = None,
157
+ file_name_with_date: bool = False,
158
+ file_name: str = None,
159
+ file_name_prefix: str = None,
160
+ file_name_suffix: str = None,
161
+ line_with_space_count: int = None,
162
+ time_prefix: bool = True,
163
+ mode: str = 'a',
164
+ interval: int = None) -> None:
165
+ """
166
+ 将 log 数据, 写入到 log_file 文件
167
+ 文件按照 日期自动创建
168
+ 例如: log_file/2020-01-01.log
169
+ """
170
+ [file_path, file_name, file_name_prefix, file_name_suffix, interval,
171
+ line_with_space_count] = get_and_save_data_to_thread(
172
+ file_path=file_path,
173
+ file_name=file_name,
174
+ file_name_prefix=file_name_prefix,
175
+ file_name_suffix=file_name_suffix,
176
+ interval=interval,
177
+ mode=mode,
178
+ file_name_with_date=file_name_with_date,
179
+ line_with_space_count=line_with_space_count,
180
+ fun_name=to_log_file
181
+ )
182
+ to_txt(data_param=[
183
+ to_log(*args, time_prefix=time_prefix, line_with_space_count=line_with_space_count, interval=interval)],
184
+ file_name=('' if file_name_prefix is None else file_name_prefix)
185
+ + (datetime.today().strftime('%Y-%m-%d') if file_name is None else file_name)
186
+ + ('' if file_name_suffix is None else file_name_suffix),
187
+ file_path=str(file_path if file_path is not None else 'to_log_file'),
188
+ fixed_name=True,
189
+ mode=mode,
190
+ suffix='.log')
191
+
192
+
193
+ def to_log_txt(*args,
194
+ file_path: str = None,
195
+ file_name_with_date: bool = False,
196
+ file_name: str = None,
197
+ file_name_prefix: str = None,
198
+ file_name_suffix: str = None,
199
+ line_with_space_count: int = None,
200
+ time_prefix: bool = True,
201
+ mode: str = 'a',
202
+ interval: int = None) -> None:
203
+ """
204
+ 将 log 数据, 写入到 log_txt 文件夹中
205
+ 文件按照 日期自动创建
206
+ 例如: log_txt/2020-01-01.txt
207
+ """
208
+ [file_path, file_name, file_name_prefix, file_name_suffix, interval,
209
+ line_with_space_count] = get_and_save_data_to_thread(
210
+ file_path=file_path,
211
+ file_name=file_name,
212
+ file_name_prefix=file_name_prefix,
213
+ file_name_suffix=file_name_suffix,
214
+ interval=interval,
215
+ mode=mode,
216
+ file_name_with_date=file_name_with_date,
217
+ line_with_space_count=line_with_space_count,
218
+ fun_name=to_log_txt
219
+ )
220
+ to_txt(data_param=[
221
+ to_log(*args, time_prefix=time_prefix, line_with_space_count=line_with_space_count, interval=interval)],
222
+ file_name=('' if file_name_prefix is None else file_name_prefix)
223
+ + (datetime.today().strftime('%Y-%m-%d') if file_name is None else file_name)
224
+ + ('' if file_name_suffix is None else file_name_suffix),
225
+ file_path=str(file_path if file_path is not None else 'to_log_txt'),
226
+ mode=mode,
227
+ fixed_name=True,
228
+ suffix='.txt')
229
+
230
+
231
+ def check_file(file_name: str = None) -> None:
232
+ r"""
233
+ 检查文件夹是否存在,不存在,就创建新的
234
+ 支持多级目录 , 例如: C:\Users\yangpu\Desktop\study\a\b\c\d\e\f
235
+ """
236
+ if file_name is None or file_name == '':
237
+ return
238
+ file_sep = '\\' if is_win() else '/'
239
+ f_n = file_name.split(file_sep)
240
+ for i in range(1, len(f_n) + 1):
241
+ # C:\Users\yangpu\Desktop\study\p.t
242
+ p_n = file_sep.join(f_n[0:i])
243
+ if p_n and not os.path.exists(p_n):
244
+ os.mkdir(p_n)
245
+
246
+
247
+ def to_txt(data_param: Any,
248
+ file_name: str = 'txt',
249
+ file_path: str = 'txt',
250
+ fixed_name: bool = False,
251
+ mode: str = 'a',
252
+ suffix: str = '.txt',
253
+ sep_list: str = '\t',
254
+ file_name_is_date: bool = False) -> str:
255
+ r"""
256
+ 将 list 中的数据以 json 或者基本类型的形式写入到文件中
257
+ data_param : 数组数据, 也可以不是数组
258
+ file_name : 文件名 , 默认 txt
259
+ 当文件名是 C:\Users\yangpu\Desktop\study\abc\d\e\f\a.sql 这种类型的时候, 可以直接创建文件夹,
260
+ 会赋值 file_name=a,
261
+ file_path=C:\Users\yangpu\Desktop\study\abc\d\e\f,
262
+ fixed_name=True,
263
+ suffix=.sql
264
+ 当文件名是 abc 的时候, 按照正常值,计算
265
+ file_path : 文件路径
266
+ fixed_name : 是否固定文件名
267
+ suffix : 文件后缀, 默认 .txt
268
+ sep_list : 当 data_param 是 list(list) 类型的时候 使用 sep_list 作为分割内部的分隔符,
269
+ 默认使用 \t 作为分隔符, 如果为 None , 则按照 json 去处理这个 list
270
+ """
271
+ file_name = str(file_name)
272
+ for sep in ['\\', '/']:
273
+ f_n = file_name.split(sep)
274
+ if len(f_n) > 1:
275
+ file_name = f_n[-1]
276
+ file_path = sep.join(f_n[0:-1])
277
+ if '.' in file_name:
278
+ suffix = '.' + file_name.split('.')[-1]
279
+ file_name = file_name[0:file_name.rfind('.')]
280
+ fixed_name = True
281
+
282
+ # 检查路径 file_path
283
+ while file_path.endswith('/'):
284
+ file_path = file_path[0:-1]
285
+ check_file(file_path)
286
+
287
+ # 在 file_name 中, 检查是否有后缀
288
+ if '.' in file_name:
289
+ suffix = '.' + file_name.split('.')[-1]
290
+ file_name = file_name[0:file_name.rfind('.')]
291
+
292
+ # 生成 file_name
293
+ if fixed_name:
294
+ file_name = file_name + suffix
295
+ else:
296
+ file_name = get_file_name(file_name, suffix, is_date=file_name_is_date)
297
+ # 文件路径
298
+ file_name_path = file_name
299
+ if file_path != '':
300
+ file_name_path = file_path + '/' + file_name
301
+ # 写入文件
302
+ text_file = open(file_name_path, mode, encoding='utf-8')
303
+ if isinstance(data_param, set):
304
+ data_param = list(data_param)
305
+ if not isinstance(data_param, list):
306
+ text_file.write(to_str(data_param) + '\n')
307
+ else:
308
+ for one in data_param:
309
+ if isinstance(one, (list, tuple, set)) and sep_list is not None:
310
+ text_file.write(str(sep_list).join(list(map(lambda x: to_str(x), one))) + '\n')
311
+ else:
312
+ text_file.write(to_str(one) + '\n')
313
+ text_file.close()
314
+ return file_name_path
315
+
316
+
317
+ # 将 list 中的数据写入到固定的文件中,自己设置文件后缀
318
+ def to_txt_file(data_param: Any,
319
+ file_name: str = None,
320
+ mode: str = 'a') -> str:
321
+ file_name = get_file_name('to_txt_file', is_date=True) if file_name is None else file_name
322
+ suffix = '.txt'
323
+ f = file_name
324
+ for sep in ['\\', '/']:
325
+ f_n = file_name.split(sep)
326
+ if len(f_n) > 1:
327
+ f = file_name
328
+ if '.' in f:
329
+ suffix = '.' + f.split('.')[-1]
330
+ file_name = file_name.replace(suffix, '')
331
+ return to_txt(data_param=data_param,
332
+ file_name=file_name,
333
+ file_path='to_txt_file',
334
+ suffix=suffix,
335
+ fixed_name=True,
336
+ mode=mode)
337
+
338
+
339
+ # 将 list 中的数据写入到固定的文件中,自己设置文件后缀
340
+ def to_file(data_param: Any,
341
+ file_name: str = None,
342
+ mode: str = 'a') -> str:
343
+ file_name = get_file_name('to_file', is_date=True) if file_name is None else file_name
344
+ suffix = '.txt'
345
+ f = file_name
346
+ for sep in ['\\', '/']:
347
+ f_n = file_name.split(sep)
348
+ if len(f_n) > 1:
349
+ f = file_name
350
+ if '.' in f:
351
+ suffix = '.' + f.split('.')[-1]
352
+ file_name = file_name.replace(suffix, '')
353
+ return to_txt(data_param=data_param,
354
+ file_name=file_name,
355
+ file_path='file',
356
+ suffix=suffix,
357
+ fixed_name=True,
358
+ mode=mode)
359
+
360
+
361
+ def to_list(file_name: str = 'a.txt',
362
+ sep: str = None,
363
+ sep_line: str = None,
364
+ sep_line_contain: str = None,
365
+ sep_line_prefix: str = None,
366
+ sep_line_suffix: str = None,
367
+ sep_all: str = None,
368
+ ignore_start_with: list[str] | set[str] | str = None,
369
+ ignore_end_with: list[str] | set[str] | str | None = None,
370
+ start_index: int = None,
371
+ start_line: str = None,
372
+ end_index: int = None,
373
+ end_line: str = None,
374
+ count: int = None,
375
+ sheet_index: int = 1,
376
+ column_index: list[str] | set[str] | str | None = None,
377
+ column_date: list[str] | set[str] | str | None = None,
378
+ column_datetime: list[str] | set[str] | str | None = None) -> list:
379
+ """
380
+ 当读取 txt 之类的文件的时候
381
+ 将 txt 文件读取到 list 中, 每一行自动过滤掉行前行后的特殊字符
382
+ sep : 是否对每一行进行分割,如果存在这个字段,就分割
383
+ sep_all : 将文件转化成一个字符串,然后对这个字符串,再次总体分割
384
+ start_index : 从这个地方开始读取,从1开始标号 , 包含这一行
385
+ start_line : 从这个地方开始读取,从第一行开始找到这个字符串开始标记 , 包含这一行
386
+ end_index : 读取到这个地方结束,从1开始标号 , 不包含这一行
387
+ end_line : 读取到这个地方结束,从第一行开始找到这个字符串开始标记 , 不包含这一行
388
+ count : 读取指定的行数
389
+ ##############################################
390
+ 当读取 excel 之类的文件的时候
391
+ 将 excel 文件读取到 list 中, 可以指定 sheet, 也可以指定列 column_index(列) ,自动过滤掉每个单元格前后的特殊字符
392
+ sheet : 从 1 开始编号,
393
+ column_index : 从 1 开始编号, 指定列
394
+ column_index : 如果是指定值, 这个时候返回的是一个 list, 没有嵌套 list
395
+ column_index : 如果是 '1,2,3,4' [1,2,3,4], 返回的是一个嵌套 list[list]
396
+ column_date : 指定日期格式的列,规则与 column_index 一样
397
+ column_datetime : 指定日期格式的列,规则与 column_index 一样
398
+ 返回的数据一定是一个 list
399
+ """
400
+ if file_name.endswith('.xls') or file_name.endswith('.xlsx'):
401
+ return to_list_from_excel(file_name=file_name,
402
+ sheet_index=sheet_index,
403
+ column_index=column_index,
404
+ column_date=column_date,
405
+ column_datetime=column_datetime)
406
+ return to_list_from_txt(file_name=file_name,
407
+ sep=sep,
408
+ sep_line=sep_line,
409
+ sep_line_contain=sep_line_contain,
410
+ sep_line_prefix=sep_line_prefix,
411
+ sep_line_suffix=sep_line_suffix,
412
+ sep_all=sep_all,
413
+ ignore_start_with=ignore_start_with,
414
+ ignore_end_with=ignore_end_with,
415
+ start_index=start_index,
416
+ start_line=start_line,
417
+ end_index=end_index,
418
+ end_line=end_line,
419
+ count=count)
420
+
421
+
422
+ def to_list_from_excel(file_name: str = 'a.xls',
423
+ sheet_index: int = 1,
424
+ column_index: list | int | str | None = None,
425
+ column_date: list | int | str | None = None,
426
+ column_datetime: list | int | str | None = None) -> list:
427
+ """
428
+ 当读取 excel 之类的文件的时候
429
+ 将 excel 文件读取到 list 中, 可以指定 sheet, 也可以指定列 column_index(列) ,自动过滤掉每个单元格前后的特殊字符
430
+ sheet_index : 从 1 开始编号,
431
+ column_index : 从 1 开始编号, 指定列, 如果是指定值是一个, 这个时候返回的是一个 list, 没有嵌套 list
432
+ 如果是 '1,2,3,4' [1,2,3,4], 返回的是一个嵌套 list[list]
433
+ column_date : 指定日期格式的列,规则与 column_index 一样
434
+ column_datetime : 指定日期格式的列,规则与 column_index 一样
435
+ """
436
+ if file_is_empty(file_name):
437
+ return []
438
+ data_list = list()
439
+ # excel 表格解析成 list 数据
440
+ list_index = []
441
+ for one_index in [column_index, column_date, column_datetime]:
442
+ list_index_one = None
443
+ if one_index is not None:
444
+ list_index_one = []
445
+ if isinstance(one_index, int):
446
+ list_index_one.append(one_index)
447
+ if isinstance(one_index, str):
448
+ i_list = one_index.split(',')
449
+ for i in i_list:
450
+ list_index_one.append(int(i))
451
+ if isinstance(one_index, list):
452
+ for i in one_index:
453
+ list_index_one.append(int(i))
454
+ list_index.append(list_index_one)
455
+ list_all = []
456
+ for one_list in list_index:
457
+ if one_list is not None:
458
+ for o in one_list:
459
+ list_all.append(o)
460
+ if len(list_all) > 0 and list_index[0] is not None:
461
+ list_index[0] = list_all
462
+ # 是否是单 list 类型的数据
463
+ list_only_one = False
464
+ if list_index[0] is not None and len(list_index[0]) == 1:
465
+ list_only_one = True
466
+ # 是 xls 格式
467
+ if file_name.endswith('.xls'):
468
+ book = xlrd.open_workbook(file_name) # 打开一个excel
469
+ sheet = book.sheet_by_index(sheet_index - 1) # 根据顺序获取sheet
470
+ for i in range(sheet.nrows): # 0 1 2 3 4 5
471
+ rows = sheet.row_values(i)
472
+ row_data = []
473
+ for j in range(len(rows)):
474
+ cell_data = str(rows[j]).strip()
475
+ is_date = False
476
+ is_datetime = False
477
+ # 日期格式的列
478
+ if list_index[1] is not None and j + 1 in list_index[1]:
479
+ cell_data = to_date(xlrd.xldate_as_datetime(to_int(rows[j]), 0))
480
+ is_date = True
481
+ row_data.append(cell_data)
482
+ if list_only_one:
483
+ row_data = cell_data
484
+ # 日期时间格式的列
485
+ if not is_date and list_index[2] is not None and j + 1 in list_index[2]:
486
+ cell_data = to_datetime(xlrd.xldate_as_datetime(to_int(rows[j]), 0))
487
+ is_datetime = True
488
+ row_data.append(cell_data)
489
+ if list_only_one:
490
+ row_data = cell_data
491
+ # 指定需要的列
492
+ if not is_date and not is_datetime:
493
+ if list_index[0] is None:
494
+ row_data.append(cell_data)
495
+ else:
496
+ # 指定需要的列
497
+ if j + 1 in list_index[0]:
498
+ row_data.append(cell_data)
499
+ if list_only_one:
500
+ row_data = cell_data
501
+ data_list.append(row_data)
502
+ # 是 xlsx 格式
503
+ if file_name.endswith('.xlsx'):
504
+ wb = openpyxl.load_workbook(filename=file_name, read_only=True)
505
+ ws = wb[wb.sheetnames[sheet_index - 1]]
506
+ for rows in ws.rows:
507
+ row_data = []
508
+ for j in range(len(rows)):
509
+ cell_data = str(rows[j].value).strip()
510
+ is_date = False
511
+ is_datetime = False
512
+ # 日期格式的列
513
+ if list_index[1] is not None and j + 1 in list_index[1]:
514
+ cell_data = to_date(cell_data)
515
+ is_date = True
516
+ row_data.append(cell_data)
517
+ if list_only_one:
518
+ row_data = cell_data
519
+ # 日期时间格式的列
520
+ if not is_date and list_index[2] is not None and j + 1 in list_index[2]:
521
+ cell_data = to_datetime(cell_data)
522
+ is_datetime = True
523
+ row_data.append(cell_data)
524
+ if list_only_one:
525
+ row_data = cell_data
526
+ # 指定需要的列
527
+ if not is_date and not is_datetime:
528
+ if list_index[0] is None:
529
+ row_data.append(cell_data)
530
+ else:
531
+ # 指定需要的列
532
+ if j + 1 in list_index[0]:
533
+ row_data.append(cell_data)
534
+ if list_only_one:
535
+ row_data = cell_data
536
+ data_list.append(row_data)
537
+ return data_list
538
+
539
+
540
+ def to_list_from_txt_with_blank_line(file_name: str = 'a.txt') -> list:
541
+ """
542
+ 将一个文件中以空行作为分隔符,
543
+ 组成一个 list(list) 数据
544
+ 多行空行,自动合并到一行空行
545
+ """
546
+ return to_list_from_txt(file_name, sep_line='')
547
+
548
+
549
+ def to_list_json_from_txt(file_name: str = 'a.txt',
550
+ start_index: int = None,
551
+ start_line: str = None,
552
+ start_line_exclude: str | list[str] | set[str] = None,
553
+ end_index: int = None,
554
+ end_line: str = None,
555
+ end_line_exclude: str | list[str] | set[str] = None,
556
+ count: int = None) -> list:
557
+ """
558
+ 将一个文件中的数据按照行来区分,
559
+ 会自动过滤掉空格行,
560
+ 组成一个 list[json] 数据
561
+ 可以将以下文本转 list[json]
562
+ {"accessKey":"1","signature":"4","timestamp":"1747639787"}
563
+ {"accessKey":"2","signature":"5","timestamp":"1747639787"}
564
+ {"accessKey":"3","signature":"6","timestamp":"1747639787"}
565
+ """
566
+ return to_list_from_txt(file_name,
567
+ start_index=start_index,
568
+ start_line=start_line,
569
+ start_line_exclude=start_line_exclude,
570
+ end_index=end_index,
571
+ end_line=end_line,
572
+ end_line_exclude=end_line_exclude,
573
+ count=count,
574
+ line_json=True)
575
+
576
+
577
+ def to_list_from_txt(file_name: str = 'a.txt',
578
+ sep: str = None,
579
+ sep_line: str = None,
580
+ sep_line_contain: str = None,
581
+ sep_line_prefix: str = None,
582
+ sep_line_suffix: str = None,
583
+ sep_line_with_space_count: int = None,
584
+ sep_is_front: bool = True,
585
+ sep_all: str = None,
586
+ line_ignore_start_with: list | int | str = None,
587
+ line_ignore_end_with: list | int | str = None,
588
+ line_ignore_empty: bool | None = None,
589
+ line_join: str = None,
590
+ line_must_start_with: str = None,
591
+ line_must_contain: str = None,
592
+ line_json: bool = None,
593
+ start_index: int = None,
594
+ start_line: str = None,
595
+ start_line_exclude: str | list[str] | set[str] = None,
596
+ end_index: int = None,
597
+ end_line: str = None,
598
+ end_line_exclude: str | list[str] | set[str] = None,
599
+ count: int = None) -> list:
600
+ """
601
+ 将 txt 文件转化成 list 的方法
602
+ 当读取 txt 之类的文件的时候
603
+ 将 txt 文件读取到 list 中, 每一行自动过滤掉行前行后的特殊字符
604
+ sep : 对每一行进行分割,将 list(str) 转化为 list(list(str)), 或者将 list(list(str)) 转化为 list(list(list(str)))
605
+ sep_line : 这一行是一个分隔符, 分隔符与这行一样, 将 list(str) 转化为 list(list(str))
606
+ sep_line_with_space_count : 分隔符是空格的个数, 将 list(str) 转化为 list(list(str))
607
+ sep_line_contain : 这一行是一个分隔符,包含这个行分隔符的做分割, 将 list(str) 转化为 list(list(str))
608
+ sep_line_prefix : 这一行是一个分隔符,以这个分隔符作为前缀的, 将 list(str) 转化为 list(list(str))
609
+ sep_line_suffix : 这一行是一个分隔符,以这个分隔符作为后缀的, 将 list(str) 转化为 list(list(str))
610
+ sep_is_front : 这一行,分割行,是包含到前面,还是包含到
611
+ sep_all : 将文件转化成一个字符串,然后对这个字符串,再次总体分割 将 list(str) 转化为 str , 然后再次转化成 list(str)
612
+ line_ignore_start_with : 忽略以这个为开头的行
613
+ line_ignore_end_with : 忽略以这个为结尾的行
614
+ line_ignore_empty : 如果这一行为空,就忽略这行
615
+ line_must_start_with : 这一行必须以这个字符串为开始
616
+ line_must_contain : 这一行必须包含这个字符串
617
+ line_join : 将 list(list(str)) 转化成 list(str) 类型的数据
618
+ line_json : 将 list(str) 转化成 list(json) 类型的数据, 会自动过滤掉空格行
619
+ start_index : 从这个地方开始读取,从1开始标号 , 包含这一行
620
+ start_line : 从这个地方开始读取,从第一行开始找到这个字符串开始标记 , 包含这一行
621
+ start_line_exclude : 从这个地方开始读取,从第一行开始找到这个字符串开始标记 , 不包含这一行, 返回的是一个 list(' '.join(one_line_list))
622
+ end_index : 读取到这个地方结束,从1开始标号 , 不包含这一行
623
+ end_line : 读取到这个地方结束,从第一行开始找到这个字符串开始标记 , 不包含这一行
624
+ end_line_exclude : 读取到这个地方结束,从第一行开始找到这个字符串开始标记 , 不包含这一行, 返回的是一个 list(' '.join(one_line_list))
625
+ count : 读取指定的行数
626
+ """
627
+ if file_is_empty(file_name=file_name):
628
+ return []
629
+ data_list = []
630
+ # 普通文件的解析
631
+ d_list = open(file_name, 'r', encoding='utf-8').readlines()
632
+ # 数量
633
+ c = 0
634
+ start_flag = None
635
+ end_flag = None
636
+ if start_line is not None:
637
+ start_flag = False
638
+ if end_line is not None:
639
+ end_flag = False
640
+ for i in range(len(d_list)):
641
+ line = d_list[i].strip()
642
+ # 判断开始位置
643
+ if start_index is not None and i + 1 < to_int(start_index):
644
+ continue
645
+ # 判断结束位置
646
+ if end_index is not None and i + 1 >= to_int(end_index):
647
+ continue
648
+ # 判断数量
649
+ if count is not None and c >= to_int(count):
650
+ continue
651
+ # 开始标记位
652
+ if start_flag is not None and not start_flag and line.find(start_line) > -1:
653
+ # 如果有标记位置,就是 True
654
+ start_flag = True
655
+ # 开始标记位
656
+ if end_flag is not None and not end_flag and line.find(end_line) > -1:
657
+ # 如果有标记位置,就是 True
658
+ end_flag = True
659
+ if start_flag is not None and not start_flag:
660
+ # 有开始标记位参数,并且,还没有走到开始标记位
661
+ continue
662
+ elif end_flag is not None and end_flag:
663
+ # 有结束标记位参数,并且,已经走到了结束标记位
664
+ continue
665
+ c += 1
666
+ can_add = True
667
+ if line_ignore_start_with is not None:
668
+ if isinstance(line_ignore_start_with, list) or isinstance(line_ignore_start_with, set):
669
+ for ss in line_ignore_start_with:
670
+ if line.startswith(str(ss)):
671
+ can_add = False
672
+ elif isinstance(line_ignore_start_with, str):
673
+ if line.startswith(str(line_ignore_start_with)):
674
+ can_add = False
675
+ if line_ignore_end_with is not None:
676
+ if isinstance(line_ignore_end_with, list) or isinstance(line_ignore_end_with, set):
677
+ for ss in line_ignore_end_with:
678
+ if line.endswith(str(ss)):
679
+ can_add = False
680
+ elif isinstance(line_ignore_end_with, str):
681
+ if line.endswith(str(line_ignore_end_with)):
682
+ can_add = False
683
+ if line_ignore_empty is not None and line_ignore_empty:
684
+ # 忽略空行
685
+ if len(line) == 0:
686
+ can_add = False
687
+ if line_must_start_with is not None:
688
+ # 必须以这个字符串为开始
689
+ if not line.startswith(str(line_must_start_with)):
690
+ can_add = False
691
+ if line_must_contain is not None:
692
+ # 必须包含这个字符串
693
+ if line.count(str(line_must_contain)) == 0:
694
+ can_add = False
695
+ if can_add:
696
+ data_list.append(line)
697
+
698
+ # 更复杂的切分, 中间的部分 会转成 ' '.join(one_line_list)
699
+ if start_line_exclude is not None and end_line_exclude is not None:
700
+ data_list1 = data_list
701
+ start_flag = False
702
+ start_flag_once = False
703
+ end_flag = False
704
+ one_data_list = []
705
+ data_list = []
706
+ for i in range(len(data_list1)):
707
+ line = data_list1[i].strip()
708
+ # 开始标记位
709
+ if not start_flag:
710
+ if isinstance(start_line_exclude, list) or isinstance(start_line_exclude, set):
711
+ for ss in start_line_exclude:
712
+ if line.find(str(ss)) > -1:
713
+ # 如果有标记位置,就是 True
714
+ start_flag = True
715
+ start_flag_once = True
716
+ else:
717
+ if line.find(str(start_line_exclude)) > -1:
718
+ # 如果有标记位置,就是 True
719
+ start_flag = True
720
+ start_flag_once = True
721
+ # 结束标记位
722
+ if not end_flag:
723
+ if isinstance(end_line_exclude, list) or isinstance(end_line_exclude, set):
724
+ for ss in end_line_exclude:
725
+ if line.find(str(ss)) > -1:
726
+ # 如果有标记位置,就是 True
727
+ end_flag = True
728
+ else:
729
+ if line.find(str(end_line_exclude)) > -1:
730
+ # 如果有标记位置,就是 True
731
+ end_flag = True
732
+ if not start_flag:
733
+ # 有开始标记位参数,并且,还没有走到开始标记位
734
+ continue
735
+ elif end_flag:
736
+ # 有结束标记位参数,并且,已经走到了结束标记位
737
+ start_flag = False
738
+ end_flag = False
739
+ start_flag_once = False
740
+ # print(one_data_list)
741
+ data_list.append(' '.join(one_data_list).strip())
742
+ one_data_list = []
743
+ continue
744
+ # 去掉 start_line_exclude 包含行
745
+ if start_flag_once:
746
+ start_flag_once = False
747
+ line = ''
748
+ if start_flag and not end_flag:
749
+ one_data_list.append(line)
750
+ if len(one_data_list):
751
+ data_list.append(' '.join(one_data_list).strip())
752
+
753
+ if sep_all is not None:
754
+ # 全部划分, 重新分割成 list(str)
755
+ data_list = ''.join(data_list).split(str(sep_all))
756
+ # 有行分隔符, 将会把 list(str) 转化成 list(list)
757
+ if len(list(filter(lambda x: x is not None,
758
+ [sep_line, sep_line_prefix, sep_line_contain, sep_line_suffix, sep_line_with_space_count]))):
759
+ # 当是这种情况的时候,返回的数据结果
760
+ r_list = []
761
+ # 数据中的一行 list 数据
762
+ one_list = []
763
+ # 空格数量
764
+ space_count = 0
765
+ for d_o in data_list:
766
+ space_count = space_count + 1 if not d_o.strip() else 0
767
+ # 过滤掉空行,无效行
768
+ if len(d_o.strip()) and sep_is_front:
769
+ one_list.append(d_o)
770
+ # 这一行, 等于 sep_line
771
+ if ((sep_line is not None and d_o == sep_line) or
772
+ # 这一行, 包含 sep_line_contain
773
+ (sep_line_contain is not None and d_o.find(sep_line_contain) != -1) or
774
+ # 这一行, 是否是以 sep_line_prefix 开头
775
+ (sep_line_prefix is not None and d_o.startswith(sep_line_prefix)) or
776
+ # 这一行, 是否是以 sep_line_suffix 结尾
777
+ (sep_line_suffix is not None and d_o.endswith(sep_line_suffix))):
778
+ if len(one_list):
779
+ r_list.append(one_list)
780
+ one_list = []
781
+ if len(d_o.strip()) and not sep_is_front:
782
+ one_list.append(d_o)
783
+ # 按照空格行的数量来进行分割
784
+ if sep_line_with_space_count is not None and space_count == sep_line_with_space_count:
785
+ if len(one_list):
786
+ r_list.append(one_list)
787
+ one_list = []
788
+ space_count = 0
789
+ # 最后的一条数据,兼容一下
790
+ if len(one_list):
791
+ r_list.append(one_list)
792
+ data_list = r_list
793
+ # 对这个 list 进行行内再次分割
794
+ if sep is not None:
795
+ r_list = []
796
+ for line in data_list:
797
+ # list(str) 情况
798
+ if isinstance(line, str):
799
+ r_list.append(line.split(str(sep)))
800
+ # list(list) 情况
801
+ elif isinstance(line, list):
802
+ a_list = []
803
+ for o_line in line:
804
+ a_list.append(o_line.split(str(sep)))
805
+ r_list.append(a_list)
806
+ data_list = r_list
807
+ # data_list 中的每一个元素都转化成 str
808
+ if line_join is not None:
809
+ data_list = list(map(lambda x: str(line_join).join(x), data_list))
810
+ # data_list 中的每一个元素都转化成 先转化成str, 然后再转化成json
811
+ if line_json is not None and line_json:
812
+ data_list = list(map(lambda x:
813
+ json.loads(str('' if line_join is None else line_join).join(x)),
814
+ list(filter(lambda x: x is not None and len(str(x)), data_list))
815
+ )
816
+ )
817
+ return data_list
818
+
819
+
820
+ # 读取文件中的数据,返回一个 str
821
+ def to_str_from_file(file_name: str = 'a.txt',
822
+ str_join: str = ' ',
823
+ ignore_start_with: list | int | str | None = None,
824
+ ignore_end_with: list | int | str | None = None,
825
+ start_index: int = None,
826
+ start_line: str = None,
827
+ end_index: int = None,
828
+ end_line: str = None,
829
+ count: int = None) -> str:
830
+ return to_data_from_file(file_name=file_name,
831
+ ignore_start_with=ignore_start_with,
832
+ ignore_end_with=ignore_end_with,
833
+ str_join=str_join,
834
+ start_index=start_index,
835
+ start_line=start_line,
836
+ end_index=end_index,
837
+ end_line=end_line,
838
+ count=count,
839
+ r_str=True)
840
+
841
+
842
+ # 读取文件中的数据,返回一个 json
843
+ def to_json_from_file(file_name: str = 'a.txt',
844
+ start_index: int = None,
845
+ start_line: str = None,
846
+ end_index: int = None,
847
+ end_line: str = None,
848
+ count: int = None) -> dict[str, Any]:
849
+ return to_data_from_file(file_name=file_name,
850
+ start_index=start_index,
851
+ start_line=start_line,
852
+ end_index=end_index,
853
+ end_line=end_line,
854
+ ignore_start_with=['//', '/*', '#'],
855
+ count=count,
856
+ r_json=True)
857
+
858
+
859
+ def to_data_from_file(file_name: str = 'a.txt',
860
+ sep: str = None,
861
+ sep_line: str = None,
862
+ sep_all: str = None,
863
+ ignore_start_with: list | int | str | None = None,
864
+ ignore_end_with: list | int | str | None = None,
865
+ start_index: int = None,
866
+ start_line: str = None,
867
+ end_index: int = None,
868
+ end_line: str = None,
869
+ count: int = None,
870
+ sheet_index: int = 1,
871
+ column_index: list | int | str | None = None,
872
+ column_date: list | int | str | None = None,
873
+ column_datetime: list | int | str | None = None,
874
+ r_json: bool = False,
875
+ str_join: str = '',
876
+ r_str: bool = False) -> str | dict[str, Any]:
877
+ """
878
+ 在 to_list 方法上再嵌套一层,
879
+ r_str : 返回的数据是否是一个 字符串, ''.join(list)
880
+ str_join : 返回的数据是否是一个 字符串, str_join.join(list), 用 str_join 做连接
881
+ r_json : 返回的数据是否是一个 json 类型的数据
882
+ """
883
+ d = to_list(file_name=file_name,
884
+ sep=sep,
885
+ sep_line=sep_line,
886
+ sep_all=sep_all,
887
+ ignore_start_with=ignore_start_with,
888
+ ignore_end_with=ignore_end_with,
889
+ start_index=start_index,
890
+ start_line=start_line,
891
+ end_index=end_index,
892
+ end_line=end_line,
893
+ count=count,
894
+ sheet_index=sheet_index,
895
+ column_index=column_index,
896
+ column_date=column_date,
897
+ column_datetime=column_datetime)
898
+ return str_join.join(d) if r_str else json.loads(str_join.join(d)) if r_json else d
899
+
900
+
901
+ # 将文件导出成excel格式的
902
+ def to_excel(data_list: set | list | tuple | None,
903
+ file_name: str = None,
904
+ file_path: str = 'excel') -> None:
905
+ if file_name is None:
906
+ file_name = 'excel'
907
+ file_name = str(file_name)
908
+ while file_path.endswith('/'):
909
+ file_path = file_path[0:-1]
910
+ check_file(file_path)
911
+ # 实例化对象excel对象
912
+ excel_obj = openpyxl.Workbook()
913
+ # excel 内第一个sheet工作表
914
+ excel_obj_sheet = excel_obj[excel_obj.sheetnames[0]]
915
+ # 给单元格赋值
916
+ for one_data in data_list:
917
+ s_list = []
918
+ if isinstance(one_data, list) or isinstance(one_data, set):
919
+ for one in one_data:
920
+ if isinstance(one, dict) or isinstance(one, list):
921
+ s = json.dumps(one)
922
+ else:
923
+ s = str(one)
924
+ s_list.append(s)
925
+ excel_obj_sheet.append(s_list)
926
+ else:
927
+ if is_json_serializable(one_data):
928
+ s = json.dumps(one_data)
929
+ else:
930
+ s = str(one_data)
931
+ excel_obj_sheet.append([s])
932
+
933
+ # 文件保存
934
+ excel_obj.save(file_path + '/' + get_file_name(file_name, '.xlsx', True))
935
+
936
+
937
+ def to_csv(data_list: set | list | tuple | dict,
938
+ file_name: str = None,
939
+ file_path: str = 'csv') -> None:
940
+ """
941
+ 将文件导出成csv格式的
942
+ data_list 格式
943
+ data_list = [['Name', 'Age', 'Gender'],
944
+ ['Alice', 25, 'Female'],
945
+ ['Bob', 30, 'Male'],
946
+ ['Charlie', 35, 'Male']]
947
+ data_list = [{
948
+ "a": 1,
949
+ "b": 2,
950
+ },{
951
+ "a": 1,
952
+ "b": 2,
953
+ }]
954
+ file_name = 'data'
955
+ """
956
+ if file_name is None:
957
+ file_name = 'csv'
958
+ file_name = get_file_name(file_name, '.csv', True)
959
+ while file_path.endswith('/'):
960
+ file_path = file_path[0:-1]
961
+ check_file(file_path)
962
+ d_list = []
963
+ if isinstance(data_list, tuple):
964
+ d_list = list(data_list)
965
+ else:
966
+ if len(data_list) and (isinstance(data_list[0], dict) or isinstance(data_list[0], tuple)):
967
+ title_list = []
968
+ for key in data_list[0]:
969
+ title_list.append(key)
970
+ d_list.append(title_list)
971
+ for one_data in data_list:
972
+ one_list = []
973
+ for k in title_list:
974
+ one_list.append(one_data[k])
975
+ d_list.append(one_list)
976
+ else:
977
+ d_list = data_list
978
+ with open(file_path + '/' + file_name, 'w', newline='') as f:
979
+ writer = csv.writer(f)
980
+ writer.writerows(d_list)