zhmiscellanylite 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of zhmiscellanylite might be problematic. Click here for more details.

@@ -0,0 +1 @@
1
+ from . import processing, misc, fileio, string, math, list, dict
@@ -0,0 +1,11 @@
1
+ def is_junction(entry):
2
+ import sys
3
+ if sys.platform != "win32":
4
+ return False
5
+ try:
6
+ st = entry.stat(follow_symlinks=False)
7
+ # On Windows, st_file_attributes is available.
8
+ # FILE_ATTRIBUTE_REPARSE_POINT (0x400) indicates a reparse point (e.g. junction).
9
+ return hasattr(st, "st_file_attributes") and bool(st.st_file_attributes & 0x400)
10
+ except Exception:
11
+ return False
@@ -0,0 +1,3 @@
1
+ def print_dict(ldict):
2
+ import json
3
+ print(json.dumps(ldict, indent=4))
@@ -0,0 +1,673 @@
1
+ def read_json_file(file_path):
2
+ """
3
+ Reads JSON data from a file and returns it as a dictionary.
4
+ """
5
+ import json
6
+ import os
7
+ if os.path.exists(file_path):
8
+ with open(file_path, 'r') as file:
9
+ data = json.load(file)
10
+ else:
11
+ with open(file_path, 'w') as f:
12
+ f.write('{}')
13
+ data = {}
14
+ return data
15
+
16
+
17
+ def write_json_file(file_path, data):
18
+ """
19
+ Writes a dictionary to a JSON file.
20
+ """
21
+ import json
22
+ with open(file_path, 'w') as file:
23
+ json.dump(data, file, indent=4)
24
+
25
+
26
+ def create_folder(folder_name):
27
+ import os
28
+ if not os.path.exists(folder_name):
29
+ os.makedirs(folder_name)
30
+
31
+
32
+ def remove_folder(folder_name):
33
+ import os
34
+ import shutil
35
+ if os.path.exists(folder_name):
36
+ shutil.rmtree(folder_name)
37
+
38
+
39
+ def base_name_no_ext(file_path):
40
+ import os
41
+ base_name = os.path.basename(file_path)
42
+ base_name_without_extension, _ = os.path.splitext(base_name)
43
+ return base_name_without_extension
44
+
45
+
46
+ def convert_name_to_filename(name):
47
+ import zhmiscellany.string
48
+ return zhmiscellany.string.multi_replace(name, [("/","["), (":","]"), (".","+")])
49
+
50
+
51
+ def convert_filename_to_name(filename):
52
+ import zhmiscellany.string
53
+ return zhmiscellany.string.multi_replace(filename, [("[","/"), ("]",":"), ("+",".")])
54
+
55
+
56
+ def recursive_copy_files(source_dir, destination_dir, prints=False):
57
+ import os
58
+ import shutil
59
+ if prints:
60
+ print('Validating matching directory structure')
61
+ for root, dirs, files in os.walk(source_dir):
62
+ for dir in dirs:
63
+ dir_path = os.path.join(root, dir)
64
+ dest_dir_path = os.path.join(destination_dir, os.path.relpath(dir_path, source_dir))
65
+ if not os.path.exists(dest_dir_path):
66
+ print(f'Creating missing directory {dest_dir_path}')
67
+ os.makedirs(dest_dir_path)
68
+ if prints:
69
+ print('Getting a list of files in the source directory')
70
+ source_files = []
71
+ for root, _, files in os.walk(source_dir):
72
+ for file in files:
73
+ source_files.append(os.path.join(root, file))
74
+ if prints:
75
+ print('Getting a list of files in the destination directory')
76
+ dest_files = []
77
+ for root, _, files in os.walk(destination_dir):
78
+ for file in files:
79
+ dest_files.append(os.path.join(root, file))
80
+ if prints:
81
+ print('Copying files from source to destination, skipping duplicates')
82
+ for root, dirs, files in os.walk(source_dir):
83
+ for file in files:
84
+ source_file = os.path.join(root, file)
85
+ rel_path = os.path.relpath(source_file, source_dir)
86
+ dest_file = os.path.join(destination_dir, rel_path)
87
+ if not os.path.exists(dest_file):
88
+ if prints:
89
+ print(f'Copying {source_file}')
90
+ shutil.copy2(source_file, dest_file)
91
+ elif os.path.getmtime(source_file) != os.path.getmtime(dest_file):
92
+ if prints:
93
+ print(f'Copying {source_file}')
94
+ shutil.copy2(source_file, dest_file)
95
+
96
+
97
+ def empty_directory(directory_path):
98
+ import os
99
+ import shutil
100
+ # Iterate over all items in the directory
101
+ for item in os.listdir(directory_path):
102
+ item_path = os.path.join(directory_path, item)
103
+ if os.path.isfile(item_path):
104
+ # If it's a file, delete it
105
+ os.unlink(item_path)
106
+ elif os.path.isdir(item_path):
107
+ # If it's a directory, delete it recursively
108
+ shutil.rmtree(item_path)
109
+
110
+
111
+ def abs_listdir(path):
112
+ import os
113
+ abs_directory_path = os.path.abspath(path)
114
+
115
+ # Verify the path exists and is a directory
116
+ if not os.path.exists(abs_directory_path):
117
+ raise FileNotFoundError(f"Directory not found: {abs_directory_path}")
118
+
119
+ if not os.path.isdir(abs_directory_path):
120
+ raise NotADirectoryError(f"Path is not a directory: {abs_directory_path}")
121
+
122
+ # Get all items in the directory
123
+ items = os.listdir(abs_directory_path)
124
+
125
+ # Create absolute paths by joining the directory path with each item
126
+ absolute_paths = []
127
+ for item in items:
128
+ absolute_path = os.path.join(abs_directory_path, item)
129
+ absolute_paths.append(absolute_path)
130
+
131
+ return absolute_paths
132
+
133
+
134
+ def delete_ends_with(directory, string_endswith, avoid=[]):
135
+ import os
136
+ files = abs_listdir(directory)
137
+ for file in files:
138
+ if file.endswith(string_endswith):
139
+ if not file in avoid:
140
+ os.remove(file)
141
+
142
+
143
+ def read_bytes_section(file_path, section_start, section_end):
144
+ with open(file_path, 'rb') as file:
145
+ file.seek(section_start) # Move the file pointer to the 'start' position
146
+ bytes_to_read = section_end - section_start
147
+ data = file.read(bytes_to_read) # Read 'bytes_to_read' number of bytes
148
+ return data
149
+
150
+
151
+ def copy_file_with_overwrite(src, dst):
152
+ import os
153
+ import shutil
154
+ if os.path.exists(dst):
155
+ os.remove(dst)
156
+ shutil.copy2(src, dst)
157
+
158
+
159
+ def fast_dill_dumps(object):
160
+ import pickle
161
+ import dill
162
+ try:
163
+ data = pickle.dumps(object, protocol=5) # pickle is much faster so at least attempt to use it at first
164
+ except:
165
+ data = dill.dumps(object, protocol=5)
166
+ return data
167
+
168
+
169
+ def fast_dill_loads(data):
170
+ import pickle
171
+ import dill
172
+ try:
173
+ object = pickle.loads(data) # pickle is much faster so at least attempt to use it at first
174
+ except:
175
+ object = dill.loads(data)
176
+ return object
177
+
178
+
179
+ zstd_comp = None
180
+ zstd_decomp = None
181
+
182
+ def _get_std_objects():
183
+ global zstd_comp, zstd_decomp
184
+ if zstd_comp is None or zstd_decomp is None:
185
+ import zstandard as zstd
186
+ zstd_comp = zstd.ZstdCompressor(level=4)
187
+ zstd_decomp = zstd.ZstdDecompressor()
188
+ return zstd_comp, zstd_decomp
189
+
190
+
191
+ def save_object_to_file(object, file_name, compressed=False):
192
+ zstd_comp, zstd_decomp = _get_std_objects()
193
+ with open(file_name, 'wb') as f:
194
+ if compressed:
195
+ f.write(zstd_comp.compress(fast_dill_dumps(object)))
196
+ else:
197
+ f.write(fast_dill_dumps(object))
198
+
199
+
200
+ def load_object_from_file(file_name, compressed=False):
201
+ zstd_comp, zstd_decomp = _get_std_objects()
202
+ with open(file_name, 'rb') as f:
203
+ if compressed:
204
+ return fast_dill_loads(zstd_decomp.decompress(f.read()))
205
+ else:
206
+ return fast_dill_loads(f.read())
207
+
208
+
209
+ def pickle_and_encode(obj):
210
+ zstd_comp, zstd_decomp = _get_std_objects()
211
+ """Pickles an object and URL-safe encodes it."""
212
+ import base64
213
+ pickled_data = zstd_comp.compress(fast_dill_dumps(obj)) # Serialize the object
214
+ encoded_data = base64.urlsafe_b64encode(pickled_data).decode() # Base64 encode
215
+ return encoded_data
216
+
217
+
218
+ def decode_and_unpickle(encoded_str):
219
+ zstd_comp, zstd_decomp = _get_std_objects()
220
+ """Decodes a URL-safe encoded string and unpickles the object."""
221
+ import base64
222
+ pickled_data = base64.urlsafe_b64decode(encoded_str) # Decode from Base64
223
+ obj = fast_dill_loads(zstd_decomp.decompress(pickled_data)) # Deserialize
224
+ return obj
225
+
226
+
227
+ def list_files_by_modified_time(directory):
228
+ import os
229
+ files_with_times = [(file, os.path.getmtime(file)) for file in abs_listdir(directory) if os.path.isfile(os.path.join(directory, file))]
230
+ sorted_files = sorted(files_with_times, key=lambda x: x[1], reverse=True)
231
+ sorted_file_names = [file for file, _ in sorted_files]
232
+ return sorted_file_names
233
+
234
+
235
+ def get_script_path():
236
+ """Returns the path to the current script or executable."""
237
+ import sys
238
+ if getattr(sys, 'frozen', False):
239
+ # Running as a standalone executable
240
+ return sys.executable
241
+ else:
242
+ # Running as a Python script
243
+ return sys.argv[0]
244
+
245
+
246
+ def chdir_to_script_dir():
247
+ import os
248
+ os.chdir(os.path.dirname(get_script_path()))
249
+
250
+
251
+ def cache(function, *args, _cache_compressed=False, **kwargs):
252
+ """
253
+ Caches the result of a function call to disk.
254
+ """
255
+ import os
256
+ import inspect
257
+ import orjson
258
+ import hashlib
259
+ from datetime import datetime
260
+ import zhmiscellany.fileio
261
+
262
+ cache_folder = 'zhmiscellany_cache'
263
+
264
+ def normalize_for_json(obj):
265
+ """Recursively normalize objects to be JSON-serializable."""
266
+ if callable(obj):
267
+ try:
268
+ return ('__callable__', inspect.getsource(obj))
269
+ except (OSError, TypeError):
270
+ return ('__callable__', str(obj))
271
+
272
+ # Handle dict-like objects (including bidict, defaultdict, etc.)
273
+ if isinstance(obj, dict):
274
+ # Convert non-string keys to strings
275
+ return {str(k): normalize_for_json(v) for k, v in obj.items()}
276
+
277
+ # Handle lists and tuples
278
+ if isinstance(obj, (list, tuple)):
279
+ return type(obj)(normalize_for_json(item) for item in obj)
280
+
281
+ # Handle sets
282
+ if isinstance(obj, set):
283
+ return sorted([normalize_for_json(item) for item in obj])
284
+
285
+ # Handle pandas DataFrames - OPTIMIZED FOR HASHING
286
+ if hasattr(obj, '__class__') and obj.__class__.__name__ == 'DataFrame':
287
+ try:
288
+ import pandas as pd
289
+ if isinstance(obj, pd.DataFrame):
290
+ # For hashing purposes, use a much faster representation
291
+ # We use the pandas internal hash which is very fast
292
+ import pandas.util as pd_util
293
+
294
+ # Fast hash based on shape, columns, dtypes (as a single string), and a sample of data
295
+ return {
296
+ '__type__': 'DataFrame',
297
+ 'shape': obj.shape,
298
+ 'columns': list(obj.columns),
299
+ 'dtypes_str': str(obj.dtypes.to_dict()), # Single conversion instead of dict comp
300
+ 'index_name': obj.index.name,
301
+ # Use pandas' built-in hash on a sample for speed
302
+ 'hash': str(pd_util.hash_pandas_object(obj.iloc[:min(100, len(obj))]).sum())
303
+ }
304
+ except ImportError:
305
+ pass
306
+
307
+ # Handle pandas Series - OPTIMIZED
308
+ if hasattr(obj, '__class__') and obj.__class__.__name__ == 'Series':
309
+ try:
310
+ import pandas as pd
311
+ if isinstance(obj, pd.Series):
312
+ import pandas.util as pd_util
313
+ return {
314
+ '__type__': 'Series',
315
+ 'dtype': str(obj.dtype),
316
+ 'name': obj.name,
317
+ 'shape': obj.shape,
318
+ 'hash': str(pd_util.hash_pandas_object(obj.iloc[:min(100, len(obj))]).sum())
319
+ }
320
+ except ImportError:
321
+ pass
322
+
323
+ # Handle bytes
324
+ if isinstance(obj, bytes):
325
+ return ('__bytes__', obj.hex())
326
+
327
+ # Handle datetime
328
+ if isinstance(obj, datetime):
329
+ return ('__datetime__', obj.isoformat())
330
+
331
+ # Handle custom objects with __dict__
332
+ if hasattr(obj, '__dict__') and not isinstance(obj, type):
333
+ return {f'__{obj.__class__.__name__}__': normalize_for_json(obj.__dict__)}
334
+
335
+ # Return primitives as-is
336
+ return obj
337
+
338
+ def get_hash_orjson(data):
339
+ # Pre-process the data to handle non-string keys and other issues
340
+ normalized_data = normalize_for_json(data)
341
+
342
+ json_bytes = orjson.dumps(
343
+ normalized_data,
344
+ option=orjson.OPT_SORT_KEYS
345
+ )
346
+ return hashlib.md5(json_bytes).hexdigest()
347
+
348
+ seed = {
349
+ 'function': function,
350
+ 'args': args,
351
+ 'kwargs': kwargs,
352
+ 'compressed': _cache_compressed
353
+ }
354
+
355
+ seed_hash = get_hash_orjson(seed)
356
+
357
+ cache_file = f'{cache_folder}/cache_{function.__name__}_{seed_hash}.pkl'
358
+
359
+ if os.path.exists(cache_file):
360
+ return load_object_from_file(cache_file, compressed=_cache_compressed)
361
+ else:
362
+ result = function(*args, **kwargs)
363
+ zhmiscellany.fileio.create_folder(cache_folder)
364
+ save_object_to_file(result, cache_file, compressed=_cache_compressed)
365
+ return result
366
+
367
+
368
+ def load_all_cached():
369
+ """
370
+ Loads all cached objects from the cache folder.
371
+ """
372
+ import os
373
+ cache_folder = 'zhmiscellany_cache'
374
+ if os.path.exists(cache_folder):
375
+ files = abs_listdir(cache_folder)
376
+ files = [file for file in files if 'fn_cache_' in file]
377
+ if files:
378
+ return [load_object_from_file(file) for file in files]
379
+ else:
380
+ raise Exception('Nothing has been cached yet')
381
+ else:
382
+ raise Exception('Nothing has been cached yet')
383
+
384
+
385
+ def list_files_recursive(folder):
386
+ """
387
+ Recursively lists all files in a directory, excluding symlinks and junctions.
388
+ """
389
+ import os
390
+ from ._fileio_supportfuncs import is_junction
391
+ files = []
392
+ try:
393
+ for entry in os.scandir(folder):
394
+ if entry.is_file():
395
+ files.append(entry.path)
396
+ elif entry.is_symlink() or is_junction(entry):
397
+ continue
398
+ elif entry.is_dir():
399
+ files.extend(list_files_recursive(entry.path))
400
+ except (PermissionError, FileNotFoundError):
401
+ pass
402
+ return files
403
+
404
+
405
+ def list_files_recursive_multiprocessed(dir_path, return_folders=False):
406
+ import os
407
+ import zhmiscellany.processing
408
+
409
+ def is_junction(entry):
410
+ try:
411
+ st = entry.stat(follow_symlinks=False)
412
+ # On Windows, st_file_attributes is available.
413
+ # FILE_ATTRIBUTE_REPARSE_POINT (0x400) indicates a reparse point (e.g. junction).
414
+ return hasattr(st, "st_file_attributes") and bool(st.st_file_attributes & 0x400)
415
+ except Exception:
416
+ return False
417
+
418
+ def traversal(dir_path, depth):
419
+ depth += 1
420
+ files = []
421
+ folders = []
422
+ tasks = []
423
+ try:
424
+ for entry in os.scandir(dir_path):
425
+ if entry.is_file():
426
+ files.append(entry.path)
427
+ elif entry.is_symlink() or is_junction(entry):
428
+ continue
429
+ elif entry.is_dir():
430
+ folders.append(entry.path)
431
+ if depth > max_python_depth:
432
+ tasks.append((traversal, (entry.path, -99999)))
433
+ else:
434
+ new_files, new_folders, new_tasks = traversal(entry.path, depth)
435
+ files.extend(new_files)
436
+ folders.extend(new_folders)
437
+ tasks.extend(new_tasks)
438
+ except (PermissionError, FileNotFoundError):
439
+ pass
440
+ return (files, folders, tasks)
441
+
442
+ max_python_depth = 1
443
+ files, folders, tasks = traversal(dir_path, 0)
444
+ file_groups = zhmiscellany.processing.batch_multiprocess(tasks)
445
+ for group in file_groups:
446
+ files.extend(group[0])
447
+ folders.extend(group[1])
448
+ if return_folders:
449
+ return files, folders
450
+ else:
451
+ return files
452
+
453
+
454
+ def encode_safe_filename(s, max_length=16):
455
+ """Encodes a string into a short, URL-safe, and file name-safe string."""
456
+ import base64
457
+ import hashlib
458
+ encoded = base64.urlsafe_b64encode(s.encode()).decode().rstrip("=") # URL-safe encoding
459
+ if len(encoded) > max_length: # Truncate if too long
460
+ encoded = hashlib.md5(s.encode()).hexdigest()[:max_length] # Use a hash
461
+ return encoded
462
+
463
+
464
+ def list_files_recursive_cache_optimised_multiprocessed(dir_path, show_timings=False, cache_in_temp=True):
465
+ import os
466
+ import zhmiscellany.processing
467
+ import zhmiscellany.fileio
468
+ import tempfile
469
+ from collections import defaultdict
470
+ import random
471
+ from itertools import chain
472
+ import zhmiscellany.misc
473
+
474
+ def is_junction(entry):
475
+ try:
476
+ st = entry.stat(follow_symlinks=False)
477
+ # On Windows, st_file_attributes is available.
478
+ # FILE_ATTRIBUTE_REPARSE_POINT (0x400) indicates a reparse point (e.g. junction).
479
+ return hasattr(st, "st_file_attributes") and bool(st.st_file_attributes & 0x400)
480
+ except Exception:
481
+ return False
482
+
483
+ def traversal(dir_path, depth=0):
484
+ depth += 1
485
+ files = defaultdict(list)
486
+ folders = []
487
+ tasks = []
488
+ try:
489
+ for entry in os.scandir(dir_path):
490
+ if entry.is_file():
491
+ files[dir_path].append(entry.path)
492
+ elif entry.is_symlink() or is_junction(entry):
493
+ continue
494
+ elif entry.is_dir():
495
+ folders.append(entry.path)
496
+ if depth > max_python_depth:
497
+ tasks.append((traversal, (entry.path, -99999)))
498
+ else:
499
+ new_files, new_folders, new_tasks = traversal(entry.path, depth)
500
+ files.update(new_files)
501
+ folders.extend(new_folders)
502
+ tasks.extend(new_tasks)
503
+ except (PermissionError, FileNotFoundError):
504
+ pass
505
+ return (files, folders, tasks)
506
+
507
+ def list_folder(folder):
508
+ files, folders = defaultdict(list), []
509
+ try:
510
+ for entry in os.scandir(folder):
511
+ if entry.is_file():
512
+ files[folder].append(entry.path)
513
+ elif entry.is_symlink() or is_junction(entry):
514
+ continue
515
+ elif entry.is_dir():
516
+ folders.append(entry.path)
517
+ except (PermissionError, FileNotFoundError):
518
+ pass
519
+ return files, folders
520
+
521
+ def split_into_n_groups(lst, n):
522
+ avg_size = len(lst) // n
523
+ remainder = len(lst) % n
524
+ sublists = []
525
+
526
+ start = 0
527
+ for i in range(n):
528
+ end = start + avg_size + (1 if i < remainder else 0) # Distribute remainder
529
+ sublists.append(lst[start:end])
530
+ start = end
531
+ sublists = [sublist for sublist in sublists if sublist]
532
+ return sublists
533
+
534
+ def get_m_times(folders):
535
+ groups = split_into_n_groups(folders, scan_mtime_worker_count)
536
+
537
+ def atom(folders):
538
+ mtimes = {}
539
+ for folder in folders:
540
+ try:
541
+ mtimes[folder] = os.path.getmtime(folder)
542
+ except:
543
+ pass
544
+ return mtimes
545
+
546
+ tasks = [(atom, (group,)) for group in groups]
547
+ results = zhmiscellany.processing.batch_multiprocess(tasks)
548
+ mtimes = {}
549
+ for i in results:
550
+ mtimes.update(i)
551
+ return mtimes
552
+
553
+ # parameters
554
+ scan_mtime_worker_count = 8
555
+ scan_changed_folders_thread_group_count = 64
556
+ fully_update_cache_threshold = 2**10
557
+ cache_compression = False
558
+ # end parameters
559
+
560
+ if cache_in_temp:
561
+ cache_folder = tempfile.gettempdir()
562
+ else:
563
+ cache_folder = 'zhmiscellany_cache'
564
+ zhmiscellany.fileio.create_folder(cache_folder)
565
+
566
+ cache_id = encode_safe_filename(dir_path)
567
+ cache_file = f'GFI_{cache_id}.pkl'
568
+ cache_file = os.path.join(cache_folder, cache_file)
569
+
570
+ if show_timings: zhmiscellany.misc.time_it(None, 'lfrcom')
571
+ if show_timings: zhmiscellany.misc.time_it(None, 'lfrcomt')
572
+
573
+ max_python_depth = 1
574
+ if not os.path.exists(cache_file):
575
+ files, folders, tasks = traversal(dir_path)
576
+ if show_timings: zhmiscellany.misc.time_it('initial traversal', 'lfrcom')
577
+
578
+ file_groups = zhmiscellany.processing.batch_multiprocess(tasks)
579
+ if show_timings: zhmiscellany.misc.time_it('multiprocessed deep traversal', 'lfrcom')
580
+ for group in file_groups:
581
+ files.update(group[0])
582
+ folders.extend(group[1])
583
+ if show_timings: zhmiscellany.misc.time_it('extending data', 'lfrcom')
584
+
585
+ folders = get_m_times(folders)
586
+ zhmiscellany.fileio.save_object_to_file((files, folders), cache_file, compressed=cache_compression)
587
+ if show_timings: zhmiscellany.misc.time_it('creating cache', 'lfrcom')
588
+ return list(chain.from_iterable(files.values()))
589
+ else:
590
+ files, folders = zhmiscellany.fileio.load_object_from_file(cache_file, compressed=cache_compression)
591
+ if show_timings: zhmiscellany.misc.time_it('loading cache', 'lfrcom')
592
+
593
+ fl_list = list(folders.keys())
594
+ new_folders = get_m_times(fl_list)
595
+ if show_timings: zhmiscellany.misc.time_it(f'getting m times of {len(fl_list)} folders', 'lfrcom')
596
+ changed_folders = []
597
+ for folder, mtime in new_folders.items():
598
+ if folders[folder] != mtime:
599
+ changed_folders.append(folder)
600
+ random.shuffle(changed_folders)
601
+ if show_timings: zhmiscellany.misc.time_it(f'creating {len(changed_folders)} changed folders', 'lfrcom')
602
+
603
+ for i in changed_folders: # clear files that might not exist
604
+ try:
605
+ del files[i]
606
+ except KeyError: # it is possible that the only thing that changed in a folder is another folder, so in that case it would not be inside the file dict
607
+ pass
608
+
609
+ if show_timings: zhmiscellany.misc.time_it(f'filtering files for changes', 'lfrcom')
610
+
611
+ def atom(_folders):
612
+ atom_files, atom_folders = defaultdict(list), []
613
+ for _folder in _folders:
614
+ fil, fol = list_folder(_folder)
615
+ atom_files.update(fil)
616
+ for fold in fol:
617
+ if fold not in new_folders:
618
+ atom_folders.append(fold)
619
+ fil, fo, _ = traversal(fold, -99999)
620
+ atom_files.update(fil)
621
+ atom_folders.extend(fo)
622
+ return atom_files, atom_folders
623
+
624
+ groups = split_into_n_groups(changed_folders, scan_changed_folders_thread_group_count)
625
+ tasks = [(atom, (group,)) for group in groups]
626
+ if not tasks:
627
+ results = []
628
+ else:
629
+ results = zhmiscellany.processing.batch_threading(tasks)
630
+ if show_timings: zhmiscellany.misc.time_it('multithreading processing changed folders', 'lfrcom')
631
+
632
+ new_new_folders = []
633
+ for fi, fo in results:
634
+ files.update(fi)
635
+ new_new_folders.extend(fo)
636
+
637
+ if len(changed_folders) > fully_update_cache_threshold:
638
+ new_folders.update(get_m_times(new_new_folders))
639
+ if show_timings: zhmiscellany.misc.time_it(f'get m times of {len(new_new_folders)} new folders')
640
+ zhmiscellany.fileio.save_object_to_file((files, new_folders), cache_file)
641
+ if show_timings: zhmiscellany.misc.time_it(f'writing to cache')
642
+
643
+ ret = list(chain.from_iterable(files.values()))
644
+ if show_timings: zhmiscellany.misc.time_it('Everything together', 'lfrcomt')
645
+ return ret
646
+
647
+
648
+ def save_chunk(name, data):
649
+ import zhmiscellany.string
650
+ create_folder(name)
651
+ chunk_path = f'{name}/chunk_{zhmiscellany.string.get_universally_unique_string()}.pkl'
652
+ save_object_to_file(data, chunk_path)
653
+
654
+
655
+ def load_chunks(name):
656
+ create_folder(name)
657
+ chunks = abs_listdir(name)
658
+ datas = []
659
+ for chunk_file in chunks:
660
+ datas.append(load_object_from_file(chunk_file))
661
+ return datas
662
+
663
+
664
+ def clear_chunks(name):
665
+ import os
666
+ if os.path.exists(name):
667
+ empty_directory(name)
668
+
669
+
670
+ def list_drives():
671
+ import os
672
+ import string
673
+ return [f"{d}:\\" for d in string.ascii_uppercase if os.path.exists(f"{d}:\\")]