unctools 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
unctools/detector.py ADDED
@@ -0,0 +1,531 @@
1
+ """
2
+ Path detection utilities for UNC paths, network drives, and substituted drives.
3
+
4
+ This module provides functions to detect and identify different types of paths,
5
+ including UNC paths, network drives, and substituted (subst) drives.
6
+ """
7
+
8
+ import os
9
+ import re
10
+ import logging
11
+ import subprocess
12
+ from pathlib import Path
13
+ from typing import Dict, List, Optional, Set, Tuple, Union, Any
14
+
15
+ # Import from our own modules
16
+ from .converter import get_mappings
17
+
18
+ # Set up module-level logger
19
+ logger = logging.getLogger(__name__)
20
+
21
+ # Constants
22
+ PATH_TYPE_UNKNOWN = "unknown"
23
+ PATH_TYPE_LOCAL = "local"
24
+ PATH_TYPE_UNC = "unc"
25
+ PATH_TYPE_NETWORK = "network"
26
+ PATH_TYPE_SUBST = "subst"
27
+ PATH_TYPE_REMOVABLE = "removable"
28
+ PATH_TYPE_CDROM = "cdrom"
29
+ PATH_TYPE_RAMDISK = "ramdisk"
30
+
31
+ # Try to import Windows-specific modules
32
+ IS_WINDOWS = os.name == 'nt'
33
+
34
+ if IS_WINDOWS:
35
+ try:
36
+ import ctypes
37
+ import win32api
38
+ import win32file
39
+ HAVE_WIN32API = True
40
+ except ImportError:
41
+ HAVE_WIN32API = False
42
+ logger.debug("win32api and/or ctypes modules not available. Some Windows-specific features will use fallbacks.")
43
+ else:
44
+ HAVE_WIN32API = False
45
+
46
+ # Cache for path type detection to avoid repeated expensive operations
47
+ _path_type_cache = {}
48
+
49
+ def _clear_path_type_cache() -> None:
50
+ """Clear the internal path type detection cache."""
51
+ global _path_type_cache
52
+ _path_type_cache.clear()
53
+
54
+ def is_unc_path(path: Union[str, Path]) -> bool:
55
+ r"""
56
+ Determine if a path is a UNC path (starts with \\server\share).
57
+
58
+ Args:
59
+ path: The path to check.
60
+
61
+ Returns:
62
+ True if the path is a UNC path, False otherwise.
63
+ """
64
+ path_str = str(path).replace('/', '\\')
65
+ return path_str.startswith('\\\\')
66
+
67
+ def _get_drive_type_windows(drive_letter: str) -> int:
68
+ """
69
+ Get the drive type using Windows API.
70
+
71
+ Args:
72
+ drive_letter: The drive letter to check (e.g., 'C:').
73
+
74
+ Returns:
75
+ The drive type code from GetDriveTypeW.
76
+ """
77
+ if not IS_WINDOWS:
78
+ return 0
79
+
80
+ if not drive_letter.endswith('\\'):
81
+ drive_letter += '\\'
82
+
83
+ try:
84
+ if HAVE_WIN32API:
85
+ # Use win32api if available
86
+ return win32file.GetDriveType(drive_letter)
87
+ else:
88
+ # Fall back to direct ctypes call
89
+ return ctypes.windll.kernel32.GetDriveTypeW(drive_letter)
90
+ except Exception as e:
91
+ logger.warning(f"Failed to get drive type for {drive_letter}: {e}")
92
+ return 0
93
+
94
+ def get_drive_type(drive: Union[str, Path]) -> str:
95
+ """
96
+ Get the type of a drive.
97
+
98
+ Args:
99
+ drive: The drive letter or path to check.
100
+
101
+ Returns:
102
+ A string indicating the drive type:
103
+ - 'local': Local fixed drive
104
+ - 'network': Network drive
105
+ - 'subst': Substituted drive (not a physical device)
106
+ - 'removable': Removable drive (e.g., USB)
107
+ - 'cdrom': CD-ROM drive
108
+ - 'ramdisk': RAM disk
109
+ - 'unknown': Unknown or could not determine
110
+ """
111
+ # Normalize the drive input
112
+ drive_str = str(drive)
113
+
114
+ # Extract drive letter if a full path was provided
115
+ match = re.match(r'^([A-Za-z]:)', drive_str)
116
+ if match:
117
+ drive_letter = match.group(1)
118
+ else:
119
+ drive_letter = drive_str
120
+
121
+ # Only applicable to Windows
122
+ if not IS_WINDOWS:
123
+ return PATH_TYPE_UNKNOWN
124
+
125
+ # Check cache
126
+ cache_key = drive_letter.upper()
127
+ if cache_key in _path_type_cache:
128
+ return _path_type_cache[cache_key]
129
+
130
+ # Windows drive type constants
131
+ DRIVE_UNKNOWN = 0
132
+ DRIVE_NO_ROOT_DIR = 1
133
+ DRIVE_REMOVABLE = 2
134
+ DRIVE_FIXED = 3
135
+ DRIVE_REMOTE = 4
136
+ DRIVE_CDROM = 5
137
+ DRIVE_RAMDISK = 6
138
+
139
+ # Get drive type
140
+ drive_type = _get_drive_type_windows(drive_letter)
141
+
142
+ # Map drive type to string
143
+ if drive_type == DRIVE_FIXED:
144
+ # For fixed drives, check if it's a subst drive
145
+ if is_subst_drive(drive_letter):
146
+ result = PATH_TYPE_SUBST
147
+ else:
148
+ result = PATH_TYPE_LOCAL
149
+ elif drive_type == DRIVE_REMOTE:
150
+ result = PATH_TYPE_NETWORK
151
+ elif drive_type == DRIVE_REMOVABLE:
152
+ result = PATH_TYPE_REMOVABLE
153
+ elif drive_type == DRIVE_CDROM:
154
+ result = PATH_TYPE_CDROM
155
+ elif drive_type == DRIVE_RAMDISK:
156
+ result = PATH_TYPE_RAMDISK
157
+ elif drive_type in (DRIVE_UNKNOWN, DRIVE_NO_ROOT_DIR):
158
+ # Could be a subst that points to a non-existent location
159
+ if is_subst_drive(drive_letter):
160
+ result = PATH_TYPE_SUBST
161
+ else:
162
+ result = PATH_TYPE_UNKNOWN
163
+ else:
164
+ result = PATH_TYPE_UNKNOWN
165
+
166
+ # Cache the result
167
+ _path_type_cache[cache_key] = result
168
+
169
+ return result
170
+
171
+ def is_network_drive(drive: Union[str, Path, None]) -> bool:
172
+ """
173
+ Determine if a drive is a network drive.
174
+
175
+ Args:
176
+ drive: The drive letter or path to check.
177
+
178
+ Returns:
179
+ True if the drive is a network drive, False otherwise.
180
+ """
181
+ # Handle None input
182
+ if drive is None:
183
+ return False
184
+
185
+ return get_drive_type(drive) == PATH_TYPE_NETWORK
186
+
187
+ def is_subst_drive(drive: Union[str, Path, None]) -> bool:
188
+ """
189
+ Determine if a drive is a substituted (subst) drive.
190
+
191
+ Args:
192
+ drive: The drive letter or path to check.
193
+
194
+ Returns:
195
+ True if the drive is a substituted drive, False otherwise.
196
+ """
197
+ # Handle None input
198
+ if drive is None:
199
+ return False
200
+
201
+ # Extract drive letter if a full path was provided
202
+ drive_str = str(drive)
203
+ match = re.match(r'^([A-Za-z]:)', drive_str)
204
+ if match:
205
+ drive_letter = match.group(1)
206
+ else:
207
+ drive_letter = drive_str
208
+
209
+ # Only applicable to Windows
210
+ if not IS_WINDOWS:
211
+ return False
212
+
213
+ # Check cache
214
+ cache_key = f"subst_{drive_letter.upper()}"
215
+ if cache_key in _path_type_cache:
216
+ return _path_type_cache[cache_key]
217
+
218
+ # Check if the drive is a subst drive
219
+ try:
220
+ # Try to get subst drives using the 'subst' command
221
+ output = subprocess.check_output(['subst'], text=True, stderr=subprocess.STDOUT)
222
+
223
+ # Look for the drive letter in the output
224
+ drive_pattern = re.escape(drive_letter.upper().rstrip('\\')) + r'\\: => (.*)'
225
+ match = re.search(drive_pattern, output)
226
+
227
+ result = match is not None
228
+
229
+ # Cache the result
230
+ _path_type_cache[cache_key] = result
231
+
232
+ return result
233
+ except Exception as e:
234
+ logger.warning(f"Failed to check if {drive_letter} is a subst drive: {e}")
235
+ return False
236
+
237
+ def get_subst_target(drive: Union[str, Path]) -> Optional[str]:
238
+ """
239
+ Get the target path of a substituted (subst) drive.
240
+
241
+ Args:
242
+ drive: The drive letter or path to check.
243
+
244
+ Returns:
245
+ The target path of the subst drive, or None if the drive is not a subst drive.
246
+ """
247
+ # Extract drive letter if a full path was provided
248
+ drive_str = str(drive)
249
+ match = re.match(r'^([A-Za-z]:)', drive_str)
250
+ if match:
251
+ drive_letter = match.group(1)
252
+ else:
253
+ drive_letter = drive_str
254
+
255
+ # Only applicable to Windows
256
+ if not IS_WINDOWS:
257
+ return None
258
+
259
+ # Check if it's a subst drive first
260
+ if not is_subst_drive(drive_letter):
261
+ return None
262
+
263
+ try:
264
+ # Get subst drives using the 'subst' command
265
+ output = subprocess.check_output(['subst'], text=True, stderr=subprocess.STDOUT)
266
+
267
+ # Look for the drive letter in the output
268
+ drive_pattern = re.escape(drive_letter.upper().rstrip('\\')) + r'\\: => (.*)'
269
+ match = re.search(drive_pattern, output)
270
+
271
+ if match:
272
+ return match.group(1)
273
+
274
+ return None
275
+ except Exception as e:
276
+ logger.warning(f"Failed to get subst target for {drive_letter}: {e}")
277
+ return None
278
+
279
+ def get_network_target(drive: Union[str, Path, None]) -> Optional[str]:
280
+ """
281
+ Get the UNC path target of a network drive.
282
+
283
+ Args:
284
+ drive: The drive letter or path to check.
285
+
286
+ Returns:
287
+ The UNC path target of the network drive, or None if the drive is not a network drive
288
+ or if the target cannot be determined.
289
+ """
290
+ # Handle None input
291
+ if drive is None:
292
+ return None
293
+
294
+ # Extract drive letter if a full path was provided
295
+ drive_str = str(drive)
296
+ match = re.match(r'^([A-Za-z]:)', drive_str)
297
+ if match:
298
+ drive_letter = match.group(1)
299
+ else:
300
+ drive_letter = drive_str
301
+
302
+ # Only applicable to Windows
303
+ if not IS_WINDOWS:
304
+ return None
305
+
306
+ # Check if it's a network drive first
307
+ if not is_network_drive(drive_letter):
308
+ return None
309
+
310
+ # Get the mappings
311
+ mappings = get_mappings()
312
+ reverse_mappings = {}
313
+
314
+ # Build reverse mappings (drive letter -> UNC path)
315
+ for unc_path, mapped_drive in mappings.items():
316
+ drive_key = mapped_drive.rstrip('\\')
317
+ reverse_mappings[drive_key.upper()] = unc_path
318
+
319
+ # Look up the drive letter
320
+ drive_key = drive_letter.upper().rstrip('\\')
321
+ if drive_key in reverse_mappings:
322
+ return reverse_mappings[drive_key]
323
+
324
+ # If not found in our mappings, try using net use as fallback
325
+ try:
326
+ output = subprocess.check_output(['net', 'use', drive_letter], text=True, stderr=subprocess.STDOUT)
327
+
328
+ # Look for the remote name in the output
329
+ # Using raw string for regex pattern to fix escape sequences
330
+ match = re.search(r'Remote name\s+(\\\\[^\s]+)', output, re.IGNORECASE)
331
+ if match:
332
+ return match.group(1)
333
+ except Exception as e:
334
+ logger.debug(f"Failed to get network target with 'net use' for {drive_letter}: {e}")
335
+
336
+ # If all else fails, return None
337
+ return None
338
+
339
+ def get_path_type(path: Union[str, Path]) -> str:
340
+ r"""
341
+ Determine the type of a path.
342
+
343
+ Args:
344
+ path: The path to check.
345
+
346
+ Returns:
347
+ A string indicating the path type:
348
+ - 'unc': UNC path (\\server\share)
349
+ - 'network': Path on a network drive
350
+ - 'subst': Path on a substituted drive
351
+ - 'local': Path on a local fixed drive
352
+ - 'removable': Path on a removable drive
353
+ - 'cdrom': Path on a CD-ROM drive
354
+ - 'ramdisk': Path on a RAM disk
355
+ - 'unknown': Unknown or could not determine
356
+ """
357
+ path_str = str(path).replace('/', '\\')
358
+
359
+ # Check cache
360
+ cache_key = f"type_{path_str}"
361
+ if cache_key in _path_type_cache:
362
+ return _path_type_cache[cache_key]
363
+
364
+ # Check if it's a UNC path
365
+ if is_unc_path(path_str):
366
+ result = PATH_TYPE_UNC
367
+ else:
368
+ # Extract drive letter
369
+ match = re.match(r'^([A-Za-z]:)', path_str)
370
+ if not match:
371
+ result = PATH_TYPE_UNKNOWN
372
+ else:
373
+ drive_letter = match.group(1)
374
+ result = get_drive_type(drive_letter)
375
+
376
+ # Cache the result
377
+ _path_type_cache[cache_key] = result
378
+
379
+ return result
380
+
381
+ def detect_path_issues(path: Union[str, Path]) -> List[str]:
382
+ """
383
+ Detect potential issues with a path.
384
+
385
+ Args:
386
+ path: The path to check.
387
+
388
+ Returns:
389
+ A list of potential issues with the path, or an empty list if no issues were found.
390
+ """
391
+ issues = []
392
+ path_str = str(path)
393
+ path_type = get_path_type(path_str)
394
+
395
+ # Check if the path is too long for Windows
396
+ if IS_WINDOWS and len(path_str) > 260 and not path_str.startswith('\\\\?\\'):
397
+ issues.append("Path exceeds Windows MAX_PATH limit (260 characters)")
398
+
399
+ # Check UNC paths
400
+ if path_type == PATH_TYPE_UNC:
401
+ # Check for no server or share name
402
+ if not re.match(r'\\\\[^\\]+\\[^\\]+', path_str):
403
+ issues.append("Invalid UNC path: missing server or share name")
404
+
405
+ # Check for potential security zone issues on Windows
406
+ if IS_WINDOWS:
407
+ match = re.match(r'\\\\([^\\]+)', path_str)
408
+ if match:
409
+ server = match.group(1)
410
+ if not is_server_in_intranet_zone(server):
411
+ issues.append(f"UNC server '{server}' is not in the Intranet security zone")
412
+
413
+ # Check network drive paths
414
+ elif path_type == PATH_TYPE_NETWORK:
415
+ drive_match = re.match(r'^([A-Za-z]:)', path_str)
416
+ if drive_match:
417
+ drive = drive_match.group(1)
418
+ if get_network_target(drive) is None:
419
+ issues.append(f"Network drive {drive} has no detectable UNC target")
420
+
421
+ # Check subst drive paths
422
+ elif path_type == PATH_TYPE_SUBST:
423
+ drive_match = re.match(r'^([A-Za-z]:)', path_str)
424
+ if drive_match:
425
+ drive = drive_match.group(1)
426
+ target = get_subst_target(drive)
427
+ if target is None:
428
+ issues.append(f"Substituted drive {drive} has no detectable target")
429
+ elif not os.path.exists(target):
430
+ issues.append(f"Substituted drive {drive} points to non-existent target: {target}")
431
+
432
+ return issues
433
+
434
+ def get_network_mappings() -> Dict[str, str]:
435
+ """
436
+ Get all network drive mappings.
437
+
438
+ Returns:
439
+ A dictionary mapping drive letters to UNC paths.
440
+ """
441
+ if not IS_WINDOWS:
442
+ return {}
443
+
444
+ try:
445
+ mappings = {}
446
+ reverse_mappings = get_mappings()
447
+
448
+ # Invert the mapping (UNC -> drive becomes drive -> UNC)
449
+ for unc_path, drive in reverse_mappings.items():
450
+ drive_key = drive.rstrip('\\')
451
+ mappings[drive_key] = unc_path
452
+
453
+ return mappings
454
+ except Exception as e:
455
+ logger.warning(f"Failed to get network mappings: {e}")
456
+ return {}
457
+
458
+ def is_server_in_intranet_zone(server: str) -> bool:
459
+ """
460
+ Check if a server is in the local intranet security zone.
461
+
462
+ Args:
463
+ server: The server name to check.
464
+
465
+ Returns:
466
+ True if the server is in the intranet zone, False otherwise.
467
+ """
468
+ # Only applicable to Windows
469
+ if not IS_WINDOWS:
470
+ return False
471
+
472
+ try:
473
+ import winreg
474
+
475
+ # Check the registry key for the server in the intranet zone
476
+ zone_path = r"Software\Microsoft\Windows\CurrentVersion\Internet Settings\ZoneMap\Domains"
477
+ try:
478
+ with winreg.OpenKey(winreg.HKEY_CURRENT_USER, zone_path + "\\" + server) as key:
479
+ # Check if any entry exists for this server
480
+ try:
481
+ value, _ = winreg.QueryValueEx(key, "*")
482
+ # Value 1 is Local Intranet zone
483
+ return value == 1
484
+ except FileNotFoundError:
485
+ pass
486
+
487
+ # Check numbered subdomains
488
+ i = 0
489
+ while True:
490
+ try:
491
+ value, _ = winreg.QueryValueEx(key, str(i))
492
+ if value == 1:
493
+ return True
494
+ i += 1
495
+ except FileNotFoundError:
496
+ break
497
+ except FileNotFoundError:
498
+ pass
499
+
500
+ # Check Ranges key
501
+ ranges_path = r"Software\Microsoft\Windows\CurrentVersion\Internet Settings\ZoneMap\Ranges"
502
+ try:
503
+ with winreg.OpenKey(winreg.HKEY_CURRENT_USER, ranges_path) as ranges_key:
504
+ # Iterate through the range entries
505
+ i = 0
506
+ while True:
507
+ try:
508
+ range_name = winreg.EnumKey(ranges_key, i)
509
+ with winreg.OpenKey(ranges_key, range_name) as range_key:
510
+ try:
511
+ value, _ = winreg.QueryValueEx(range_key, ":Range")
512
+ if value == 1: # Local Intranet zone
513
+ # Check if server is in this range
514
+ try:
515
+ server_value, _ = winreg.QueryValueEx(range_key, "http")
516
+ if server.lower() in server_value.lower():
517
+ return True
518
+ except FileNotFoundError:
519
+ pass
520
+ except FileNotFoundError:
521
+ pass
522
+ i += 1
523
+ except OSError:
524
+ break
525
+ except FileNotFoundError:
526
+ pass
527
+
528
+ return False
529
+ except Exception as e:
530
+ logger.warning(f"Failed to check if server {server} is in intranet zone: {e}")
531
+ return False