tamar-file-hub-client 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,153 +1,186 @@
1
- """
2
- 文件工具函数
3
- """
4
- import hashlib
5
- import mimetypes
6
- from pathlib import Path
7
- from typing import Generator, Optional, BinaryIO, Union
8
-
9
-
10
- def get_file_mime_type(file_path: Union[str, Path]) -> str:
11
- """
12
- 获取文件的MIME类型
13
-
14
- Args:
15
- file_path: 文件路径
16
-
17
- Returns:
18
- MIME类型
19
- """
20
- file_path = Path(file_path)
21
-
22
- # 定义常见文件扩展名到MIME类型的映射,确保跨平台一致性
23
- extension_mime_map = {
24
- '.csv': 'text/csv',
25
- '.txt': 'text/plain',
26
- '.json': 'application/json',
27
- '.xml': 'application/xml',
28
- '.html': 'text/html',
29
- '.htm': 'text/html',
30
- '.pdf': 'application/pdf',
31
- '.doc': 'application/msword',
32
- '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
33
- '.xls': 'application/vnd.ms-excel',
34
- '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
35
- '.ppt': 'application/vnd.ms-powerpoint',
36
- '.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
37
- '.jpg': 'image/jpeg',
38
- '.jpeg': 'image/jpeg',
39
- '.png': 'image/png',
40
- '.gif': 'image/gif',
41
- '.bmp': 'image/bmp',
42
- '.webp': 'image/webp',
43
- '.mp3': 'audio/mpeg',
44
- '.wav': 'audio/wav',
45
- '.mp4': 'video/mp4',
46
- '.avi': 'video/x-msvideo',
47
- '.mov': 'video/quicktime',
48
- '.zip': 'application/zip',
49
- '.rar': 'application/vnd.rar',
50
- '.7z': 'application/x-7z-compressed',
51
- '.tar': 'application/x-tar',
52
- '.gz': 'application/gzip',
53
- }
54
-
55
- # 获取文件扩展名(转为小写)
56
- extension = file_path.suffix.lower()
57
-
58
- # 优先使用自定义映射,确保常见文件类型的一致性
59
- if extension in extension_mime_map:
60
- return extension_mime_map[extension]
61
-
62
- # 如果自定义映射中没有,尝试使用magic进行内容检测
63
- try:
64
- import magic
65
- mime = magic.Magic(mime=True)
66
- return mime.from_file(str(file_path))
67
- except ImportError:
68
- # 如果magic不可用,使用mimetypes作为fallback
69
- mime_type, _ = mimetypes.guess_type(str(file_path))
70
- return mime_type or "application/octet-stream"
71
-
72
-
73
- def get_file_extension(file_name: str) -> str:
74
- """
75
- 获取文件扩展名
76
-
77
- Args:
78
- file_name: 文件名
79
-
80
- Returns:
81
- 文件扩展名(包含点号)
82
- """
83
- return Path(file_name).suffix.lower()
84
-
85
-
86
- def humanize_file_size(size_bytes: int) -> str:
87
- """
88
- 将文件大小转换为人类可读的格式
89
-
90
- Args:
91
- size_bytes: 文件大小(字节)
92
-
93
- Returns:
94
- 人类可读的文件大小
95
- """
96
- for unit in ["B", "KB", "MB", "GB", "TB"]:
97
- if size_bytes < 1024.0:
98
- return f"{size_bytes:.2f} {unit}"
99
- size_bytes /= 1024.0
100
- return f"{size_bytes:.2f} PB"
101
-
102
-
103
- def calculate_file_hash(file_path: Union[str, Path], algorithm: str = "sha256") -> str:
104
- """
105
- 计算文件哈希值
106
-
107
- Args:
108
- file_path: 文件路径
109
- algorithm: 哈希算法(md5, sha1, sha256等)
110
-
111
- Returns:
112
- 文件哈希值(十六进制)
113
- """
114
- file_path = Path(file_path)
115
- hash_obj = hashlib.new(algorithm)
116
-
117
- with open(file_path, "rb") as f:
118
- while chunk := f.read(8192):
119
- hash_obj.update(chunk)
120
-
121
- return hash_obj.hexdigest()
122
-
123
-
124
- def split_file_chunks(
125
- file_obj: BinaryIO,
126
- chunk_size: int = 1024 * 1024, # 默认1MB
127
- start_offset: int = 0
128
- ) -> Generator[tuple[bytes, int, bool], None, None]:
129
- """
130
- 将文件分割成块
131
-
132
- Args:
133
- file_obj: 文件对象
134
- chunk_size: 块大小(字节)
135
- start_offset: 起始偏移量
136
-
137
- Yields:
138
- (块数据, 偏移量, 是否最后一块)
139
- """
140
- file_obj.seek(start_offset)
141
- offset = start_offset
142
-
143
- while True:
144
- chunk = file_obj.read(chunk_size)
145
- if not chunk:
146
- break
147
-
148
- is_last = len(chunk) < chunk_size
149
- yield chunk, offset, is_last
150
-
151
- offset += len(chunk)
152
- if is_last:
153
- break
1
+ """
2
+ 文件工具函数
3
+ """
4
+ import hashlib
5
+ import mimetypes
6
+ from pathlib import Path
7
+ from typing import Generator, Optional, BinaryIO, Union
8
+
9
+
10
+ def get_file_mime_type(file_path: Union[str, Path]) -> str:
11
+ """
12
+ 获取文件的MIME类型
13
+
14
+ Args:
15
+ file_path: 文件路径
16
+
17
+ Returns:
18
+ MIME类型
19
+ """
20
+ import json
21
+
22
+ file_path = Path(file_path)
23
+
24
+ # 定义常见文件扩展名到MIME类型的映射,确保跨平台一致性
25
+ extension_mime_map = {
26
+ '.csv': 'text/csv',
27
+ '.txt': 'text/plain',
28
+ '.json': 'application/json',
29
+ '.xml': 'application/xml',
30
+ '.html': 'text/html',
31
+ '.htm': 'text/html',
32
+ '.pdf': 'application/pdf',
33
+ '.doc': 'application/msword',
34
+ '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
35
+ '.xls': 'application/vnd.ms-excel',
36
+ '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
37
+ '.ppt': 'application/vnd.ms-powerpoint',
38
+ '.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
39
+ '.jpg': 'image/jpeg',
40
+ '.jpeg': 'image/jpeg',
41
+ '.png': 'image/png',
42
+ '.gif': 'image/gif',
43
+ '.bmp': 'image/bmp',
44
+ '.webp': 'image/webp',
45
+ '.mp3': 'audio/mpeg',
46
+ '.wav': 'audio/wav',
47
+ '.mp4': 'video/mp4',
48
+ '.avi': 'video/x-msvideo',
49
+ '.mov': 'video/quicktime',
50
+ '.zip': 'application/zip',
51
+ '.rar': 'application/vnd.rar',
52
+ '.7z': 'application/x-7z-compressed',
53
+ '.tar': 'application/x-tar',
54
+ '.gz': 'application/gzip',
55
+ }
56
+
57
+ # 获取文件扩展名(转为小写)
58
+ extension = file_path.suffix.lower()
59
+
60
+ # 对于JSON文件,进行内容验证
61
+ if extension == '.json':
62
+ if file_path.exists():
63
+ try:
64
+ # 尝试不同的编码方式读取文件
65
+ content = None
66
+ for encoding in ['utf-8-sig', 'utf-8', 'latin-1']:
67
+ try:
68
+ with open(file_path, 'r', encoding=encoding) as f:
69
+ content = f.read().strip()
70
+ break
71
+ except UnicodeDecodeError:
72
+ continue
73
+
74
+ if content is None:
75
+ # 无法读取文件,返回text/plain
76
+ return 'text/plain'
77
+
78
+ if not content:
79
+ # 空文件,按扩展名处理
80
+ return extension_mime_map[extension]
81
+
82
+ # 尝试解析JSON
83
+ json.loads(content)
84
+ # 如果解析成功,确实是JSON格式
85
+ return 'application/json'
86
+ except (json.JSONDecodeError, OSError):
87
+ # JSON解析失败或文件读取失败,可能是格式错误的JSON文件
88
+ # 返回text/plain避免服务器端的类型不匹配错误
89
+ return 'text/plain'
90
+
91
+ # 优先使用自定义映射,确保常见文件类型的一致性
92
+ if extension in extension_mime_map:
93
+ return extension_mime_map[extension]
94
+
95
+ # 如果自定义映射中没有,尝试使用magic进行内容检测
96
+ try:
97
+ import magic
98
+ mime = magic.Magic(mime=True)
99
+ return mime.from_file(str(file_path))
100
+ except ImportError:
101
+ # 如果magic不可用,使用mimetypes作为fallback
102
+ mime_type, _ = mimetypes.guess_type(str(file_path))
103
+ return mime_type or "application/octet-stream"
104
+
105
+
106
+ def get_file_extension(file_name: str) -> str:
107
+ """
108
+ 获取文件扩展名
109
+
110
+ Args:
111
+ file_name: 文件名
112
+
113
+ Returns:
114
+ 文件扩展名(包含点号)
115
+ """
116
+ return Path(file_name).suffix.lower()
117
+
118
+
119
+ def humanize_file_size(size_bytes: int) -> str:
120
+ """
121
+ 将文件大小转换为人类可读的格式
122
+
123
+ Args:
124
+ size_bytes: 文件大小(字节)
125
+
126
+ Returns:
127
+ 人类可读的文件大小
128
+ """
129
+ for unit in ["B", "KB", "MB", "GB", "TB"]:
130
+ if size_bytes < 1024.0:
131
+ return f"{size_bytes:.2f} {unit}"
132
+ size_bytes /= 1024.0
133
+ return f"{size_bytes:.2f} PB"
134
+
135
+
136
+ def calculate_file_hash(file_path: Union[str, Path], algorithm: str = "sha256") -> str:
137
+ """
138
+ 计算文件哈希值
139
+
140
+ Args:
141
+ file_path: 文件路径
142
+ algorithm: 哈希算法(md5, sha1, sha256等)
143
+
144
+ Returns:
145
+ 文件哈希值(十六进制)
146
+ """
147
+ file_path = Path(file_path)
148
+ hash_obj = hashlib.new(algorithm)
149
+
150
+ with open(file_path, "rb") as f:
151
+ while chunk := f.read(8192):
152
+ hash_obj.update(chunk)
153
+
154
+ return hash_obj.hexdigest()
155
+
156
+
157
+ def split_file_chunks(
158
+ file_obj: BinaryIO,
159
+ chunk_size: int = 1024 * 1024, # 默认1MB
160
+ start_offset: int = 0
161
+ ) -> Generator[tuple[bytes, int, bool], None, None]:
162
+ """
163
+ 将文件分割成块
164
+
165
+ Args:
166
+ file_obj: 文件对象
167
+ chunk_size: 块大小(字节)
168
+ start_offset: 起始偏移量
169
+
170
+ Yields:
171
+ (块数据, 偏移量, 是否最后一块)
172
+ """
173
+ file_obj.seek(start_offset)
174
+ offset = start_offset
175
+
176
+ while True:
177
+ chunk = file_obj.read(chunk_size)
178
+ if not chunk:
179
+ break
180
+
181
+ is_last = len(chunk) < chunk_size
182
+ yield chunk, offset, is_last
183
+
184
+ offset += len(chunk)
185
+ if is_last:
186
+ break
@@ -0,0 +1,226 @@
1
+ """
2
+ 用户真实IP自动检测模块
3
+ 从当前HTTP请求上下文中自动获取真实用户IP地址
4
+ """
5
+
6
+ import os
7
+ import threading
8
+ from typing import Optional, Dict, Any, Callable
9
+ from contextvars import ContextVar
10
+
11
+ # 使用ContextVar存储当前请求的用户IP
12
+ current_user_ip: ContextVar[Optional[str]] = ContextVar('current_user_ip', default=None)
13
+
14
+ # 存储自定义IP提取器
15
+ _custom_ip_extractor: Optional[Callable[[], Optional[str]]] = None
16
+
17
+ # 线程本地存储(fallback)
18
+ _thread_local = threading.local()
19
+
20
+
21
+ def set_user_ip_extractor(extractor: Callable[[], Optional[str]]):
22
+ """
23
+ 设置自定义用户IP提取器
24
+
25
+ Args:
26
+ extractor: 返回用户IP的函数,如果无法获取则返回None
27
+ """
28
+ global _custom_ip_extractor
29
+ _custom_ip_extractor = extractor
30
+
31
+
32
+ def set_current_user_ip(ip: str):
33
+ """
34
+ 设置当前请求的用户IP(通常在请求开始时调用)
35
+
36
+ Args:
37
+ ip: 用户真实IP地址
38
+ """
39
+ current_user_ip.set(ip)
40
+ # 同时设置线程本地存储作为fallback
41
+ _thread_local.user_ip = ip
42
+
43
+
44
+ def get_current_user_ip() -> Optional[str]:
45
+ """
46
+ 自动获取当前用户的真实IP地址
47
+
48
+ 优先级:
49
+ 1. ContextVar中的用户IP
50
+ 2. 自定义IP提取器
51
+ 3. 常见Web框架自动检测
52
+ 4. 环境变量
53
+ 5. 线程本地存储
54
+
55
+ Returns:
56
+ 用户真实IP地址,如果无法获取则返回None
57
+ """
58
+ # 1. 优先使用ContextVar
59
+ ip = current_user_ip.get(None)
60
+ if ip:
61
+ return ip
62
+
63
+ # 2. 尝试自定义提取器
64
+ if _custom_ip_extractor:
65
+ try:
66
+ ip = _custom_ip_extractor()
67
+ if ip:
68
+ return ip
69
+ except:
70
+ pass
71
+
72
+ # 3. 尝试从常见Web框架中自动获取
73
+ ip = _auto_detect_from_web_frameworks()
74
+ if ip:
75
+ return ip
76
+
77
+ # 4. 尝试从环境变量获取
78
+ ip = os.environ.get('USER_IP') or os.environ.get('CLIENT_IP')
79
+ if ip:
80
+ return ip
81
+
82
+ # 5. Fallback到线程本地存储
83
+ try:
84
+ return getattr(_thread_local, 'user_ip', None)
85
+ except:
86
+ return None
87
+
88
+
89
+ def _auto_detect_from_web_frameworks() -> Optional[str]:
90
+ """
91
+ 从常见Web框架中自动检测用户IP
92
+ """
93
+ # Flask
94
+ try:
95
+ from flask import request
96
+ if request:
97
+ return _extract_ip_from_headers(request.environ)
98
+ except (ImportError, RuntimeError):
99
+ pass
100
+
101
+ # Django
102
+ try:
103
+ from django.http import HttpRequest
104
+ from django.utils.deprecation import MiddlewareMixin
105
+ # Django需要通过中间件设置,这里只能检查是否有请求对象
106
+ import django
107
+ from django.core.context_processors import request as django_request
108
+ # Django的请求需要通过其他方式获取,这里先跳过
109
+ except ImportError:
110
+ pass
111
+
112
+ # FastAPI/Starlette
113
+ try:
114
+ from starlette.requests import Request
115
+ # FastAPI需要在路由处理器中获取,这里先跳过
116
+ except ImportError:
117
+ pass
118
+
119
+ # Tornado
120
+ try:
121
+ import tornado.web
122
+ # Tornado需要在RequestHandler中获取,这里先跳过
123
+ except ImportError:
124
+ pass
125
+
126
+ return None
127
+
128
+
129
+ def _extract_ip_from_headers(environ: Dict[str, Any]) -> Optional[str]:
130
+ """
131
+ 从HTTP环境变量中提取用户真实IP
132
+
133
+ Args:
134
+ environ: WSGI environ字典或类似的HTTP环境变量
135
+
136
+ Returns:
137
+ 用户真实IP,优先级: X-Forwarded-For > X-Real-IP > CF-Connecting-IP > Remote-Addr
138
+ """
139
+ # X-Forwarded-For: 最常用的代理头,包含原始客户端IP
140
+ forwarded_for = environ.get('HTTP_X_FORWARDED_FOR')
141
+ if forwarded_for:
142
+ # 取第一个IP(原始客户端IP),忽略代理IP
143
+ return forwarded_for.split(',')[0].strip()
144
+
145
+ # X-Real-IP: Nginx常用的真实IP头
146
+ real_ip = environ.get('HTTP_X_REAL_IP')
147
+ if real_ip:
148
+ return real_ip.strip()
149
+
150
+ # CF-Connecting-IP: Cloudflare的连接IP
151
+ cf_ip = environ.get('HTTP_CF_CONNECTING_IP')
152
+ if cf_ip:
153
+ return cf_ip.strip()
154
+
155
+ # Remote-Addr: 直接连接的IP(可能是代理IP)
156
+ remote_addr = environ.get('REMOTE_ADDR')
157
+ if remote_addr:
158
+ return remote_addr.strip()
159
+
160
+ return None
161
+
162
+
163
+ def clear_current_user_ip():
164
+ """清除当前请求的用户IP(通常在请求结束时调用)"""
165
+ current_user_ip.set(None)
166
+ try:
167
+ delattr(_thread_local, 'user_ip')
168
+ except AttributeError:
169
+ pass
170
+
171
+
172
+ # Flask集成装饰器
173
+ def flask_auto_user_ip(app=None):
174
+ """
175
+ Flask应用自动用户IP检测装饰器
176
+
177
+ 用法:
178
+ from flask import Flask
179
+ from file_hub_client.utils.ip_detector import flask_auto_user_ip
180
+
181
+ app = Flask(__name__)
182
+ flask_auto_user_ip(app)
183
+ """
184
+ def decorator(app_instance):
185
+ @app_instance.before_request
186
+ def extract_user_ip():
187
+ from flask import request
188
+ ip = _extract_ip_from_headers(request.environ)
189
+ if ip:
190
+ set_current_user_ip(ip)
191
+
192
+ @app_instance.after_request
193
+ def clear_user_ip(response):
194
+ clear_current_user_ip()
195
+ return response
196
+
197
+ return app_instance
198
+
199
+ if app is None:
200
+ return decorator
201
+ else:
202
+ return decorator(app)
203
+
204
+
205
+ # 上下文管理器
206
+ class UserIPContext:
207
+ """
208
+ 用户IP上下文管理器
209
+
210
+ 用法:
211
+ with UserIPContext("192.168.1.100"):
212
+ # 在此范围内SDK会自动使用这个IP
213
+ client.upload_file(...)
214
+ """
215
+
216
+ def __init__(self, user_ip: str):
217
+ self.user_ip = user_ip
218
+ self.token = None
219
+
220
+ def __enter__(self):
221
+ self.token = current_user_ip.set(self.user_ip)
222
+ return self
223
+
224
+ def __exit__(self, exc_type, exc_val, exc_tb):
225
+ if self.token:
226
+ current_user_ip.reset(self.token)