xiaoshiai-hub 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,831 @@
1
+ """
2
+ Upload utilities for XiaoShi AI Hub SDK using GitPython
3
+ """
4
+
5
+ import os
6
+ import shutil
7
+ import json
8
+ import hashlib
9
+ from pathlib import Path
10
+ from typing import List, Optional, Union, Callable, Dict, Any
11
+ from urllib.parse import urlparse, urlunparse
12
+ from datetime import datetime, timezone
13
+
14
+ from xiaoshiai_hub.client import DEFAULT_BASE_URL
15
+
16
+ try:
17
+ from git import Repo, GitCommandError, InvalidGitRepositoryError
18
+ except ImportError:
19
+ raise ImportError(
20
+ "GitPython is required for upload functionality. "
21
+ "Install it with: pip install gitpython"
22
+ )
23
+
24
+ try:
25
+ from tqdm.auto import tqdm
26
+ except ImportError:
27
+ tqdm = None
28
+
29
+ from .exceptions import HubException, AuthenticationError, RepositoryNotFoundError, EncryptionError
30
+
31
+
32
+ class UploadError(HubException):
33
+ """Raised when an upload operation fails."""
34
+ pass
35
+
36
+
37
+ def _build_git_url(
38
+ base_url: Optional[str],
39
+ organization: str,
40
+ repo_type: str,
41
+ repo_name: str,
42
+ username: Optional[str] = None,
43
+ password: Optional[str] = None,
44
+ token: Optional[str] = None,
45
+ ) -> str:
46
+ """
47
+ Build Git repository URL with authentication.
48
+
49
+ Args:
50
+ base_url: Base URL of the Hub API
51
+ organization: Organization name
52
+ repo_type: Repository type ("models" or "datasets")
53
+ repo_name: Repository name
54
+ username: Username for authentication
55
+ password: Password for authentication
56
+ token: Token for authentication
57
+
58
+ Returns:
59
+ Git repository URL with embedded credentials
60
+ """
61
+ # Parse base URL to get the host
62
+ base_url = (base_url or DEFAULT_BASE_URL).rstrip('/')
63
+ parsed = urlparse(base_url)
64
+ host = parsed.netloc
65
+ scheme = parsed.scheme or 'https'
66
+
67
+ # Build repository path
68
+ repo_path = f"moha/{organization}/{repo_type}/{repo_name}.git"
69
+
70
+ # Add authentication to URL
71
+ if token:
72
+ # Use token as username with empty password
73
+ netloc = f"oauth2:{token}@{host}"
74
+ elif username and password:
75
+ netloc = f"{username}:{password}@{host}"
76
+ else:
77
+ netloc = host
78
+
79
+ # Construct full URL
80
+ git_url = urlunparse((scheme, netloc, repo_path, '', '', ''))
81
+
82
+ return git_url
83
+
84
+
85
+ def _calculate_file_hash(file_path: Path) -> str:
86
+ """
87
+ Calculate SHA256 hash of a file.
88
+
89
+ Args:
90
+ file_path: Path to the file
91
+
92
+ Returns:
93
+ Hexadecimal hash string
94
+ """
95
+ sha256_hash = hashlib.sha256()
96
+ with open(file_path, "rb") as f:
97
+ for byte_block in iter(lambda: f.read(4096), b""):
98
+ sha256_hash.update(byte_block)
99
+ return sha256_hash.hexdigest()
100
+
101
+
102
+ def _should_encrypt_file(file_path: str, encryption_exclude: List[str]) -> bool:
103
+ """
104
+ Check if a file should be encrypted based on exclude patterns.
105
+
106
+ Args:
107
+ file_path: Relative path of the file (relative to folder_path)
108
+ encryption_exclude: List of patterns to exclude from encryption
109
+
110
+ Returns:
111
+ True if file should be encrypted, False otherwise
112
+ """
113
+ import fnmatch
114
+
115
+ if not encryption_exclude:
116
+ return True
117
+
118
+ for pattern in encryption_exclude:
119
+ # Match against the full relative path
120
+ if fnmatch.fnmatch(file_path, pattern):
121
+ return False
122
+
123
+ return True
124
+
125
+
126
+ def _create_encryption_metadata(
127
+ encrypted_files: List[Dict[str, Any]],
128
+ algorithm: str,
129
+ version: str = "1.0"
130
+ ) -> Dict[str, Any]:
131
+ """
132
+ Create encryption metadata structure.
133
+
134
+ Args:
135
+ encrypted_files: List of encrypted file information
136
+ algorithm: Encryption algorithm used
137
+ version: Metadata format version
138
+
139
+ Returns:
140
+ Encryption metadata dictionary
141
+ """
142
+ return {
143
+ "version": version,
144
+ "createAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'),
145
+ "files": encrypted_files
146
+ }
147
+
148
+
149
+ def _write_encryption_metadata(repo_path: Path, metadata: Dict[str, Any]) -> None:
150
+ """
151
+ Write encryption metadata to .moha_encryption file.
152
+
153
+ Args:
154
+ repo_path: Path to the repository
155
+ metadata: Encryption metadata dictionary
156
+ """
157
+ metadata_file = repo_path / ".moha_encryption"
158
+ with open(metadata_file, 'w', encoding='utf-8') as f:
159
+ json.dump(metadata, f, indent=2, ensure_ascii=False)
160
+ print(f"Encryption metadata written to .moha_encryption")
161
+
162
+
163
+ def _count_files_in_directory(directory: str, ignore_patterns: Optional[List[str]] = None) -> int:
164
+ """
165
+ Count total number of files in a directory.
166
+
167
+ Args:
168
+ directory: Directory path
169
+ ignore_patterns: List of patterns to ignore (e.g., ['.git', '__pycache__'])
170
+
171
+ Returns:
172
+ Total number of files
173
+ """
174
+ import fnmatch
175
+
176
+ count = 0
177
+
178
+ for root, dirs, files in os.walk(directory):
179
+ # Get relative path from directory
180
+ rel_root = os.path.relpath(root, directory)
181
+
182
+ if ignore_patterns:
183
+ dirs[:] = [d for d in dirs if not any(
184
+ fnmatch.fnmatch(d, pattern) for pattern in ignore_patterns
185
+ )]
186
+
187
+ if ignore_patterns:
188
+ filtered_files = []
189
+ for f in files:
190
+ # Construct relative file path
191
+ if rel_root == '.':
192
+ rel_file_path = f
193
+ else:
194
+ rel_file_path = os.path.join(rel_root, f)
195
+
196
+ # Check if file matches any ignore pattern
197
+ should_ignore = any(
198
+ fnmatch.fnmatch(rel_file_path, pattern) for pattern in ignore_patterns
199
+ )
200
+ if not should_ignore:
201
+ filtered_files.append(f)
202
+ files = filtered_files
203
+
204
+ count += len(files)
205
+
206
+ return count
207
+
208
+
209
+ def upload_folder(
210
+ folder_path: Union[str, Path],
211
+ repo_id: str,
212
+ repo_type: str = "models",
213
+ revision: str = "main",
214
+ commit_message: Optional[str] = None,
215
+ commit_description: Optional[str] = None,
216
+ base_url: Optional[str] = None,
217
+ username: Optional[str] = None,
218
+ password: Optional[str] = None,
219
+ token: Optional[str] = None,
220
+ ignore_patterns: Optional[List[str]] = None, # 上传的时候忽略的文件
221
+ encryption_key: Optional[Union[str, bytes]] = None,
222
+ encryption_exclude: Optional[List[str]] = None, # 加密的时候排除的文件
223
+ encryption_algorithm: Optional[str] = None,
224
+ temp_dir: Optional[Union[str, Path]] = None,
225
+ skip_lfs: Optional[bool] = True,
226
+ ) -> str:
227
+ """
228
+ Upload a folder to a repository using Git.
229
+
230
+ This function clones the repository, copies files from the folder,
231
+ commits the changes, and pushes to the remote repository.
232
+
233
+ Args:
234
+ folder_path: Path to the folder to upload
235
+ repo_id: Repository ID in the format "organization/repo_name"
236
+ repo_type: Type of repository ("models" or "datasets")
237
+ revision: Branch to upload to (default: "main")
238
+ commit_message: Commit message (default: "Upload folder")
239
+ commit_description: Additional commit description
240
+ base_url: Base URL of the Hub API
241
+ username: Username for authentication
242
+ password: Password for authentication
243
+ token: Token for authentication (preferred over username/password)
244
+ ignore_patterns: List of patterns to ignore (e.g., ['.git', '*.pyc', '__pycache__'])
245
+ encryption_key: Encryption key for encrypted repositories (string for symmetric, PEM for asymmetric)
246
+ encryption_exclude: List of file patterns to exclude from encryption (e.g., ['*.txt', 'README.md'])
247
+ encryption_algorithm: Encryption algorithm to use (default: 'aes-256-cbc')
248
+ - Symmetric: 'aes-256-cbc', 'aes-256-gcm'
249
+ - Asymmetric: 'rsa-oaep', 'rsa-pkcs1v15' (requires RSA public key in PEM format)
250
+ temp_dir: Custom temporary directory path for cloning repository (default: system temp directory)
251
+ skip_lfs: Skip LFS files when cloning the repository (default: True)
252
+
253
+ Returns:
254
+ Commit hash of the uploaded changes
255
+
256
+ Raises:
257
+ RepositoryNotFoundError: If the repository does not exist
258
+ EncryptionError: If repository requires encryption but encryption_key is not provided
259
+
260
+ Example:
261
+ >>> commit_hash = upload_folder(
262
+ ... folder_path="./my_model",
263
+ ... repo_id="demo/my-model",
264
+ ... repo_type="models",
265
+ ... commit_message="Upload model files",
266
+ ... token="your-token",
267
+ ... )
268
+
269
+ >>> # Upload to encrypted repository
270
+ >>> commit_hash = upload_folder(
271
+ ... folder_path="./my_model",
272
+ ... repo_id="demo/encrypted-model",
273
+ ... repo_type="models",
274
+ ... encryption_key="my-secret-key",
275
+ ... encryption_exclude=["README.md", "*.txt"],
276
+ ... token="your-token",
277
+ ... )
278
+ """
279
+ import tempfile
280
+ import fnmatch
281
+ from .client import HubClient
282
+
283
+ parts = repo_id.split('/')
284
+ if len(parts) != 2:
285
+ raise ValueError(f"Invalid repo_id format: {repo_id}. Expected 'organization/repo_name'")
286
+
287
+ organization, repo_name = parts
288
+ client = HubClient(
289
+ base_url=base_url,
290
+ username=username,
291
+ password=password,
292
+ token=token,
293
+ )
294
+ is_encrypted = bool(encryption_key)
295
+ # Validate folder path
296
+ folder_path = Path(folder_path)
297
+ if not folder_path.exists():
298
+ raise FileNotFoundError(f"Folder not found: {folder_path}")
299
+ if not folder_path.is_dir():
300
+ raise ValueError(f"Path is not a directory: {folder_path}")
301
+
302
+ if ignore_patterns is None:
303
+ ignore_patterns = []
304
+
305
+ if encryption_exclude is None:
306
+ encryption_exclude = []
307
+
308
+ # Build Git URL
309
+ git_url = _build_git_url(
310
+ base_url=base_url,
311
+ organization=organization,
312
+ repo_type=repo_type,
313
+ repo_name=repo_name,
314
+ username=username,
315
+ password=password,
316
+ token=token,
317
+ )
318
+
319
+ if temp_dir is not None:
320
+ import uuid
321
+ temp_dir_path = Path(temp_dir)
322
+ temp_dir_path.mkdir(parents=True, exist_ok=True)
323
+ unique_dir = temp_dir_path / f"upload_{uuid.uuid4().hex[:8]}"
324
+ unique_dir.mkdir(parents=True, exist_ok=True)
325
+ temp_context = None
326
+ working_temp_dir = unique_dir
327
+ else:
328
+ temp_context = tempfile.TemporaryDirectory()
329
+ working_temp_dir = Path(temp_context.__enter__())
330
+ unique_dir = None
331
+
332
+ try:
333
+ repo_path = working_temp_dir / "repo"
334
+
335
+ try:
336
+ try:
337
+ env_mapping = None
338
+ if skip_lfs:
339
+ env_mapping={'GIT_LFS_SKIP_SMUDGE': "1"}
340
+ repo = Repo.clone_from(
341
+ git_url,
342
+ repo_path,
343
+ branch=revision,
344
+ depth=1,
345
+ env=env_mapping,
346
+ )
347
+ except GitCommandError as e:
348
+ if "Authentication failed" in str(e) or "authentication" in str(e).lower():
349
+ raise AuthenticationError(f"Authentication failed: {e}")
350
+ raise UploadError(f"Failed to clone repository: {e}")
351
+
352
+ # 过滤掉忽略的文件,需要上传的文件总数
353
+ total_files = _count_files_in_directory(str(folder_path), ignore_patterns)
354
+ print(f"Copying files from {folder_path} to repository...")
355
+ progress_bar = None
356
+ if tqdm is not None and total_files > 0:
357
+ progress_bar = tqdm(
358
+ total=total_files,
359
+ unit='file',
360
+ desc="Copying files",
361
+ leave=True,
362
+ )
363
+
364
+ files_copied = 0
365
+ files_to_encrypt = [] # 标记哪些文件需要加密
366
+
367
+ for root, dirs, files in os.walk(folder_path):
368
+ dirs[:] = [d for d in dirs if not any(
369
+ fnmatch.fnmatch(d, pattern) for pattern in ignore_patterns
370
+ )]
371
+
372
+ # 相对路径
373
+ rel_root = os.path.relpath(root, folder_path)
374
+ if rel_root == '.':
375
+ dest_root = repo_path
376
+ else:
377
+ dest_root = repo_path / rel_root
378
+
379
+ # Create destination directory
380
+ dest_root.mkdir(parents=True, exist_ok=True)
381
+
382
+ # Copy files
383
+ for file in files:
384
+ # Construct relative file path (relative to folder_path)
385
+ if rel_root == '.':
386
+ rel_file_path = file
387
+ else:
388
+ rel_file_path = os.path.join(rel_root, file)
389
+
390
+ # Check ignore patterns using full relative path
391
+ if any(fnmatch.fnmatch(rel_file_path, pattern) for pattern in ignore_patterns):
392
+ continue
393
+
394
+ src_file = Path(root) / file
395
+ dest_file = dest_root / file
396
+
397
+ # Copy file
398
+ shutil.copy2(src_file, dest_file)
399
+ files_copied += 1
400
+
401
+ # 把需要加密的文件写入列表 (使用完整相对路径)
402
+ if is_encrypted and _should_encrypt_file(rel_file_path, encryption_exclude):
403
+ files_to_encrypt.append(dest_file)
404
+
405
+ if progress_bar is not None:
406
+ progress_bar.update(1)
407
+
408
+ if progress_bar is not None:
409
+ progress_bar.close()
410
+
411
+ print(f"Copied {files_copied} files")
412
+
413
+ # 处理加密元数据文件
414
+ metadata_file = repo_path / ".moha_encryption"
415
+
416
+ # 加密
417
+ if is_encrypted and files_to_encrypt:
418
+ # Determine encryption algorithm
419
+ from .encryption import EncryptionAlgorithm, encrypt_file as encrypt_file_func
420
+
421
+ if encryption_algorithm is None:
422
+ algorithm = EncryptionAlgorithm.AES_256_CBC
423
+ else:
424
+ try:
425
+ algorithm = EncryptionAlgorithm(encryption_algorithm)
426
+ except ValueError:
427
+ raise EncryptionError(
428
+ f"Invalid encryption algorithm: {encryption_algorithm}. "
429
+ f"Supported algorithms: {', '.join([a.value for a in EncryptionAlgorithm])}"
430
+ )
431
+
432
+ print(f"Encrypting {len(files_to_encrypt)} files using {algorithm.value}...")
433
+ encrypt_progress = None
434
+ if tqdm is not None:
435
+ encrypt_progress = tqdm(
436
+ total=len(files_to_encrypt),
437
+ unit='file',
438
+ desc="Encrypting files",
439
+ leave=True,
440
+ )
441
+
442
+ # 收集加密文件的元数据
443
+ encrypted_files_metadata = []
444
+
445
+ for file_path in files_to_encrypt:
446
+ # 加密文件
447
+ encrypt_file_func(file_path, encryption_key, algorithm)
448
+
449
+ # 计算加密后的文件信息
450
+ encrypted_size = file_path.stat().st_size
451
+ encrypted_hash = _calculate_file_hash(file_path)
452
+
453
+ # 获取相对于 repo_path 的路径
454
+ rel_path = file_path.relative_to(repo_path)
455
+
456
+ # 添加到元数据列表
457
+ encrypted_files_metadata.append({
458
+ "path": str(rel_path),
459
+ "algorithm": algorithm.value,
460
+ "encryptedSize": encrypted_size,
461
+ "encryptedHash": encrypted_hash,
462
+ })
463
+
464
+ if encrypt_progress is not None:
465
+ encrypt_progress.update(1)
466
+
467
+ if encrypt_progress is not None:
468
+ encrypt_progress.close()
469
+
470
+ print(f"Encrypted {len(files_to_encrypt)} files")
471
+
472
+ # 写入加密元数据文件
473
+ encryption_metadata = _create_encryption_metadata(
474
+ encrypted_files=encrypted_files_metadata,
475
+ algorithm=algorithm.value
476
+ )
477
+ _write_encryption_metadata(repo_path, encryption_metadata)
478
+ else:
479
+ # 如果不是加密上传,删除可能存在的加密元数据文件
480
+ if metadata_file.exists():
481
+ metadata_file.unlink()
482
+ print("Removed .moha_encryption file (non-encrypted upload)")
483
+
484
+ repo.git.add(A=True)
485
+
486
+ if not repo.is_dirty() and not repo.untracked_files:
487
+ print("No changes to commit")
488
+ return repo.head.commit.hexsha
489
+
490
+ if commit_message is None:
491
+ commit_message = f"Upload folder from {folder_path.name}"
492
+
493
+ full_message = commit_message
494
+ if commit_description:
495
+ full_message = f"{commit_message}\n\n{commit_description}"
496
+
497
+ # Commit changes
498
+ print(f"Committing changes: {commit_message}")
499
+ commit = repo.index.commit(full_message)
500
+ try:
501
+ origin = repo.remote(name='origin')
502
+ origin.push(refspec=f'{revision}:{revision}')
503
+ except GitCommandError as e:
504
+ if "Authentication failed" in str(e) or "authentication" in str(e).lower():
505
+ raise AuthenticationError(f"Authentication failed during push: {e}")
506
+ raise UploadError(f"Failed to push changes: {e}")
507
+
508
+ print(f"Successfully uploaded to {repo_id}")
509
+ print(f"Commit hash: {commit.hexsha}")
510
+
511
+ return commit.hexsha
512
+
513
+ except (GitCommandError, InvalidGitRepositoryError) as e:
514
+ raise UploadError(f"Git operation failed: {e}")
515
+ finally:
516
+ # Clean up temporary directory
517
+ if temp_context is not None:
518
+ # Clean up system temp directory
519
+ try:
520
+ temp_context.__exit__(None, None, None)
521
+ except Exception:
522
+ pass # Ignore cleanup errors
523
+ elif unique_dir is not None:
524
+ # Clean up custom temp directory
525
+ try:
526
+ import shutil as shutil_cleanup
527
+ shutil_cleanup.rmtree(unique_dir, ignore_errors=True)
528
+ except Exception:
529
+ pass # Ignore cleanup errors
530
+
531
+
532
+ def upload_file(
533
+ path_or_fileobj: Union[str, Path, bytes],
534
+ path_in_repo: str,
535
+ repo_id: str,
536
+ repo_type: str = "models",
537
+ revision: str = "main",
538
+ commit_message: Optional[str] = None,
539
+ commit_description: Optional[str] = None,
540
+ base_url: Optional[str] = None,
541
+ username: Optional[str] = None,
542
+ password: Optional[str] = None,
543
+ token: Optional[str] = None,
544
+ encryption_key: Optional[Union[str, bytes]] = None,
545
+ encryption_algorithm: Optional[str] = None,
546
+ temp_dir: Optional[Union[str, Path]] = None,
547
+ skip_lfs: Optional[bool] = True, # 当克隆仓库的时候,跳过lfs的大文件下载,只需要下载lfs文件指针
548
+ ) -> str:
549
+ """
550
+ Upload a single file to a repository.
551
+
552
+ Args:
553
+ path_or_fileobj: Path to the file or file content as bytes
554
+ path_in_repo: Path where the file should be stored in the repository
555
+ repo_id: Repository ID in the format "organization/repo_name"
556
+ repo_type: Type of repository ("models" or "datasets")
557
+ revision: Branch to upload to (default: "main")
558
+ commit_message: Commit message
559
+ commit_description: Additional commit description
560
+ base_url: Base URL of the Hub API
561
+ username: Username for authentication
562
+ password: Password for authentication
563
+ token: Token for authentication
564
+ encryption_key: Encryption key for encrypted repositories (string for symmetric, PEM for asymmetric)
565
+ encryption_algorithm: Encryption algorithm to use (default: 'aes-256-cbc')
566
+ - Symmetric: 'aes-256-cbc', 'aes-256-gcm'
567
+ - Asymmetric: 'rsa-oaep', 'rsa-pkcs1v15' (requires RSA public key in PEM format)
568
+ temp_dir: Custom temporary directory path for cloning repository (default: system temp directory)
569
+ skip_lfs: Skip LFS files when cloning the repository (default: True)
570
+
571
+ Returns:
572
+ Commit hash of the uploaded file
573
+
574
+ Raises:
575
+ RepositoryNotFoundError: If the repository does not exist
576
+ EncryptionError: If repository requires encryption but encryption_key is not provided
577
+
578
+ Example:
579
+ >>> commit_hash = upload_file(
580
+ ... path_or_fileobj="./config.yaml",
581
+ ... path_in_repo="config.yaml",
582
+ ... repo_id="demo/my-model",
583
+ ... commit_message="Upload config file",
584
+ ... token="your-token",
585
+ ... )
586
+
587
+ >>> # Upload to encrypted repository
588
+ >>> commit_hash = upload_file(
589
+ ... path_or_fileobj="./model.bin",
590
+ ... path_in_repo="model.bin",
591
+ ... repo_id="demo/encrypted-model",
592
+ ... encryption_key="my-secret-key",
593
+ ... token="your-token",
594
+ ... )
595
+ """
596
+ import tempfile
597
+ from .client import HubClient
598
+
599
+ # Parse repo_id
600
+ parts = repo_id.split('/')
601
+ if len(parts) != 2:
602
+ raise ValueError(f"Invalid repo_id format: {repo_id}. Expected 'organization/repo_name'")
603
+
604
+ organization, repo_name = parts
605
+
606
+ client = HubClient(
607
+ base_url=base_url,
608
+ username=username,
609
+ password=password,
610
+ token=token,
611
+ )
612
+ is_encrypted = bool(encryption_key)
613
+ # 构建 Git URL
614
+ git_url = _build_git_url(
615
+ base_url=base_url,
616
+ organization=organization,
617
+ repo_type=repo_type,
618
+ repo_name=repo_name,
619
+ username=username,
620
+ password=password,
621
+ token=token,
622
+ )
623
+
624
+ # 该临时目录用来存储克隆的仓库
625
+ if temp_dir is not None:
626
+ # 创建子目录
627
+ import uuid
628
+ temp_dir_path = Path(temp_dir)
629
+ temp_dir_path.mkdir(parents=True, exist_ok=True)
630
+ unique_dir = temp_dir_path / f"upload_{uuid.uuid4().hex[:8]}"
631
+ unique_dir.mkdir(parents=True, exist_ok=True)
632
+ temp_context = None
633
+ working_temp_dir = unique_dir
634
+ else:
635
+ # 使用系统的临时目录
636
+ temp_context = tempfile.TemporaryDirectory()
637
+ working_temp_dir = Path(temp_context.__enter__())
638
+ unique_dir = None
639
+
640
+ try:
641
+ repo_path = working_temp_dir / "repo"
642
+
643
+ try:
644
+ # 克隆远程仓库到临时目录,depth=1,只克隆最新的提交,减少数据量
645
+ try:
646
+ env_mapping = None
647
+ if skip_lfs:
648
+ env_mapping={'GIT_LFS_SKIP_SMUDGE': "1"}
649
+ repo = Repo.clone_from(
650
+ git_url,
651
+ repo_path,
652
+ branch=revision,
653
+ depth=1,
654
+ env=env_mapping,
655
+ )
656
+ except GitCommandError as e:
657
+ if "Authentication failed" in str(e) or "authentication" in str(e).lower():
658
+ raise AuthenticationError(f"Authentication failed: {e}")
659
+ raise UploadError(f"Failed to clone repository: {e}")
660
+
661
+ # Prepare file path in repository
662
+ file_path = repo_path / path_in_repo
663
+ file_path.parent.mkdir(parents=True, exist_ok=True)
664
+
665
+ # Write file content
666
+ if isinstance(path_or_fileobj, bytes):
667
+ # Write bytes directly
668
+ file_path.write_bytes(path_or_fileobj)
669
+ else:
670
+ # Copy from source file
671
+ source_path = Path(path_or_fileobj)
672
+ if not source_path.exists():
673
+ raise FileNotFoundError(f"File not found: {source_path}")
674
+ shutil.copy2(source_path, file_path)
675
+
676
+ print(f"Added file: {path_in_repo}")
677
+
678
+ # 处理加密元数据文件
679
+ metadata_file = repo_path / ".moha_encryption"
680
+ metadata_updated = False
681
+
682
+ # Encrypt file if repository is encrypted
683
+ if is_encrypted:
684
+ # Determine encryption algorithm
685
+ from .encryption import EncryptionAlgorithm, encrypt_file as encrypt_file_func
686
+
687
+ if encryption_algorithm is None:
688
+ algorithm = EncryptionAlgorithm.AES_256_CBC
689
+ else:
690
+ try:
691
+ algorithm = EncryptionAlgorithm(encryption_algorithm)
692
+ except ValueError:
693
+ raise EncryptionError(
694
+ f"Invalid encryption algorithm: {encryption_algorithm}. "
695
+ f"Supported algorithms: {', '.join([a.value for a in EncryptionAlgorithm])}"
696
+ )
697
+
698
+ print(f"Encrypting file: {path_in_repo} using {algorithm.value}")
699
+ encrypt_file_func(file_path, encryption_key, algorithm)
700
+ print(f"File encrypted")
701
+
702
+ # 计算加密后的文件信息
703
+ encrypted_size = file_path.stat().st_size
704
+ encrypted_hash = _calculate_file_hash(file_path)
705
+
706
+ # 读取或创建加密元数据文件
707
+ if metadata_file.exists():
708
+ with open(metadata_file, 'r', encoding='utf-8') as f:
709
+ encryption_metadata = json.load(f)
710
+ else:
711
+ encryption_metadata = {
712
+ "version": "1.0",
713
+ "createAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'),
714
+ "files": []
715
+ }
716
+
717
+ # 更新或添加文件信息
718
+ file_info = {
719
+ "path": path_in_repo,
720
+ "algorithm": algorithm.value,
721
+ "encryptedSize": encrypted_size,
722
+ "encryptedHash": encrypted_hash,
723
+ }
724
+
725
+ # 检查文件是否已存在,如果存在则更新
726
+ existing_index = None
727
+ for i, f in enumerate(encryption_metadata["files"]):
728
+ if f["path"] == path_in_repo:
729
+ existing_index = i
730
+ break
731
+
732
+ if existing_index is not None:
733
+ encryption_metadata["files"][existing_index] = file_info
734
+ else:
735
+ encryption_metadata["files"].append(file_info)
736
+
737
+ # 写入元数据文件
738
+ _write_encryption_metadata(repo_path, encryption_metadata)
739
+ metadata_updated = True
740
+ else:
741
+ # 如果不是加密上传,从元数据文件中移除该文件(如果存在)
742
+ if metadata_file.exists():
743
+ with open(metadata_file, 'r', encoding='utf-8') as f:
744
+ encryption_metadata = json.load(f)
745
+
746
+ # 查找并移除该文件
747
+ original_count = len(encryption_metadata["files"])
748
+ encryption_metadata["files"] = [
749
+ f for f in encryption_metadata["files"]
750
+ if f["path"] != path_in_repo
751
+ ]
752
+
753
+ if len(encryption_metadata["files"]) < original_count:
754
+ # 文件被移除了
755
+ if len(encryption_metadata["files"]) == 0:
756
+ # 如果没有加密文件了,删除元数据文件
757
+ metadata_file.unlink()
758
+ print(f"Removed .moha_encryption file (no encrypted files remaining)")
759
+ else:
760
+ # 更新元数据文件
761
+ _write_encryption_metadata(repo_path, encryption_metadata)
762
+ print(f"Updated .moha_encryption file (removed {path_in_repo})")
763
+ metadata_updated = True
764
+
765
+ # Add file to git
766
+ repo.git.add(path_in_repo)
767
+
768
+ # 如果元数据文件被更新,也添加到 git
769
+ if metadata_updated:
770
+ if metadata_file.exists():
771
+ repo.git.add(".moha_encryption")
772
+ else:
773
+ # 如果文件被删除,确保从 git 中移除
774
+ try:
775
+ repo.git.rm(".moha_encryption")
776
+ except GitCommandError:
777
+ # 文件可能不在 git 中,忽略错误
778
+ pass
779
+
780
+ # Check if there are changes
781
+ if not repo.is_dirty() and not repo.untracked_files:
782
+ print("No changes to commit (file already exists with same content)")
783
+ return repo.head.commit.hexsha
784
+
785
+ # Create commit message
786
+ if commit_message is None:
787
+ commit_message = f"Upload {path_in_repo}"
788
+
789
+ full_message = commit_message
790
+ if commit_description:
791
+ full_message = f"{commit_message}\n\n{commit_description}"
792
+
793
+ # Commit changes
794
+ print(f"Committing changes: {commit_message}")
795
+ commit = repo.index.commit(full_message)
796
+
797
+ # Push to remote
798
+ print(f"Pushing to {revision}...")
799
+ try:
800
+ origin = repo.remote(name='origin')
801
+ origin.push(refspec=f'{revision}:{revision}')
802
+ except GitCommandError as e:
803
+ if "Authentication failed" in str(e) or "authentication" in str(e).lower():
804
+ raise AuthenticationError(f"Authentication failed during push: {e}")
805
+ raise UploadError(f"Failed to push changes: {e}")
806
+
807
+ print(f"Successfully uploaded {path_in_repo} to {repo_id}")
808
+ print(f"Commit hash: {commit.hexsha}")
809
+
810
+ return commit.hexsha
811
+
812
+ except (GitCommandError, InvalidGitRepositoryError) as e:
813
+ raise UploadError(f"Git operation failed: {e}")
814
+ finally:
815
+ # Clean up temporary directory
816
+ if temp_context is not None:
817
+ # Clean up system temp directory
818
+ try:
819
+ temp_context.__exit__(None, None, None)
820
+ except Exception:
821
+ pass # Ignore cleanup errors
822
+ elif unique_dir is not None:
823
+ # Clean up custom temp directory
824
+ try:
825
+ import shutil as shutil_cleanup
826
+ shutil_cleanup.rmtree(unique_dir, ignore_errors=True)
827
+ except Exception:
828
+ pass # Ignore cleanup errors
829
+
830
+
831
+