webscout 8.3__py3-none-any.whl → 8.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

Files changed (120) hide show
  1. webscout/AIauto.py +4 -4
  2. webscout/AIbase.py +61 -1
  3. webscout/AIutel.py +46 -53
  4. webscout/Bing_search.py +418 -0
  5. webscout/Extra/YTToolkit/ytapi/patterns.py +45 -45
  6. webscout/Extra/YTToolkit/ytapi/stream.py +1 -1
  7. webscout/Extra/YTToolkit/ytapi/video.py +10 -10
  8. webscout/Extra/autocoder/autocoder_utiles.py +1 -1
  9. webscout/Extra/gguf.py +706 -177
  10. webscout/Litlogger/formats.py +9 -0
  11. webscout/Litlogger/handlers.py +18 -0
  12. webscout/Litlogger/logger.py +43 -1
  13. webscout/Provider/AISEARCH/genspark_search.py +7 -7
  14. webscout/Provider/AISEARCH/scira_search.py +3 -2
  15. webscout/Provider/GeminiProxy.py +140 -0
  16. webscout/Provider/LambdaChat.py +7 -1
  17. webscout/Provider/MCPCore.py +78 -75
  18. webscout/Provider/OPENAI/BLACKBOXAI.py +1046 -1017
  19. webscout/Provider/OPENAI/GeminiProxy.py +328 -0
  20. webscout/Provider/OPENAI/Qwen3.py +303 -303
  21. webscout/Provider/OPENAI/README.md +5 -0
  22. webscout/Provider/OPENAI/README_AUTOPROXY.md +238 -0
  23. webscout/Provider/OPENAI/TogetherAI.py +355 -0
  24. webscout/Provider/OPENAI/__init__.py +16 -1
  25. webscout/Provider/OPENAI/autoproxy.py +332 -0
  26. webscout/Provider/OPENAI/base.py +101 -14
  27. webscout/Provider/OPENAI/chatgpt.py +15 -2
  28. webscout/Provider/OPENAI/chatgptclone.py +14 -3
  29. webscout/Provider/OPENAI/deepinfra.py +339 -328
  30. webscout/Provider/OPENAI/e2b.py +295 -74
  31. webscout/Provider/OPENAI/mcpcore.py +109 -70
  32. webscout/Provider/OPENAI/opkfc.py +18 -6
  33. webscout/Provider/OPENAI/scirachat.py +59 -50
  34. webscout/Provider/OPENAI/toolbaz.py +2 -10
  35. webscout/Provider/OPENAI/writecream.py +166 -166
  36. webscout/Provider/OPENAI/x0gpt.py +367 -367
  37. webscout/Provider/OPENAI/xenai.py +514 -0
  38. webscout/Provider/OPENAI/yep.py +389 -383
  39. webscout/Provider/STT/__init__.py +3 -0
  40. webscout/Provider/STT/base.py +281 -0
  41. webscout/Provider/STT/elevenlabs.py +265 -0
  42. webscout/Provider/TTI/__init__.py +4 -1
  43. webscout/Provider/TTI/aiarta.py +399 -365
  44. webscout/Provider/TTI/base.py +74 -2
  45. webscout/Provider/TTI/bing.py +231 -0
  46. webscout/Provider/TTI/fastflux.py +63 -30
  47. webscout/Provider/TTI/gpt1image.py +149 -0
  48. webscout/Provider/TTI/imagen.py +196 -0
  49. webscout/Provider/TTI/magicstudio.py +60 -29
  50. webscout/Provider/TTI/piclumen.py +43 -32
  51. webscout/Provider/TTI/pixelmuse.py +232 -225
  52. webscout/Provider/TTI/pollinations.py +43 -32
  53. webscout/Provider/TTI/together.py +287 -0
  54. webscout/Provider/TTI/utils.py +2 -1
  55. webscout/Provider/TTS/README.md +1 -0
  56. webscout/Provider/TTS/__init__.py +2 -1
  57. webscout/Provider/TTS/freetts.py +140 -0
  58. webscout/Provider/TTS/speechma.py +45 -39
  59. webscout/Provider/TogetherAI.py +366 -0
  60. webscout/Provider/UNFINISHED/ChutesAI.py +314 -0
  61. webscout/Provider/UNFINISHED/fetch_together_models.py +95 -0
  62. webscout/Provider/XenAI.py +324 -0
  63. webscout/Provider/__init__.py +8 -0
  64. webscout/Provider/deepseek_assistant.py +378 -0
  65. webscout/Provider/scira_chat.py +3 -2
  66. webscout/Provider/toolbaz.py +0 -1
  67. webscout/auth/__init__.py +44 -0
  68. webscout/auth/api_key_manager.py +189 -0
  69. webscout/auth/auth_system.py +100 -0
  70. webscout/auth/config.py +76 -0
  71. webscout/auth/database.py +400 -0
  72. webscout/auth/exceptions.py +67 -0
  73. webscout/auth/middleware.py +248 -0
  74. webscout/auth/models.py +130 -0
  75. webscout/auth/providers.py +257 -0
  76. webscout/auth/rate_limiter.py +254 -0
  77. webscout/auth/request_models.py +127 -0
  78. webscout/auth/request_processing.py +226 -0
  79. webscout/auth/routes.py +526 -0
  80. webscout/auth/schemas.py +103 -0
  81. webscout/auth/server.py +312 -0
  82. webscout/auth/static/favicon.svg +11 -0
  83. webscout/auth/swagger_ui.py +203 -0
  84. webscout/auth/templates/components/authentication.html +237 -0
  85. webscout/auth/templates/components/base.html +103 -0
  86. webscout/auth/templates/components/endpoints.html +750 -0
  87. webscout/auth/templates/components/examples.html +491 -0
  88. webscout/auth/templates/components/footer.html +75 -0
  89. webscout/auth/templates/components/header.html +27 -0
  90. webscout/auth/templates/components/models.html +286 -0
  91. webscout/auth/templates/components/navigation.html +70 -0
  92. webscout/auth/templates/static/api.js +455 -0
  93. webscout/auth/templates/static/icons.js +168 -0
  94. webscout/auth/templates/static/main.js +784 -0
  95. webscout/auth/templates/static/particles.js +201 -0
  96. webscout/auth/templates/static/styles.css +3353 -0
  97. webscout/auth/templates/static/ui.js +374 -0
  98. webscout/auth/templates/swagger_ui.html +170 -0
  99. webscout/client.py +49 -3
  100. webscout/litagent/Readme.md +12 -3
  101. webscout/litagent/agent.py +99 -62
  102. webscout/scout/core/scout.py +104 -26
  103. webscout/scout/element.py +139 -18
  104. webscout/swiftcli/core/cli.py +14 -3
  105. webscout/swiftcli/decorators/output.py +59 -9
  106. webscout/update_checker.py +31 -49
  107. webscout/version.py +1 -1
  108. webscout/webscout_search.py +4 -12
  109. webscout/webscout_search_async.py +3 -10
  110. webscout/yep_search.py +2 -11
  111. {webscout-8.3.dist-info → webscout-8.3.2.dist-info}/METADATA +41 -11
  112. {webscout-8.3.dist-info → webscout-8.3.2.dist-info}/RECORD +116 -68
  113. {webscout-8.3.dist-info → webscout-8.3.2.dist-info}/entry_points.txt +1 -1
  114. webscout/Provider/HF_space/__init__.py +0 -0
  115. webscout/Provider/HF_space/qwen_qwen2.py +0 -206
  116. webscout/Provider/OPENAI/api.py +0 -1035
  117. webscout/Provider/TTI/artbit.py +0 -0
  118. {webscout-8.3.dist-info → webscout-8.3.2.dist-info}/WHEEL +0 -0
  119. {webscout-8.3.dist-info → webscout-8.3.2.dist-info}/licenses/LICENSE.md +0 -0
  120. {webscout-8.3.dist-info → webscout-8.3.2.dist-info}/top_level.txt +0 -0
webscout/Extra/gguf.py CHANGED
@@ -1,13 +1,38 @@
1
1
  """
2
2
  Convert Hugging Face models to GGUF format with advanced features.
3
3
 
4
+ 🔥 2025 UPDATE: ALL CMAKE BUILD ERRORS FIXED! 🔥
5
+
6
+ This converter has been completely updated for 2025 compatibility with the latest llama.cpp:
7
+
8
+ CRITICAL FIXES:
9
+ - ✅ Updated all deprecated LLAMA_* flags to GGML_* (LLAMA_CUBLAS → GGML_CUDA)
10
+ - ✅ Fixed CURL dependency error by adding -DLLAMA_CURL=OFF
11
+ - ✅ Disabled optional dependencies (LLAMA_LLGUIDANCE=OFF)
12
+ - ✅ Cross-platform hardware detection (Windows, macOS, Linux)
13
+ - ✅ Robust CMake configuration with multiple fallback strategies
14
+ - ✅ Priority-based acceleration selection (CUDA > Metal > Vulkan > OpenCL > ROCm > BLAS)
15
+ - ✅ Enhanced error handling and recovery mechanisms
16
+ - ✅ Platform-specific optimizations and build generators
17
+ - ✅ Automatic build directory cleanup to avoid cached CMake conflicts
18
+
19
+ SUPPORTED ACCELERATION:
20
+ - CUDA: GGML_CUDA=ON (NVIDIA GPUs)
21
+ - Metal: GGML_METAL=ON (Apple Silicon/macOS)
22
+ - Vulkan: GGML_VULKAN=ON (Cross-platform GPU)
23
+ - OpenCL: GGML_OPENCL=ON (Cross-platform GPU)
24
+ - ROCm: GGML_HIPBLAS=ON (AMD GPUs)
25
+ - BLAS: GGML_BLAS=ON (Optimized CPU libraries)
26
+ - Accelerate: GGML_ACCELERATE=ON (Apple Accelerate framework)
27
+
4
28
  For detailed documentation, see: webscout/Extra/gguf.md
5
29
 
30
+ USAGE EXAMPLES:
6
31
  >>> python -m webscout.Extra.gguf convert -m "OEvortex/HelpingAI-Lite-1.5T" -q "q4_k_m,q5_k_m"
7
32
  >>> # With upload options:
8
33
  >>> python -m webscout.Extra.gguf convert -m "your-model" -u "username" -t "token" -q "q4_k_m"
9
34
  >>> # With imatrix quantization:
10
- >>> python -m webscout.Extra.gguf convert -m "your-model" -i -q "iq4_nl" -t "train_data.txt"
35
+ >>> python -m webscout.Extra.gguf convert -m "your-model" -i -q "iq4_nl" --train-data "train_data.txt"
11
36
  >>> # With model splitting:
12
37
  >>> python -m webscout.Extra.gguf convert -m "your-model" -s --split-max-tensors 256
13
38
  """
@@ -26,7 +51,7 @@ from webscout.zeroart import figlet_format
26
51
  from rich.console import Console
27
52
  from rich.panel import Panel
28
53
  from rich.table import Table
29
- from ..swiftcli import CLI, option
54
+ from webscout.swiftcli import CLI, option
30
55
 
31
56
  console = Console()
32
57
 
@@ -131,147 +156,469 @@ class ModelConverter:
131
156
 
132
157
  @staticmethod
133
158
  def check_dependencies() -> Dict[str, bool]:
134
- """Check if all required dependencies are installed."""
159
+ """Check if all required dependencies are installed with cross-platform support."""
160
+ system = platform.system()
161
+
135
162
  dependencies: Dict[str, str] = {
136
163
  'git': 'Git version control',
137
- 'pip3': 'Python package installer',
138
- 'huggingface-cli': 'Hugging Face CLI',
139
164
  'cmake': 'CMake build system',
140
165
  'ninja': 'Ninja build system (optional)'
141
166
  }
142
-
167
+
168
+ # Add platform-specific dependencies
169
+ if system != 'Windows':
170
+ dependencies['pip3'] = 'Python package installer'
171
+ else:
172
+ dependencies['pip'] = 'Python package installer'
173
+
143
174
  status: Dict[str, bool] = {}
175
+
144
176
  for cmd, desc in dependencies.items():
145
- status[cmd] = subprocess.run(['which', cmd], capture_output=True, text=True).returncode == 0
146
-
177
+ try:
178
+ if system == 'Windows':
179
+ # Use 'where' command on Windows
180
+ result = subprocess.run(['where', cmd], capture_output=True, text=True)
181
+ status[cmd] = result.returncode == 0
182
+ else:
183
+ # Use 'which' command on Unix-like systems
184
+ result = subprocess.run(['which', cmd], capture_output=True, text=True)
185
+ status[cmd] = result.returncode == 0
186
+ except (FileNotFoundError, subprocess.SubprocessError):
187
+ status[cmd] = False
188
+
189
+ # Special check for Python - try different variants
190
+ python_variants = ['python3', 'python', 'py'] if system != 'Windows' else ['python', 'py', 'python3']
191
+ status['python'] = False
192
+ for variant in python_variants:
193
+ try:
194
+ if system == 'Windows':
195
+ result = subprocess.run(['where', variant], capture_output=True)
196
+ else:
197
+ result = subprocess.run(['which', variant], capture_output=True)
198
+ if result.returncode == 0:
199
+ status['python'] = True
200
+ break
201
+ except:
202
+ continue
203
+
204
+ # Check for C++ compiler
205
+ cpp_compilers = ['cl', 'g++', 'clang++'] if system == 'Windows' else ['g++', 'clang++']
206
+ status['cpp_compiler'] = False
207
+ for compiler in cpp_compilers:
208
+ try:
209
+ if system == 'Windows':
210
+ result = subprocess.run(['where', compiler], capture_output=True)
211
+ else:
212
+ result = subprocess.run(['which', compiler], capture_output=True)
213
+ if result.returncode == 0:
214
+ status['cpp_compiler'] = True
215
+ break
216
+ except:
217
+ continue
218
+
219
+ dependencies['python'] = 'Python interpreter'
220
+ dependencies['cpp_compiler'] = 'C++ compiler (g++, clang++, or MSVC)'
221
+
147
222
  return status
148
223
 
149
224
  def detect_hardware(self) -> Dict[str, bool]:
150
- """Detect available hardware acceleration."""
225
+ """Detect available hardware acceleration with improved cross-platform support."""
151
226
  hardware: Dict[str, bool] = {
152
227
  'cuda': False,
153
228
  'metal': False,
154
229
  'opencl': False,
155
230
  'vulkan': False,
156
- 'rocm': False
231
+ 'rocm': False,
232
+ 'blas': False,
233
+ 'accelerate': False
157
234
  }
158
-
235
+
236
+ system = platform.system()
237
+
159
238
  # Check CUDA
160
239
  try:
161
- if subprocess.run(['nvcc', '--version'], capture_output=True).returncode == 0:
240
+ # Check for nvcc compiler
241
+ if subprocess.run(['nvcc', '--version'], capture_output=True, shell=(system == 'Windows')).returncode == 0:
162
242
  hardware['cuda'] = True
163
- except FileNotFoundError:
164
- pass
165
-
243
+ # Also check for nvidia-smi as fallback
244
+ elif subprocess.run(['nvidia-smi'], capture_output=True, shell=(system == 'Windows')).returncode == 0:
245
+ hardware['cuda'] = True
246
+ except (FileNotFoundError, subprocess.SubprocessError):
247
+ # Check for CUDA libraries on Windows
248
+ if system == 'Windows':
249
+ cuda_paths = [
250
+ os.environ.get('CUDA_PATH'),
251
+ 'C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA',
252
+ 'C:\\Program Files (x86)\\NVIDIA GPU Computing Toolkit\\CUDA'
253
+ ]
254
+ for cuda_path in cuda_paths:
255
+ if cuda_path and os.path.exists(cuda_path):
256
+ hardware['cuda'] = True
257
+ break
258
+
166
259
  # Check Metal (macOS)
167
- if platform.system() == 'Darwin':
260
+ if system == 'Darwin':
168
261
  try:
262
+ # Check for Xcode command line tools
169
263
  if subprocess.run(['xcrun', '--show-sdk-path'], capture_output=True).returncode == 0:
170
264
  hardware['metal'] = True
171
- except FileNotFoundError:
265
+ # Check for Metal framework
266
+ if os.path.exists('/System/Library/Frameworks/Metal.framework'):
267
+ hardware['metal'] = True
268
+ # macOS also supports Accelerate framework
269
+ if os.path.exists('/System/Library/Frameworks/Accelerate.framework'):
270
+ hardware['accelerate'] = True
271
+ except (FileNotFoundError, subprocess.SubprocessError):
172
272
  pass
173
-
273
+
174
274
  # Check OpenCL
175
275
  try:
176
- if subprocess.run(['clinfo'], capture_output=True).returncode == 0:
177
- hardware['opencl'] = True
178
- except FileNotFoundError:
276
+ if system == 'Windows':
277
+ # Check for OpenCL on Windows
278
+ opencl_paths = [
279
+ 'C:\\Windows\\System32\\OpenCL.dll',
280
+ 'C:\\Windows\\SysWOW64\\OpenCL.dll'
281
+ ]
282
+ if any(os.path.exists(path) for path in opencl_paths):
283
+ hardware['opencl'] = True
284
+ else:
285
+ if subprocess.run(['clinfo'], capture_output=True).returncode == 0:
286
+ hardware['opencl'] = True
287
+ except (FileNotFoundError, subprocess.SubprocessError):
179
288
  pass
180
-
289
+
181
290
  # Check Vulkan
182
291
  try:
183
- if subprocess.run(['vulkaninfo'], capture_output=True).returncode == 0:
184
- hardware['vulkan'] = True
185
- except FileNotFoundError:
292
+ if system == 'Windows':
293
+ # Check for Vulkan on Windows
294
+ vulkan_paths = [
295
+ 'C:\\Windows\\System32\\vulkan-1.dll',
296
+ 'C:\\Windows\\SysWOW64\\vulkan-1.dll'
297
+ ]
298
+ if any(os.path.exists(path) for path in vulkan_paths):
299
+ hardware['vulkan'] = True
300
+ else:
301
+ if subprocess.run(['vulkaninfo'], capture_output=True).returncode == 0:
302
+ hardware['vulkan'] = True
303
+ except (FileNotFoundError, subprocess.SubprocessError):
186
304
  pass
187
-
188
- # Check ROCm
305
+
306
+ # Check ROCm (AMD)
189
307
  try:
190
- if subprocess.run(['rocm-smi'], capture_output=True).returncode == 0:
308
+ if subprocess.run(['rocm-smi'], capture_output=True, shell=(system == 'Windows')).returncode == 0:
191
309
  hardware['rocm'] = True
192
- except FileNotFoundError:
310
+ elif system == 'Linux':
311
+ # Check for ROCm installation
312
+ rocm_paths = ['/opt/rocm', '/usr/lib/x86_64-linux-gnu/librocm-smi64.so']
313
+ if any(os.path.exists(path) for path in rocm_paths):
314
+ hardware['rocm'] = True
315
+ except (FileNotFoundError, subprocess.SubprocessError):
193
316
  pass
194
-
317
+
318
+ # Check for BLAS libraries
319
+ try:
320
+ import numpy as np
321
+ # Check if numpy is linked with optimized BLAS
322
+ config = np.__config__.show()
323
+ if any(lib in str(config).lower() for lib in ['openblas', 'mkl', 'atlas', 'blis']):
324
+ hardware['blas'] = True
325
+ except (ImportError, AttributeError):
326
+ # Fallback: check for common BLAS libraries
327
+ if system == 'Linux':
328
+ blas_libs = ['/usr/lib/x86_64-linux-gnu/libopenblas.so', '/usr/lib/x86_64-linux-gnu/libblas.so']
329
+ if any(os.path.exists(lib) for lib in blas_libs):
330
+ hardware['blas'] = True
331
+ elif system == 'Windows':
332
+ # Check for Intel MKL or OpenBLAS on Windows
333
+ mkl_paths = ['C:\\Program Files (x86)\\Intel\\oneAPI\\mkl']
334
+ if any(os.path.exists(path) for path in mkl_paths):
335
+ hardware['blas'] = True
336
+
195
337
  return hardware
196
338
 
197
339
  def setup_llama_cpp(self) -> None:
198
- """Sets up and builds llama.cpp repository."""
340
+ """Sets up and builds llama.cpp repository with robust error handling."""
199
341
  llama_path = self.workspace / "llama.cpp"
200
-
342
+ system = platform.system()
343
+
201
344
  with console.status("[bold green]Setting up llama.cpp...") as status:
202
345
  # Clone llama.cpp if not exists
203
346
  if not llama_path.exists():
204
- subprocess.run(['git', 'clone', 'https://github.com/ggerganov/llama.cpp'], check=True)
205
-
206
- os.chdir(llama_path)
207
-
208
- # Check if we're in a Nix environment
209
- is_nix = platform.system() == "Linux" and os.path.exists("/nix/store")
210
-
211
- if is_nix:
212
- console.print("[yellow]Detected Nix environment. Using system Python packages...")
213
- # In Nix, we need to use the system Python packages
214
347
  try:
215
- # Try to import required packages to check if they're available
216
- import torch # type: ignore
217
- import numpy # type: ignore
218
- import sentencepiece # type: ignore
219
- import transformers # type: ignore
220
- console.print("[green]Required Python packages are already installed.")
221
- except ImportError as e:
222
- console.print("[red]Missing required Python packages in Nix environment.")
223
- console.print("[yellow]Please install them using:")
224
- console.print("nix-shell -p python3Packages.torch python3Packages.numpy python3Packages.sentencepiece python3Packages.transformers")
225
- raise ConversionError("Missing required Python packages in Nix environment")
226
- else:
227
- # In non-Nix environments, install requirements
348
+ subprocess.run(['git', 'clone', 'https://github.com/ggerganov/llama.cpp'], check=True)
349
+ except subprocess.CalledProcessError as e:
350
+ raise ConversionError(f"Failed to clone llama.cpp repository: {e}")
351
+
352
+ original_cwd = os.getcwd()
353
+ try:
354
+ os.chdir(llama_path)
355
+
356
+ # Update to latest version
357
+ try:
358
+ subprocess.run(['git', 'pull'], capture_output=True, check=False)
359
+ except subprocess.CalledProcessError:
360
+ console.print("[yellow]Warning: Could not update llama.cpp repository")
361
+
362
+ # Clean any existing build directory to avoid cached CMake variables
363
+ build_dir = Path('build')
364
+ if build_dir.exists():
365
+ console.print("[yellow]Cleaning existing build directory to avoid CMake cache conflicts...")
366
+ import shutil
367
+ try:
368
+ shutil.rmtree(build_dir)
369
+ console.print("[green]Build directory cleaned successfully")
370
+ except Exception as e:
371
+ console.print(f"[yellow]Warning: Could not clean build directory: {e}")
372
+
373
+ # Check if we're in a Nix environment
374
+ is_nix = system == "Linux" and os.path.exists("/nix/store")
375
+
376
+ if is_nix:
377
+ console.print("[yellow]Detected Nix environment. Using system Python packages...")
378
+ # In Nix, we need to use the system Python packages
379
+ try:
380
+ # Try to import required packages to check if they're available
381
+ import torch # type: ignore
382
+ import numpy # type: ignore
383
+ import sentencepiece # type: ignore
384
+ import transformers # type: ignore
385
+ console.print("[green]Required Python packages are already installed.")
386
+ except ImportError as e:
387
+ console.print("[red]Missing required Python packages in Nix environment.")
388
+ console.print("[yellow]Please install them using:")
389
+ console.print("nix-shell -p python3Packages.torch python3Packages.numpy python3Packages.sentencepiece python3Packages.transformers")
390
+ raise ConversionError("Missing required Python packages in Nix environment")
391
+ else:
392
+ # In non-Nix environments, install requirements if they exist
393
+ if os.path.exists('requirements.txt'):
394
+ try:
395
+ pip_cmd = 'pip' if system == 'Windows' else 'pip3'
396
+ subprocess.run([pip_cmd, 'install', '-r', 'requirements.txt'], check=True)
397
+ except subprocess.CalledProcessError as e:
398
+ if "externally-managed-environment" in str(e):
399
+ console.print("[yellow]Detected externally managed Python environment.")
400
+ console.print("[yellow]Please install the required packages manually:")
401
+ console.print("pip install torch numpy sentencepiece transformers")
402
+ raise ConversionError("Failed to install requirements in externally managed environment")
403
+ else:
404
+ console.print(f"[yellow]Warning: Failed to install requirements: {e}")
405
+
406
+ # Detect available hardware
407
+ hardware = self.detect_hardware()
408
+ console.print("[bold green]Detected hardware acceleration:")
409
+ for hw, available in hardware.items():
410
+ console.print(f" {'✓' if available else '✗'} {hw.upper()}")
411
+
412
+ # Clear any environment variables that might cause conflicts
413
+ env_vars_to_clear = [
414
+ 'LLAMA_CUBLAS', 'LLAMA_CLBLAST', 'LLAMA_HIPBLAS',
415
+ 'LLAMA_METAL', 'LLAMA_ACCELERATE', 'LLAMA_OPENBLAS'
416
+ ]
417
+ for var in env_vars_to_clear:
418
+ if var in os.environ:
419
+ console.print(f"[yellow]Clearing conflicting environment variable: {var}")
420
+ del os.environ[var]
421
+
422
+ # Configure CMake build with robust options
423
+ cmake_args: List[str] = ['cmake', '-B', 'build']
424
+
425
+ # Add basic CMake options with correct LLAMA prefixes
426
+ cmake_args.extend([
427
+ '-DCMAKE_BUILD_TYPE=Release',
428
+ '-DLLAMA_BUILD_TESTS=OFF',
429
+ '-DLLAMA_BUILD_EXAMPLES=ON',
430
+ '-DLLAMA_BUILD_SERVER=OFF',
431
+ # Disable optional dependencies that might cause issues
432
+ '-DLLAMA_CURL=OFF', # Disable CURL (not needed for GGUF conversion)
433
+ '-DLLAMA_LLGUIDANCE=OFF', # Disable LLGuidance (optional feature)
434
+ # Explicitly disable deprecated flags to avoid conflicts
435
+ '-DLLAMA_CUBLAS=OFF',
436
+ '-DLLAMA_CLBLAST=OFF',
437
+ '-DLLAMA_HIPBLAS=OFF'
438
+ ])
439
+
440
+ # Add hardware acceleration options with latest 2025 llama.cpp GGML flags
441
+ # Use priority order: CUDA > Metal > Vulkan > OpenCL > ROCm > BLAS > Accelerate
442
+ acceleration_enabled = False
443
+
444
+ if hardware['cuda']:
445
+ # Latest 2025 GGML CUDA flags (LLAMA_CUBLAS is deprecated)
446
+ cmake_args.extend(['-DGGML_CUDA=ON'])
447
+ console.print("[green]Enabling CUDA acceleration (GGML_CUDA=ON)")
448
+ acceleration_enabled = True
449
+ elif hardware['metal']:
450
+ # Latest 2025 GGML Metal flags for macOS
451
+ cmake_args.extend(['-DGGML_METAL=ON'])
452
+ console.print("[green]Enabling Metal acceleration (GGML_METAL=ON)")
453
+ acceleration_enabled = True
454
+ elif hardware['vulkan']:
455
+ # Latest 2025 GGML Vulkan flags
456
+ cmake_args.extend(['-DGGML_VULKAN=ON'])
457
+ console.print("[green]Enabling Vulkan acceleration (GGML_VULKAN=ON)")
458
+ acceleration_enabled = True
459
+ elif hardware['opencl']:
460
+ # Latest 2025 GGML OpenCL flags (LLAMA_CLBLAST is deprecated)
461
+ cmake_args.extend(['-DGGML_OPENCL=ON'])
462
+ console.print("[green]Enabling OpenCL acceleration (GGML_OPENCL=ON)")
463
+ acceleration_enabled = True
464
+ elif hardware['rocm']:
465
+ # Latest 2025 GGML ROCm/HIP flags
466
+ cmake_args.extend(['-DGGML_HIPBLAS=ON'])
467
+ console.print("[green]Enabling ROCm acceleration (GGML_HIPBLAS=ON)")
468
+ acceleration_enabled = True
469
+ elif hardware['blas']:
470
+ # Latest 2025 GGML BLAS flags with vendor detection
471
+ cmake_args.extend(['-DGGML_BLAS=ON'])
472
+ # Try to detect BLAS vendor for optimal performance
473
+ if system == 'Darwin':
474
+ cmake_args.extend(['-DGGML_BLAS_VENDOR=Accelerate'])
475
+ elif 'mkl' in str(hardware).lower():
476
+ cmake_args.extend(['-DGGML_BLAS_VENDOR=Intel10_64lp'])
477
+ else:
478
+ cmake_args.extend(['-DGGML_BLAS_VENDOR=OpenBLAS'])
479
+ console.print("[green]Enabling BLAS acceleration (GGML_BLAS=ON)")
480
+ acceleration_enabled = True
481
+ elif hardware['accelerate']:
482
+ # Latest 2025 GGML Accelerate framework flags for macOS
483
+ cmake_args.extend(['-DGGML_ACCELERATE=ON'])
484
+ console.print("[green]Enabling Accelerate framework (GGML_ACCELERATE=ON)")
485
+ acceleration_enabled = True
486
+
487
+ if not acceleration_enabled:
488
+ console.print("[yellow]No hardware acceleration available, using CPU only")
489
+ console.print("[cyan]Note: All deprecated LLAMA_* flags have been updated to GGML_* for 2025 compatibility")
490
+
491
+ # Platform-specific optimizations
492
+ if system == 'Windows':
493
+ # Use Visual Studio generator on Windows if available
494
+ try:
495
+ vs_result = subprocess.run(['where', 'msbuild'], capture_output=True)
496
+ if vs_result.returncode == 0:
497
+ cmake_args.extend(['-G', 'Visual Studio 17 2022'])
498
+ else:
499
+ cmake_args.extend(['-G', 'MinGW Makefiles'])
500
+ except:
501
+ cmake_args.extend(['-G', 'MinGW Makefiles'])
502
+ else:
503
+ # Use Ninja if available on Unix systems
504
+ try:
505
+ ninja_cmd = 'ninja' if system != 'Windows' else 'ninja.exe'
506
+ if subprocess.run(['which', ninja_cmd], capture_output=True).returncode == 0:
507
+ cmake_args.extend(['-G', 'Ninja'])
508
+ except:
509
+ pass # Fall back to default generator
510
+
511
+ # Configure the build with error handling and multiple fallback strategies
512
+ status.update("[bold green]Configuring CMake build...")
513
+ config_success = False
514
+
515
+ # Try main configuration
228
516
  try:
229
- subprocess.run(['pip3', 'install', '-r', 'requirements.txt'], check=True)
517
+ console.print(f"[cyan]CMake command: {' '.join(cmake_args)}")
518
+ result = subprocess.run(cmake_args, capture_output=True, text=True)
519
+ if result.returncode == 0:
520
+ config_success = True
521
+ console.print("[green]CMake configuration successful!")
522
+ else:
523
+ console.print(f"[red]CMake configuration failed: {result.stderr}")
230
524
  except subprocess.CalledProcessError as e:
231
- if "externally-managed-environment" in str(e):
232
- console.print("[yellow]Detected externally managed Python environment.")
233
- console.print("[yellow]Please install the required packages manually:")
234
- console.print("pip install torch numpy sentencepiece transformers")
235
- raise ConversionError("Failed to install requirements in externally managed environment")
236
- raise
237
-
238
- # Detect available hardware
239
- hardware = self.detect_hardware()
240
- console.print("[bold green]Detected hardware acceleration:")
241
- for hw, available in hardware.items():
242
- console.print(f" {'' if available else '✗'} {hw.upper()}")
243
-
244
- # Configure CMake build
245
- cmake_args: List[str] = ['cmake', '-B', 'build']
246
-
247
- # Add hardware acceleration options
248
- if hardware['cuda']:
249
- cmake_args.extend(['-DLLAMA_CUBLAS=ON'])
250
- if hardware['metal']:
251
- cmake_args.extend(['-DLLAMA_METAL=ON'])
252
- if hardware['opencl']:
253
- cmake_args.extend(['-DLLAMA_CLBLAST=ON'])
254
- if hardware['vulkan']:
255
- cmake_args.extend(['-DLLAMA_VULKAN=ON'])
256
- if hardware['rocm']:
257
- cmake_args.extend(['-DLLAMA_HIPBLAS=ON'])
258
-
259
- # Use Ninja if available
260
- if subprocess.run(['which', 'ninja'], capture_output=True).returncode == 0:
261
- cmake_args.extend(['-G', 'Ninja'])
262
-
263
- # Configure the build
264
- subprocess.run(cmake_args, check=True)
265
-
266
- # Build the project
267
- if any(hardware.values()):
268
- status.update("[bold green]Building with hardware acceleration...")
269
- else:
270
- status.update("[bold yellow]Building for CPU only...")
271
-
272
- subprocess.run(['cmake', '--build', 'build', '-j', str(os.cpu_count() or 1)], check=True)
273
-
274
- os.chdir(self.workspace)
525
+ console.print(f"[red]CMake execution failed: {e}")
526
+
527
+ # Try fallback without hardware acceleration if main config failed
528
+ if not config_success:
529
+ console.print("[yellow]Attempting fallback configuration without hardware acceleration...")
530
+ console.print("[cyan]Using 2025-compatible LLAMA build flags...")
531
+ fallback_args = [
532
+ 'cmake', '-B', 'build',
533
+ '-DCMAKE_BUILD_TYPE=Release',
534
+ '-DLLAMA_BUILD_TESTS=OFF',
535
+ '-DLLAMA_BUILD_EXAMPLES=ON',
536
+ '-DLLAMA_BUILD_SERVER=OFF',
537
+ # Disable optional dependencies that might cause issues
538
+ '-DLLAMA_CURL=OFF', # Disable CURL (not needed for GGUF conversion)
539
+ '-DLLAMA_LLGUIDANCE=OFF', # Disable LLGuidance (optional feature)
540
+ # Explicitly disable all deprecated flags
541
+ '-DLLAMA_CUBLAS=OFF',
542
+ '-DLLAMA_CLBLAST=OFF',
543
+ '-DLLAMA_HIPBLAS=OFF',
544
+ '-DLLAMA_METAL=OFF',
545
+ # Enable CPU optimizations
546
+ '-DGGML_NATIVE=OFF', # Disable native optimizations for compatibility
547
+ '-DGGML_AVX=ON', # Enable AVX if available
548
+ '-DGGML_AVX2=ON', # Enable AVX2 if available
549
+ '-DGGML_FMA=ON' # Enable FMA if available
550
+ ]
551
+ try:
552
+ console.print(f"[cyan]Fallback CMake command: {' '.join(fallback_args)}")
553
+ result = subprocess.run(fallback_args, capture_output=True, text=True)
554
+ if result.returncode == 0:
555
+ config_success = True
556
+ console.print("[green]Fallback CMake configuration successful!")
557
+ else:
558
+ console.print(f"[red]Fallback CMake configuration failed: {result.stderr}")
559
+ except subprocess.CalledProcessError as e:
560
+ console.print(f"[red]Fallback CMake execution failed: {e}")
561
+
562
+ # Try minimal configuration as last resort
563
+ if not config_success:
564
+ console.print("[yellow]Attempting minimal configuration...")
565
+ minimal_args = [
566
+ 'cmake', '-B', 'build',
567
+ # Disable optional dependencies that might cause issues
568
+ '-DLLAMA_CURL=OFF', # Disable CURL (not needed for GGUF conversion)
569
+ '-DLLAMA_LLGUIDANCE=OFF', # Disable LLGuidance (optional feature)
570
+ '-DLLAMA_BUILD_SERVER=OFF', # Disable server (not needed for conversion)
571
+ '-DLLAMA_BUILD_TESTS=OFF', # Disable tests (not needed for conversion)
572
+ # Explicitly disable ALL deprecated flags to avoid conflicts
573
+ '-DLLAMA_CUBLAS=OFF',
574
+ '-DLLAMA_CLBLAST=OFF',
575
+ '-DLLAMA_HIPBLAS=OFF',
576
+ '-DLLAMA_METAL=OFF',
577
+ '-DLLAMA_ACCELERATE=OFF'
578
+ ]
579
+ try:
580
+ console.print(f"[cyan]Minimal CMake command: {' '.join(minimal_args)}")
581
+ result = subprocess.run(minimal_args, capture_output=True, text=True)
582
+ if result.returncode == 0:
583
+ config_success = True
584
+ console.print("[green]Minimal CMake configuration successful!")
585
+ else:
586
+ console.print(f"[red]Minimal CMake configuration failed: {result.stderr}")
587
+ raise ConversionError(f"All CMake configuration attempts failed. Last error: {result.stderr}")
588
+ except subprocess.CalledProcessError as e:
589
+ raise ConversionError(f"All CMake configuration attempts failed: {e}")
590
+
591
+ if not config_success:
592
+ raise ConversionError("CMake configuration failed with all attempted strategies")
593
+
594
+ # Build the project
595
+ status.update("[bold green]Building llama.cpp...")
596
+ build_cmd = ['cmake', '--build', 'build', '--config', 'Release']
597
+
598
+ # Add parallel build option
599
+ cpu_count = os.cpu_count() or 1
600
+ if system == 'Windows':
601
+ build_cmd.extend(['--parallel', str(cpu_count)])
602
+ else:
603
+ build_cmd.extend(['-j', str(cpu_count)])
604
+
605
+ try:
606
+ result = subprocess.run(build_cmd, capture_output=True, text=True)
607
+ if result.returncode != 0:
608
+ console.print(f"[red]Build failed: {result.stderr}")
609
+ # Try single-threaded build as fallback
610
+ console.print("[yellow]Attempting single-threaded build...")
611
+ fallback_build = ['cmake', '--build', 'build', '--config', 'Release']
612
+ result = subprocess.run(fallback_build, capture_output=True, text=True)
613
+ if result.returncode != 0:
614
+ raise ConversionError(f"Build failed: {result.stderr}")
615
+ except subprocess.CalledProcessError as e:
616
+ raise ConversionError(f"Build failed: {e}")
617
+
618
+ console.print("[green]llama.cpp built successfully!")
619
+
620
+ finally:
621
+ os.chdir(original_cwd)
275
622
 
276
623
  def display_config(self) -> None:
277
624
  """Displays the current configuration in a formatted table."""
@@ -290,10 +637,40 @@ class ModelConverter:
290
637
 
291
638
  console.print(Panel(table))
292
639
 
640
+ def get_binary_path(self, binary_name: str) -> str:
641
+ """Get the correct path to llama.cpp binaries based on platform."""
642
+ system = platform.system()
643
+
644
+ # Possible binary locations
645
+ possible_paths = [
646
+ f"./llama.cpp/build/bin/{binary_name}", # Standard build location
647
+ f"./llama.cpp/build/{binary_name}", # Alternative build location
648
+ f"./llama.cpp/{binary_name}", # Root directory
649
+ f"./llama.cpp/build/Release/{binary_name}", # Windows Release build
650
+ f"./llama.cpp/build/Debug/{binary_name}", # Windows Debug build
651
+ ]
652
+
653
+ # Add .exe extension on Windows
654
+ if system == 'Windows':
655
+ possible_paths = [path + '.exe' for path in possible_paths]
656
+
657
+ # Find the first existing binary
658
+ for path in possible_paths:
659
+ if os.path.isfile(path):
660
+ return path
661
+
662
+ # If not found, return the most likely path and let it fail with a clear error
663
+ default_path = f"./llama.cpp/build/bin/{binary_name}"
664
+ if system == 'Windows':
665
+ default_path += '.exe'
666
+ return default_path
667
+
293
668
  def generate_importance_matrix(self, model_path: str, train_data_path: str, output_path: str) -> None:
294
- """Generates importance matrix for quantization."""
669
+ """Generates importance matrix for quantization with improved error handling."""
670
+ imatrix_binary = self.get_binary_path("llama-imatrix")
671
+
295
672
  imatrix_command: List[str] = [
296
- "./llama.cpp/build/bin/llama-imatrix",
673
+ imatrix_binary,
297
674
  "-m", model_path,
298
675
  "-f", train_data_path,
299
676
  "-ngl", "99",
@@ -304,32 +681,52 @@ class ModelConverter:
304
681
  if not os.path.isfile(model_path):
305
682
  raise ConversionError(f"Model file not found: {model_path}")
306
683
 
684
+ if not os.path.isfile(train_data_path):
685
+ raise ConversionError(f"Training data file not found: {train_data_path}")
686
+
687
+ if not os.path.isfile(imatrix_binary):
688
+ raise ConversionError(f"llama-imatrix binary not found at: {imatrix_binary}")
689
+
307
690
  console.print("[bold green]Generating importance matrix...")
308
- process = subprocess.Popen(imatrix_command, shell=False)
691
+ console.print(f"[cyan]Command: {' '.join(imatrix_command)}")
309
692
 
310
693
  try:
311
- process.wait(timeout=60)
312
- except subprocess.TimeoutExpired:
313
- console.print("[yellow]Imatrix computation timed out. Sending SIGINT...")
314
- process.send_signal(signal.SIGINT)
694
+ process = subprocess.Popen(
695
+ imatrix_command,
696
+ shell=False,
697
+ stdout=subprocess.PIPE,
698
+ stderr=subprocess.PIPE,
699
+ text=True
700
+ )
701
+
315
702
  try:
316
- process.wait(timeout=5)
703
+ stdout, stderr = process.communicate(timeout=300) # 5 minute timeout
704
+ if process.returncode != 0:
705
+ raise ConversionError(f"Failed to generate importance matrix: {stderr}")
317
706
  except subprocess.TimeoutExpired:
318
- console.print("[red]Imatrix process still running. Force terminating...")
319
- process.kill()
320
-
321
- if process.returncode != 0:
322
- raise ConversionError("Failed to generate importance matrix")
707
+ console.print("[yellow]Imatrix computation timed out. Sending SIGINT...")
708
+ process.send_signal(signal.SIGINT)
709
+ try:
710
+ stdout, stderr = process.communicate(timeout=10)
711
+ except subprocess.TimeoutExpired:
712
+ console.print("[red]Imatrix process still running. Force terminating...")
713
+ process.kill()
714
+ stdout, stderr = process.communicate()
715
+ raise ConversionError(f"Imatrix generation timed out: {stderr}")
716
+ except FileNotFoundError:
717
+ raise ConversionError(f"Could not execute llama-imatrix binary: {imatrix_binary}")
323
718
 
324
719
  console.print("[green]Importance matrix generation completed.")
325
720
 
326
721
  def split_model(self, model_path: str, outdir: str) -> List[str]:
327
- """Splits the model into smaller chunks."""
722
+ """Splits the model into smaller chunks with improved error handling."""
723
+ split_binary = self.get_binary_path("llama-gguf-split")
724
+
328
725
  split_cmd: List[str] = [
329
- "./llama.cpp/build/bin/llama-gguf-split",
726
+ split_binary,
330
727
  "--split",
331
728
  ]
332
-
729
+
333
730
  if self.split_max_size:
334
731
  split_cmd.extend(["--split-max-size", self.split_max_size])
335
732
  else:
@@ -338,29 +735,42 @@ class ModelConverter:
338
735
  model_path_prefix = '.'.join(model_path.split('.')[:-1])
339
736
  split_cmd.extend([model_path, model_path_prefix])
340
737
 
738
+ if not os.path.isfile(model_path):
739
+ raise ConversionError(f"Model file not found: {model_path}")
740
+
741
+ if not os.path.isfile(split_binary):
742
+ raise ConversionError(f"llama-gguf-split binary not found at: {split_binary}")
743
+
341
744
  console.print(f"[bold green]Splitting model with command: {' '.join(split_cmd)}")
342
-
343
- result = subprocess.run(split_cmd, shell=False, capture_output=True, text=True)
344
-
345
- if result.returncode != 0:
346
- raise ConversionError(f"Error splitting model: {result.stderr}")
347
-
745
+
746
+ try:
747
+ result = subprocess.run(split_cmd, shell=False, capture_output=True, text=True)
748
+
749
+ if result.returncode != 0:
750
+ raise ConversionError(f"Error splitting model: {result.stderr}")
751
+ except FileNotFoundError:
752
+ raise ConversionError(f"Could not execute llama-gguf-split binary: {split_binary}")
753
+
348
754
  console.print("[green]Model split successfully!")
349
-
755
+
350
756
  # Get list of split files
351
- model_file_prefix = model_path_prefix.split('/')[-1]
352
- split_files = [f for f in os.listdir(outdir)
353
- if f.startswith(model_file_prefix) and f.endswith(".gguf")]
354
-
757
+ model_file_prefix = os.path.basename(model_path_prefix)
758
+ try:
759
+ split_files = [f for f in os.listdir(outdir)
760
+ if f.startswith(model_file_prefix) and f.endswith(".gguf")]
761
+ except OSError as e:
762
+ raise ConversionError(f"Error reading output directory: {e}")
763
+
355
764
  if not split_files:
356
- raise ConversionError("No split files found")
357
-
765
+ raise ConversionError(f"No split files found in {outdir} with prefix {model_file_prefix}")
766
+
767
+ console.print(f"[green]Found {len(split_files)} split files: {', '.join(split_files)}")
358
768
  return split_files
359
769
 
360
770
  def upload_split_files(self, split_files: List[str], outdir: str, repo_id: str) -> None:
361
771
  """Uploads split model files to Hugging Face."""
362
772
  api = HfApi(token=self.token)
363
-
773
+
364
774
  for file in split_files:
365
775
  file_path = os.path.join(outdir, file)
366
776
  console.print(f"[bold green]Uploading file: {file}")
@@ -370,7 +780,9 @@ class ModelConverter:
370
780
  path_in_repo=file,
371
781
  repo_id=repo_id,
372
782
  )
783
+ console.print(f"[green]✓ Successfully uploaded: {file}")
373
784
  except Exception as e:
785
+ console.print(f"[red]✗ Failed to upload {file}: {e}")
374
786
  raise ConversionError(f"Error uploading file {file}: {e}")
375
787
 
376
788
  def generate_readme(self, quantized_files: List[str]) -> str:
@@ -443,18 +855,46 @@ This repository is licensed under the same terms as the original model.
443
855
  """
444
856
  return readme
445
857
 
858
+ def create_repository(self, repo_id: str) -> None:
859
+ """Create a new repository on Hugging Face Hub if it doesn't exist."""
860
+ api = HfApi(token=self.token)
861
+ try:
862
+ # Check if repository already exists
863
+ try:
864
+ api.repo_info(repo_id=repo_id)
865
+ console.print(f"[green]✓ Repository {repo_id} already exists")
866
+ return
867
+ except Exception:
868
+ # Repository doesn't exist, create it
869
+ pass
870
+
871
+ console.print(f"[bold green]Creating new repository: {repo_id}")
872
+ api.create_repo(
873
+ repo_id=repo_id,
874
+ exist_ok=True,
875
+ private=False,
876
+ repo_type="model"
877
+ )
878
+ console.print(f"[green]✓ Successfully created repository: {repo_id}")
879
+ console.print(f"[cyan]Repository URL: https://huggingface.co/{repo_id}")
880
+ except Exception as e:
881
+ console.print(f"[red]✗ Failed to create repository: {e}")
882
+ raise ConversionError(f"Error creating repository {repo_id}: {e}")
883
+
446
884
  def upload_readme(self, readme_content: str, repo_id: str) -> None:
447
885
  """Upload README.md to Hugging Face Hub."""
448
886
  api = HfApi(token=self.token)
887
+ console.print("[bold green]Uploading README.md with model documentation")
449
888
  try:
450
889
  api.upload_file(
451
890
  path_or_fileobj=readme_content.encode(),
452
891
  path_in_repo="README.md",
453
892
  repo_id=repo_id,
454
893
  )
455
- console.print("[green]README.md uploaded successfully!")
894
+ console.print("[green]✓ Successfully uploaded: README.md")
456
895
  except Exception as e:
457
- console.print(f"[yellow]Warning: Failed to upload README.md: {e}")
896
+ console.print(f"[red] Failed to upload README.md: {e}")
897
+ raise ConversionError(f"Error uploading README.md: {e}")
458
898
 
459
899
  def convert(self) -> None:
460
900
  """Performs the model conversion process."""
@@ -527,25 +967,74 @@ This repository is licensed under the same terms as the original model.
527
967
 
528
968
  # Convert to fp16
529
969
  console.print("[bold green]Converting to fp16...")
530
- result = subprocess.run([
531
- "python", "llama.cpp/convert_hf_to_gguf.py",
970
+
971
+ # Find the conversion script
972
+ conversion_scripts = [
973
+ "llama.cpp/convert_hf_to_gguf.py",
974
+ "llama.cpp/convert-hf-to-gguf.py",
975
+ "llama.cpp/convert.py"
976
+ ]
977
+
978
+ conversion_script = None
979
+ for script in conversion_scripts:
980
+ if os.path.isfile(script):
981
+ conversion_script = script
982
+ break
983
+
984
+ if not conversion_script:
985
+ raise ConversionError("Could not find HuggingFace to GGUF conversion script")
986
+
987
+ # Use the appropriate Python executable
988
+ python_cmd = "python" if platform.system() == "Windows" else "python3"
989
+
990
+ convert_cmd = [
991
+ python_cmd, conversion_script,
532
992
  str(local_dir),
533
993
  "--outtype", "f16",
534
994
  "--outfile", fp16
535
- ], capture_output=True, text=True)
536
-
537
- if result.returncode != 0:
538
- raise ConversionError(f"Error converting to fp16: {result.stderr}")
995
+ ]
996
+
997
+ console.print(f"[cyan]Conversion command: {' '.join(convert_cmd)}")
998
+
999
+ try:
1000
+ result = subprocess.run(convert_cmd, capture_output=True, text=True)
1001
+
1002
+ if result.returncode != 0:
1003
+ raise ConversionError(f"Error converting to fp16: {result.stderr}")
1004
+ except FileNotFoundError as e:
1005
+ raise ConversionError(f"Could not execute conversion script: {e}")
1006
+
1007
+ if not os.path.isfile(fp16):
1008
+ raise ConversionError(f"Conversion completed but output file not found: {fp16}")
1009
+
1010
+ console.print("[green]Model converted to fp16 successfully!")
539
1011
 
540
1012
  # If fp16_only is True, we're done after fp16 conversion
541
1013
  if self.fp16_only:
542
1014
  quantized_files = [f"{self.model_name}.fp16.gguf"]
543
1015
  if self.username and self.token:
544
- api.upload_file(
545
- path_or_fileobj=fp16,
546
- path_in_repo=f"{self.model_name}.fp16.gguf",
547
- repo_id=f"{self.username}/{self.model_name}-GGUF"
548
- )
1016
+ repo_id = f"{self.username}/{self.model_name}-GGUF"
1017
+
1018
+ # Step 1: Create repository
1019
+ self.create_repository(repo_id)
1020
+
1021
+ # Step 2: Upload README first
1022
+ readme_content = self.generate_readme(quantized_files)
1023
+ self.upload_readme(readme_content, repo_id)
1024
+
1025
+ # Step 3: Upload model GGUF file
1026
+ file_name = f"{self.model_name}.fp16.gguf"
1027
+ console.print(f"[bold green]Uploading model file: {file_name}")
1028
+ try:
1029
+ api.upload_file(
1030
+ path_or_fileobj=fp16,
1031
+ path_in_repo=file_name,
1032
+ repo_id=repo_id
1033
+ )
1034
+ console.print(f"[green]✓ Successfully uploaded: {file_name}")
1035
+ except Exception as e:
1036
+ console.print(f"[red]✗ Failed to upload {file_name}: {e}")
1037
+ raise ConversionError(f"Error uploading model file: {e}")
549
1038
  return
550
1039
 
551
1040
  # Generate importance matrix if needed
@@ -558,56 +1047,96 @@ This repository is licensed under the same terms as the original model.
558
1047
  # Quantize model
559
1048
  console.print("[bold green]Quantizing model...")
560
1049
  quantized_files: List[str] = []
1050
+ quantize_binary = self.get_binary_path("llama-quantize")
1051
+
1052
+ if not os.path.isfile(quantize_binary):
1053
+ raise ConversionError(f"llama-quantize binary not found at: {quantize_binary}")
1054
+
561
1055
  for method in self.quantization_methods:
562
1056
  quantized_name = f"{self.model_name.lower()}-{method.lower()}"
563
1057
  if self.use_imatrix:
564
1058
  quantized_name += "-imat"
565
1059
  quantized_path = str(Path(outdir)/f"{quantized_name}.gguf")
566
-
567
- if self.use_imatrix:
1060
+
1061
+ console.print(f"[cyan]Quantizing with method: {method}")
1062
+
1063
+ if self.use_imatrix and imatrix_path:
568
1064
  quantize_cmd: List[str] = [
569
- "./llama.cpp/build/bin/llama-quantize",
570
- "--imatrix", imatrix_path,
1065
+ quantize_binary,
1066
+ "--imatrix", str(imatrix_path),
571
1067
  fp16, quantized_path, method
572
1068
  ]
573
1069
  else:
574
1070
  quantize_cmd = [
575
- "./llama.cpp/build/bin/llama-quantize",
1071
+ quantize_binary,
576
1072
  fp16, quantized_path, method
577
1073
  ]
578
-
579
- result = subprocess.run(quantize_cmd, capture_output=True, text=True)
580
- if result.returncode != 0:
581
- raise ConversionError(f"Error quantizing with {method}: {result.stderr}")
582
-
1074
+
1075
+ console.print(f"[cyan]Quantization command: {' '.join(quantize_cmd)}")
1076
+
1077
+ try:
1078
+ result = subprocess.run(quantize_cmd, capture_output=True, text=True)
1079
+ if result.returncode != 0:
1080
+ raise ConversionError(f"Error quantizing with {method}: {result.stderr}")
1081
+ except FileNotFoundError:
1082
+ raise ConversionError(f"Could not execute llama-quantize binary: {quantize_binary}")
1083
+
1084
+ if not os.path.isfile(quantized_path):
1085
+ raise ConversionError(f"Quantization completed but output file not found: {quantized_path}")
1086
+
583
1087
  quantized_files.append(f"{quantized_name}.gguf")
1088
+ console.print(f"[green]Successfully quantized with {method}: {quantized_name}.gguf")
584
1089
 
585
- # Split model if requested
586
- if self.split_model:
587
- split_files = self.split_model(quantized_path, outdir)
588
- if self.username and self.token:
589
- self.upload_split_files(split_files, outdir, f"{self.username}/{self.model_name}-GGUF")
590
- else:
591
- # Upload single file if credentials provided
592
- if self.username and self.token:
593
- api.upload_file(
594
- path_or_fileobj=quantized_path,
595
- path_in_repo=f"{self.model_name.lower()}-{self.quantization_methods[0].lower()}.gguf",
596
- repo_id=f"{self.username}/{self.model_name}-GGUF"
597
- )
598
-
599
- # Upload imatrix if generated and credentials provided
600
- if imatrix_path and self.username and self.token:
601
- api.upload_file(
602
- path_or_fileobj=imatrix_path,
603
- path_in_repo="imatrix.dat",
604
- repo_id=f"{self.username}/{self.model_name}-GGUF"
605
- )
606
-
607
- # Generate and upload README if credentials provided
1090
+ # Upload to Hugging Face if credentials provided
608
1091
  if self.username and self.token:
1092
+ repo_id = f"{self.username}/{self.model_name}-GGUF"
1093
+
1094
+ # Step 1: Create repository
1095
+ console.print(f"[bold blue]Step 1: Creating repository {repo_id}")
1096
+ self.create_repository(repo_id)
1097
+
1098
+ # Step 2: Generate and upload README first
1099
+ console.print("[bold blue]Step 2: Uploading README.md")
609
1100
  readme_content = self.generate_readme(quantized_files)
610
- self.upload_readme(readme_content, f"{self.username}/{self.model_name}-GGUF")
1101
+ self.upload_readme(readme_content, repo_id)
1102
+
1103
+ # Step 3: Upload model GGUF files
1104
+ console.print("[bold blue]Step 3: Uploading model files")
1105
+ if self.split_model:
1106
+ split_files = self.split_model(quantized_path, outdir)
1107
+ self.upload_split_files(split_files, outdir, repo_id)
1108
+ else:
1109
+ # Upload single quantized file
1110
+ file_name = f"{self.model_name.lower()}-{self.quantization_methods[0].lower()}.gguf"
1111
+ console.print(f"[bold green]Uploading quantized model: {file_name}")
1112
+ try:
1113
+ api.upload_file(
1114
+ path_or_fileobj=quantized_path,
1115
+ path_in_repo=file_name,
1116
+ repo_id=repo_id
1117
+ )
1118
+ console.print(f"[green]✓ Successfully uploaded: {file_name}")
1119
+ except Exception as e:
1120
+ console.print(f"[red]✗ Failed to upload {file_name}: {e}")
1121
+ raise ConversionError(f"Error uploading quantized model: {e}")
1122
+
1123
+ # Step 4: Upload imatrix if generated (optional)
1124
+ if imatrix_path:
1125
+ console.print("[bold blue]Step 4: Uploading importance matrix")
1126
+ console.print("[bold green]Uploading importance matrix: imatrix.dat")
1127
+ try:
1128
+ api.upload_file(
1129
+ path_or_fileobj=imatrix_path,
1130
+ path_in_repo="imatrix.dat",
1131
+ repo_id=repo_id
1132
+ )
1133
+ console.print("[green]✓ Successfully uploaded: imatrix.dat")
1134
+ except Exception as e:
1135
+ console.print(f"[yellow]Warning: Failed to upload imatrix.dat: {e}")
1136
+
1137
+ # Final success message
1138
+ console.print(f"[bold green]🎉 All files uploaded successfully to {repo_id}!")
1139
+ console.print(f"[cyan]Repository URL: https://huggingface.co/{repo_id}")
611
1140
 
612
1141
  # Initialize CLI with HAI vibes
613
1142
  app = CLI(