webscout 8.3.6__py3-none-any.whl → 2025.10.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

Files changed (304) hide show
  1. webscout/AIauto.py +250 -250
  2. webscout/AIbase.py +379 -379
  3. webscout/AIutel.py +60 -58
  4. webscout/Bard.py +1012 -1012
  5. webscout/Bing_search.py +417 -417
  6. webscout/DWEBS.py +529 -529
  7. webscout/Extra/Act.md +309 -309
  8. webscout/Extra/GitToolkit/__init__.py +10 -10
  9. webscout/Extra/GitToolkit/gitapi/README.md +110 -110
  10. webscout/Extra/GitToolkit/gitapi/__init__.py +11 -11
  11. webscout/Extra/GitToolkit/gitapi/repository.py +195 -195
  12. webscout/Extra/GitToolkit/gitapi/user.py +96 -96
  13. webscout/Extra/GitToolkit/gitapi/utils.py +61 -61
  14. webscout/Extra/YTToolkit/README.md +375 -375
  15. webscout/Extra/YTToolkit/YTdownloader.py +956 -956
  16. webscout/Extra/YTToolkit/__init__.py +2 -2
  17. webscout/Extra/YTToolkit/transcriber.py +475 -475
  18. webscout/Extra/YTToolkit/ytapi/README.md +44 -44
  19. webscout/Extra/YTToolkit/ytapi/__init__.py +6 -6
  20. webscout/Extra/YTToolkit/ytapi/channel.py +307 -307
  21. webscout/Extra/YTToolkit/ytapi/errors.py +13 -13
  22. webscout/Extra/YTToolkit/ytapi/extras.py +118 -118
  23. webscout/Extra/YTToolkit/ytapi/https.py +88 -88
  24. webscout/Extra/YTToolkit/ytapi/patterns.py +61 -61
  25. webscout/Extra/YTToolkit/ytapi/playlist.py +58 -58
  26. webscout/Extra/YTToolkit/ytapi/pool.py +7 -7
  27. webscout/Extra/YTToolkit/ytapi/query.py +39 -39
  28. webscout/Extra/YTToolkit/ytapi/stream.py +62 -62
  29. webscout/Extra/YTToolkit/ytapi/utils.py +62 -62
  30. webscout/Extra/YTToolkit/ytapi/video.py +232 -232
  31. webscout/Extra/autocoder/__init__.py +9 -9
  32. webscout/Extra/autocoder/autocoder.py +1105 -1105
  33. webscout/Extra/autocoder/autocoder_utiles.py +332 -332
  34. webscout/Extra/gguf.md +429 -429
  35. webscout/Extra/gguf.py +1213 -1213
  36. webscout/Extra/tempmail/README.md +487 -487
  37. webscout/Extra/tempmail/__init__.py +27 -27
  38. webscout/Extra/tempmail/async_utils.py +140 -140
  39. webscout/Extra/tempmail/base.py +160 -160
  40. webscout/Extra/tempmail/cli.py +186 -186
  41. webscout/Extra/tempmail/emailnator.py +84 -84
  42. webscout/Extra/tempmail/mail_tm.py +360 -360
  43. webscout/Extra/tempmail/temp_mail_io.py +291 -291
  44. webscout/Extra/weather.md +281 -281
  45. webscout/Extra/weather.py +193 -193
  46. webscout/Litlogger/README.md +10 -10
  47. webscout/Litlogger/__init__.py +15 -15
  48. webscout/Litlogger/formats.py +13 -13
  49. webscout/Litlogger/handlers.py +121 -121
  50. webscout/Litlogger/levels.py +13 -13
  51. webscout/Litlogger/logger.py +134 -134
  52. webscout/Provider/AISEARCH/Perplexity.py +332 -332
  53. webscout/Provider/AISEARCH/README.md +279 -279
  54. webscout/Provider/AISEARCH/__init__.py +33 -11
  55. webscout/Provider/AISEARCH/felo_search.py +206 -206
  56. webscout/Provider/AISEARCH/genspark_search.py +323 -323
  57. webscout/Provider/AISEARCH/hika_search.py +185 -185
  58. webscout/Provider/AISEARCH/iask_search.py +410 -410
  59. webscout/Provider/AISEARCH/monica_search.py +219 -219
  60. webscout/Provider/AISEARCH/scira_search.py +316 -314
  61. webscout/Provider/AISEARCH/stellar_search.py +177 -177
  62. webscout/Provider/AISEARCH/webpilotai_search.py +255 -255
  63. webscout/Provider/Aitopia.py +314 -315
  64. webscout/Provider/Andi.py +3 -3
  65. webscout/Provider/Apriel.py +306 -0
  66. webscout/Provider/ChatGPTClone.py +236 -236
  67. webscout/Provider/ChatSandbox.py +343 -342
  68. webscout/Provider/Cloudflare.py +324 -324
  69. webscout/Provider/Cohere.py +208 -207
  70. webscout/Provider/Deepinfra.py +370 -369
  71. webscout/Provider/ExaAI.py +260 -260
  72. webscout/Provider/ExaChat.py +308 -387
  73. webscout/Provider/Flowith.py +221 -221
  74. webscout/Provider/GMI.py +293 -0
  75. webscout/Provider/Gemini.py +164 -162
  76. webscout/Provider/GeminiProxy.py +167 -166
  77. webscout/Provider/GithubChat.py +371 -370
  78. webscout/Provider/Groq.py +800 -800
  79. webscout/Provider/HeckAI.py +383 -379
  80. webscout/Provider/Jadve.py +282 -297
  81. webscout/Provider/K2Think.py +308 -0
  82. webscout/Provider/Koboldai.py +206 -384
  83. webscout/Provider/LambdaChat.py +423 -425
  84. webscout/Provider/Nemotron.py +244 -245
  85. webscout/Provider/Netwrck.py +248 -247
  86. webscout/Provider/OLLAMA.py +395 -394
  87. webscout/Provider/OPENAI/Cloudflare.py +394 -395
  88. webscout/Provider/OPENAI/FalconH1.py +452 -457
  89. webscout/Provider/OPENAI/FreeGemini.py +297 -299
  90. webscout/Provider/OPENAI/{monochat.py → K2Think.py} +432 -329
  91. webscout/Provider/OPENAI/NEMOTRON.py +241 -244
  92. webscout/Provider/OPENAI/PI.py +428 -427
  93. webscout/Provider/OPENAI/README.md +959 -959
  94. webscout/Provider/OPENAI/TogetherAI.py +345 -345
  95. webscout/Provider/OPENAI/TwoAI.py +466 -467
  96. webscout/Provider/OPENAI/__init__.py +33 -59
  97. webscout/Provider/OPENAI/ai4chat.py +313 -303
  98. webscout/Provider/OPENAI/base.py +249 -269
  99. webscout/Provider/OPENAI/chatglm.py +528 -0
  100. webscout/Provider/OPENAI/chatgpt.py +593 -588
  101. webscout/Provider/OPENAI/chatgptclone.py +521 -524
  102. webscout/Provider/OPENAI/chatsandbox.py +202 -177
  103. webscout/Provider/OPENAI/deepinfra.py +319 -315
  104. webscout/Provider/OPENAI/e2b.py +1665 -1665
  105. webscout/Provider/OPENAI/exaai.py +420 -420
  106. webscout/Provider/OPENAI/exachat.py +452 -452
  107. webscout/Provider/OPENAI/friendli.py +232 -232
  108. webscout/Provider/OPENAI/{refact.py → gmi.py} +324 -274
  109. webscout/Provider/OPENAI/groq.py +364 -364
  110. webscout/Provider/OPENAI/heckai.py +314 -311
  111. webscout/Provider/OPENAI/llmchatco.py +337 -337
  112. webscout/Provider/OPENAI/netwrck.py +355 -354
  113. webscout/Provider/OPENAI/oivscode.py +290 -290
  114. webscout/Provider/OPENAI/opkfc.py +518 -518
  115. webscout/Provider/OPENAI/pydantic_imports.py +1 -1
  116. webscout/Provider/OPENAI/scirachat.py +535 -529
  117. webscout/Provider/OPENAI/sonus.py +308 -308
  118. webscout/Provider/OPENAI/standardinput.py +442 -442
  119. webscout/Provider/OPENAI/textpollinations.py +340 -348
  120. webscout/Provider/OPENAI/toolbaz.py +419 -413
  121. webscout/Provider/OPENAI/typefully.py +362 -362
  122. webscout/Provider/OPENAI/utils.py +295 -295
  123. webscout/Provider/OPENAI/venice.py +436 -436
  124. webscout/Provider/OPENAI/wisecat.py +387 -387
  125. webscout/Provider/OPENAI/writecream.py +166 -166
  126. webscout/Provider/OPENAI/x0gpt.py +378 -378
  127. webscout/Provider/OPENAI/yep.py +389 -389
  128. webscout/Provider/OpenGPT.py +230 -230
  129. webscout/Provider/Openai.py +244 -496
  130. webscout/Provider/PI.py +405 -404
  131. webscout/Provider/Perplexitylabs.py +430 -431
  132. webscout/Provider/QwenLM.py +272 -254
  133. webscout/Provider/STT/__init__.py +32 -2
  134. webscout/Provider/{Llama3.py → Sambanova.py} +257 -258
  135. webscout/Provider/StandardInput.py +309 -309
  136. webscout/Provider/TTI/README.md +82 -82
  137. webscout/Provider/TTI/__init__.py +33 -12
  138. webscout/Provider/TTI/aiarta.py +413 -413
  139. webscout/Provider/TTI/base.py +136 -136
  140. webscout/Provider/TTI/bing.py +243 -243
  141. webscout/Provider/TTI/gpt1image.py +149 -149
  142. webscout/Provider/TTI/imagen.py +196 -196
  143. webscout/Provider/TTI/infip.py +211 -211
  144. webscout/Provider/TTI/magicstudio.py +232 -232
  145. webscout/Provider/TTI/monochat.py +219 -219
  146. webscout/Provider/TTI/piclumen.py +214 -214
  147. webscout/Provider/TTI/pixelmuse.py +232 -232
  148. webscout/Provider/TTI/pollinations.py +232 -232
  149. webscout/Provider/TTI/together.py +288 -288
  150. webscout/Provider/TTI/utils.py +12 -12
  151. webscout/Provider/TTI/venice.py +367 -367
  152. webscout/Provider/TTS/README.md +192 -192
  153. webscout/Provider/TTS/__init__.py +33 -10
  154. webscout/Provider/TTS/parler.py +110 -110
  155. webscout/Provider/TTS/streamElements.py +333 -333
  156. webscout/Provider/TTS/utils.py +280 -280
  157. webscout/Provider/TeachAnything.py +237 -236
  158. webscout/Provider/TextPollinationsAI.py +311 -318
  159. webscout/Provider/TogetherAI.py +356 -357
  160. webscout/Provider/TwoAI.py +313 -569
  161. webscout/Provider/TypliAI.py +312 -311
  162. webscout/Provider/UNFINISHED/ChatHub.py +208 -208
  163. webscout/Provider/UNFINISHED/ChutesAI.py +313 -313
  164. webscout/Provider/{GizAI.py → UNFINISHED/GizAI.py} +294 -294
  165. webscout/Provider/{Marcus.py → UNFINISHED/Marcus.py} +198 -198
  166. webscout/Provider/{Qodo.py → UNFINISHED/Qodo.py} +477 -477
  167. webscout/Provider/UNFINISHED/VercelAIGateway.py +338 -338
  168. webscout/Provider/{XenAI.py → UNFINISHED/XenAI.py} +324 -324
  169. webscout/Provider/UNFINISHED/Youchat.py +330 -330
  170. webscout/Provider/UNFINISHED/liner.py +334 -0
  171. webscout/Provider/UNFINISHED/liner_api_request.py +262 -262
  172. webscout/Provider/UNFINISHED/puterjs.py +634 -634
  173. webscout/Provider/UNFINISHED/samurai.py +223 -223
  174. webscout/Provider/UNFINISHED/test_lmarena.py +119 -119
  175. webscout/Provider/Venice.py +251 -250
  176. webscout/Provider/VercelAI.py +256 -255
  177. webscout/Provider/WiseCat.py +232 -231
  178. webscout/Provider/WrDoChat.py +367 -366
  179. webscout/Provider/__init__.py +33 -86
  180. webscout/Provider/ai4chat.py +174 -174
  181. webscout/Provider/akashgpt.py +331 -334
  182. webscout/Provider/cerebras.py +446 -340
  183. webscout/Provider/chatglm.py +394 -214
  184. webscout/Provider/cleeai.py +211 -212
  185. webscout/Provider/deepseek_assistant.py +1 -1
  186. webscout/Provider/elmo.py +282 -282
  187. webscout/Provider/geminiapi.py +208 -208
  188. webscout/Provider/granite.py +261 -261
  189. webscout/Provider/hermes.py +263 -265
  190. webscout/Provider/julius.py +223 -222
  191. webscout/Provider/learnfastai.py +309 -309
  192. webscout/Provider/llama3mitril.py +214 -214
  193. webscout/Provider/llmchat.py +243 -243
  194. webscout/Provider/llmchatco.py +290 -290
  195. webscout/Provider/meta.py +801 -801
  196. webscout/Provider/oivscode.py +309 -309
  197. webscout/Provider/scira_chat.py +384 -457
  198. webscout/Provider/searchchat.py +292 -291
  199. webscout/Provider/sonus.py +258 -258
  200. webscout/Provider/toolbaz.py +370 -364
  201. webscout/Provider/turboseek.py +274 -265
  202. webscout/Provider/typefully.py +208 -207
  203. webscout/Provider/x0gpt.py +1 -0
  204. webscout/Provider/yep.py +372 -371
  205. webscout/__init__.py +30 -31
  206. webscout/__main__.py +5 -5
  207. webscout/auth/api_key_manager.py +189 -189
  208. webscout/auth/config.py +175 -175
  209. webscout/auth/models.py +185 -185
  210. webscout/auth/routes.py +664 -664
  211. webscout/auth/simple_logger.py +236 -236
  212. webscout/cli.py +523 -523
  213. webscout/conversation.py +438 -438
  214. webscout/exceptions.py +361 -361
  215. webscout/litagent/Readme.md +298 -298
  216. webscout/litagent/__init__.py +28 -28
  217. webscout/litagent/agent.py +581 -581
  218. webscout/litagent/constants.py +59 -59
  219. webscout/litprinter/__init__.py +58 -58
  220. webscout/models.py +181 -181
  221. webscout/optimizers.py +419 -419
  222. webscout/prompt_manager.py +288 -288
  223. webscout/sanitize.py +1078 -1078
  224. webscout/scout/README.md +401 -401
  225. webscout/scout/__init__.py +8 -8
  226. webscout/scout/core/__init__.py +6 -6
  227. webscout/scout/core/crawler.py +297 -297
  228. webscout/scout/core/scout.py +706 -706
  229. webscout/scout/core/search_result.py +95 -95
  230. webscout/scout/core/text_analyzer.py +62 -62
  231. webscout/scout/core/text_utils.py +277 -277
  232. webscout/scout/core/web_analyzer.py +51 -51
  233. webscout/scout/element.py +599 -599
  234. webscout/scout/parsers/__init__.py +69 -69
  235. webscout/scout/parsers/html5lib_parser.py +172 -172
  236. webscout/scout/parsers/html_parser.py +236 -236
  237. webscout/scout/parsers/lxml_parser.py +178 -178
  238. webscout/scout/utils.py +37 -37
  239. webscout/swiftcli/Readme.md +323 -323
  240. webscout/swiftcli/__init__.py +95 -95
  241. webscout/swiftcli/core/__init__.py +7 -7
  242. webscout/swiftcli/core/cli.py +308 -308
  243. webscout/swiftcli/core/context.py +104 -104
  244. webscout/swiftcli/core/group.py +241 -241
  245. webscout/swiftcli/decorators/__init__.py +28 -28
  246. webscout/swiftcli/decorators/command.py +221 -221
  247. webscout/swiftcli/decorators/options.py +220 -220
  248. webscout/swiftcli/decorators/output.py +302 -302
  249. webscout/swiftcli/exceptions.py +21 -21
  250. webscout/swiftcli/plugins/__init__.py +9 -9
  251. webscout/swiftcli/plugins/base.py +135 -135
  252. webscout/swiftcli/plugins/manager.py +269 -269
  253. webscout/swiftcli/utils/__init__.py +59 -59
  254. webscout/swiftcli/utils/formatting.py +252 -252
  255. webscout/swiftcli/utils/parsing.py +267 -267
  256. webscout/update_checker.py +117 -117
  257. webscout/version.py +1 -1
  258. webscout/webscout_search.py +1183 -1183
  259. webscout/webscout_search_async.py +649 -649
  260. webscout/yep_search.py +346 -346
  261. webscout/zeroart/README.md +89 -89
  262. webscout/zeroart/__init__.py +134 -134
  263. webscout/zeroart/base.py +66 -66
  264. webscout/zeroart/effects.py +100 -100
  265. webscout/zeroart/fonts.py +1238 -1238
  266. {webscout-8.3.6.dist-info → webscout-2025.10.11.dist-info}/METADATA +937 -936
  267. webscout-2025.10.11.dist-info/RECORD +300 -0
  268. webscout/Provider/AISEARCH/DeepFind.py +0 -254
  269. webscout/Provider/AllenAI.py +0 -440
  270. webscout/Provider/Blackboxai.py +0 -793
  271. webscout/Provider/FreeGemini.py +0 -250
  272. webscout/Provider/GptOss.py +0 -207
  273. webscout/Provider/Hunyuan.py +0 -283
  274. webscout/Provider/Kimi.py +0 -445
  275. webscout/Provider/MCPCore.py +0 -322
  276. webscout/Provider/MiniMax.py +0 -207
  277. webscout/Provider/OPENAI/BLACKBOXAI.py +0 -1045
  278. webscout/Provider/OPENAI/MiniMax.py +0 -298
  279. webscout/Provider/OPENAI/Qwen3.py +0 -304
  280. webscout/Provider/OPENAI/autoproxy.py +0 -1067
  281. webscout/Provider/OPENAI/copilot.py +0 -321
  282. webscout/Provider/OPENAI/gptoss.py +0 -288
  283. webscout/Provider/OPENAI/kimi.py +0 -469
  284. webscout/Provider/OPENAI/mcpcore.py +0 -431
  285. webscout/Provider/OPENAI/multichat.py +0 -378
  286. webscout/Provider/OPENAI/qodo.py +0 -630
  287. webscout/Provider/OPENAI/xenai.py +0 -514
  288. webscout/Provider/Reka.py +0 -214
  289. webscout/Provider/UNFINISHED/fetch_together_models.py +0 -90
  290. webscout/Provider/asksteve.py +0 -220
  291. webscout/Provider/copilot.py +0 -441
  292. webscout/Provider/freeaichat.py +0 -294
  293. webscout/Provider/koala.py +0 -182
  294. webscout/Provider/lmarena.py +0 -198
  295. webscout/Provider/monochat.py +0 -275
  296. webscout/Provider/multichat.py +0 -375
  297. webscout/Provider/scnet.py +0 -244
  298. webscout/Provider/talkai.py +0 -194
  299. webscout/tempid.py +0 -128
  300. webscout-8.3.6.dist-info/RECORD +0 -327
  301. {webscout-8.3.6.dist-info → webscout-2025.10.11.dist-info}/WHEEL +0 -0
  302. {webscout-8.3.6.dist-info → webscout-2025.10.11.dist-info}/entry_points.txt +0 -0
  303. {webscout-8.3.6.dist-info → webscout-2025.10.11.dist-info}/licenses/LICENSE.md +0 -0
  304. {webscout-8.3.6.dist-info → webscout-2025.10.11.dist-info}/top_level.txt +0 -0
webscout/Extra/gguf.py CHANGED
@@ -1,1213 +1,1213 @@
1
- """
2
- Convert Hugging Face models to GGUF format with advanced features.
3
-
4
- 🔥 2025 UPDATE: ALL CMAKE BUILD ERRORS FIXED! 🔥
5
-
6
- This converter has been completely updated for 2025 compatibility with the latest llama.cpp:
7
-
8
- CRITICAL FIXES:
9
- - ✅ Updated all deprecated LLAMA_* flags to GGML_* (LLAMA_CUBLAS → GGML_CUDA)
10
- - ✅ Fixed CURL dependency error by adding -DLLAMA_CURL=OFF
11
- - ✅ Disabled optional dependencies (LLAMA_LLGUIDANCE=OFF)
12
- - ✅ Cross-platform hardware detection (Windows, macOS, Linux)
13
- - ✅ Robust CMake configuration with multiple fallback strategies
14
- - ✅ Priority-based acceleration selection (CUDA > Metal > Vulkan > OpenCL > ROCm > BLAS)
15
- - ✅ Enhanced error handling and recovery mechanisms
16
- - ✅ Platform-specific optimizations and build generators
17
- - ✅ Automatic build directory cleanup to avoid cached CMake conflicts
18
-
19
- SUPPORTED ACCELERATION:
20
- - CUDA: GGML_CUDA=ON (NVIDIA GPUs)
21
- - Metal: GGML_METAL=ON (Apple Silicon/macOS)
22
- - Vulkan: GGML_VULKAN=ON (Cross-platform GPU)
23
- - OpenCL: GGML_OPENCL=ON (Cross-platform GPU)
24
- - ROCm: GGML_HIPBLAS=ON (AMD GPUs)
25
- - BLAS: GGML_BLAS=ON (Optimized CPU libraries)
26
- - Accelerate: GGML_ACCELERATE=ON (Apple Accelerate framework)
27
-
28
- For detailed documentation, see: webscout/Extra/gguf.md
29
-
30
- USAGE EXAMPLES:
31
- >>> python -m webscout.Extra.gguf convert -m "OEvortex/HelpingAI-Lite-1.5T" -q "q4_k_m,q5_k_m"
32
- >>> # With upload options:
33
- >>> python -m webscout.Extra.gguf convert -m "your-model" -u "username" -t "token" -q "q4_k_m"
34
- >>> # With imatrix quantization:
35
- >>> python -m webscout.Extra.gguf convert -m "your-model" -i -q "iq4_nl" --train-data "train_data.txt"
36
- >>> # With model splitting:
37
- >>> python -m webscout.Extra.gguf convert -m "your-model" -s --split-max-tensors 256
38
- """
39
-
40
- import subprocess
41
- import os
42
- import sys
43
- import signal
44
- import tempfile
45
- import platform
46
- from pathlib import Path
47
- from typing import Optional, Dict, List, Any, Union, Literal, TypedDict, Set
48
-
49
- from huggingface_hub import HfApi
50
- from webscout.zeroart import figlet_format
51
- from rich.console import Console
52
- from rich.panel import Panel
53
- from rich.table import Table
54
- from webscout.swiftcli import CLI, option
55
-
56
- console = Console()
57
-
58
- class ConversionError(Exception):
59
- """Custom exception for when things don't go as planned! ⚠️"""
60
- pass
61
-
62
- class QuantizationMethod(TypedDict):
63
- """Type definition for quantization method descriptions."""
64
- description: str
65
-
66
- class ModelConverter:
67
- """Handles the conversion of Hugging Face models to GGUF format."""
68
-
69
- VALID_METHODS: Dict[str, str] = {
70
- "fp16": "16-bit floating point - maximum accuracy, largest size",
71
- "q2_k": "2-bit quantization (smallest size, lowest accuracy)",
72
- "q3_k_l": "3-bit quantization (large) - balanced for size/accuracy",
73
- "q3_k_m": "3-bit quantization (medium) - good balance for most use cases",
74
- "q3_k_s": "3-bit quantization (small) - optimized for speed",
75
- "q4_0": "4-bit quantization (version 0) - standard 4-bit compression",
76
- "q4_1": "4-bit quantization (version 1) - improved accuracy over q4_0",
77
- "q4_k_m": "4-bit quantization (medium) - balanced for most models",
78
- "q4_k_s": "4-bit quantization (small) - optimized for speed",
79
- "q5_0": "5-bit quantization (version 0) - high accuracy, larger size",
80
- "q5_1": "5-bit quantization (version 1) - improved accuracy over q5_0",
81
- "q5_k_m": "5-bit quantization (medium) - best balance for quality/size",
82
- "q5_k_s": "5-bit quantization (small) - optimized for speed",
83
- "q6_k": "6-bit quantization - highest accuracy, largest size",
84
- "q8_0": "8-bit quantization - maximum accuracy, largest size"
85
- }
86
-
87
- VALID_IMATRIX_METHODS: Dict[str, str] = {
88
- "iq3_m": "3-bit imatrix quantization (medium) - balanced importance-based",
89
- "iq3_xxs": "3-bit imatrix quantization (extra extra small) - maximum compression",
90
- "q4_k_m": "4-bit imatrix quantization (medium) - balanced importance-based",
91
- "q4_k_s": "4-bit imatrix quantization (small) - optimized for speed",
92
- "iq4_nl": "4-bit imatrix quantization (non-linear) - best accuracy for 4-bit",
93
- "iq4_xs": "4-bit imatrix quantization (extra small) - maximum compression",
94
- "q5_k_m": "5-bit imatrix quantization (medium) - balanced importance-based",
95
- "q5_k_s": "5-bit imatrix quantization (small) - optimized for speed"
96
- }
97
-
98
- def __init__(
99
- self,
100
- model_id: str,
101
- username: Optional[str] = None,
102
- token: Optional[str] = None,
103
- quantization_methods: str = "q4_k_m",
104
- use_imatrix: bool = False,
105
- train_data_file: Optional[str] = None,
106
- split_model: bool = False,
107
- split_max_tensors: int = 256,
108
- split_max_size: Optional[str] = None
109
- ) -> None:
110
- self.model_id = model_id
111
- self.username = username
112
- self.token = token
113
- self.quantization_methods = quantization_methods.split(',')
114
- self.model_name = model_id.split('/')[-1]
115
- self.workspace = Path(os.getcwd())
116
- self.use_imatrix = use_imatrix
117
- self.train_data_file = train_data_file
118
- self.split_model = split_model
119
- self.split_max_tensors = split_max_tensors
120
- self.split_max_size = split_max_size
121
- self.fp16_only = "fp16" in self.quantization_methods and len(self.quantization_methods) == 1
122
-
123
- def validate_inputs(self) -> None:
124
- """Validates all input parameters."""
125
- if not '/' in self.model_id:
126
- raise ValueError("Invalid model ID format. Expected format: 'organization/model-name'")
127
-
128
- if self.use_imatrix:
129
- invalid_methods = [m for m in self.quantization_methods if m not in self.VALID_IMATRIX_METHODS]
130
- if invalid_methods:
131
- raise ValueError(
132
- f"Invalid imatrix quantization methods: {', '.join(invalid_methods)}.\n"
133
- f"Valid methods are: {', '.join(self.VALID_IMATRIX_METHODS.keys())}"
134
- )
135
- if not self.train_data_file and not os.path.exists("llama.cpp/groups_merged.txt"):
136
- raise ValueError("Training data file is required for imatrix quantization")
137
- else:
138
- invalid_methods = [m for m in self.quantization_methods if m not in self.VALID_METHODS]
139
- if invalid_methods:
140
- raise ValueError(
141
- f"Invalid quantization methods: {', '.join(invalid_methods)}.\n"
142
- f"Valid methods are: {', '.join(self.VALID_METHODS.keys())}"
143
- )
144
-
145
- if bool(self.username) != bool(self.token):
146
- raise ValueError("Both username and token must be provided for upload, or neither.")
147
-
148
- if self.split_model and self.split_max_size:
149
- try:
150
- size = int(self.split_max_size[:-1])
151
- unit = self.split_max_size[-1].upper()
152
- if unit not in ['M', 'G']:
153
- raise ValueError("Split max size must end with M or G")
154
- except ValueError:
155
- raise ValueError("Invalid split max size format. Use format like '256M' or '5G'")
156
-
157
- @staticmethod
158
- def check_dependencies() -> Dict[str, bool]:
159
- """Check if all required dependencies are installed with cross-platform support."""
160
- system = platform.system()
161
-
162
- dependencies: Dict[str, str] = {
163
- 'git': 'Git version control',
164
- 'cmake': 'CMake build system',
165
- 'ninja': 'Ninja build system (optional)'
166
- }
167
-
168
- # Add platform-specific dependencies
169
- if system != 'Windows':
170
- dependencies['pip3'] = 'Python package installer'
171
- else:
172
- dependencies['pip'] = 'Python package installer'
173
-
174
- status: Dict[str, bool] = {}
175
-
176
- for cmd, desc in dependencies.items():
177
- try:
178
- if system == 'Windows':
179
- # Use 'where' command on Windows
180
- result = subprocess.run(['where', cmd], capture_output=True, text=True)
181
- status[cmd] = result.returncode == 0
182
- else:
183
- # Use 'which' command on Unix-like systems
184
- result = subprocess.run(['which', cmd], capture_output=True, text=True)
185
- status[cmd] = result.returncode == 0
186
- except (FileNotFoundError, subprocess.SubprocessError):
187
- status[cmd] = False
188
-
189
- # Special check for Python - try different variants
190
- python_variants = ['python3', 'python', 'py'] if system != 'Windows' else ['python', 'py', 'python3']
191
- status['python'] = False
192
- for variant in python_variants:
193
- try:
194
- if system == 'Windows':
195
- result = subprocess.run(['where', variant], capture_output=True)
196
- else:
197
- result = subprocess.run(['which', variant], capture_output=True)
198
- if result.returncode == 0:
199
- status['python'] = True
200
- break
201
- except:
202
- continue
203
-
204
- # Check for C++ compiler
205
- cpp_compilers = ['cl', 'g++', 'clang++'] if system == 'Windows' else ['g++', 'clang++']
206
- status['cpp_compiler'] = False
207
- for compiler in cpp_compilers:
208
- try:
209
- if system == 'Windows':
210
- result = subprocess.run(['where', compiler], capture_output=True)
211
- else:
212
- result = subprocess.run(['which', compiler], capture_output=True)
213
- if result.returncode == 0:
214
- status['cpp_compiler'] = True
215
- break
216
- except:
217
- continue
218
-
219
- dependencies['python'] = 'Python interpreter'
220
- dependencies['cpp_compiler'] = 'C++ compiler (g++, clang++, or MSVC)'
221
-
222
- return status
223
-
224
- def detect_hardware(self) -> Dict[str, bool]:
225
- """Detect available hardware acceleration with improved cross-platform support."""
226
- hardware: Dict[str, bool] = {
227
- 'cuda': False,
228
- 'metal': False,
229
- 'opencl': False,
230
- 'vulkan': False,
231
- 'rocm': False,
232
- 'blas': False,
233
- 'accelerate': False
234
- }
235
-
236
- system = platform.system()
237
-
238
- # Check CUDA
239
- try:
240
- # Check for nvcc compiler
241
- if subprocess.run(['nvcc', '--version'], capture_output=True, shell=(system == 'Windows')).returncode == 0:
242
- hardware['cuda'] = True
243
- # Also check for nvidia-smi as fallback
244
- elif subprocess.run(['nvidia-smi'], capture_output=True, shell=(system == 'Windows')).returncode == 0:
245
- hardware['cuda'] = True
246
- except (FileNotFoundError, subprocess.SubprocessError):
247
- # Check for CUDA libraries on Windows
248
- if system == 'Windows':
249
- cuda_paths = [
250
- os.environ.get('CUDA_PATH'),
251
- 'C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA',
252
- 'C:\\Program Files (x86)\\NVIDIA GPU Computing Toolkit\\CUDA'
253
- ]
254
- for cuda_path in cuda_paths:
255
- if cuda_path and os.path.exists(cuda_path):
256
- hardware['cuda'] = True
257
- break
258
-
259
- # Check Metal (macOS)
260
- if system == 'Darwin':
261
- try:
262
- # Check for Xcode command line tools
263
- if subprocess.run(['xcrun', '--show-sdk-path'], capture_output=True).returncode == 0:
264
- hardware['metal'] = True
265
- # Check for Metal framework
266
- if os.path.exists('/System/Library/Frameworks/Metal.framework'):
267
- hardware['metal'] = True
268
- # macOS also supports Accelerate framework
269
- if os.path.exists('/System/Library/Frameworks/Accelerate.framework'):
270
- hardware['accelerate'] = True
271
- except (FileNotFoundError, subprocess.SubprocessError):
272
- pass
273
-
274
- # Check OpenCL
275
- try:
276
- if system == 'Windows':
277
- # Check for OpenCL on Windows
278
- opencl_paths = [
279
- 'C:\\Windows\\System32\\OpenCL.dll',
280
- 'C:\\Windows\\SysWOW64\\OpenCL.dll'
281
- ]
282
- if any(os.path.exists(path) for path in opencl_paths):
283
- hardware['opencl'] = True
284
- else:
285
- if subprocess.run(['clinfo'], capture_output=True).returncode == 0:
286
- hardware['opencl'] = True
287
- except (FileNotFoundError, subprocess.SubprocessError):
288
- pass
289
-
290
- # Check Vulkan
291
- try:
292
- if system == 'Windows':
293
- # Check for Vulkan on Windows
294
- vulkan_paths = [
295
- 'C:\\Windows\\System32\\vulkan-1.dll',
296
- 'C:\\Windows\\SysWOW64\\vulkan-1.dll'
297
- ]
298
- if any(os.path.exists(path) for path in vulkan_paths):
299
- hardware['vulkan'] = True
300
- else:
301
- if subprocess.run(['vulkaninfo'], capture_output=True).returncode == 0:
302
- hardware['vulkan'] = True
303
- except (FileNotFoundError, subprocess.SubprocessError):
304
- pass
305
-
306
- # Check ROCm (AMD)
307
- try:
308
- if subprocess.run(['rocm-smi'], capture_output=True, shell=(system == 'Windows')).returncode == 0:
309
- hardware['rocm'] = True
310
- elif system == 'Linux':
311
- # Check for ROCm installation
312
- rocm_paths = ['/opt/rocm', '/usr/lib/x86_64-linux-gnu/librocm-smi64.so']
313
- if any(os.path.exists(path) for path in rocm_paths):
314
- hardware['rocm'] = True
315
- except (FileNotFoundError, subprocess.SubprocessError):
316
- pass
317
-
318
- # Check for BLAS libraries
319
- try:
320
- import numpy as np
321
- # Check if numpy is linked with optimized BLAS
322
- config = np.__config__.show()
323
- if any(lib in str(config).lower() for lib in ['openblas', 'mkl', 'atlas', 'blis']):
324
- hardware['blas'] = True
325
- except (ImportError, AttributeError):
326
- # Fallback: check for common BLAS libraries
327
- if system == 'Linux':
328
- blas_libs = ['/usr/lib/x86_64-linux-gnu/libopenblas.so', '/usr/lib/x86_64-linux-gnu/libblas.so']
329
- if any(os.path.exists(lib) for lib in blas_libs):
330
- hardware['blas'] = True
331
- elif system == 'Windows':
332
- # Check for Intel MKL or OpenBLAS on Windows
333
- mkl_paths = ['C:\\Program Files (x86)\\Intel\\oneAPI\\mkl']
334
- if any(os.path.exists(path) for path in mkl_paths):
335
- hardware['blas'] = True
336
-
337
- return hardware
338
-
339
- def setup_llama_cpp(self) -> None:
340
- """Sets up and builds llama.cpp repository with robust error handling."""
341
- llama_path = self.workspace / "llama.cpp"
342
- system = platform.system()
343
-
344
- with console.status("[bold green]Setting up llama.cpp...") as status:
345
- # Clone llama.cpp if not exists
346
- if not llama_path.exists():
347
- try:
348
- subprocess.run(['git', 'clone', 'https://github.com/ggerganov/llama.cpp'], check=True)
349
- except subprocess.CalledProcessError as e:
350
- raise ConversionError(f"Failed to clone llama.cpp repository: {e}")
351
-
352
- original_cwd = os.getcwd()
353
- try:
354
- os.chdir(llama_path)
355
-
356
- # Update to latest version
357
- try:
358
- subprocess.run(['git', 'pull'], capture_output=True, check=False)
359
- except subprocess.CalledProcessError:
360
- console.print("[yellow]Warning: Could not update llama.cpp repository")
361
-
362
- # Clean any existing build directory to avoid cached CMake variables
363
- build_dir = Path('build')
364
- if build_dir.exists():
365
- console.print("[yellow]Cleaning existing build directory to avoid CMake cache conflicts...")
366
- import shutil
367
- try:
368
- shutil.rmtree(build_dir)
369
- console.print("[green]Build directory cleaned successfully")
370
- except Exception as e:
371
- console.print(f"[yellow]Warning: Could not clean build directory: {e}")
372
-
373
- # Check if we're in a Nix environment
374
- is_nix = system == "Linux" and os.path.exists("/nix/store")
375
-
376
- if is_nix:
377
- console.print("[yellow]Detected Nix environment. Using system Python packages...")
378
- # In Nix, we need to use the system Python packages
379
- try:
380
- # Try to import required packages to check if they're available
381
- import torch # type: ignore
382
- import numpy # type: ignore
383
- import sentencepiece # type: ignore
384
- import transformers # type: ignore
385
- console.print("[green]Required Python packages are already installed.")
386
- except ImportError as e:
387
- console.print("[red]Missing required Python packages in Nix environment.")
388
- console.print("[yellow]Please install them using:")
389
- console.print("nix-shell -p python3Packages.torch python3Packages.numpy python3Packages.sentencepiece python3Packages.transformers")
390
- raise ConversionError("Missing required Python packages in Nix environment")
391
- else:
392
- # In non-Nix environments, install requirements if they exist
393
- if os.path.exists('requirements.txt'):
394
- try:
395
- pip_cmd = 'pip' if system == 'Windows' else 'pip3'
396
- subprocess.run([pip_cmd, 'install', '-r', 'requirements.txt'], check=True)
397
- except subprocess.CalledProcessError as e:
398
- if "externally-managed-environment" in str(e):
399
- console.print("[yellow]Detected externally managed Python environment.")
400
- console.print("[yellow]Please install the required packages manually:")
401
- console.print("pip install torch numpy sentencepiece transformers")
402
- raise ConversionError("Failed to install requirements in externally managed environment")
403
- else:
404
- console.print(f"[yellow]Warning: Failed to install requirements: {e}")
405
-
406
- # Detect available hardware
407
- hardware = self.detect_hardware()
408
- console.print("[bold green]Detected hardware acceleration:")
409
- for hw, available in hardware.items():
410
- console.print(f" {'✓' if available else '✗'} {hw.upper()}")
411
-
412
- # Clear any environment variables that might cause conflicts
413
- env_vars_to_clear = [
414
- 'LLAMA_CUBLAS', 'LLAMA_CLBLAST', 'LLAMA_HIPBLAS',
415
- 'LLAMA_METAL', 'LLAMA_ACCELERATE', 'LLAMA_OPENBLAS'
416
- ]
417
- for var in env_vars_to_clear:
418
- if var in os.environ:
419
- console.print(f"[yellow]Clearing conflicting environment variable: {var}")
420
- del os.environ[var]
421
-
422
- # Configure CMake build with robust options
423
- cmake_args: List[str] = ['cmake', '-B', 'build']
424
-
425
- # Add basic CMake options with correct LLAMA prefixes
426
- cmake_args.extend([
427
- '-DCMAKE_BUILD_TYPE=Release',
428
- '-DLLAMA_BUILD_TESTS=OFF',
429
- '-DLLAMA_BUILD_EXAMPLES=ON',
430
- '-DLLAMA_BUILD_SERVER=OFF',
431
- # Disable optional dependencies that might cause issues
432
- '-DLLAMA_CURL=OFF', # Disable CURL (not needed for GGUF conversion)
433
- '-DLLAMA_LLGUIDANCE=OFF', # Disable LLGuidance (optional feature)
434
- # Explicitly disable deprecated flags to avoid conflicts
435
- '-DLLAMA_CUBLAS=OFF',
436
- '-DLLAMA_CLBLAST=OFF',
437
- '-DLLAMA_HIPBLAS=OFF'
438
- ])
439
-
440
- # Add hardware acceleration options with latest 2025 llama.cpp GGML flags
441
- # Use priority order: CUDA > Metal > Vulkan > OpenCL > ROCm > BLAS > Accelerate
442
- acceleration_enabled = False
443
-
444
- if hardware['cuda']:
445
- # Latest 2025 GGML CUDA flags (LLAMA_CUBLAS is deprecated)
446
- cmake_args.extend(['-DGGML_CUDA=ON'])
447
- console.print("[green]Enabling CUDA acceleration (GGML_CUDA=ON)")
448
- acceleration_enabled = True
449
- elif hardware['metal']:
450
- # Latest 2025 GGML Metal flags for macOS
451
- cmake_args.extend(['-DGGML_METAL=ON'])
452
- console.print("[green]Enabling Metal acceleration (GGML_METAL=ON)")
453
- acceleration_enabled = True
454
- elif hardware['vulkan']:
455
- # Latest 2025 GGML Vulkan flags
456
- cmake_args.extend(['-DGGML_VULKAN=ON'])
457
- console.print("[green]Enabling Vulkan acceleration (GGML_VULKAN=ON)")
458
- acceleration_enabled = True
459
- elif hardware['opencl']:
460
- # Latest 2025 GGML OpenCL flags (LLAMA_CLBLAST is deprecated)
461
- cmake_args.extend(['-DGGML_OPENCL=ON'])
462
- console.print("[green]Enabling OpenCL acceleration (GGML_OPENCL=ON)")
463
- acceleration_enabled = True
464
- elif hardware['rocm']:
465
- # Latest 2025 GGML ROCm/HIP flags
466
- cmake_args.extend(['-DGGML_HIPBLAS=ON'])
467
- console.print("[green]Enabling ROCm acceleration (GGML_HIPBLAS=ON)")
468
- acceleration_enabled = True
469
- elif hardware['blas']:
470
- # Latest 2025 GGML BLAS flags with vendor detection
471
- cmake_args.extend(['-DGGML_BLAS=ON'])
472
- # Try to detect BLAS vendor for optimal performance
473
- if system == 'Darwin':
474
- cmake_args.extend(['-DGGML_BLAS_VENDOR=Accelerate'])
475
- elif 'mkl' in str(hardware).lower():
476
- cmake_args.extend(['-DGGML_BLAS_VENDOR=Intel10_64lp'])
477
- else:
478
- cmake_args.extend(['-DGGML_BLAS_VENDOR=OpenBLAS'])
479
- console.print("[green]Enabling BLAS acceleration (GGML_BLAS=ON)")
480
- acceleration_enabled = True
481
- elif hardware['accelerate']:
482
- # Latest 2025 GGML Accelerate framework flags for macOS
483
- cmake_args.extend(['-DGGML_ACCELERATE=ON'])
484
- console.print("[green]Enabling Accelerate framework (GGML_ACCELERATE=ON)")
485
- acceleration_enabled = True
486
-
487
- if not acceleration_enabled:
488
- console.print("[yellow]No hardware acceleration available, using CPU only")
489
- console.print("[cyan]Note: All deprecated LLAMA_* flags have been updated to GGML_* for 2025 compatibility")
490
-
491
- # Platform-specific optimizations
492
- if system == 'Windows':
493
- # Use Visual Studio generator on Windows if available
494
- try:
495
- vs_result = subprocess.run(['where', 'msbuild'], capture_output=True)
496
- if vs_result.returncode == 0:
497
- cmake_args.extend(['-G', 'Visual Studio 17 2022'])
498
- else:
499
- cmake_args.extend(['-G', 'MinGW Makefiles'])
500
- except:
501
- cmake_args.extend(['-G', 'MinGW Makefiles'])
502
- else:
503
- # Use Ninja if available on Unix systems
504
- try:
505
- ninja_cmd = 'ninja' if system != 'Windows' else 'ninja.exe'
506
- if subprocess.run(['which', ninja_cmd], capture_output=True).returncode == 0:
507
- cmake_args.extend(['-G', 'Ninja'])
508
- except:
509
- pass # Fall back to default generator
510
-
511
- # Configure the build with error handling and multiple fallback strategies
512
- status.update("[bold green]Configuring CMake build...")
513
- config_success = False
514
-
515
- # Try main configuration
516
- try:
517
- console.print(f"[cyan]CMake command: {' '.join(cmake_args)}")
518
- result = subprocess.run(cmake_args, capture_output=True, text=True)
519
- if result.returncode == 0:
520
- config_success = True
521
- console.print("[green]CMake configuration successful!")
522
- else:
523
- console.print(f"[red]CMake configuration failed: {result.stderr}")
524
- except subprocess.CalledProcessError as e:
525
- console.print(f"[red]CMake execution failed: {e}")
526
-
527
- # Try fallback without hardware acceleration if main config failed
528
- if not config_success:
529
- console.print("[yellow]Attempting fallback configuration without hardware acceleration...")
530
- console.print("[cyan]Using 2025-compatible LLAMA build flags...")
531
- fallback_args = [
532
- 'cmake', '-B', 'build',
533
- '-DCMAKE_BUILD_TYPE=Release',
534
- '-DLLAMA_BUILD_TESTS=OFF',
535
- '-DLLAMA_BUILD_EXAMPLES=ON',
536
- '-DLLAMA_BUILD_SERVER=OFF',
537
- # Disable optional dependencies that might cause issues
538
- '-DLLAMA_CURL=OFF', # Disable CURL (not needed for GGUF conversion)
539
- '-DLLAMA_LLGUIDANCE=OFF', # Disable LLGuidance (optional feature)
540
- # Explicitly disable all deprecated flags
541
- '-DLLAMA_CUBLAS=OFF',
542
- '-DLLAMA_CLBLAST=OFF',
543
- '-DLLAMA_HIPBLAS=OFF',
544
- '-DLLAMA_METAL=OFF',
545
- # Enable CPU optimizations
546
- '-DGGML_NATIVE=OFF', # Disable native optimizations for compatibility
547
- '-DGGML_AVX=ON', # Enable AVX if available
548
- '-DGGML_AVX2=ON', # Enable AVX2 if available
549
- '-DGGML_FMA=ON' # Enable FMA if available
550
- ]
551
- try:
552
- console.print(f"[cyan]Fallback CMake command: {' '.join(fallback_args)}")
553
- result = subprocess.run(fallback_args, capture_output=True, text=True)
554
- if result.returncode == 0:
555
- config_success = True
556
- console.print("[green]Fallback CMake configuration successful!")
557
- else:
558
- console.print(f"[red]Fallback CMake configuration failed: {result.stderr}")
559
- except subprocess.CalledProcessError as e:
560
- console.print(f"[red]Fallback CMake execution failed: {e}")
561
-
562
- # Try minimal configuration as last resort
563
- if not config_success:
564
- console.print("[yellow]Attempting minimal configuration...")
565
- minimal_args = [
566
- 'cmake', '-B', 'build',
567
- # Disable optional dependencies that might cause issues
568
- '-DLLAMA_CURL=OFF', # Disable CURL (not needed for GGUF conversion)
569
- '-DLLAMA_LLGUIDANCE=OFF', # Disable LLGuidance (optional feature)
570
- '-DLLAMA_BUILD_SERVER=OFF', # Disable server (not needed for conversion)
571
- '-DLLAMA_BUILD_TESTS=OFF', # Disable tests (not needed for conversion)
572
- # Explicitly disable ALL deprecated flags to avoid conflicts
573
- '-DLLAMA_CUBLAS=OFF',
574
- '-DLLAMA_CLBLAST=OFF',
575
- '-DLLAMA_HIPBLAS=OFF',
576
- '-DLLAMA_METAL=OFF',
577
- '-DLLAMA_ACCELERATE=OFF'
578
- ]
579
- try:
580
- console.print(f"[cyan]Minimal CMake command: {' '.join(minimal_args)}")
581
- result = subprocess.run(minimal_args, capture_output=True, text=True)
582
- if result.returncode == 0:
583
- config_success = True
584
- console.print("[green]Minimal CMake configuration successful!")
585
- else:
586
- console.print(f"[red]Minimal CMake configuration failed: {result.stderr}")
587
- raise ConversionError(f"All CMake configuration attempts failed. Last error: {result.stderr}")
588
- except subprocess.CalledProcessError as e:
589
- raise ConversionError(f"All CMake configuration attempts failed: {e}")
590
-
591
- if not config_success:
592
- raise ConversionError("CMake configuration failed with all attempted strategies")
593
-
594
- # Build the project
595
- status.update("[bold green]Building llama.cpp...")
596
- build_cmd = ['cmake', '--build', 'build', '--config', 'Release']
597
-
598
- # Add parallel build option
599
- cpu_count = os.cpu_count() or 1
600
- if system == 'Windows':
601
- build_cmd.extend(['--parallel', str(cpu_count)])
602
- else:
603
- build_cmd.extend(['-j', str(cpu_count)])
604
-
605
- try:
606
- result = subprocess.run(build_cmd, capture_output=True, text=True)
607
- if result.returncode != 0:
608
- console.print(f"[red]Build failed: {result.stderr}")
609
- # Try single-threaded build as fallback
610
- console.print("[yellow]Attempting single-threaded build...")
611
- fallback_build = ['cmake', '--build', 'build', '--config', 'Release']
612
- result = subprocess.run(fallback_build, capture_output=True, text=True)
613
- if result.returncode != 0:
614
- raise ConversionError(f"Build failed: {result.stderr}")
615
- except subprocess.CalledProcessError as e:
616
- raise ConversionError(f"Build failed: {e}")
617
-
618
- console.print("[green]llama.cpp built successfully!")
619
-
620
- finally:
621
- os.chdir(original_cwd)
622
-
623
- def display_config(self) -> None:
624
- """Displays the current configuration in a formatted table."""
625
- table = Table(title="Configuration", show_header=True, header_style="bold magenta")
626
- table.add_column("Setting", style="cyan")
627
- table.add_column("Value", style="green")
628
-
629
- table.add_row("Model ID", self.model_id)
630
- table.add_row("Model Name", self.model_name)
631
- table.add_row("Username", self.username or "Not provided")
632
- table.add_row("Token", "****" if self.token else "Not provided")
633
- table.add_row("Quantization Methods", "\n".join(
634
- f"{method} ({self.VALID_METHODS[method]})"
635
- for method in self.quantization_methods
636
- ))
637
-
638
- console.print(Panel(table))
639
-
640
- def get_binary_path(self, binary_name: str) -> str:
641
- """Get the correct path to llama.cpp binaries based on platform."""
642
- system = platform.system()
643
-
644
- # Possible binary locations
645
- possible_paths = [
646
- f"./llama.cpp/build/bin/{binary_name}", # Standard build location
647
- f"./llama.cpp/build/{binary_name}", # Alternative build location
648
- f"./llama.cpp/{binary_name}", # Root directory
649
- f"./llama.cpp/build/Release/{binary_name}", # Windows Release build
650
- f"./llama.cpp/build/Debug/{binary_name}", # Windows Debug build
651
- ]
652
-
653
- # Add .exe extension on Windows
654
- if system == 'Windows':
655
- possible_paths = [path + '.exe' for path in possible_paths]
656
-
657
- # Find the first existing binary
658
- for path in possible_paths:
659
- if os.path.isfile(path):
660
- return path
661
-
662
- # If not found, return the most likely path and let it fail with a clear error
663
- default_path = f"./llama.cpp/build/bin/{binary_name}"
664
- if system == 'Windows':
665
- default_path += '.exe'
666
- return default_path
667
-
668
- def generate_importance_matrix(self, model_path: str, train_data_path: str, output_path: str) -> None:
669
- """Generates importance matrix for quantization with improved error handling."""
670
- imatrix_binary = self.get_binary_path("llama-imatrix")
671
-
672
- imatrix_command: List[str] = [
673
- imatrix_binary,
674
- "-m", model_path,
675
- "-f", train_data_path,
676
- "-ngl", "99",
677
- "--output-frequency", "10",
678
- "-o", output_path,
679
- ]
680
-
681
- if not os.path.isfile(model_path):
682
- raise ConversionError(f"Model file not found: {model_path}")
683
-
684
- if not os.path.isfile(train_data_path):
685
- raise ConversionError(f"Training data file not found: {train_data_path}")
686
-
687
- if not os.path.isfile(imatrix_binary):
688
- raise ConversionError(f"llama-imatrix binary not found at: {imatrix_binary}")
689
-
690
- console.print("[bold green]Generating importance matrix...")
691
- console.print(f"[cyan]Command: {' '.join(imatrix_command)}")
692
-
693
- try:
694
- process = subprocess.Popen(
695
- imatrix_command,
696
- shell=False,
697
- stdout=subprocess.PIPE,
698
- stderr=subprocess.PIPE,
699
- text=True
700
- )
701
-
702
- try:
703
- stdout, stderr = process.communicate(timeout=300) # 5 minute timeout
704
- if process.returncode != 0:
705
- raise ConversionError(f"Failed to generate importance matrix: {stderr}")
706
- except subprocess.TimeoutExpired:
707
- console.print("[yellow]Imatrix computation timed out. Sending SIGINT...")
708
- process.send_signal(signal.SIGINT)
709
- try:
710
- stdout, stderr = process.communicate(timeout=10)
711
- except subprocess.TimeoutExpired:
712
- console.print("[red]Imatrix process still running. Force terminating...")
713
- process.kill()
714
- stdout, stderr = process.communicate()
715
- raise ConversionError(f"Imatrix generation timed out: {stderr}")
716
- except FileNotFoundError:
717
- raise ConversionError(f"Could not execute llama-imatrix binary: {imatrix_binary}")
718
-
719
- console.print("[green]Importance matrix generation completed.")
720
-
721
- def split_model(self, model_path: str, outdir: str) -> List[str]:
722
- """Splits the model into smaller chunks with improved error handling."""
723
- split_binary = self.get_binary_path("llama-gguf-split")
724
-
725
- split_cmd: List[str] = [
726
- split_binary,
727
- "--split",
728
- ]
729
-
730
- if self.split_max_size:
731
- split_cmd.extend(["--split-max-size", self.split_max_size])
732
- else:
733
- split_cmd.extend(["--split-max-tensors", str(self.split_max_tensors)])
734
-
735
- model_path_prefix = '.'.join(model_path.split('.')[:-1])
736
- split_cmd.extend([model_path, model_path_prefix])
737
-
738
- if not os.path.isfile(model_path):
739
- raise ConversionError(f"Model file not found: {model_path}")
740
-
741
- if not os.path.isfile(split_binary):
742
- raise ConversionError(f"llama-gguf-split binary not found at: {split_binary}")
743
-
744
- console.print(f"[bold green]Splitting model with command: {' '.join(split_cmd)}")
745
-
746
- try:
747
- result = subprocess.run(split_cmd, shell=False, capture_output=True, text=True)
748
-
749
- if result.returncode != 0:
750
- raise ConversionError(f"Error splitting model: {result.stderr}")
751
- except FileNotFoundError:
752
- raise ConversionError(f"Could not execute llama-gguf-split binary: {split_binary}")
753
-
754
- console.print("[green]Model split successfully!")
755
-
756
- # Get list of split files
757
- model_file_prefix = os.path.basename(model_path_prefix)
758
- try:
759
- split_files = [f for f in os.listdir(outdir)
760
- if f.startswith(model_file_prefix) and f.endswith(".gguf")]
761
- except OSError as e:
762
- raise ConversionError(f"Error reading output directory: {e}")
763
-
764
- if not split_files:
765
- raise ConversionError(f"No split files found in {outdir} with prefix {model_file_prefix}")
766
-
767
- console.print(f"[green]Found {len(split_files)} split files: {', '.join(split_files)}")
768
- return split_files
769
-
770
- def upload_split_files(self, split_files: List[str], outdir: str, repo_id: str) -> None:
771
- """Uploads split model files to Hugging Face."""
772
- api = HfApi(token=self.token)
773
-
774
- for file in split_files:
775
- file_path = os.path.join(outdir, file)
776
- console.print(f"[bold green]Uploading file: {file}")
777
- try:
778
- api.upload_file(
779
- path_or_fileobj=file_path,
780
- path_in_repo=file,
781
- repo_id=repo_id,
782
- )
783
- console.print(f"[green]✓ Successfully uploaded: {file}")
784
- except Exception as e:
785
- console.print(f"[red]✗ Failed to upload {file}: {e}")
786
- raise ConversionError(f"Error uploading file {file}: {e}")
787
-
788
- def generate_readme(self, quantized_files: List[str]) -> str:
789
- """Generate a README.md file for the Hugging Face Hub."""
790
- readme = f"""# {self.model_name} GGUF
791
-
792
- This repository contains GGUF quantized versions of [{self.model_id}](https://huggingface.co/{self.model_id}).
793
-
794
- ## About
795
-
796
- This model was converted using [Webscout](https://github.com/Webscout/webscout).
797
-
798
- ## Quantization Methods
799
-
800
- The following quantization methods were used:
801
-
802
- """
803
- # Add quantization method descriptions
804
- for method in self.quantization_methods:
805
- if self.use_imatrix:
806
- readme += f"- `{method}`: {self.VALID_IMATRIX_METHODS[method]}\n"
807
- else:
808
- readme += f"- `{method}`: {self.VALID_METHODS[method]}\n"
809
-
810
- readme += """
811
- ## Available Files
812
-
813
- The following quantized files are available:
814
-
815
- """
816
- # Add file information
817
- for file in quantized_files:
818
- readme += f"- `{file}`\n"
819
-
820
- if self.use_imatrix:
821
- readme += """
822
- ## Importance Matrix
823
-
824
- This model was quantized using importance matrix quantization. The `imatrix.dat` file contains the importance matrix used for quantization.
825
-
826
- """
827
-
828
- readme += """
829
- ## Usage
830
-
831
- These GGUF files can be used with [llama.cpp](https://github.com/ggerganov/llama.cpp) and compatible tools.
832
-
833
- Example usage:
834
- ```bash
835
- ./main -m model.gguf -n 1024 --repeat_penalty 1.0 --color -i -r "User:" -f prompts/chat-with-bob.txt
836
- ```
837
-
838
- ## Conversion Process
839
-
840
- This model was converted using the following command:
841
- ```bash
842
- python -m webscout.Extra.gguf convert \\
843
- -m "{self.model_id}" \\
844
- -q "{','.join(self.quantization_methods)}" \\
845
- {f'-i' if self.use_imatrix else ''} \\
846
- {f'--train-data "{self.train_data_file}"' if self.train_data_file else ''} \\
847
- {f'-s' if self.split_model else ''} \\
848
- {f'--split-max-tensors {self.split_max_tensors}' if self.split_model else ''} \\
849
- {f'--split-max-size {self.split_max_size}' if self.split_max_size else ''}
850
- ```
851
-
852
- ## License
853
-
854
- This repository is licensed under the same terms as the original model.
855
- """
856
- return readme
857
-
858
- def create_repository(self, repo_id: str) -> None:
859
- """Create a new repository on Hugging Face Hub if it doesn't exist."""
860
- api = HfApi(token=self.token)
861
- try:
862
- # Check if repository already exists
863
- try:
864
- api.repo_info(repo_id=repo_id)
865
- console.print(f"[green]✓ Repository {repo_id} already exists")
866
- return
867
- except Exception:
868
- # Repository doesn't exist, create it
869
- pass
870
-
871
- console.print(f"[bold green]Creating new repository: {repo_id}")
872
- api.create_repo(
873
- repo_id=repo_id,
874
- exist_ok=True,
875
- private=False,
876
- repo_type="model"
877
- )
878
- console.print(f"[green]✓ Successfully created repository: {repo_id}")
879
- console.print(f"[cyan]Repository URL: https://huggingface.co/{repo_id}")
880
- except Exception as e:
881
- console.print(f"[red]✗ Failed to create repository: {e}")
882
- raise ConversionError(f"Error creating repository {repo_id}: {e}")
883
-
884
- def upload_readme(self, readme_content: str, repo_id: str) -> None:
885
- """Upload README.md to Hugging Face Hub."""
886
- api = HfApi(token=self.token)
887
- console.print("[bold green]Uploading README.md with model documentation")
888
- try:
889
- api.upload_file(
890
- path_or_fileobj=readme_content.encode(),
891
- path_in_repo="README.md",
892
- repo_id=repo_id,
893
- )
894
- console.print("[green]✓ Successfully uploaded: README.md")
895
- except Exception as e:
896
- console.print(f"[red]✗ Failed to upload README.md: {e}")
897
- raise ConversionError(f"Error uploading README.md: {e}")
898
-
899
- def convert(self) -> None:
900
- """Performs the model conversion process."""
901
- try:
902
- # Display banner and configuration
903
- console.print(f"[bold green]{figlet_format('GGUF Converter')}")
904
- self.display_config()
905
-
906
- # Validate inputs
907
- self.validate_inputs()
908
-
909
- # Check dependencies
910
- deps = self.check_dependencies()
911
- missing = [name for name, installed in deps.items() if not installed and name != 'ninja']
912
- if missing:
913
- raise ConversionError(f"Missing required dependencies: {', '.join(missing)}")
914
-
915
- # Setup llama.cpp
916
- self.setup_llama_cpp()
917
-
918
- # Determine if we need temporary directories (only for uploads)
919
- needs_temp = bool(self.username and self.token)
920
-
921
- if needs_temp:
922
- # Use temporary directories for upload case
923
- with tempfile.TemporaryDirectory() as outdir:
924
- with tempfile.TemporaryDirectory() as tmpdir:
925
- self._convert_with_dirs(tmpdir, outdir)
926
- else:
927
- # Use current directory for local output
928
- outdir = os.getcwd()
929
- tmpdir = os.path.join(outdir, "temp_download")
930
- os.makedirs(tmpdir, exist_ok=True)
931
- try:
932
- self._convert_with_dirs(tmpdir, outdir)
933
- finally:
934
- # Clean up temporary download directory
935
- import shutil
936
- shutil.rmtree(tmpdir, ignore_errors=True)
937
-
938
- # Display success message
939
- console.print(Panel.fit(
940
- "[bold green]✓[/] Conversion completed successfully!\n\n"
941
- f"[cyan]Output files can be found in: {self.workspace / self.model_name}[/]",
942
- title="Success",
943
- border_style="green"
944
- ))
945
-
946
- except Exception as e:
947
- console.print(Panel.fit(
948
- f"[bold red]✗[/] {str(e)}",
949
- title="Error",
950
- border_style="red"
951
- ))
952
- raise
953
-
954
- def _convert_with_dirs(self, tmpdir: str, outdir: str) -> None:
955
- """Helper method to perform conversion with given directories."""
956
- fp16 = str(Path(outdir)/f"{self.model_name}.fp16.gguf")
957
-
958
- # Download model
959
- local_dir = Path(tmpdir)/self.model_name
960
- console.print("[bold green]Downloading model...")
961
- api = HfApi(token=self.token)
962
- api.snapshot_download(
963
- repo_id=self.model_id,
964
- local_dir=local_dir,
965
- local_dir_use_symlinks=False
966
- )
967
-
968
- # Convert to fp16
969
- console.print("[bold green]Converting to fp16...")
970
-
971
- # Find the conversion script
972
- conversion_scripts = [
973
- "llama.cpp/convert_hf_to_gguf.py",
974
- "llama.cpp/convert-hf-to-gguf.py",
975
- "llama.cpp/convert.py"
976
- ]
977
-
978
- conversion_script = None
979
- for script in conversion_scripts:
980
- if os.path.isfile(script):
981
- conversion_script = script
982
- break
983
-
984
- if not conversion_script:
985
- raise ConversionError("Could not find HuggingFace to GGUF conversion script")
986
-
987
- # Use the appropriate Python executable
988
- python_cmd = "python" if platform.system() == "Windows" else "python3"
989
-
990
- convert_cmd = [
991
- python_cmd, conversion_script,
992
- str(local_dir),
993
- "--outtype", "f16",
994
- "--outfile", fp16
995
- ]
996
-
997
- console.print(f"[cyan]Conversion command: {' '.join(convert_cmd)}")
998
-
999
- try:
1000
- result = subprocess.run(convert_cmd, capture_output=True, text=True)
1001
-
1002
- if result.returncode != 0:
1003
- raise ConversionError(f"Error converting to fp16: {result.stderr}")
1004
- except FileNotFoundError as e:
1005
- raise ConversionError(f"Could not execute conversion script: {e}")
1006
-
1007
- if not os.path.isfile(fp16):
1008
- raise ConversionError(f"Conversion completed but output file not found: {fp16}")
1009
-
1010
- console.print("[green]Model converted to fp16 successfully!")
1011
-
1012
- # If fp16_only is True, we're done after fp16 conversion
1013
- if self.fp16_only:
1014
- quantized_files = [f"{self.model_name}.fp16.gguf"]
1015
- if self.username and self.token:
1016
- repo_id = f"{self.username}/{self.model_name}-GGUF"
1017
-
1018
- # Step 1: Create repository
1019
- self.create_repository(repo_id)
1020
-
1021
- # Step 2: Upload README first
1022
- readme_content = self.generate_readme(quantized_files)
1023
- self.upload_readme(readme_content, repo_id)
1024
-
1025
- # Step 3: Upload model GGUF file
1026
- file_name = f"{self.model_name}.fp16.gguf"
1027
- console.print(f"[bold green]Uploading model file: {file_name}")
1028
- try:
1029
- api.upload_file(
1030
- path_or_fileobj=fp16,
1031
- path_in_repo=file_name,
1032
- repo_id=repo_id
1033
- )
1034
- console.print(f"[green]✓ Successfully uploaded: {file_name}")
1035
- except Exception as e:
1036
- console.print(f"[red]✗ Failed to upload {file_name}: {e}")
1037
- raise ConversionError(f"Error uploading model file: {e}")
1038
- return
1039
-
1040
- # Generate importance matrix if needed
1041
- imatrix_path: Optional[str] = None
1042
- if self.use_imatrix:
1043
- train_data_path = self.train_data_file if self.train_data_file else "llama.cpp/groups_merged.txt"
1044
- imatrix_path = str(Path(outdir)/"imatrix.dat")
1045
- self.generate_importance_matrix(fp16, train_data_path, imatrix_path)
1046
-
1047
- # Quantize model
1048
- console.print("[bold green]Quantizing model...")
1049
- quantized_files: List[str] = []
1050
- quantize_binary = self.get_binary_path("llama-quantize")
1051
-
1052
- if not os.path.isfile(quantize_binary):
1053
- raise ConversionError(f"llama-quantize binary not found at: {quantize_binary}")
1054
-
1055
- for method in self.quantization_methods:
1056
- quantized_name = f"{self.model_name.lower()}-{method.lower()}"
1057
- if self.use_imatrix:
1058
- quantized_name += "-imat"
1059
- quantized_path = str(Path(outdir)/f"{quantized_name}.gguf")
1060
-
1061
- console.print(f"[cyan]Quantizing with method: {method}")
1062
-
1063
- if self.use_imatrix and imatrix_path:
1064
- quantize_cmd: List[str] = [
1065
- quantize_binary,
1066
- "--imatrix", str(imatrix_path),
1067
- fp16, quantized_path, method
1068
- ]
1069
- else:
1070
- quantize_cmd = [
1071
- quantize_binary,
1072
- fp16, quantized_path, method
1073
- ]
1074
-
1075
- console.print(f"[cyan]Quantization command: {' '.join(quantize_cmd)}")
1076
-
1077
- try:
1078
- result = subprocess.run(quantize_cmd, capture_output=True, text=True)
1079
- if result.returncode != 0:
1080
- raise ConversionError(f"Error quantizing with {method}: {result.stderr}")
1081
- except FileNotFoundError:
1082
- raise ConversionError(f"Could not execute llama-quantize binary: {quantize_binary}")
1083
-
1084
- if not os.path.isfile(quantized_path):
1085
- raise ConversionError(f"Quantization completed but output file not found: {quantized_path}")
1086
-
1087
- quantized_files.append(f"{quantized_name}.gguf")
1088
- console.print(f"[green]Successfully quantized with {method}: {quantized_name}.gguf")
1089
-
1090
- # Upload to Hugging Face if credentials provided
1091
- if self.username and self.token:
1092
- repo_id = f"{self.username}/{self.model_name}-GGUF"
1093
-
1094
- # Step 1: Create repository
1095
- console.print(f"[bold blue]Step 1: Creating repository {repo_id}")
1096
- self.create_repository(repo_id)
1097
-
1098
- # Step 2: Generate and upload README first
1099
- console.print("[bold blue]Step 2: Uploading README.md")
1100
- readme_content = self.generate_readme(quantized_files)
1101
- self.upload_readme(readme_content, repo_id)
1102
-
1103
- # Step 3: Upload model GGUF files
1104
- console.print("[bold blue]Step 3: Uploading model files")
1105
- if self.split_model:
1106
- split_files = self.split_model(quantized_path, outdir)
1107
- self.upload_split_files(split_files, outdir, repo_id)
1108
- else:
1109
- # Upload single quantized file
1110
- file_name = f"{self.model_name.lower()}-{self.quantization_methods[0].lower()}.gguf"
1111
- console.print(f"[bold green]Uploading quantized model: {file_name}")
1112
- try:
1113
- api.upload_file(
1114
- path_or_fileobj=quantized_path,
1115
- path_in_repo=file_name,
1116
- repo_id=repo_id
1117
- )
1118
- console.print(f"[green]✓ Successfully uploaded: {file_name}")
1119
- except Exception as e:
1120
- console.print(f"[red]✗ Failed to upload {file_name}: {e}")
1121
- raise ConversionError(f"Error uploading quantized model: {e}")
1122
-
1123
- # Step 4: Upload imatrix if generated (optional)
1124
- if imatrix_path:
1125
- console.print("[bold blue]Step 4: Uploading importance matrix")
1126
- console.print("[bold green]Uploading importance matrix: imatrix.dat")
1127
- try:
1128
- api.upload_file(
1129
- path_or_fileobj=imatrix_path,
1130
- path_in_repo="imatrix.dat",
1131
- repo_id=repo_id
1132
- )
1133
- console.print("[green]✓ Successfully uploaded: imatrix.dat")
1134
- except Exception as e:
1135
- console.print(f"[yellow]Warning: Failed to upload imatrix.dat: {e}")
1136
-
1137
- # Final success message
1138
- console.print(f"[bold green]🎉 All files uploaded successfully to {repo_id}!")
1139
- console.print(f"[cyan]Repository URL: https://huggingface.co/{repo_id}")
1140
-
1141
- # Initialize CLI with HAI vibes
1142
- app = CLI(
1143
- name="gguf",
1144
- help="Convert HuggingFace models to GGUF format with style! 🔥",
1145
- version="1.0.0"
1146
- )
1147
-
1148
- @app.command(name="convert")
1149
- @option("-m", "--model-id", help="The HuggingFace model ID (e.g., 'OEvortex/HelpingAI-Lite-1.5T')", required=True)
1150
- @option("-u", "--username", help="Your HuggingFace username for uploads", default=None)
1151
- @option("-t", "--token", help="Your HuggingFace API token for uploads", default=None)
1152
- @option("-q", "--quantization", help="Comma-separated quantization methods", default="q4_k_m")
1153
- @option("-i", "--use-imatrix", help="Use importance matrix for quantization", is_flag=True)
1154
- @option("--train-data", help="Training data file for imatrix quantization", default=None)
1155
- @option("-s", "--split-model", help="Split the model into smaller chunks", is_flag=True)
1156
- @option("--split-max-tensors", help="Maximum number of tensors per file when splitting", default=256)
1157
- @option("--split-max-size", help="Maximum file size when splitting (e.g., '256M', '5G')", default=None)
1158
- def convert_command(
1159
- model_id: str,
1160
- username: Optional[str] = None,
1161
- token: Optional[str] = None,
1162
- quantization: str = "q4_k_m",
1163
- use_imatrix: bool = False,
1164
- train_data: Optional[str] = None,
1165
- split_model: bool = False,
1166
- split_max_tensors: int = 256,
1167
- split_max_size: Optional[str] = None
1168
- ) -> None:
1169
- """
1170
- Convert and quantize HuggingFace models to GGUF format! 🚀
1171
-
1172
- Args:
1173
- model_id (str): Your model's HF ID (like 'OEvortex/HelpingAI-Lite-1.5T') 🎯
1174
- username (str, optional): Your HF username for uploads 👤
1175
- token (str, optional): Your HF API token 🔑
1176
- quantization (str): Quantization methods (default: q4_k_m,q5_k_m) 🎮
1177
- use_imatrix (bool): Use importance matrix for quantization 🔍
1178
- train_data (str, optional): Training data file for imatrix quantization 📚
1179
- split_model (bool): Split the model into smaller chunks 🔪
1180
- split_max_tensors (int): Max tensors per file when splitting (default: 256) 📊
1181
- split_max_size (str, optional): Max file size when splitting (e.g., '256M', '5G') 📏
1182
-
1183
- Example:
1184
- >>> python -m webscout.Extra.gguf convert \\
1185
- ... -m "OEvortex/HelpingAI-Lite-1.5T" \\
1186
- ... -q "q4_k_m,q5_k_m"
1187
- """
1188
- try:
1189
- converter = ModelConverter(
1190
- model_id=model_id,
1191
- username=username,
1192
- token=token,
1193
- quantization_methods=quantization,
1194
- use_imatrix=use_imatrix,
1195
- train_data_file=train_data,
1196
- split_model=split_model,
1197
- split_max_tensors=split_max_tensors,
1198
- split_max_size=split_max_size
1199
- )
1200
- converter.convert()
1201
- except (ConversionError, ValueError) as e:
1202
- console.print(f"[red]Error: {str(e)}")
1203
- sys.exit(1)
1204
- except Exception as e:
1205
- console.print(f"[red]Unexpected error: {str(e)}")
1206
- sys.exit(1)
1207
-
1208
- def main() -> None:
1209
- """Fire up the GGUF converter! 🚀"""
1210
- app.run()
1211
-
1212
- if __name__ == "__main__":
1213
- main()
1
+ """
2
+ Convert Hugging Face models to GGUF format with advanced features.
3
+
4
+ 🔥 2025 UPDATE: ALL CMAKE BUILD ERRORS FIXED! 🔥
5
+
6
+ This converter has been completely updated for 2025 compatibility with the latest llama.cpp:
7
+
8
+ CRITICAL FIXES:
9
+ - ✅ Updated all deprecated LLAMA_* flags to GGML_* (LLAMA_CUBLAS → GGML_CUDA)
10
+ - ✅ Fixed CURL dependency error by adding -DLLAMA_CURL=OFF
11
+ - ✅ Disabled optional dependencies (LLAMA_LLGUIDANCE=OFF)
12
+ - ✅ Cross-platform hardware detection (Windows, macOS, Linux)
13
+ - ✅ Robust CMake configuration with multiple fallback strategies
14
+ - ✅ Priority-based acceleration selection (CUDA > Metal > Vulkan > OpenCL > ROCm > BLAS)
15
+ - ✅ Enhanced error handling and recovery mechanisms
16
+ - ✅ Platform-specific optimizations and build generators
17
+ - ✅ Automatic build directory cleanup to avoid cached CMake conflicts
18
+
19
+ SUPPORTED ACCELERATION:
20
+ - CUDA: GGML_CUDA=ON (NVIDIA GPUs)
21
+ - Metal: GGML_METAL=ON (Apple Silicon/macOS)
22
+ - Vulkan: GGML_VULKAN=ON (Cross-platform GPU)
23
+ - OpenCL: GGML_OPENCL=ON (Cross-platform GPU)
24
+ - ROCm: GGML_HIPBLAS=ON (AMD GPUs)
25
+ - BLAS: GGML_BLAS=ON (Optimized CPU libraries)
26
+ - Accelerate: GGML_ACCELERATE=ON (Apple Accelerate framework)
27
+
28
+ For detailed documentation, see: webscout/Extra/gguf.md
29
+
30
+ USAGE EXAMPLES:
31
+ >>> python -m webscout.Extra.gguf convert -m "OEvortex/HelpingAI-Lite-1.5T" -q "q4_k_m,q5_k_m"
32
+ >>> # With upload options:
33
+ >>> python -m webscout.Extra.gguf convert -m "your-model" -u "username" -t "token" -q "q4_k_m"
34
+ >>> # With imatrix quantization:
35
+ >>> python -m webscout.Extra.gguf convert -m "your-model" -i -q "iq4_nl" --train-data "train_data.txt"
36
+ >>> # With model splitting:
37
+ >>> python -m webscout.Extra.gguf convert -m "your-model" -s --split-max-tensors 256
38
+ """
39
+
40
+ import subprocess
41
+ import os
42
+ import sys
43
+ import signal
44
+ import tempfile
45
+ import platform
46
+ from pathlib import Path
47
+ from typing import Optional, Dict, List, Any, Union, Literal, TypedDict, Set
48
+
49
+ from huggingface_hub import HfApi
50
+ from webscout.zeroart import figlet_format
51
+ from rich.console import Console
52
+ from rich.panel import Panel
53
+ from rich.table import Table
54
+ from webscout.swiftcli import CLI, option
55
+
56
+ console = Console()
57
+
58
+ class ConversionError(Exception):
59
+ """Custom exception for when things don't go as planned! ⚠️"""
60
+ pass
61
+
62
+ class QuantizationMethod(TypedDict):
63
+ """Type definition for quantization method descriptions."""
64
+ description: str
65
+
66
+ class ModelConverter:
67
+ """Handles the conversion of Hugging Face models to GGUF format."""
68
+
69
+ VALID_METHODS: Dict[str, str] = {
70
+ "fp16": "16-bit floating point - maximum accuracy, largest size",
71
+ "q2_k": "2-bit quantization (smallest size, lowest accuracy)",
72
+ "q3_k_l": "3-bit quantization (large) - balanced for size/accuracy",
73
+ "q3_k_m": "3-bit quantization (medium) - good balance for most use cases",
74
+ "q3_k_s": "3-bit quantization (small) - optimized for speed",
75
+ "q4_0": "4-bit quantization (version 0) - standard 4-bit compression",
76
+ "q4_1": "4-bit quantization (version 1) - improved accuracy over q4_0",
77
+ "q4_k_m": "4-bit quantization (medium) - balanced for most models",
78
+ "q4_k_s": "4-bit quantization (small) - optimized for speed",
79
+ "q5_0": "5-bit quantization (version 0) - high accuracy, larger size",
80
+ "q5_1": "5-bit quantization (version 1) - improved accuracy over q5_0",
81
+ "q5_k_m": "5-bit quantization (medium) - best balance for quality/size",
82
+ "q5_k_s": "5-bit quantization (small) - optimized for speed",
83
+ "q6_k": "6-bit quantization - highest accuracy, largest size",
84
+ "q8_0": "8-bit quantization - maximum accuracy, largest size"
85
+ }
86
+
87
+ VALID_IMATRIX_METHODS: Dict[str, str] = {
88
+ "iq3_m": "3-bit imatrix quantization (medium) - balanced importance-based",
89
+ "iq3_xxs": "3-bit imatrix quantization (extra extra small) - maximum compression",
90
+ "q4_k_m": "4-bit imatrix quantization (medium) - balanced importance-based",
91
+ "q4_k_s": "4-bit imatrix quantization (small) - optimized for speed",
92
+ "iq4_nl": "4-bit imatrix quantization (non-linear) - best accuracy for 4-bit",
93
+ "iq4_xs": "4-bit imatrix quantization (extra small) - maximum compression",
94
+ "q5_k_m": "5-bit imatrix quantization (medium) - balanced importance-based",
95
+ "q5_k_s": "5-bit imatrix quantization (small) - optimized for speed"
96
+ }
97
+
98
+ def __init__(
99
+ self,
100
+ model_id: str,
101
+ username: Optional[str] = None,
102
+ token: Optional[str] = None,
103
+ quantization_methods: str = "q4_k_m",
104
+ use_imatrix: bool = False,
105
+ train_data_file: Optional[str] = None,
106
+ split_model: bool = False,
107
+ split_max_tensors: int = 256,
108
+ split_max_size: Optional[str] = None
109
+ ) -> None:
110
+ self.model_id = model_id
111
+ self.username = username
112
+ self.token = token
113
+ self.quantization_methods = quantization_methods.split(',')
114
+ self.model_name = model_id.split('/')[-1]
115
+ self.workspace = Path(os.getcwd())
116
+ self.use_imatrix = use_imatrix
117
+ self.train_data_file = train_data_file
118
+ self.split_model = split_model
119
+ self.split_max_tensors = split_max_tensors
120
+ self.split_max_size = split_max_size
121
+ self.fp16_only = "fp16" in self.quantization_methods and len(self.quantization_methods) == 1
122
+
123
+ def validate_inputs(self) -> None:
124
+ """Validates all input parameters."""
125
+ if not '/' in self.model_id:
126
+ raise ValueError("Invalid model ID format. Expected format: 'organization/model-name'")
127
+
128
+ if self.use_imatrix:
129
+ invalid_methods = [m for m in self.quantization_methods if m not in self.VALID_IMATRIX_METHODS]
130
+ if invalid_methods:
131
+ raise ValueError(
132
+ f"Invalid imatrix quantization methods: {', '.join(invalid_methods)}.\n"
133
+ f"Valid methods are: {', '.join(self.VALID_IMATRIX_METHODS.keys())}"
134
+ )
135
+ if not self.train_data_file and not os.path.exists("llama.cpp/groups_merged.txt"):
136
+ raise ValueError("Training data file is required for imatrix quantization")
137
+ else:
138
+ invalid_methods = [m for m in self.quantization_methods if m not in self.VALID_METHODS]
139
+ if invalid_methods:
140
+ raise ValueError(
141
+ f"Invalid quantization methods: {', '.join(invalid_methods)}.\n"
142
+ f"Valid methods are: {', '.join(self.VALID_METHODS.keys())}"
143
+ )
144
+
145
+ if bool(self.username) != bool(self.token):
146
+ raise ValueError("Both username and token must be provided for upload, or neither.")
147
+
148
+ if self.split_model and self.split_max_size:
149
+ try:
150
+ size = int(self.split_max_size[:-1])
151
+ unit = self.split_max_size[-1].upper()
152
+ if unit not in ['M', 'G']:
153
+ raise ValueError("Split max size must end with M or G")
154
+ except ValueError:
155
+ raise ValueError("Invalid split max size format. Use format like '256M' or '5G'")
156
+
157
+ @staticmethod
158
+ def check_dependencies() -> Dict[str, bool]:
159
+ """Check if all required dependencies are installed with cross-platform support."""
160
+ system = platform.system()
161
+
162
+ dependencies: Dict[str, str] = {
163
+ 'git': 'Git version control',
164
+ 'cmake': 'CMake build system',
165
+ 'ninja': 'Ninja build system (optional)'
166
+ }
167
+
168
+ # Add platform-specific dependencies
169
+ if system != 'Windows':
170
+ dependencies['pip3'] = 'Python package installer'
171
+ else:
172
+ dependencies['pip'] = 'Python package installer'
173
+
174
+ status: Dict[str, bool] = {}
175
+
176
+ for cmd, desc in dependencies.items():
177
+ try:
178
+ if system == 'Windows':
179
+ # Use 'where' command on Windows
180
+ result = subprocess.run(['where', cmd], capture_output=True, text=True)
181
+ status[cmd] = result.returncode == 0
182
+ else:
183
+ # Use 'which' command on Unix-like systems
184
+ result = subprocess.run(['which', cmd], capture_output=True, text=True)
185
+ status[cmd] = result.returncode == 0
186
+ except (FileNotFoundError, subprocess.SubprocessError):
187
+ status[cmd] = False
188
+
189
+ # Special check for Python - try different variants
190
+ python_variants = ['python3', 'python', 'py'] if system != 'Windows' else ['python', 'py', 'python3']
191
+ status['python'] = False
192
+ for variant in python_variants:
193
+ try:
194
+ if system == 'Windows':
195
+ result = subprocess.run(['where', variant], capture_output=True)
196
+ else:
197
+ result = subprocess.run(['which', variant], capture_output=True)
198
+ if result.returncode == 0:
199
+ status['python'] = True
200
+ break
201
+ except:
202
+ continue
203
+
204
+ # Check for C++ compiler
205
+ cpp_compilers = ['cl', 'g++', 'clang++'] if system == 'Windows' else ['g++', 'clang++']
206
+ status['cpp_compiler'] = False
207
+ for compiler in cpp_compilers:
208
+ try:
209
+ if system == 'Windows':
210
+ result = subprocess.run(['where', compiler], capture_output=True)
211
+ else:
212
+ result = subprocess.run(['which', compiler], capture_output=True)
213
+ if result.returncode == 0:
214
+ status['cpp_compiler'] = True
215
+ break
216
+ except:
217
+ continue
218
+
219
+ dependencies['python'] = 'Python interpreter'
220
+ dependencies['cpp_compiler'] = 'C++ compiler (g++, clang++, or MSVC)'
221
+
222
+ return status
223
+
224
+ def detect_hardware(self) -> Dict[str, bool]:
225
+ """Detect available hardware acceleration with improved cross-platform support."""
226
+ hardware: Dict[str, bool] = {
227
+ 'cuda': False,
228
+ 'metal': False,
229
+ 'opencl': False,
230
+ 'vulkan': False,
231
+ 'rocm': False,
232
+ 'blas': False,
233
+ 'accelerate': False
234
+ }
235
+
236
+ system = platform.system()
237
+
238
+ # Check CUDA
239
+ try:
240
+ # Check for nvcc compiler
241
+ if subprocess.run(['nvcc', '--version'], capture_output=True, shell=(system == 'Windows')).returncode == 0:
242
+ hardware['cuda'] = True
243
+ # Also check for nvidia-smi as fallback
244
+ elif subprocess.run(['nvidia-smi'], capture_output=True, shell=(system == 'Windows')).returncode == 0:
245
+ hardware['cuda'] = True
246
+ except (FileNotFoundError, subprocess.SubprocessError):
247
+ # Check for CUDA libraries on Windows
248
+ if system == 'Windows':
249
+ cuda_paths = [
250
+ os.environ.get('CUDA_PATH'),
251
+ 'C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA',
252
+ 'C:\\Program Files (x86)\\NVIDIA GPU Computing Toolkit\\CUDA'
253
+ ]
254
+ for cuda_path in cuda_paths:
255
+ if cuda_path and os.path.exists(cuda_path):
256
+ hardware['cuda'] = True
257
+ break
258
+
259
+ # Check Metal (macOS)
260
+ if system == 'Darwin':
261
+ try:
262
+ # Check for Xcode command line tools
263
+ if subprocess.run(['xcrun', '--show-sdk-path'], capture_output=True).returncode == 0:
264
+ hardware['metal'] = True
265
+ # Check for Metal framework
266
+ if os.path.exists('/System/Library/Frameworks/Metal.framework'):
267
+ hardware['metal'] = True
268
+ # macOS also supports Accelerate framework
269
+ if os.path.exists('/System/Library/Frameworks/Accelerate.framework'):
270
+ hardware['accelerate'] = True
271
+ except (FileNotFoundError, subprocess.SubprocessError):
272
+ pass
273
+
274
+ # Check OpenCL
275
+ try:
276
+ if system == 'Windows':
277
+ # Check for OpenCL on Windows
278
+ opencl_paths = [
279
+ 'C:\\Windows\\System32\\OpenCL.dll',
280
+ 'C:\\Windows\\SysWOW64\\OpenCL.dll'
281
+ ]
282
+ if any(os.path.exists(path) for path in opencl_paths):
283
+ hardware['opencl'] = True
284
+ else:
285
+ if subprocess.run(['clinfo'], capture_output=True).returncode == 0:
286
+ hardware['opencl'] = True
287
+ except (FileNotFoundError, subprocess.SubprocessError):
288
+ pass
289
+
290
+ # Check Vulkan
291
+ try:
292
+ if system == 'Windows':
293
+ # Check for Vulkan on Windows
294
+ vulkan_paths = [
295
+ 'C:\\Windows\\System32\\vulkan-1.dll',
296
+ 'C:\\Windows\\SysWOW64\\vulkan-1.dll'
297
+ ]
298
+ if any(os.path.exists(path) for path in vulkan_paths):
299
+ hardware['vulkan'] = True
300
+ else:
301
+ if subprocess.run(['vulkaninfo'], capture_output=True).returncode == 0:
302
+ hardware['vulkan'] = True
303
+ except (FileNotFoundError, subprocess.SubprocessError):
304
+ pass
305
+
306
+ # Check ROCm (AMD)
307
+ try:
308
+ if subprocess.run(['rocm-smi'], capture_output=True, shell=(system == 'Windows')).returncode == 0:
309
+ hardware['rocm'] = True
310
+ elif system == 'Linux':
311
+ # Check for ROCm installation
312
+ rocm_paths = ['/opt/rocm', '/usr/lib/x86_64-linux-gnu/librocm-smi64.so']
313
+ if any(os.path.exists(path) for path in rocm_paths):
314
+ hardware['rocm'] = True
315
+ except (FileNotFoundError, subprocess.SubprocessError):
316
+ pass
317
+
318
+ # Check for BLAS libraries
319
+ try:
320
+ import numpy as np
321
+ # Check if numpy is linked with optimized BLAS
322
+ config = np.__config__.show()
323
+ if any(lib in str(config).lower() for lib in ['openblas', 'mkl', 'atlas', 'blis']):
324
+ hardware['blas'] = True
325
+ except (ImportError, AttributeError):
326
+ # Fallback: check for common BLAS libraries
327
+ if system == 'Linux':
328
+ blas_libs = ['/usr/lib/x86_64-linux-gnu/libopenblas.so', '/usr/lib/x86_64-linux-gnu/libblas.so']
329
+ if any(os.path.exists(lib) for lib in blas_libs):
330
+ hardware['blas'] = True
331
+ elif system == 'Windows':
332
+ # Check for Intel MKL or OpenBLAS on Windows
333
+ mkl_paths = ['C:\\Program Files (x86)\\Intel\\oneAPI\\mkl']
334
+ if any(os.path.exists(path) for path in mkl_paths):
335
+ hardware['blas'] = True
336
+
337
+ return hardware
338
+
339
+ def setup_llama_cpp(self) -> None:
340
+ """Sets up and builds llama.cpp repository with robust error handling."""
341
+ llama_path = self.workspace / "llama.cpp"
342
+ system = platform.system()
343
+
344
+ with console.status("[bold green]Setting up llama.cpp...") as status:
345
+ # Clone llama.cpp if not exists
346
+ if not llama_path.exists():
347
+ try:
348
+ subprocess.run(['git', 'clone', 'https://github.com/ggerganov/llama.cpp'], check=True)
349
+ except subprocess.CalledProcessError as e:
350
+ raise ConversionError(f"Failed to clone llama.cpp repository: {e}")
351
+
352
+ original_cwd = os.getcwd()
353
+ try:
354
+ os.chdir(llama_path)
355
+
356
+ # Update to latest version
357
+ try:
358
+ subprocess.run(['git', 'pull'], capture_output=True, check=False)
359
+ except subprocess.CalledProcessError:
360
+ console.print("[yellow]Warning: Could not update llama.cpp repository")
361
+
362
+ # Clean any existing build directory to avoid cached CMake variables
363
+ build_dir = Path('build')
364
+ if build_dir.exists():
365
+ console.print("[yellow]Cleaning existing build directory to avoid CMake cache conflicts...")
366
+ import shutil
367
+ try:
368
+ shutil.rmtree(build_dir)
369
+ console.print("[green]Build directory cleaned successfully")
370
+ except Exception as e:
371
+ console.print(f"[yellow]Warning: Could not clean build directory: {e}")
372
+
373
+ # Check if we're in a Nix environment
374
+ is_nix = system == "Linux" and os.path.exists("/nix/store")
375
+
376
+ if is_nix:
377
+ console.print("[yellow]Detected Nix environment. Using system Python packages...")
378
+ # In Nix, we need to use the system Python packages
379
+ try:
380
+ # Try to import required packages to check if they're available
381
+ import torch # type: ignore
382
+ import numpy # type: ignore
383
+ import sentencepiece # type: ignore
384
+ import transformers # type: ignore
385
+ console.print("[green]Required Python packages are already installed.")
386
+ except ImportError as e:
387
+ console.print("[red]Missing required Python packages in Nix environment.")
388
+ console.print("[yellow]Please install them using:")
389
+ console.print("nix-shell -p python3Packages.torch python3Packages.numpy python3Packages.sentencepiece python3Packages.transformers")
390
+ raise ConversionError("Missing required Python packages in Nix environment")
391
+ else:
392
+ # In non-Nix environments, install requirements if they exist
393
+ if os.path.exists('requirements.txt'):
394
+ try:
395
+ pip_cmd = 'pip' if system == 'Windows' else 'pip3'
396
+ subprocess.run([pip_cmd, 'install', '-r', 'requirements.txt'], check=True)
397
+ except subprocess.CalledProcessError as e:
398
+ if "externally-managed-environment" in str(e):
399
+ console.print("[yellow]Detected externally managed Python environment.")
400
+ console.print("[yellow]Please install the required packages manually:")
401
+ console.print("pip install torch numpy sentencepiece transformers")
402
+ raise ConversionError("Failed to install requirements in externally managed environment")
403
+ else:
404
+ console.print(f"[yellow]Warning: Failed to install requirements: {e}")
405
+
406
+ # Detect available hardware
407
+ hardware = self.detect_hardware()
408
+ console.print("[bold green]Detected hardware acceleration:")
409
+ for hw, available in hardware.items():
410
+ console.print(f" {'✓' if available else '✗'} {hw.upper()}")
411
+
412
+ # Clear any environment variables that might cause conflicts
413
+ env_vars_to_clear = [
414
+ 'LLAMA_CUBLAS', 'LLAMA_CLBLAST', 'LLAMA_HIPBLAS',
415
+ 'LLAMA_METAL', 'LLAMA_ACCELERATE', 'LLAMA_OPENBLAS'
416
+ ]
417
+ for var in env_vars_to_clear:
418
+ if var in os.environ:
419
+ console.print(f"[yellow]Clearing conflicting environment variable: {var}")
420
+ del os.environ[var]
421
+
422
+ # Configure CMake build with robust options
423
+ cmake_args: List[str] = ['cmake', '-B', 'build']
424
+
425
+ # Add basic CMake options with correct LLAMA prefixes
426
+ cmake_args.extend([
427
+ '-DCMAKE_BUILD_TYPE=Release',
428
+ '-DLLAMA_BUILD_TESTS=OFF',
429
+ '-DLLAMA_BUILD_EXAMPLES=ON',
430
+ '-DLLAMA_BUILD_SERVER=OFF',
431
+ # Disable optional dependencies that might cause issues
432
+ '-DLLAMA_CURL=OFF', # Disable CURL (not needed for GGUF conversion)
433
+ '-DLLAMA_LLGUIDANCE=OFF', # Disable LLGuidance (optional feature)
434
+ # Explicitly disable deprecated flags to avoid conflicts
435
+ '-DLLAMA_CUBLAS=OFF',
436
+ '-DLLAMA_CLBLAST=OFF',
437
+ '-DLLAMA_HIPBLAS=OFF'
438
+ ])
439
+
440
+ # Add hardware acceleration options with latest 2025 llama.cpp GGML flags
441
+ # Use priority order: CUDA > Metal > Vulkan > OpenCL > ROCm > BLAS > Accelerate
442
+ acceleration_enabled = False
443
+
444
+ if hardware['cuda']:
445
+ # Latest 2025 GGML CUDA flags (LLAMA_CUBLAS is deprecated)
446
+ cmake_args.extend(['-DGGML_CUDA=ON'])
447
+ console.print("[green]Enabling CUDA acceleration (GGML_CUDA=ON)")
448
+ acceleration_enabled = True
449
+ elif hardware['metal']:
450
+ # Latest 2025 GGML Metal flags for macOS
451
+ cmake_args.extend(['-DGGML_METAL=ON'])
452
+ console.print("[green]Enabling Metal acceleration (GGML_METAL=ON)")
453
+ acceleration_enabled = True
454
+ elif hardware['vulkan']:
455
+ # Latest 2025 GGML Vulkan flags
456
+ cmake_args.extend(['-DGGML_VULKAN=ON'])
457
+ console.print("[green]Enabling Vulkan acceleration (GGML_VULKAN=ON)")
458
+ acceleration_enabled = True
459
+ elif hardware['opencl']:
460
+ # Latest 2025 GGML OpenCL flags (LLAMA_CLBLAST is deprecated)
461
+ cmake_args.extend(['-DGGML_OPENCL=ON'])
462
+ console.print("[green]Enabling OpenCL acceleration (GGML_OPENCL=ON)")
463
+ acceleration_enabled = True
464
+ elif hardware['rocm']:
465
+ # Latest 2025 GGML ROCm/HIP flags
466
+ cmake_args.extend(['-DGGML_HIPBLAS=ON'])
467
+ console.print("[green]Enabling ROCm acceleration (GGML_HIPBLAS=ON)")
468
+ acceleration_enabled = True
469
+ elif hardware['blas']:
470
+ # Latest 2025 GGML BLAS flags with vendor detection
471
+ cmake_args.extend(['-DGGML_BLAS=ON'])
472
+ # Try to detect BLAS vendor for optimal performance
473
+ if system == 'Darwin':
474
+ cmake_args.extend(['-DGGML_BLAS_VENDOR=Accelerate'])
475
+ elif 'mkl' in str(hardware).lower():
476
+ cmake_args.extend(['-DGGML_BLAS_VENDOR=Intel10_64lp'])
477
+ else:
478
+ cmake_args.extend(['-DGGML_BLAS_VENDOR=OpenBLAS'])
479
+ console.print("[green]Enabling BLAS acceleration (GGML_BLAS=ON)")
480
+ acceleration_enabled = True
481
+ elif hardware['accelerate']:
482
+ # Latest 2025 GGML Accelerate framework flags for macOS
483
+ cmake_args.extend(['-DGGML_ACCELERATE=ON'])
484
+ console.print("[green]Enabling Accelerate framework (GGML_ACCELERATE=ON)")
485
+ acceleration_enabled = True
486
+
487
+ if not acceleration_enabled:
488
+ console.print("[yellow]No hardware acceleration available, using CPU only")
489
+ console.print("[cyan]Note: All deprecated LLAMA_* flags have been updated to GGML_* for 2025 compatibility")
490
+
491
+ # Platform-specific optimizations
492
+ if system == 'Windows':
493
+ # Use Visual Studio generator on Windows if available
494
+ try:
495
+ vs_result = subprocess.run(['where', 'msbuild'], capture_output=True)
496
+ if vs_result.returncode == 0:
497
+ cmake_args.extend(['-G', 'Visual Studio 17 2022'])
498
+ else:
499
+ cmake_args.extend(['-G', 'MinGW Makefiles'])
500
+ except:
501
+ cmake_args.extend(['-G', 'MinGW Makefiles'])
502
+ else:
503
+ # Use Ninja if available on Unix systems
504
+ try:
505
+ ninja_cmd = 'ninja' if system != 'Windows' else 'ninja.exe'
506
+ if subprocess.run(['which', ninja_cmd], capture_output=True).returncode == 0:
507
+ cmake_args.extend(['-G', 'Ninja'])
508
+ except:
509
+ pass # Fall back to default generator
510
+
511
+ # Configure the build with error handling and multiple fallback strategies
512
+ status.update("[bold green]Configuring CMake build...")
513
+ config_success = False
514
+
515
+ # Try main configuration
516
+ try:
517
+ console.print(f"[cyan]CMake command: {' '.join(cmake_args)}")
518
+ result = subprocess.run(cmake_args, capture_output=True, text=True)
519
+ if result.returncode == 0:
520
+ config_success = True
521
+ console.print("[green]CMake configuration successful!")
522
+ else:
523
+ console.print(f"[red]CMake configuration failed: {result.stderr}")
524
+ except subprocess.CalledProcessError as e:
525
+ console.print(f"[red]CMake execution failed: {e}")
526
+
527
+ # Try fallback without hardware acceleration if main config failed
528
+ if not config_success:
529
+ console.print("[yellow]Attempting fallback configuration without hardware acceleration...")
530
+ console.print("[cyan]Using 2025-compatible LLAMA build flags...")
531
+ fallback_args = [
532
+ 'cmake', '-B', 'build',
533
+ '-DCMAKE_BUILD_TYPE=Release',
534
+ '-DLLAMA_BUILD_TESTS=OFF',
535
+ '-DLLAMA_BUILD_EXAMPLES=ON',
536
+ '-DLLAMA_BUILD_SERVER=OFF',
537
+ # Disable optional dependencies that might cause issues
538
+ '-DLLAMA_CURL=OFF', # Disable CURL (not needed for GGUF conversion)
539
+ '-DLLAMA_LLGUIDANCE=OFF', # Disable LLGuidance (optional feature)
540
+ # Explicitly disable all deprecated flags
541
+ '-DLLAMA_CUBLAS=OFF',
542
+ '-DLLAMA_CLBLAST=OFF',
543
+ '-DLLAMA_HIPBLAS=OFF',
544
+ '-DLLAMA_METAL=OFF',
545
+ # Enable CPU optimizations
546
+ '-DGGML_NATIVE=OFF', # Disable native optimizations for compatibility
547
+ '-DGGML_AVX=ON', # Enable AVX if available
548
+ '-DGGML_AVX2=ON', # Enable AVX2 if available
549
+ '-DGGML_FMA=ON' # Enable FMA if available
550
+ ]
551
+ try:
552
+ console.print(f"[cyan]Fallback CMake command: {' '.join(fallback_args)}")
553
+ result = subprocess.run(fallback_args, capture_output=True, text=True)
554
+ if result.returncode == 0:
555
+ config_success = True
556
+ console.print("[green]Fallback CMake configuration successful!")
557
+ else:
558
+ console.print(f"[red]Fallback CMake configuration failed: {result.stderr}")
559
+ except subprocess.CalledProcessError as e:
560
+ console.print(f"[red]Fallback CMake execution failed: {e}")
561
+
562
+ # Try minimal configuration as last resort
563
+ if not config_success:
564
+ console.print("[yellow]Attempting minimal configuration...")
565
+ minimal_args = [
566
+ 'cmake', '-B', 'build',
567
+ # Disable optional dependencies that might cause issues
568
+ '-DLLAMA_CURL=OFF', # Disable CURL (not needed for GGUF conversion)
569
+ '-DLLAMA_LLGUIDANCE=OFF', # Disable LLGuidance (optional feature)
570
+ '-DLLAMA_BUILD_SERVER=OFF', # Disable server (not needed for conversion)
571
+ '-DLLAMA_BUILD_TESTS=OFF', # Disable tests (not needed for conversion)
572
+ # Explicitly disable ALL deprecated flags to avoid conflicts
573
+ '-DLLAMA_CUBLAS=OFF',
574
+ '-DLLAMA_CLBLAST=OFF',
575
+ '-DLLAMA_HIPBLAS=OFF',
576
+ '-DLLAMA_METAL=OFF',
577
+ '-DLLAMA_ACCELERATE=OFF'
578
+ ]
579
+ try:
580
+ console.print(f"[cyan]Minimal CMake command: {' '.join(minimal_args)}")
581
+ result = subprocess.run(minimal_args, capture_output=True, text=True)
582
+ if result.returncode == 0:
583
+ config_success = True
584
+ console.print("[green]Minimal CMake configuration successful!")
585
+ else:
586
+ console.print(f"[red]Minimal CMake configuration failed: {result.stderr}")
587
+ raise ConversionError(f"All CMake configuration attempts failed. Last error: {result.stderr}")
588
+ except subprocess.CalledProcessError as e:
589
+ raise ConversionError(f"All CMake configuration attempts failed: {e}")
590
+
591
+ if not config_success:
592
+ raise ConversionError("CMake configuration failed with all attempted strategies")
593
+
594
+ # Build the project
595
+ status.update("[bold green]Building llama.cpp...")
596
+ build_cmd = ['cmake', '--build', 'build', '--config', 'Release']
597
+
598
+ # Add parallel build option
599
+ cpu_count = os.cpu_count() or 1
600
+ if system == 'Windows':
601
+ build_cmd.extend(['--parallel', str(cpu_count)])
602
+ else:
603
+ build_cmd.extend(['-j', str(cpu_count)])
604
+
605
+ try:
606
+ result = subprocess.run(build_cmd, capture_output=True, text=True)
607
+ if result.returncode != 0:
608
+ console.print(f"[red]Build failed: {result.stderr}")
609
+ # Try single-threaded build as fallback
610
+ console.print("[yellow]Attempting single-threaded build...")
611
+ fallback_build = ['cmake', '--build', 'build', '--config', 'Release']
612
+ result = subprocess.run(fallback_build, capture_output=True, text=True)
613
+ if result.returncode != 0:
614
+ raise ConversionError(f"Build failed: {result.stderr}")
615
+ except subprocess.CalledProcessError as e:
616
+ raise ConversionError(f"Build failed: {e}")
617
+
618
+ console.print("[green]llama.cpp built successfully!")
619
+
620
+ finally:
621
+ os.chdir(original_cwd)
622
+
623
+ def display_config(self) -> None:
624
+ """Displays the current configuration in a formatted table."""
625
+ table = Table(title="Configuration", show_header=True, header_style="bold magenta")
626
+ table.add_column("Setting", style="cyan")
627
+ table.add_column("Value", style="green")
628
+
629
+ table.add_row("Model ID", self.model_id)
630
+ table.add_row("Model Name", self.model_name)
631
+ table.add_row("Username", self.username or "Not provided")
632
+ table.add_row("Token", "****" if self.token else "Not provided")
633
+ table.add_row("Quantization Methods", "\n".join(
634
+ f"{method} ({self.VALID_METHODS[method]})"
635
+ for method in self.quantization_methods
636
+ ))
637
+
638
+ console.print(Panel(table))
639
+
640
+ def get_binary_path(self, binary_name: str) -> str:
641
+ """Get the correct path to llama.cpp binaries based on platform."""
642
+ system = platform.system()
643
+
644
+ # Possible binary locations
645
+ possible_paths = [
646
+ f"./llama.cpp/build/bin/{binary_name}", # Standard build location
647
+ f"./llama.cpp/build/{binary_name}", # Alternative build location
648
+ f"./llama.cpp/{binary_name}", # Root directory
649
+ f"./llama.cpp/build/Release/{binary_name}", # Windows Release build
650
+ f"./llama.cpp/build/Debug/{binary_name}", # Windows Debug build
651
+ ]
652
+
653
+ # Add .exe extension on Windows
654
+ if system == 'Windows':
655
+ possible_paths = [path + '.exe' for path in possible_paths]
656
+
657
+ # Find the first existing binary
658
+ for path in possible_paths:
659
+ if os.path.isfile(path):
660
+ return path
661
+
662
+ # If not found, return the most likely path and let it fail with a clear error
663
+ default_path = f"./llama.cpp/build/bin/{binary_name}"
664
+ if system == 'Windows':
665
+ default_path += '.exe'
666
+ return default_path
667
+
668
+ def generate_importance_matrix(self, model_path: str, train_data_path: str, output_path: str) -> None:
669
+ """Generates importance matrix for quantization with improved error handling."""
670
+ imatrix_binary = self.get_binary_path("llama-imatrix")
671
+
672
+ imatrix_command: List[str] = [
673
+ imatrix_binary,
674
+ "-m", model_path,
675
+ "-f", train_data_path,
676
+ "-ngl", "99",
677
+ "--output-frequency", "10",
678
+ "-o", output_path,
679
+ ]
680
+
681
+ if not os.path.isfile(model_path):
682
+ raise ConversionError(f"Model file not found: {model_path}")
683
+
684
+ if not os.path.isfile(train_data_path):
685
+ raise ConversionError(f"Training data file not found: {train_data_path}")
686
+
687
+ if not os.path.isfile(imatrix_binary):
688
+ raise ConversionError(f"llama-imatrix binary not found at: {imatrix_binary}")
689
+
690
+ console.print("[bold green]Generating importance matrix...")
691
+ console.print(f"[cyan]Command: {' '.join(imatrix_command)}")
692
+
693
+ try:
694
+ process = subprocess.Popen(
695
+ imatrix_command,
696
+ shell=False,
697
+ stdout=subprocess.PIPE,
698
+ stderr=subprocess.PIPE,
699
+ text=True
700
+ )
701
+
702
+ try:
703
+ stdout, stderr = process.communicate(timeout=300) # 5 minute timeout
704
+ if process.returncode != 0:
705
+ raise ConversionError(f"Failed to generate importance matrix: {stderr}")
706
+ except subprocess.TimeoutExpired:
707
+ console.print("[yellow]Imatrix computation timed out. Sending SIGINT...")
708
+ process.send_signal(signal.SIGINT)
709
+ try:
710
+ stdout, stderr = process.communicate(timeout=10)
711
+ except subprocess.TimeoutExpired:
712
+ console.print("[red]Imatrix process still running. Force terminating...")
713
+ process.kill()
714
+ stdout, stderr = process.communicate()
715
+ raise ConversionError(f"Imatrix generation timed out: {stderr}")
716
+ except FileNotFoundError:
717
+ raise ConversionError(f"Could not execute llama-imatrix binary: {imatrix_binary}")
718
+
719
+ console.print("[green]Importance matrix generation completed.")
720
+
721
+ def split_model(self, model_path: str, outdir: str) -> List[str]:
722
+ """Splits the model into smaller chunks with improved error handling."""
723
+ split_binary = self.get_binary_path("llama-gguf-split")
724
+
725
+ split_cmd: List[str] = [
726
+ split_binary,
727
+ "--split",
728
+ ]
729
+
730
+ if self.split_max_size:
731
+ split_cmd.extend(["--split-max-size", self.split_max_size])
732
+ else:
733
+ split_cmd.extend(["--split-max-tensors", str(self.split_max_tensors)])
734
+
735
+ model_path_prefix = '.'.join(model_path.split('.')[:-1])
736
+ split_cmd.extend([model_path, model_path_prefix])
737
+
738
+ if not os.path.isfile(model_path):
739
+ raise ConversionError(f"Model file not found: {model_path}")
740
+
741
+ if not os.path.isfile(split_binary):
742
+ raise ConversionError(f"llama-gguf-split binary not found at: {split_binary}")
743
+
744
+ console.print(f"[bold green]Splitting model with command: {' '.join(split_cmd)}")
745
+
746
+ try:
747
+ result = subprocess.run(split_cmd, shell=False, capture_output=True, text=True)
748
+
749
+ if result.returncode != 0:
750
+ raise ConversionError(f"Error splitting model: {result.stderr}")
751
+ except FileNotFoundError:
752
+ raise ConversionError(f"Could not execute llama-gguf-split binary: {split_binary}")
753
+
754
+ console.print("[green]Model split successfully!")
755
+
756
+ # Get list of split files
757
+ model_file_prefix = os.path.basename(model_path_prefix)
758
+ try:
759
+ split_files = [f for f in os.listdir(outdir)
760
+ if f.startswith(model_file_prefix) and f.endswith(".gguf")]
761
+ except OSError as e:
762
+ raise ConversionError(f"Error reading output directory: {e}")
763
+
764
+ if not split_files:
765
+ raise ConversionError(f"No split files found in {outdir} with prefix {model_file_prefix}")
766
+
767
+ console.print(f"[green]Found {len(split_files)} split files: {', '.join(split_files)}")
768
+ return split_files
769
+
770
+ def upload_split_files(self, split_files: List[str], outdir: str, repo_id: str) -> None:
771
+ """Uploads split model files to Hugging Face."""
772
+ api = HfApi(token=self.token)
773
+
774
+ for file in split_files:
775
+ file_path = os.path.join(outdir, file)
776
+ console.print(f"[bold green]Uploading file: {file}")
777
+ try:
778
+ api.upload_file(
779
+ path_or_fileobj=file_path,
780
+ path_in_repo=file,
781
+ repo_id=repo_id,
782
+ )
783
+ console.print(f"[green]✓ Successfully uploaded: {file}")
784
+ except Exception as e:
785
+ console.print(f"[red]✗ Failed to upload {file}: {e}")
786
+ raise ConversionError(f"Error uploading file {file}: {e}")
787
+
788
+ def generate_readme(self, quantized_files: List[str]) -> str:
789
+ """Generate a README.md file for the Hugging Face Hub."""
790
+ readme = f"""# {self.model_name} GGUF
791
+
792
+ This repository contains GGUF quantized versions of [{self.model_id}](https://huggingface.co/{self.model_id}).
793
+
794
+ ## About
795
+
796
+ This model was converted using [Webscout](https://github.com/Webscout/webscout).
797
+
798
+ ## Quantization Methods
799
+
800
+ The following quantization methods were used:
801
+
802
+ """
803
+ # Add quantization method descriptions
804
+ for method in self.quantization_methods:
805
+ if self.use_imatrix:
806
+ readme += f"- `{method}`: {self.VALID_IMATRIX_METHODS[method]}\n"
807
+ else:
808
+ readme += f"- `{method}`: {self.VALID_METHODS[method]}\n"
809
+
810
+ readme += """
811
+ ## Available Files
812
+
813
+ The following quantized files are available:
814
+
815
+ """
816
+ # Add file information
817
+ for file in quantized_files:
818
+ readme += f"- `{file}`\n"
819
+
820
+ if self.use_imatrix:
821
+ readme += """
822
+ ## Importance Matrix
823
+
824
+ This model was quantized using importance matrix quantization. The `imatrix.dat` file contains the importance matrix used for quantization.
825
+
826
+ """
827
+
828
+ readme += """
829
+ ## Usage
830
+
831
+ These GGUF files can be used with [llama.cpp](https://github.com/ggerganov/llama.cpp) and compatible tools.
832
+
833
+ Example usage:
834
+ ```bash
835
+ ./main -m model.gguf -n 1024 --repeat_penalty 1.0 --color -i -r "User:" -f prompts/chat-with-bob.txt
836
+ ```
837
+
838
+ ## Conversion Process
839
+
840
+ This model was converted using the following command:
841
+ ```bash
842
+ python -m webscout.Extra.gguf convert \\
843
+ -m "{self.model_id}" \\
844
+ -q "{','.join(self.quantization_methods)}" \\
845
+ {f'-i' if self.use_imatrix else ''} \\
846
+ {f'--train-data "{self.train_data_file}"' if self.train_data_file else ''} \\
847
+ {f'-s' if self.split_model else ''} \\
848
+ {f'--split-max-tensors {self.split_max_tensors}' if self.split_model else ''} \\
849
+ {f'--split-max-size {self.split_max_size}' if self.split_max_size else ''}
850
+ ```
851
+
852
+ ## License
853
+
854
+ This repository is licensed under the same terms as the original model.
855
+ """
856
+ return readme
857
+
858
+ def create_repository(self, repo_id: str) -> None:
859
+ """Create a new repository on Hugging Face Hub if it doesn't exist."""
860
+ api = HfApi(token=self.token)
861
+ try:
862
+ # Check if repository already exists
863
+ try:
864
+ api.repo_info(repo_id=repo_id)
865
+ console.print(f"[green]✓ Repository {repo_id} already exists")
866
+ return
867
+ except Exception:
868
+ # Repository doesn't exist, create it
869
+ pass
870
+
871
+ console.print(f"[bold green]Creating new repository: {repo_id}")
872
+ api.create_repo(
873
+ repo_id=repo_id,
874
+ exist_ok=True,
875
+ private=False,
876
+ repo_type="model"
877
+ )
878
+ console.print(f"[green]✓ Successfully created repository: {repo_id}")
879
+ console.print(f"[cyan]Repository URL: https://huggingface.co/{repo_id}")
880
+ except Exception as e:
881
+ console.print(f"[red]✗ Failed to create repository: {e}")
882
+ raise ConversionError(f"Error creating repository {repo_id}: {e}")
883
+
884
+ def upload_readme(self, readme_content: str, repo_id: str) -> None:
885
+ """Upload README.md to Hugging Face Hub."""
886
+ api = HfApi(token=self.token)
887
+ console.print("[bold green]Uploading README.md with model documentation")
888
+ try:
889
+ api.upload_file(
890
+ path_or_fileobj=readme_content.encode(),
891
+ path_in_repo="README.md",
892
+ repo_id=repo_id,
893
+ )
894
+ console.print("[green]✓ Successfully uploaded: README.md")
895
+ except Exception as e:
896
+ console.print(f"[red]✗ Failed to upload README.md: {e}")
897
+ raise ConversionError(f"Error uploading README.md: {e}")
898
+
899
+ def convert(self) -> None:
900
+ """Performs the model conversion process."""
901
+ try:
902
+ # Display banner and configuration
903
+ console.print(f"[bold green]{figlet_format('GGUF Converter')}")
904
+ self.display_config()
905
+
906
+ # Validate inputs
907
+ self.validate_inputs()
908
+
909
+ # Check dependencies
910
+ deps = self.check_dependencies()
911
+ missing = [name for name, installed in deps.items() if not installed and name != 'ninja']
912
+ if missing:
913
+ raise ConversionError(f"Missing required dependencies: {', '.join(missing)}")
914
+
915
+ # Setup llama.cpp
916
+ self.setup_llama_cpp()
917
+
918
+ # Determine if we need temporary directories (only for uploads)
919
+ needs_temp = bool(self.username and self.token)
920
+
921
+ if needs_temp:
922
+ # Use temporary directories for upload case
923
+ with tempfile.TemporaryDirectory() as outdir:
924
+ with tempfile.TemporaryDirectory() as tmpdir:
925
+ self._convert_with_dirs(tmpdir, outdir)
926
+ else:
927
+ # Use current directory for local output
928
+ outdir = os.getcwd()
929
+ tmpdir = os.path.join(outdir, "temp_download")
930
+ os.makedirs(tmpdir, exist_ok=True)
931
+ try:
932
+ self._convert_with_dirs(tmpdir, outdir)
933
+ finally:
934
+ # Clean up temporary download directory
935
+ import shutil
936
+ shutil.rmtree(tmpdir, ignore_errors=True)
937
+
938
+ # Display success message
939
+ console.print(Panel.fit(
940
+ "[bold green]✓[/] Conversion completed successfully!\n\n"
941
+ f"[cyan]Output files can be found in: {self.workspace / self.model_name}[/]",
942
+ title="Success",
943
+ border_style="green"
944
+ ))
945
+
946
+ except Exception as e:
947
+ console.print(Panel.fit(
948
+ f"[bold red]✗[/] {str(e)}",
949
+ title="Error",
950
+ border_style="red"
951
+ ))
952
+ raise
953
+
954
+ def _convert_with_dirs(self, tmpdir: str, outdir: str) -> None:
955
+ """Helper method to perform conversion with given directories."""
956
+ fp16 = str(Path(outdir)/f"{self.model_name}.fp16.gguf")
957
+
958
+ # Download model
959
+ local_dir = Path(tmpdir)/self.model_name
960
+ console.print("[bold green]Downloading model...")
961
+ api = HfApi(token=self.token)
962
+ api.snapshot_download(
963
+ repo_id=self.model_id,
964
+ local_dir=local_dir,
965
+ local_dir_use_symlinks=False
966
+ )
967
+
968
+ # Convert to fp16
969
+ console.print("[bold green]Converting to fp16...")
970
+
971
+ # Find the conversion script
972
+ conversion_scripts = [
973
+ "llama.cpp/convert_hf_to_gguf.py",
974
+ "llama.cpp/convert-hf-to-gguf.py",
975
+ "llama.cpp/convert.py"
976
+ ]
977
+
978
+ conversion_script = None
979
+ for script in conversion_scripts:
980
+ if os.path.isfile(script):
981
+ conversion_script = script
982
+ break
983
+
984
+ if not conversion_script:
985
+ raise ConversionError("Could not find HuggingFace to GGUF conversion script")
986
+
987
+ # Use the appropriate Python executable
988
+ python_cmd = "python" if platform.system() == "Windows" else "python3"
989
+
990
+ convert_cmd = [
991
+ python_cmd, conversion_script,
992
+ str(local_dir),
993
+ "--outtype", "f16",
994
+ "--outfile", fp16
995
+ ]
996
+
997
+ console.print(f"[cyan]Conversion command: {' '.join(convert_cmd)}")
998
+
999
+ try:
1000
+ result = subprocess.run(convert_cmd, capture_output=True, text=True)
1001
+
1002
+ if result.returncode != 0:
1003
+ raise ConversionError(f"Error converting to fp16: {result.stderr}")
1004
+ except FileNotFoundError as e:
1005
+ raise ConversionError(f"Could not execute conversion script: {e}")
1006
+
1007
+ if not os.path.isfile(fp16):
1008
+ raise ConversionError(f"Conversion completed but output file not found: {fp16}")
1009
+
1010
+ console.print("[green]Model converted to fp16 successfully!")
1011
+
1012
+ # If fp16_only is True, we're done after fp16 conversion
1013
+ if self.fp16_only:
1014
+ quantized_files = [f"{self.model_name}.fp16.gguf"]
1015
+ if self.username and self.token:
1016
+ repo_id = f"{self.username}/{self.model_name}-GGUF"
1017
+
1018
+ # Step 1: Create repository
1019
+ self.create_repository(repo_id)
1020
+
1021
+ # Step 2: Upload README first
1022
+ readme_content = self.generate_readme(quantized_files)
1023
+ self.upload_readme(readme_content, repo_id)
1024
+
1025
+ # Step 3: Upload model GGUF file
1026
+ file_name = f"{self.model_name}.fp16.gguf"
1027
+ console.print(f"[bold green]Uploading model file: {file_name}")
1028
+ try:
1029
+ api.upload_file(
1030
+ path_or_fileobj=fp16,
1031
+ path_in_repo=file_name,
1032
+ repo_id=repo_id
1033
+ )
1034
+ console.print(f"[green]✓ Successfully uploaded: {file_name}")
1035
+ except Exception as e:
1036
+ console.print(f"[red]✗ Failed to upload {file_name}: {e}")
1037
+ raise ConversionError(f"Error uploading model file: {e}")
1038
+ return
1039
+
1040
+ # Generate importance matrix if needed
1041
+ imatrix_path: Optional[str] = None
1042
+ if self.use_imatrix:
1043
+ train_data_path = self.train_data_file if self.train_data_file else "llama.cpp/groups_merged.txt"
1044
+ imatrix_path = str(Path(outdir)/"imatrix.dat")
1045
+ self.generate_importance_matrix(fp16, train_data_path, imatrix_path)
1046
+
1047
+ # Quantize model
1048
+ console.print("[bold green]Quantizing model...")
1049
+ quantized_files: List[str] = []
1050
+ quantize_binary = self.get_binary_path("llama-quantize")
1051
+
1052
+ if not os.path.isfile(quantize_binary):
1053
+ raise ConversionError(f"llama-quantize binary not found at: {quantize_binary}")
1054
+
1055
+ for method in self.quantization_methods:
1056
+ quantized_name = f"{self.model_name.lower()}-{method.lower()}"
1057
+ if self.use_imatrix:
1058
+ quantized_name += "-imat"
1059
+ quantized_path = str(Path(outdir)/f"{quantized_name}.gguf")
1060
+
1061
+ console.print(f"[cyan]Quantizing with method: {method}")
1062
+
1063
+ if self.use_imatrix and imatrix_path:
1064
+ quantize_cmd: List[str] = [
1065
+ quantize_binary,
1066
+ "--imatrix", str(imatrix_path),
1067
+ fp16, quantized_path, method
1068
+ ]
1069
+ else:
1070
+ quantize_cmd = [
1071
+ quantize_binary,
1072
+ fp16, quantized_path, method
1073
+ ]
1074
+
1075
+ console.print(f"[cyan]Quantization command: {' '.join(quantize_cmd)}")
1076
+
1077
+ try:
1078
+ result = subprocess.run(quantize_cmd, capture_output=True, text=True)
1079
+ if result.returncode != 0:
1080
+ raise ConversionError(f"Error quantizing with {method}: {result.stderr}")
1081
+ except FileNotFoundError:
1082
+ raise ConversionError(f"Could not execute llama-quantize binary: {quantize_binary}")
1083
+
1084
+ if not os.path.isfile(quantized_path):
1085
+ raise ConversionError(f"Quantization completed but output file not found: {quantized_path}")
1086
+
1087
+ quantized_files.append(f"{quantized_name}.gguf")
1088
+ console.print(f"[green]Successfully quantized with {method}: {quantized_name}.gguf")
1089
+
1090
+ # Upload to Hugging Face if credentials provided
1091
+ if self.username and self.token:
1092
+ repo_id = f"{self.username}/{self.model_name}-GGUF"
1093
+
1094
+ # Step 1: Create repository
1095
+ console.print(f"[bold blue]Step 1: Creating repository {repo_id}")
1096
+ self.create_repository(repo_id)
1097
+
1098
+ # Step 2: Generate and upload README first
1099
+ console.print("[bold blue]Step 2: Uploading README.md")
1100
+ readme_content = self.generate_readme(quantized_files)
1101
+ self.upload_readme(readme_content, repo_id)
1102
+
1103
+ # Step 3: Upload model GGUF files
1104
+ console.print("[bold blue]Step 3: Uploading model files")
1105
+ if self.split_model:
1106
+ split_files = self.split_model(quantized_path, outdir)
1107
+ self.upload_split_files(split_files, outdir, repo_id)
1108
+ else:
1109
+ # Upload single quantized file
1110
+ file_name = f"{self.model_name.lower()}-{self.quantization_methods[0].lower()}.gguf"
1111
+ console.print(f"[bold green]Uploading quantized model: {file_name}")
1112
+ try:
1113
+ api.upload_file(
1114
+ path_or_fileobj=quantized_path,
1115
+ path_in_repo=file_name,
1116
+ repo_id=repo_id
1117
+ )
1118
+ console.print(f"[green]✓ Successfully uploaded: {file_name}")
1119
+ except Exception as e:
1120
+ console.print(f"[red]✗ Failed to upload {file_name}: {e}")
1121
+ raise ConversionError(f"Error uploading quantized model: {e}")
1122
+
1123
+ # Step 4: Upload imatrix if generated (optional)
1124
+ if imatrix_path:
1125
+ console.print("[bold blue]Step 4: Uploading importance matrix")
1126
+ console.print("[bold green]Uploading importance matrix: imatrix.dat")
1127
+ try:
1128
+ api.upload_file(
1129
+ path_or_fileobj=imatrix_path,
1130
+ path_in_repo="imatrix.dat",
1131
+ repo_id=repo_id
1132
+ )
1133
+ console.print("[green]✓ Successfully uploaded: imatrix.dat")
1134
+ except Exception as e:
1135
+ console.print(f"[yellow]Warning: Failed to upload imatrix.dat: {e}")
1136
+
1137
+ # Final success message
1138
+ console.print(f"[bold green]🎉 All files uploaded successfully to {repo_id}!")
1139
+ console.print(f"[cyan]Repository URL: https://huggingface.co/{repo_id}")
1140
+
1141
+ # Initialize CLI with HAI vibes
1142
+ app = CLI(
1143
+ name="gguf",
1144
+ help="Convert HuggingFace models to GGUF format with style! 🔥",
1145
+ version="1.0.0"
1146
+ )
1147
+
1148
+ @app.command(name="convert")
1149
+ @option("-m", "--model-id", help="The HuggingFace model ID (e.g., 'OEvortex/HelpingAI-Lite-1.5T')", required=True)
1150
+ @option("-u", "--username", help="Your HuggingFace username for uploads", default=None)
1151
+ @option("-t", "--token", help="Your HuggingFace API token for uploads", default=None)
1152
+ @option("-q", "--quantization", help="Comma-separated quantization methods", default="q4_k_m")
1153
+ @option("-i", "--use-imatrix", help="Use importance matrix for quantization", is_flag=True)
1154
+ @option("--train-data", help="Training data file for imatrix quantization", default=None)
1155
+ @option("-s", "--split-model", help="Split the model into smaller chunks", is_flag=True)
1156
+ @option("--split-max-tensors", help="Maximum number of tensors per file when splitting", default=256)
1157
+ @option("--split-max-size", help="Maximum file size when splitting (e.g., '256M', '5G')", default=None)
1158
+ def convert_command(
1159
+ model_id: str,
1160
+ username: Optional[str] = None,
1161
+ token: Optional[str] = None,
1162
+ quantization: str = "q4_k_m",
1163
+ use_imatrix: bool = False,
1164
+ train_data: Optional[str] = None,
1165
+ split_model: bool = False,
1166
+ split_max_tensors: int = 256,
1167
+ split_max_size: Optional[str] = None
1168
+ ) -> None:
1169
+ """
1170
+ Convert and quantize HuggingFace models to GGUF format! 🚀
1171
+
1172
+ Args:
1173
+ model_id (str): Your model's HF ID (like 'OEvortex/HelpingAI-Lite-1.5T') 🎯
1174
+ username (str, optional): Your HF username for uploads 👤
1175
+ token (str, optional): Your HF API token 🔑
1176
+ quantization (str): Quantization methods (default: q4_k_m,q5_k_m) 🎮
1177
+ use_imatrix (bool): Use importance matrix for quantization 🔍
1178
+ train_data (str, optional): Training data file for imatrix quantization 📚
1179
+ split_model (bool): Split the model into smaller chunks 🔪
1180
+ split_max_tensors (int): Max tensors per file when splitting (default: 256) 📊
1181
+ split_max_size (str, optional): Max file size when splitting (e.g., '256M', '5G') 📏
1182
+
1183
+ Example:
1184
+ >>> python -m webscout.Extra.gguf convert \\
1185
+ ... -m "OEvortex/HelpingAI-Lite-1.5T" \\
1186
+ ... -q "q4_k_m,q5_k_m"
1187
+ """
1188
+ try:
1189
+ converter = ModelConverter(
1190
+ model_id=model_id,
1191
+ username=username,
1192
+ token=token,
1193
+ quantization_methods=quantization,
1194
+ use_imatrix=use_imatrix,
1195
+ train_data_file=train_data,
1196
+ split_model=split_model,
1197
+ split_max_tensors=split_max_tensors,
1198
+ split_max_size=split_max_size
1199
+ )
1200
+ converter.convert()
1201
+ except (ConversionError, ValueError) as e:
1202
+ console.print(f"[red]Error: {str(e)}")
1203
+ sys.exit(1)
1204
+ except Exception as e:
1205
+ console.print(f"[red]Unexpected error: {str(e)}")
1206
+ sys.exit(1)
1207
+
1208
+ def main() -> None:
1209
+ """Fire up the GGUF converter! 🚀"""
1210
+ app.run()
1211
+
1212
+ if __name__ == "__main__":
1213
+ main()