webscout 8.3.7__py3-none-any.whl → 2025.10.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

Files changed (273) hide show
  1. webscout/AIauto.py +250 -250
  2. webscout/AIbase.py +379 -379
  3. webscout/AIutel.py +60 -60
  4. webscout/Bard.py +1012 -1012
  5. webscout/Bing_search.py +417 -417
  6. webscout/DWEBS.py +529 -529
  7. webscout/Extra/Act.md +309 -309
  8. webscout/Extra/GitToolkit/__init__.py +10 -10
  9. webscout/Extra/GitToolkit/gitapi/README.md +110 -110
  10. webscout/Extra/GitToolkit/gitapi/__init__.py +11 -11
  11. webscout/Extra/GitToolkit/gitapi/repository.py +195 -195
  12. webscout/Extra/GitToolkit/gitapi/user.py +96 -96
  13. webscout/Extra/GitToolkit/gitapi/utils.py +61 -61
  14. webscout/Extra/YTToolkit/README.md +375 -375
  15. webscout/Extra/YTToolkit/YTdownloader.py +956 -956
  16. webscout/Extra/YTToolkit/__init__.py +2 -2
  17. webscout/Extra/YTToolkit/transcriber.py +475 -475
  18. webscout/Extra/YTToolkit/ytapi/README.md +44 -44
  19. webscout/Extra/YTToolkit/ytapi/__init__.py +6 -6
  20. webscout/Extra/YTToolkit/ytapi/channel.py +307 -307
  21. webscout/Extra/YTToolkit/ytapi/errors.py +13 -13
  22. webscout/Extra/YTToolkit/ytapi/extras.py +118 -118
  23. webscout/Extra/YTToolkit/ytapi/https.py +88 -88
  24. webscout/Extra/YTToolkit/ytapi/patterns.py +61 -61
  25. webscout/Extra/YTToolkit/ytapi/playlist.py +58 -58
  26. webscout/Extra/YTToolkit/ytapi/pool.py +7 -7
  27. webscout/Extra/YTToolkit/ytapi/query.py +39 -39
  28. webscout/Extra/YTToolkit/ytapi/stream.py +62 -62
  29. webscout/Extra/YTToolkit/ytapi/utils.py +62 -62
  30. webscout/Extra/YTToolkit/ytapi/video.py +232 -232
  31. webscout/Extra/autocoder/__init__.py +9 -9
  32. webscout/Extra/autocoder/autocoder.py +1105 -1105
  33. webscout/Extra/autocoder/autocoder_utiles.py +332 -332
  34. webscout/Extra/gguf.md +429 -429
  35. webscout/Extra/gguf.py +1213 -1213
  36. webscout/Extra/tempmail/README.md +487 -487
  37. webscout/Extra/tempmail/__init__.py +27 -27
  38. webscout/Extra/tempmail/async_utils.py +140 -140
  39. webscout/Extra/tempmail/base.py +160 -160
  40. webscout/Extra/tempmail/cli.py +186 -186
  41. webscout/Extra/tempmail/emailnator.py +84 -84
  42. webscout/Extra/tempmail/mail_tm.py +360 -360
  43. webscout/Extra/tempmail/temp_mail_io.py +291 -291
  44. webscout/Extra/weather.md +281 -281
  45. webscout/Extra/weather.py +193 -193
  46. webscout/Litlogger/README.md +10 -10
  47. webscout/Litlogger/__init__.py +15 -15
  48. webscout/Litlogger/formats.py +13 -13
  49. webscout/Litlogger/handlers.py +121 -121
  50. webscout/Litlogger/levels.py +13 -13
  51. webscout/Litlogger/logger.py +134 -134
  52. webscout/Provider/AISEARCH/Perplexity.py +332 -332
  53. webscout/Provider/AISEARCH/README.md +279 -279
  54. webscout/Provider/AISEARCH/__init__.py +16 -1
  55. webscout/Provider/AISEARCH/felo_search.py +206 -206
  56. webscout/Provider/AISEARCH/genspark_search.py +323 -323
  57. webscout/Provider/AISEARCH/hika_search.py +185 -185
  58. webscout/Provider/AISEARCH/iask_search.py +410 -410
  59. webscout/Provider/AISEARCH/monica_search.py +219 -219
  60. webscout/Provider/AISEARCH/scira_search.py +316 -316
  61. webscout/Provider/AISEARCH/stellar_search.py +177 -177
  62. webscout/Provider/AISEARCH/webpilotai_search.py +255 -255
  63. webscout/Provider/Aitopia.py +314 -314
  64. webscout/Provider/Apriel.py +306 -0
  65. webscout/Provider/ChatGPTClone.py +236 -236
  66. webscout/Provider/ChatSandbox.py +343 -343
  67. webscout/Provider/Cloudflare.py +324 -324
  68. webscout/Provider/Cohere.py +208 -208
  69. webscout/Provider/Deepinfra.py +370 -366
  70. webscout/Provider/ExaAI.py +260 -260
  71. webscout/Provider/ExaChat.py +308 -308
  72. webscout/Provider/Flowith.py +221 -221
  73. webscout/Provider/GMI.py +293 -0
  74. webscout/Provider/Gemini.py +164 -164
  75. webscout/Provider/GeminiProxy.py +167 -167
  76. webscout/Provider/GithubChat.py +371 -372
  77. webscout/Provider/Groq.py +800 -800
  78. webscout/Provider/HeckAI.py +383 -383
  79. webscout/Provider/Jadve.py +282 -282
  80. webscout/Provider/K2Think.py +307 -307
  81. webscout/Provider/Koboldai.py +205 -205
  82. webscout/Provider/LambdaChat.py +423 -423
  83. webscout/Provider/Nemotron.py +244 -244
  84. webscout/Provider/Netwrck.py +248 -248
  85. webscout/Provider/OLLAMA.py +395 -395
  86. webscout/Provider/OPENAI/Cloudflare.py +393 -393
  87. webscout/Provider/OPENAI/FalconH1.py +451 -451
  88. webscout/Provider/OPENAI/FreeGemini.py +296 -296
  89. webscout/Provider/OPENAI/K2Think.py +431 -431
  90. webscout/Provider/OPENAI/NEMOTRON.py +240 -240
  91. webscout/Provider/OPENAI/PI.py +427 -427
  92. webscout/Provider/OPENAI/README.md +959 -959
  93. webscout/Provider/OPENAI/TogetherAI.py +345 -345
  94. webscout/Provider/OPENAI/TwoAI.py +465 -465
  95. webscout/Provider/OPENAI/__init__.py +33 -18
  96. webscout/Provider/OPENAI/base.py +248 -248
  97. webscout/Provider/OPENAI/chatglm.py +528 -0
  98. webscout/Provider/OPENAI/chatgpt.py +592 -592
  99. webscout/Provider/OPENAI/chatgptclone.py +521 -521
  100. webscout/Provider/OPENAI/chatsandbox.py +202 -202
  101. webscout/Provider/OPENAI/deepinfra.py +318 -314
  102. webscout/Provider/OPENAI/e2b.py +1665 -1665
  103. webscout/Provider/OPENAI/exaai.py +420 -420
  104. webscout/Provider/OPENAI/exachat.py +452 -452
  105. webscout/Provider/OPENAI/friendli.py +232 -232
  106. webscout/Provider/OPENAI/{refact.py → gmi.py} +324 -274
  107. webscout/Provider/OPENAI/groq.py +364 -364
  108. webscout/Provider/OPENAI/heckai.py +314 -314
  109. webscout/Provider/OPENAI/llmchatco.py +337 -337
  110. webscout/Provider/OPENAI/netwrck.py +355 -355
  111. webscout/Provider/OPENAI/oivscode.py +290 -290
  112. webscout/Provider/OPENAI/opkfc.py +518 -518
  113. webscout/Provider/OPENAI/pydantic_imports.py +1 -1
  114. webscout/Provider/OPENAI/scirachat.py +535 -535
  115. webscout/Provider/OPENAI/sonus.py +308 -308
  116. webscout/Provider/OPENAI/standardinput.py +442 -442
  117. webscout/Provider/OPENAI/textpollinations.py +340 -340
  118. webscout/Provider/OPENAI/toolbaz.py +419 -416
  119. webscout/Provider/OPENAI/typefully.py +362 -362
  120. webscout/Provider/OPENAI/utils.py +295 -295
  121. webscout/Provider/OPENAI/venice.py +436 -436
  122. webscout/Provider/OPENAI/wisecat.py +387 -387
  123. webscout/Provider/OPENAI/writecream.py +166 -166
  124. webscout/Provider/OPENAI/x0gpt.py +378 -378
  125. webscout/Provider/OPENAI/yep.py +389 -389
  126. webscout/Provider/OpenGPT.py +230 -230
  127. webscout/Provider/Openai.py +243 -243
  128. webscout/Provider/PI.py +405 -405
  129. webscout/Provider/Perplexitylabs.py +430 -430
  130. webscout/Provider/QwenLM.py +272 -272
  131. webscout/Provider/STT/__init__.py +16 -1
  132. webscout/Provider/Sambanova.py +257 -257
  133. webscout/Provider/StandardInput.py +309 -309
  134. webscout/Provider/TTI/README.md +82 -82
  135. webscout/Provider/TTI/__init__.py +33 -18
  136. webscout/Provider/TTI/aiarta.py +413 -413
  137. webscout/Provider/TTI/base.py +136 -136
  138. webscout/Provider/TTI/bing.py +243 -243
  139. webscout/Provider/TTI/gpt1image.py +149 -149
  140. webscout/Provider/TTI/imagen.py +196 -196
  141. webscout/Provider/TTI/infip.py +211 -211
  142. webscout/Provider/TTI/magicstudio.py +232 -232
  143. webscout/Provider/TTI/monochat.py +219 -219
  144. webscout/Provider/TTI/piclumen.py +214 -214
  145. webscout/Provider/TTI/pixelmuse.py +232 -232
  146. webscout/Provider/TTI/pollinations.py +232 -232
  147. webscout/Provider/TTI/together.py +288 -288
  148. webscout/Provider/TTI/utils.py +12 -12
  149. webscout/Provider/TTI/venice.py +367 -367
  150. webscout/Provider/TTS/README.md +192 -192
  151. webscout/Provider/TTS/__init__.py +33 -18
  152. webscout/Provider/TTS/parler.py +110 -110
  153. webscout/Provider/TTS/streamElements.py +333 -333
  154. webscout/Provider/TTS/utils.py +280 -280
  155. webscout/Provider/TeachAnything.py +237 -237
  156. webscout/Provider/TextPollinationsAI.py +310 -310
  157. webscout/Provider/TogetherAI.py +356 -356
  158. webscout/Provider/TwoAI.py +312 -312
  159. webscout/Provider/TypliAI.py +311 -311
  160. webscout/Provider/UNFINISHED/ChatHub.py +208 -208
  161. webscout/Provider/UNFINISHED/ChutesAI.py +313 -313
  162. webscout/Provider/UNFINISHED/GizAI.py +294 -294
  163. webscout/Provider/UNFINISHED/Marcus.py +198 -198
  164. webscout/Provider/UNFINISHED/Qodo.py +477 -477
  165. webscout/Provider/UNFINISHED/VercelAIGateway.py +338 -338
  166. webscout/Provider/UNFINISHED/XenAI.py +324 -324
  167. webscout/Provider/UNFINISHED/Youchat.py +330 -330
  168. webscout/Provider/UNFINISHED/liner.py +334 -0
  169. webscout/Provider/UNFINISHED/liner_api_request.py +262 -262
  170. webscout/Provider/UNFINISHED/puterjs.py +634 -634
  171. webscout/Provider/UNFINISHED/samurai.py +223 -223
  172. webscout/Provider/UNFINISHED/test_lmarena.py +119 -119
  173. webscout/Provider/Venice.py +250 -250
  174. webscout/Provider/VercelAI.py +256 -256
  175. webscout/Provider/WiseCat.py +231 -231
  176. webscout/Provider/WrDoChat.py +366 -366
  177. webscout/Provider/__init__.py +33 -18
  178. webscout/Provider/ai4chat.py +174 -174
  179. webscout/Provider/akashgpt.py +331 -331
  180. webscout/Provider/cerebras.py +446 -446
  181. webscout/Provider/chatglm.py +394 -301
  182. webscout/Provider/cleeai.py +211 -211
  183. webscout/Provider/elmo.py +282 -282
  184. webscout/Provider/geminiapi.py +208 -208
  185. webscout/Provider/granite.py +261 -261
  186. webscout/Provider/hermes.py +263 -263
  187. webscout/Provider/julius.py +223 -223
  188. webscout/Provider/learnfastai.py +309 -309
  189. webscout/Provider/llama3mitril.py +214 -214
  190. webscout/Provider/llmchat.py +243 -243
  191. webscout/Provider/llmchatco.py +290 -290
  192. webscout/Provider/meta.py +801 -801
  193. webscout/Provider/oivscode.py +309 -309
  194. webscout/Provider/scira_chat.py +383 -383
  195. webscout/Provider/searchchat.py +292 -292
  196. webscout/Provider/sonus.py +258 -258
  197. webscout/Provider/toolbaz.py +370 -367
  198. webscout/Provider/turboseek.py +273 -273
  199. webscout/Provider/typefully.py +207 -207
  200. webscout/Provider/yep.py +372 -372
  201. webscout/__init__.py +30 -31
  202. webscout/__main__.py +5 -5
  203. webscout/auth/api_key_manager.py +189 -189
  204. webscout/auth/config.py +175 -175
  205. webscout/auth/models.py +185 -185
  206. webscout/auth/routes.py +664 -664
  207. webscout/auth/simple_logger.py +236 -236
  208. webscout/cli.py +523 -523
  209. webscout/conversation.py +438 -438
  210. webscout/exceptions.py +361 -361
  211. webscout/litagent/Readme.md +298 -298
  212. webscout/litagent/__init__.py +28 -28
  213. webscout/litagent/agent.py +581 -581
  214. webscout/litagent/constants.py +59 -59
  215. webscout/litprinter/__init__.py +58 -58
  216. webscout/models.py +181 -181
  217. webscout/optimizers.py +419 -419
  218. webscout/prompt_manager.py +288 -288
  219. webscout/sanitize.py +1078 -1078
  220. webscout/scout/README.md +401 -401
  221. webscout/scout/__init__.py +8 -8
  222. webscout/scout/core/__init__.py +6 -6
  223. webscout/scout/core/crawler.py +297 -297
  224. webscout/scout/core/scout.py +706 -706
  225. webscout/scout/core/search_result.py +95 -95
  226. webscout/scout/core/text_analyzer.py +62 -62
  227. webscout/scout/core/text_utils.py +277 -277
  228. webscout/scout/core/web_analyzer.py +51 -51
  229. webscout/scout/element.py +599 -599
  230. webscout/scout/parsers/__init__.py +69 -69
  231. webscout/scout/parsers/html5lib_parser.py +172 -172
  232. webscout/scout/parsers/html_parser.py +236 -236
  233. webscout/scout/parsers/lxml_parser.py +178 -178
  234. webscout/scout/utils.py +37 -37
  235. webscout/swiftcli/Readme.md +323 -323
  236. webscout/swiftcli/__init__.py +95 -95
  237. webscout/swiftcli/core/__init__.py +7 -7
  238. webscout/swiftcli/core/cli.py +308 -308
  239. webscout/swiftcli/core/context.py +104 -104
  240. webscout/swiftcli/core/group.py +241 -241
  241. webscout/swiftcli/decorators/__init__.py +28 -28
  242. webscout/swiftcli/decorators/command.py +221 -221
  243. webscout/swiftcli/decorators/options.py +220 -220
  244. webscout/swiftcli/decorators/output.py +302 -302
  245. webscout/swiftcli/exceptions.py +21 -21
  246. webscout/swiftcli/plugins/__init__.py +9 -9
  247. webscout/swiftcli/plugins/base.py +135 -135
  248. webscout/swiftcli/plugins/manager.py +269 -269
  249. webscout/swiftcli/utils/__init__.py +59 -59
  250. webscout/swiftcli/utils/formatting.py +252 -252
  251. webscout/swiftcli/utils/parsing.py +267 -267
  252. webscout/update_checker.py +117 -117
  253. webscout/version.py +1 -1
  254. webscout/webscout_search.py +1183 -1183
  255. webscout/webscout_search_async.py +649 -649
  256. webscout/yep_search.py +346 -346
  257. webscout/zeroart/README.md +89 -89
  258. webscout/zeroart/__init__.py +134 -134
  259. webscout/zeroart/base.py +66 -66
  260. webscout/zeroart/effects.py +100 -100
  261. webscout/zeroart/fonts.py +1238 -1238
  262. {webscout-8.3.7.dist-info → webscout-2025.10.11.dist-info}/METADATA +937 -937
  263. webscout-2025.10.11.dist-info/RECORD +300 -0
  264. webscout/Provider/AISEARCH/DeepFind.py +0 -254
  265. webscout/Provider/OPENAI/Qwen3.py +0 -303
  266. webscout/Provider/OPENAI/qodo.py +0 -630
  267. webscout/Provider/OPENAI/xenai.py +0 -514
  268. webscout/tempid.py +0 -134
  269. webscout-8.3.7.dist-info/RECORD +0 -301
  270. {webscout-8.3.7.dist-info → webscout-2025.10.11.dist-info}/WHEEL +0 -0
  271. {webscout-8.3.7.dist-info → webscout-2025.10.11.dist-info}/entry_points.txt +0 -0
  272. {webscout-8.3.7.dist-info → webscout-2025.10.11.dist-info}/licenses/LICENSE.md +0 -0
  273. {webscout-8.3.7.dist-info → webscout-2025.10.11.dist-info}/top_level.txt +0 -0
webscout/Extra/gguf.py CHANGED
@@ -1,1213 +1,1213 @@
1
- """
2
- Convert Hugging Face models to GGUF format with advanced features.
3
-
4
- 🔥 2025 UPDATE: ALL CMAKE BUILD ERRORS FIXED! 🔥
5
-
6
- This converter has been completely updated for 2025 compatibility with the latest llama.cpp:
7
-
8
- CRITICAL FIXES:
9
- - ✅ Updated all deprecated LLAMA_* flags to GGML_* (LLAMA_CUBLAS → GGML_CUDA)
10
- - ✅ Fixed CURL dependency error by adding -DLLAMA_CURL=OFF
11
- - ✅ Disabled optional dependencies (LLAMA_LLGUIDANCE=OFF)
12
- - ✅ Cross-platform hardware detection (Windows, macOS, Linux)
13
- - ✅ Robust CMake configuration with multiple fallback strategies
14
- - ✅ Priority-based acceleration selection (CUDA > Metal > Vulkan > OpenCL > ROCm > BLAS)
15
- - ✅ Enhanced error handling and recovery mechanisms
16
- - ✅ Platform-specific optimizations and build generators
17
- - ✅ Automatic build directory cleanup to avoid cached CMake conflicts
18
-
19
- SUPPORTED ACCELERATION:
20
- - CUDA: GGML_CUDA=ON (NVIDIA GPUs)
21
- - Metal: GGML_METAL=ON (Apple Silicon/macOS)
22
- - Vulkan: GGML_VULKAN=ON (Cross-platform GPU)
23
- - OpenCL: GGML_OPENCL=ON (Cross-platform GPU)
24
- - ROCm: GGML_HIPBLAS=ON (AMD GPUs)
25
- - BLAS: GGML_BLAS=ON (Optimized CPU libraries)
26
- - Accelerate: GGML_ACCELERATE=ON (Apple Accelerate framework)
27
-
28
- For detailed documentation, see: webscout/Extra/gguf.md
29
-
30
- USAGE EXAMPLES:
31
- >>> python -m webscout.Extra.gguf convert -m "OEvortex/HelpingAI-Lite-1.5T" -q "q4_k_m,q5_k_m"
32
- >>> # With upload options:
33
- >>> python -m webscout.Extra.gguf convert -m "your-model" -u "username" -t "token" -q "q4_k_m"
34
- >>> # With imatrix quantization:
35
- >>> python -m webscout.Extra.gguf convert -m "your-model" -i -q "iq4_nl" --train-data "train_data.txt"
36
- >>> # With model splitting:
37
- >>> python -m webscout.Extra.gguf convert -m "your-model" -s --split-max-tensors 256
38
- """
39
-
40
- import subprocess
41
- import os
42
- import sys
43
- import signal
44
- import tempfile
45
- import platform
46
- from pathlib import Path
47
- from typing import Optional, Dict, List, Any, Union, Literal, TypedDict, Set
48
-
49
- from huggingface_hub import HfApi
50
- from webscout.zeroart import figlet_format
51
- from rich.console import Console
52
- from rich.panel import Panel
53
- from rich.table import Table
54
- from webscout.swiftcli import CLI, option
55
-
56
- console = Console()
57
-
58
- class ConversionError(Exception):
59
- """Custom exception for when things don't go as planned! ⚠️"""
60
- pass
61
-
62
- class QuantizationMethod(TypedDict):
63
- """Type definition for quantization method descriptions."""
64
- description: str
65
-
66
- class ModelConverter:
67
- """Handles the conversion of Hugging Face models to GGUF format."""
68
-
69
- VALID_METHODS: Dict[str, str] = {
70
- "fp16": "16-bit floating point - maximum accuracy, largest size",
71
- "q2_k": "2-bit quantization (smallest size, lowest accuracy)",
72
- "q3_k_l": "3-bit quantization (large) - balanced for size/accuracy",
73
- "q3_k_m": "3-bit quantization (medium) - good balance for most use cases",
74
- "q3_k_s": "3-bit quantization (small) - optimized for speed",
75
- "q4_0": "4-bit quantization (version 0) - standard 4-bit compression",
76
- "q4_1": "4-bit quantization (version 1) - improved accuracy over q4_0",
77
- "q4_k_m": "4-bit quantization (medium) - balanced for most models",
78
- "q4_k_s": "4-bit quantization (small) - optimized for speed",
79
- "q5_0": "5-bit quantization (version 0) - high accuracy, larger size",
80
- "q5_1": "5-bit quantization (version 1) - improved accuracy over q5_0",
81
- "q5_k_m": "5-bit quantization (medium) - best balance for quality/size",
82
- "q5_k_s": "5-bit quantization (small) - optimized for speed",
83
- "q6_k": "6-bit quantization - highest accuracy, largest size",
84
- "q8_0": "8-bit quantization - maximum accuracy, largest size"
85
- }
86
-
87
- VALID_IMATRIX_METHODS: Dict[str, str] = {
88
- "iq3_m": "3-bit imatrix quantization (medium) - balanced importance-based",
89
- "iq3_xxs": "3-bit imatrix quantization (extra extra small) - maximum compression",
90
- "q4_k_m": "4-bit imatrix quantization (medium) - balanced importance-based",
91
- "q4_k_s": "4-bit imatrix quantization (small) - optimized for speed",
92
- "iq4_nl": "4-bit imatrix quantization (non-linear) - best accuracy for 4-bit",
93
- "iq4_xs": "4-bit imatrix quantization (extra small) - maximum compression",
94
- "q5_k_m": "5-bit imatrix quantization (medium) - balanced importance-based",
95
- "q5_k_s": "5-bit imatrix quantization (small) - optimized for speed"
96
- }
97
-
98
- def __init__(
99
- self,
100
- model_id: str,
101
- username: Optional[str] = None,
102
- token: Optional[str] = None,
103
- quantization_methods: str = "q4_k_m",
104
- use_imatrix: bool = False,
105
- train_data_file: Optional[str] = None,
106
- split_model: bool = False,
107
- split_max_tensors: int = 256,
108
- split_max_size: Optional[str] = None
109
- ) -> None:
110
- self.model_id = model_id
111
- self.username = username
112
- self.token = token
113
- self.quantization_methods = quantization_methods.split(',')
114
- self.model_name = model_id.split('/')[-1]
115
- self.workspace = Path(os.getcwd())
116
- self.use_imatrix = use_imatrix
117
- self.train_data_file = train_data_file
118
- self.split_model = split_model
119
- self.split_max_tensors = split_max_tensors
120
- self.split_max_size = split_max_size
121
- self.fp16_only = "fp16" in self.quantization_methods and len(self.quantization_methods) == 1
122
-
123
- def validate_inputs(self) -> None:
124
- """Validates all input parameters."""
125
- if not '/' in self.model_id:
126
- raise ValueError("Invalid model ID format. Expected format: 'organization/model-name'")
127
-
128
- if self.use_imatrix:
129
- invalid_methods = [m for m in self.quantization_methods if m not in self.VALID_IMATRIX_METHODS]
130
- if invalid_methods:
131
- raise ValueError(
132
- f"Invalid imatrix quantization methods: {', '.join(invalid_methods)}.\n"
133
- f"Valid methods are: {', '.join(self.VALID_IMATRIX_METHODS.keys())}"
134
- )
135
- if not self.train_data_file and not os.path.exists("llama.cpp/groups_merged.txt"):
136
- raise ValueError("Training data file is required for imatrix quantization")
137
- else:
138
- invalid_methods = [m for m in self.quantization_methods if m not in self.VALID_METHODS]
139
- if invalid_methods:
140
- raise ValueError(
141
- f"Invalid quantization methods: {', '.join(invalid_methods)}.\n"
142
- f"Valid methods are: {', '.join(self.VALID_METHODS.keys())}"
143
- )
144
-
145
- if bool(self.username) != bool(self.token):
146
- raise ValueError("Both username and token must be provided for upload, or neither.")
147
-
148
- if self.split_model and self.split_max_size:
149
- try:
150
- size = int(self.split_max_size[:-1])
151
- unit = self.split_max_size[-1].upper()
152
- if unit not in ['M', 'G']:
153
- raise ValueError("Split max size must end with M or G")
154
- except ValueError:
155
- raise ValueError("Invalid split max size format. Use format like '256M' or '5G'")
156
-
157
- @staticmethod
158
- def check_dependencies() -> Dict[str, bool]:
159
- """Check if all required dependencies are installed with cross-platform support."""
160
- system = platform.system()
161
-
162
- dependencies: Dict[str, str] = {
163
- 'git': 'Git version control',
164
- 'cmake': 'CMake build system',
165
- 'ninja': 'Ninja build system (optional)'
166
- }
167
-
168
- # Add platform-specific dependencies
169
- if system != 'Windows':
170
- dependencies['pip3'] = 'Python package installer'
171
- else:
172
- dependencies['pip'] = 'Python package installer'
173
-
174
- status: Dict[str, bool] = {}
175
-
176
- for cmd, desc in dependencies.items():
177
- try:
178
- if system == 'Windows':
179
- # Use 'where' command on Windows
180
- result = subprocess.run(['where', cmd], capture_output=True, text=True)
181
- status[cmd] = result.returncode == 0
182
- else:
183
- # Use 'which' command on Unix-like systems
184
- result = subprocess.run(['which', cmd], capture_output=True, text=True)
185
- status[cmd] = result.returncode == 0
186
- except (FileNotFoundError, subprocess.SubprocessError):
187
- status[cmd] = False
188
-
189
- # Special check for Python - try different variants
190
- python_variants = ['python3', 'python', 'py'] if system != 'Windows' else ['python', 'py', 'python3']
191
- status['python'] = False
192
- for variant in python_variants:
193
- try:
194
- if system == 'Windows':
195
- result = subprocess.run(['where', variant], capture_output=True)
196
- else:
197
- result = subprocess.run(['which', variant], capture_output=True)
198
- if result.returncode == 0:
199
- status['python'] = True
200
- break
201
- except:
202
- continue
203
-
204
- # Check for C++ compiler
205
- cpp_compilers = ['cl', 'g++', 'clang++'] if system == 'Windows' else ['g++', 'clang++']
206
- status['cpp_compiler'] = False
207
- for compiler in cpp_compilers:
208
- try:
209
- if system == 'Windows':
210
- result = subprocess.run(['where', compiler], capture_output=True)
211
- else:
212
- result = subprocess.run(['which', compiler], capture_output=True)
213
- if result.returncode == 0:
214
- status['cpp_compiler'] = True
215
- break
216
- except:
217
- continue
218
-
219
- dependencies['python'] = 'Python interpreter'
220
- dependencies['cpp_compiler'] = 'C++ compiler (g++, clang++, or MSVC)'
221
-
222
- return status
223
-
224
- def detect_hardware(self) -> Dict[str, bool]:
225
- """Detect available hardware acceleration with improved cross-platform support."""
226
- hardware: Dict[str, bool] = {
227
- 'cuda': False,
228
- 'metal': False,
229
- 'opencl': False,
230
- 'vulkan': False,
231
- 'rocm': False,
232
- 'blas': False,
233
- 'accelerate': False
234
- }
235
-
236
- system = platform.system()
237
-
238
- # Check CUDA
239
- try:
240
- # Check for nvcc compiler
241
- if subprocess.run(['nvcc', '--version'], capture_output=True, shell=(system == 'Windows')).returncode == 0:
242
- hardware['cuda'] = True
243
- # Also check for nvidia-smi as fallback
244
- elif subprocess.run(['nvidia-smi'], capture_output=True, shell=(system == 'Windows')).returncode == 0:
245
- hardware['cuda'] = True
246
- except (FileNotFoundError, subprocess.SubprocessError):
247
- # Check for CUDA libraries on Windows
248
- if system == 'Windows':
249
- cuda_paths = [
250
- os.environ.get('CUDA_PATH'),
251
- 'C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA',
252
- 'C:\\Program Files (x86)\\NVIDIA GPU Computing Toolkit\\CUDA'
253
- ]
254
- for cuda_path in cuda_paths:
255
- if cuda_path and os.path.exists(cuda_path):
256
- hardware['cuda'] = True
257
- break
258
-
259
- # Check Metal (macOS)
260
- if system == 'Darwin':
261
- try:
262
- # Check for Xcode command line tools
263
- if subprocess.run(['xcrun', '--show-sdk-path'], capture_output=True).returncode == 0:
264
- hardware['metal'] = True
265
- # Check for Metal framework
266
- if os.path.exists('/System/Library/Frameworks/Metal.framework'):
267
- hardware['metal'] = True
268
- # macOS also supports Accelerate framework
269
- if os.path.exists('/System/Library/Frameworks/Accelerate.framework'):
270
- hardware['accelerate'] = True
271
- except (FileNotFoundError, subprocess.SubprocessError):
272
- pass
273
-
274
- # Check OpenCL
275
- try:
276
- if system == 'Windows':
277
- # Check for OpenCL on Windows
278
- opencl_paths = [
279
- 'C:\\Windows\\System32\\OpenCL.dll',
280
- 'C:\\Windows\\SysWOW64\\OpenCL.dll'
281
- ]
282
- if any(os.path.exists(path) for path in opencl_paths):
283
- hardware['opencl'] = True
284
- else:
285
- if subprocess.run(['clinfo'], capture_output=True).returncode == 0:
286
- hardware['opencl'] = True
287
- except (FileNotFoundError, subprocess.SubprocessError):
288
- pass
289
-
290
- # Check Vulkan
291
- try:
292
- if system == 'Windows':
293
- # Check for Vulkan on Windows
294
- vulkan_paths = [
295
- 'C:\\Windows\\System32\\vulkan-1.dll',
296
- 'C:\\Windows\\SysWOW64\\vulkan-1.dll'
297
- ]
298
- if any(os.path.exists(path) for path in vulkan_paths):
299
- hardware['vulkan'] = True
300
- else:
301
- if subprocess.run(['vulkaninfo'], capture_output=True).returncode == 0:
302
- hardware['vulkan'] = True
303
- except (FileNotFoundError, subprocess.SubprocessError):
304
- pass
305
-
306
- # Check ROCm (AMD)
307
- try:
308
- if subprocess.run(['rocm-smi'], capture_output=True, shell=(system == 'Windows')).returncode == 0:
309
- hardware['rocm'] = True
310
- elif system == 'Linux':
311
- # Check for ROCm installation
312
- rocm_paths = ['/opt/rocm', '/usr/lib/x86_64-linux-gnu/librocm-smi64.so']
313
- if any(os.path.exists(path) for path in rocm_paths):
314
- hardware['rocm'] = True
315
- except (FileNotFoundError, subprocess.SubprocessError):
316
- pass
317
-
318
- # Check for BLAS libraries
319
- try:
320
- import numpy as np
321
- # Check if numpy is linked with optimized BLAS
322
- config = np.__config__.show()
323
- if any(lib in str(config).lower() for lib in ['openblas', 'mkl', 'atlas', 'blis']):
324
- hardware['blas'] = True
325
- except (ImportError, AttributeError):
326
- # Fallback: check for common BLAS libraries
327
- if system == 'Linux':
328
- blas_libs = ['/usr/lib/x86_64-linux-gnu/libopenblas.so', '/usr/lib/x86_64-linux-gnu/libblas.so']
329
- if any(os.path.exists(lib) for lib in blas_libs):
330
- hardware['blas'] = True
331
- elif system == 'Windows':
332
- # Check for Intel MKL or OpenBLAS on Windows
333
- mkl_paths = ['C:\\Program Files (x86)\\Intel\\oneAPI\\mkl']
334
- if any(os.path.exists(path) for path in mkl_paths):
335
- hardware['blas'] = True
336
-
337
- return hardware
338
-
339
- def setup_llama_cpp(self) -> None:
340
- """Sets up and builds llama.cpp repository with robust error handling."""
341
- llama_path = self.workspace / "llama.cpp"
342
- system = platform.system()
343
-
344
- with console.status("[bold green]Setting up llama.cpp...") as status:
345
- # Clone llama.cpp if not exists
346
- if not llama_path.exists():
347
- try:
348
- subprocess.run(['git', 'clone', 'https://github.com/ggerganov/llama.cpp'], check=True)
349
- except subprocess.CalledProcessError as e:
350
- raise ConversionError(f"Failed to clone llama.cpp repository: {e}")
351
-
352
- original_cwd = os.getcwd()
353
- try:
354
- os.chdir(llama_path)
355
-
356
- # Update to latest version
357
- try:
358
- subprocess.run(['git', 'pull'], capture_output=True, check=False)
359
- except subprocess.CalledProcessError:
360
- console.print("[yellow]Warning: Could not update llama.cpp repository")
361
-
362
- # Clean any existing build directory to avoid cached CMake variables
363
- build_dir = Path('build')
364
- if build_dir.exists():
365
- console.print("[yellow]Cleaning existing build directory to avoid CMake cache conflicts...")
366
- import shutil
367
- try:
368
- shutil.rmtree(build_dir)
369
- console.print("[green]Build directory cleaned successfully")
370
- except Exception as e:
371
- console.print(f"[yellow]Warning: Could not clean build directory: {e}")
372
-
373
- # Check if we're in a Nix environment
374
- is_nix = system == "Linux" and os.path.exists("/nix/store")
375
-
376
- if is_nix:
377
- console.print("[yellow]Detected Nix environment. Using system Python packages...")
378
- # In Nix, we need to use the system Python packages
379
- try:
380
- # Try to import required packages to check if they're available
381
- import torch # type: ignore
382
- import numpy # type: ignore
383
- import sentencepiece # type: ignore
384
- import transformers # type: ignore
385
- console.print("[green]Required Python packages are already installed.")
386
- except ImportError as e:
387
- console.print("[red]Missing required Python packages in Nix environment.")
388
- console.print("[yellow]Please install them using:")
389
- console.print("nix-shell -p python3Packages.torch python3Packages.numpy python3Packages.sentencepiece python3Packages.transformers")
390
- raise ConversionError("Missing required Python packages in Nix environment")
391
- else:
392
- # In non-Nix environments, install requirements if they exist
393
- if os.path.exists('requirements.txt'):
394
- try:
395
- pip_cmd = 'pip' if system == 'Windows' else 'pip3'
396
- subprocess.run([pip_cmd, 'install', '-r', 'requirements.txt'], check=True)
397
- except subprocess.CalledProcessError as e:
398
- if "externally-managed-environment" in str(e):
399
- console.print("[yellow]Detected externally managed Python environment.")
400
- console.print("[yellow]Please install the required packages manually:")
401
- console.print("pip install torch numpy sentencepiece transformers")
402
- raise ConversionError("Failed to install requirements in externally managed environment")
403
- else:
404
- console.print(f"[yellow]Warning: Failed to install requirements: {e}")
405
-
406
- # Detect available hardware
407
- hardware = self.detect_hardware()
408
- console.print("[bold green]Detected hardware acceleration:")
409
- for hw, available in hardware.items():
410
- console.print(f" {'✓' if available else '✗'} {hw.upper()}")
411
-
412
- # Clear any environment variables that might cause conflicts
413
- env_vars_to_clear = [
414
- 'LLAMA_CUBLAS', 'LLAMA_CLBLAST', 'LLAMA_HIPBLAS',
415
- 'LLAMA_METAL', 'LLAMA_ACCELERATE', 'LLAMA_OPENBLAS'
416
- ]
417
- for var in env_vars_to_clear:
418
- if var in os.environ:
419
- console.print(f"[yellow]Clearing conflicting environment variable: {var}")
420
- del os.environ[var]
421
-
422
- # Configure CMake build with robust options
423
- cmake_args: List[str] = ['cmake', '-B', 'build']
424
-
425
- # Add basic CMake options with correct LLAMA prefixes
426
- cmake_args.extend([
427
- '-DCMAKE_BUILD_TYPE=Release',
428
- '-DLLAMA_BUILD_TESTS=OFF',
429
- '-DLLAMA_BUILD_EXAMPLES=ON',
430
- '-DLLAMA_BUILD_SERVER=OFF',
431
- # Disable optional dependencies that might cause issues
432
- '-DLLAMA_CURL=OFF', # Disable CURL (not needed for GGUF conversion)
433
- '-DLLAMA_LLGUIDANCE=OFF', # Disable LLGuidance (optional feature)
434
- # Explicitly disable deprecated flags to avoid conflicts
435
- '-DLLAMA_CUBLAS=OFF',
436
- '-DLLAMA_CLBLAST=OFF',
437
- '-DLLAMA_HIPBLAS=OFF'
438
- ])
439
-
440
- # Add hardware acceleration options with latest 2025 llama.cpp GGML flags
441
- # Use priority order: CUDA > Metal > Vulkan > OpenCL > ROCm > BLAS > Accelerate
442
- acceleration_enabled = False
443
-
444
- if hardware['cuda']:
445
- # Latest 2025 GGML CUDA flags (LLAMA_CUBLAS is deprecated)
446
- cmake_args.extend(['-DGGML_CUDA=ON'])
447
- console.print("[green]Enabling CUDA acceleration (GGML_CUDA=ON)")
448
- acceleration_enabled = True
449
- elif hardware['metal']:
450
- # Latest 2025 GGML Metal flags for macOS
451
- cmake_args.extend(['-DGGML_METAL=ON'])
452
- console.print("[green]Enabling Metal acceleration (GGML_METAL=ON)")
453
- acceleration_enabled = True
454
- elif hardware['vulkan']:
455
- # Latest 2025 GGML Vulkan flags
456
- cmake_args.extend(['-DGGML_VULKAN=ON'])
457
- console.print("[green]Enabling Vulkan acceleration (GGML_VULKAN=ON)")
458
- acceleration_enabled = True
459
- elif hardware['opencl']:
460
- # Latest 2025 GGML OpenCL flags (LLAMA_CLBLAST is deprecated)
461
- cmake_args.extend(['-DGGML_OPENCL=ON'])
462
- console.print("[green]Enabling OpenCL acceleration (GGML_OPENCL=ON)")
463
- acceleration_enabled = True
464
- elif hardware['rocm']:
465
- # Latest 2025 GGML ROCm/HIP flags
466
- cmake_args.extend(['-DGGML_HIPBLAS=ON'])
467
- console.print("[green]Enabling ROCm acceleration (GGML_HIPBLAS=ON)")
468
- acceleration_enabled = True
469
- elif hardware['blas']:
470
- # Latest 2025 GGML BLAS flags with vendor detection
471
- cmake_args.extend(['-DGGML_BLAS=ON'])
472
- # Try to detect BLAS vendor for optimal performance
473
- if system == 'Darwin':
474
- cmake_args.extend(['-DGGML_BLAS_VENDOR=Accelerate'])
475
- elif 'mkl' in str(hardware).lower():
476
- cmake_args.extend(['-DGGML_BLAS_VENDOR=Intel10_64lp'])
477
- else:
478
- cmake_args.extend(['-DGGML_BLAS_VENDOR=OpenBLAS'])
479
- console.print("[green]Enabling BLAS acceleration (GGML_BLAS=ON)")
480
- acceleration_enabled = True
481
- elif hardware['accelerate']:
482
- # Latest 2025 GGML Accelerate framework flags for macOS
483
- cmake_args.extend(['-DGGML_ACCELERATE=ON'])
484
- console.print("[green]Enabling Accelerate framework (GGML_ACCELERATE=ON)")
485
- acceleration_enabled = True
486
-
487
- if not acceleration_enabled:
488
- console.print("[yellow]No hardware acceleration available, using CPU only")
489
- console.print("[cyan]Note: All deprecated LLAMA_* flags have been updated to GGML_* for 2025 compatibility")
490
-
491
- # Platform-specific optimizations
492
- if system == 'Windows':
493
- # Use Visual Studio generator on Windows if available
494
- try:
495
- vs_result = subprocess.run(['where', 'msbuild'], capture_output=True)
496
- if vs_result.returncode == 0:
497
- cmake_args.extend(['-G', 'Visual Studio 17 2022'])
498
- else:
499
- cmake_args.extend(['-G', 'MinGW Makefiles'])
500
- except:
501
- cmake_args.extend(['-G', 'MinGW Makefiles'])
502
- else:
503
- # Use Ninja if available on Unix systems
504
- try:
505
- ninja_cmd = 'ninja' if system != 'Windows' else 'ninja.exe'
506
- if subprocess.run(['which', ninja_cmd], capture_output=True).returncode == 0:
507
- cmake_args.extend(['-G', 'Ninja'])
508
- except:
509
- pass # Fall back to default generator
510
-
511
- # Configure the build with error handling and multiple fallback strategies
512
- status.update("[bold green]Configuring CMake build...")
513
- config_success = False
514
-
515
- # Try main configuration
516
- try:
517
- console.print(f"[cyan]CMake command: {' '.join(cmake_args)}")
518
- result = subprocess.run(cmake_args, capture_output=True, text=True)
519
- if result.returncode == 0:
520
- config_success = True
521
- console.print("[green]CMake configuration successful!")
522
- else:
523
- console.print(f"[red]CMake configuration failed: {result.stderr}")
524
- except subprocess.CalledProcessError as e:
525
- console.print(f"[red]CMake execution failed: {e}")
526
-
527
- # Try fallback without hardware acceleration if main config failed
528
- if not config_success:
529
- console.print("[yellow]Attempting fallback configuration without hardware acceleration...")
530
- console.print("[cyan]Using 2025-compatible LLAMA build flags...")
531
- fallback_args = [
532
- 'cmake', '-B', 'build',
533
- '-DCMAKE_BUILD_TYPE=Release',
534
- '-DLLAMA_BUILD_TESTS=OFF',
535
- '-DLLAMA_BUILD_EXAMPLES=ON',
536
- '-DLLAMA_BUILD_SERVER=OFF',
537
- # Disable optional dependencies that might cause issues
538
- '-DLLAMA_CURL=OFF', # Disable CURL (not needed for GGUF conversion)
539
- '-DLLAMA_LLGUIDANCE=OFF', # Disable LLGuidance (optional feature)
540
- # Explicitly disable all deprecated flags
541
- '-DLLAMA_CUBLAS=OFF',
542
- '-DLLAMA_CLBLAST=OFF',
543
- '-DLLAMA_HIPBLAS=OFF',
544
- '-DLLAMA_METAL=OFF',
545
- # Enable CPU optimizations
546
- '-DGGML_NATIVE=OFF', # Disable native optimizations for compatibility
547
- '-DGGML_AVX=ON', # Enable AVX if available
548
- '-DGGML_AVX2=ON', # Enable AVX2 if available
549
- '-DGGML_FMA=ON' # Enable FMA if available
550
- ]
551
- try:
552
- console.print(f"[cyan]Fallback CMake command: {' '.join(fallback_args)}")
553
- result = subprocess.run(fallback_args, capture_output=True, text=True)
554
- if result.returncode == 0:
555
- config_success = True
556
- console.print("[green]Fallback CMake configuration successful!")
557
- else:
558
- console.print(f"[red]Fallback CMake configuration failed: {result.stderr}")
559
- except subprocess.CalledProcessError as e:
560
- console.print(f"[red]Fallback CMake execution failed: {e}")
561
-
562
- # Try minimal configuration as last resort
563
- if not config_success:
564
- console.print("[yellow]Attempting minimal configuration...")
565
- minimal_args = [
566
- 'cmake', '-B', 'build',
567
- # Disable optional dependencies that might cause issues
568
- '-DLLAMA_CURL=OFF', # Disable CURL (not needed for GGUF conversion)
569
- '-DLLAMA_LLGUIDANCE=OFF', # Disable LLGuidance (optional feature)
570
- '-DLLAMA_BUILD_SERVER=OFF', # Disable server (not needed for conversion)
571
- '-DLLAMA_BUILD_TESTS=OFF', # Disable tests (not needed for conversion)
572
- # Explicitly disable ALL deprecated flags to avoid conflicts
573
- '-DLLAMA_CUBLAS=OFF',
574
- '-DLLAMA_CLBLAST=OFF',
575
- '-DLLAMA_HIPBLAS=OFF',
576
- '-DLLAMA_METAL=OFF',
577
- '-DLLAMA_ACCELERATE=OFF'
578
- ]
579
- try:
580
- console.print(f"[cyan]Minimal CMake command: {' '.join(minimal_args)}")
581
- result = subprocess.run(minimal_args, capture_output=True, text=True)
582
- if result.returncode == 0:
583
- config_success = True
584
- console.print("[green]Minimal CMake configuration successful!")
585
- else:
586
- console.print(f"[red]Minimal CMake configuration failed: {result.stderr}")
587
- raise ConversionError(f"All CMake configuration attempts failed. Last error: {result.stderr}")
588
- except subprocess.CalledProcessError as e:
589
- raise ConversionError(f"All CMake configuration attempts failed: {e}")
590
-
591
- if not config_success:
592
- raise ConversionError("CMake configuration failed with all attempted strategies")
593
-
594
- # Build the project
595
- status.update("[bold green]Building llama.cpp...")
596
- build_cmd = ['cmake', '--build', 'build', '--config', 'Release']
597
-
598
- # Add parallel build option
599
- cpu_count = os.cpu_count() or 1
600
- if system == 'Windows':
601
- build_cmd.extend(['--parallel', str(cpu_count)])
602
- else:
603
- build_cmd.extend(['-j', str(cpu_count)])
604
-
605
- try:
606
- result = subprocess.run(build_cmd, capture_output=True, text=True)
607
- if result.returncode != 0:
608
- console.print(f"[red]Build failed: {result.stderr}")
609
- # Try single-threaded build as fallback
610
- console.print("[yellow]Attempting single-threaded build...")
611
- fallback_build = ['cmake', '--build', 'build', '--config', 'Release']
612
- result = subprocess.run(fallback_build, capture_output=True, text=True)
613
- if result.returncode != 0:
614
- raise ConversionError(f"Build failed: {result.stderr}")
615
- except subprocess.CalledProcessError as e:
616
- raise ConversionError(f"Build failed: {e}")
617
-
618
- console.print("[green]llama.cpp built successfully!")
619
-
620
- finally:
621
- os.chdir(original_cwd)
622
-
623
- def display_config(self) -> None:
624
- """Displays the current configuration in a formatted table."""
625
- table = Table(title="Configuration", show_header=True, header_style="bold magenta")
626
- table.add_column("Setting", style="cyan")
627
- table.add_column("Value", style="green")
628
-
629
- table.add_row("Model ID", self.model_id)
630
- table.add_row("Model Name", self.model_name)
631
- table.add_row("Username", self.username or "Not provided")
632
- table.add_row("Token", "****" if self.token else "Not provided")
633
- table.add_row("Quantization Methods", "\n".join(
634
- f"{method} ({self.VALID_METHODS[method]})"
635
- for method in self.quantization_methods
636
- ))
637
-
638
- console.print(Panel(table))
639
-
640
- def get_binary_path(self, binary_name: str) -> str:
641
- """Get the correct path to llama.cpp binaries based on platform."""
642
- system = platform.system()
643
-
644
- # Possible binary locations
645
- possible_paths = [
646
- f"./llama.cpp/build/bin/{binary_name}", # Standard build location
647
- f"./llama.cpp/build/{binary_name}", # Alternative build location
648
- f"./llama.cpp/{binary_name}", # Root directory
649
- f"./llama.cpp/build/Release/{binary_name}", # Windows Release build
650
- f"./llama.cpp/build/Debug/{binary_name}", # Windows Debug build
651
- ]
652
-
653
- # Add .exe extension on Windows
654
- if system == 'Windows':
655
- possible_paths = [path + '.exe' for path in possible_paths]
656
-
657
- # Find the first existing binary
658
- for path in possible_paths:
659
- if os.path.isfile(path):
660
- return path
661
-
662
- # If not found, return the most likely path and let it fail with a clear error
663
- default_path = f"./llama.cpp/build/bin/{binary_name}"
664
- if system == 'Windows':
665
- default_path += '.exe'
666
- return default_path
667
-
668
- def generate_importance_matrix(self, model_path: str, train_data_path: str, output_path: str) -> None:
669
- """Generates importance matrix for quantization with improved error handling."""
670
- imatrix_binary = self.get_binary_path("llama-imatrix")
671
-
672
- imatrix_command: List[str] = [
673
- imatrix_binary,
674
- "-m", model_path,
675
- "-f", train_data_path,
676
- "-ngl", "99",
677
- "--output-frequency", "10",
678
- "-o", output_path,
679
- ]
680
-
681
- if not os.path.isfile(model_path):
682
- raise ConversionError(f"Model file not found: {model_path}")
683
-
684
- if not os.path.isfile(train_data_path):
685
- raise ConversionError(f"Training data file not found: {train_data_path}")
686
-
687
- if not os.path.isfile(imatrix_binary):
688
- raise ConversionError(f"llama-imatrix binary not found at: {imatrix_binary}")
689
-
690
- console.print("[bold green]Generating importance matrix...")
691
- console.print(f"[cyan]Command: {' '.join(imatrix_command)}")
692
-
693
- try:
694
- process = subprocess.Popen(
695
- imatrix_command,
696
- shell=False,
697
- stdout=subprocess.PIPE,
698
- stderr=subprocess.PIPE,
699
- text=True
700
- )
701
-
702
- try:
703
- stdout, stderr = process.communicate(timeout=300) # 5 minute timeout
704
- if process.returncode != 0:
705
- raise ConversionError(f"Failed to generate importance matrix: {stderr}")
706
- except subprocess.TimeoutExpired:
707
- console.print("[yellow]Imatrix computation timed out. Sending SIGINT...")
708
- process.send_signal(signal.SIGINT)
709
- try:
710
- stdout, stderr = process.communicate(timeout=10)
711
- except subprocess.TimeoutExpired:
712
- console.print("[red]Imatrix process still running. Force terminating...")
713
- process.kill()
714
- stdout, stderr = process.communicate()
715
- raise ConversionError(f"Imatrix generation timed out: {stderr}")
716
- except FileNotFoundError:
717
- raise ConversionError(f"Could not execute llama-imatrix binary: {imatrix_binary}")
718
-
719
- console.print("[green]Importance matrix generation completed.")
720
-
721
- def split_model(self, model_path: str, outdir: str) -> List[str]:
722
- """Splits the model into smaller chunks with improved error handling."""
723
- split_binary = self.get_binary_path("llama-gguf-split")
724
-
725
- split_cmd: List[str] = [
726
- split_binary,
727
- "--split",
728
- ]
729
-
730
- if self.split_max_size:
731
- split_cmd.extend(["--split-max-size", self.split_max_size])
732
- else:
733
- split_cmd.extend(["--split-max-tensors", str(self.split_max_tensors)])
734
-
735
- model_path_prefix = '.'.join(model_path.split('.')[:-1])
736
- split_cmd.extend([model_path, model_path_prefix])
737
-
738
- if not os.path.isfile(model_path):
739
- raise ConversionError(f"Model file not found: {model_path}")
740
-
741
- if not os.path.isfile(split_binary):
742
- raise ConversionError(f"llama-gguf-split binary not found at: {split_binary}")
743
-
744
- console.print(f"[bold green]Splitting model with command: {' '.join(split_cmd)}")
745
-
746
- try:
747
- result = subprocess.run(split_cmd, shell=False, capture_output=True, text=True)
748
-
749
- if result.returncode != 0:
750
- raise ConversionError(f"Error splitting model: {result.stderr}")
751
- except FileNotFoundError:
752
- raise ConversionError(f"Could not execute llama-gguf-split binary: {split_binary}")
753
-
754
- console.print("[green]Model split successfully!")
755
-
756
- # Get list of split files
757
- model_file_prefix = os.path.basename(model_path_prefix)
758
- try:
759
- split_files = [f for f in os.listdir(outdir)
760
- if f.startswith(model_file_prefix) and f.endswith(".gguf")]
761
- except OSError as e:
762
- raise ConversionError(f"Error reading output directory: {e}")
763
-
764
- if not split_files:
765
- raise ConversionError(f"No split files found in {outdir} with prefix {model_file_prefix}")
766
-
767
- console.print(f"[green]Found {len(split_files)} split files: {', '.join(split_files)}")
768
- return split_files
769
-
770
- def upload_split_files(self, split_files: List[str], outdir: str, repo_id: str) -> None:
771
- """Uploads split model files to Hugging Face."""
772
- api = HfApi(token=self.token)
773
-
774
- for file in split_files:
775
- file_path = os.path.join(outdir, file)
776
- console.print(f"[bold green]Uploading file: {file}")
777
- try:
778
- api.upload_file(
779
- path_or_fileobj=file_path,
780
- path_in_repo=file,
781
- repo_id=repo_id,
782
- )
783
- console.print(f"[green]✓ Successfully uploaded: {file}")
784
- except Exception as e:
785
- console.print(f"[red]✗ Failed to upload {file}: {e}")
786
- raise ConversionError(f"Error uploading file {file}: {e}")
787
-
788
- def generate_readme(self, quantized_files: List[str]) -> str:
789
- """Generate a README.md file for the Hugging Face Hub."""
790
- readme = f"""# {self.model_name} GGUF
791
-
792
- This repository contains GGUF quantized versions of [{self.model_id}](https://huggingface.co/{self.model_id}).
793
-
794
- ## About
795
-
796
- This model was converted using [Webscout](https://github.com/Webscout/webscout).
797
-
798
- ## Quantization Methods
799
-
800
- The following quantization methods were used:
801
-
802
- """
803
- # Add quantization method descriptions
804
- for method in self.quantization_methods:
805
- if self.use_imatrix:
806
- readme += f"- `{method}`: {self.VALID_IMATRIX_METHODS[method]}\n"
807
- else:
808
- readme += f"- `{method}`: {self.VALID_METHODS[method]}\n"
809
-
810
- readme += """
811
- ## Available Files
812
-
813
- The following quantized files are available:
814
-
815
- """
816
- # Add file information
817
- for file in quantized_files:
818
- readme += f"- `{file}`\n"
819
-
820
- if self.use_imatrix:
821
- readme += """
822
- ## Importance Matrix
823
-
824
- This model was quantized using importance matrix quantization. The `imatrix.dat` file contains the importance matrix used for quantization.
825
-
826
- """
827
-
828
- readme += """
829
- ## Usage
830
-
831
- These GGUF files can be used with [llama.cpp](https://github.com/ggerganov/llama.cpp) and compatible tools.
832
-
833
- Example usage:
834
- ```bash
835
- ./main -m model.gguf -n 1024 --repeat_penalty 1.0 --color -i -r "User:" -f prompts/chat-with-bob.txt
836
- ```
837
-
838
- ## Conversion Process
839
-
840
- This model was converted using the following command:
841
- ```bash
842
- python -m webscout.Extra.gguf convert \\
843
- -m "{self.model_id}" \\
844
- -q "{','.join(self.quantization_methods)}" \\
845
- {f'-i' if self.use_imatrix else ''} \\
846
- {f'--train-data "{self.train_data_file}"' if self.train_data_file else ''} \\
847
- {f'-s' if self.split_model else ''} \\
848
- {f'--split-max-tensors {self.split_max_tensors}' if self.split_model else ''} \\
849
- {f'--split-max-size {self.split_max_size}' if self.split_max_size else ''}
850
- ```
851
-
852
- ## License
853
-
854
- This repository is licensed under the same terms as the original model.
855
- """
856
- return readme
857
-
858
- def create_repository(self, repo_id: str) -> None:
859
- """Create a new repository on Hugging Face Hub if it doesn't exist."""
860
- api = HfApi(token=self.token)
861
- try:
862
- # Check if repository already exists
863
- try:
864
- api.repo_info(repo_id=repo_id)
865
- console.print(f"[green]✓ Repository {repo_id} already exists")
866
- return
867
- except Exception:
868
- # Repository doesn't exist, create it
869
- pass
870
-
871
- console.print(f"[bold green]Creating new repository: {repo_id}")
872
- api.create_repo(
873
- repo_id=repo_id,
874
- exist_ok=True,
875
- private=False,
876
- repo_type="model"
877
- )
878
- console.print(f"[green]✓ Successfully created repository: {repo_id}")
879
- console.print(f"[cyan]Repository URL: https://huggingface.co/{repo_id}")
880
- except Exception as e:
881
- console.print(f"[red]✗ Failed to create repository: {e}")
882
- raise ConversionError(f"Error creating repository {repo_id}: {e}")
883
-
884
- def upload_readme(self, readme_content: str, repo_id: str) -> None:
885
- """Upload README.md to Hugging Face Hub."""
886
- api = HfApi(token=self.token)
887
- console.print("[bold green]Uploading README.md with model documentation")
888
- try:
889
- api.upload_file(
890
- path_or_fileobj=readme_content.encode(),
891
- path_in_repo="README.md",
892
- repo_id=repo_id,
893
- )
894
- console.print("[green]✓ Successfully uploaded: README.md")
895
- except Exception as e:
896
- console.print(f"[red]✗ Failed to upload README.md: {e}")
897
- raise ConversionError(f"Error uploading README.md: {e}")
898
-
899
- def convert(self) -> None:
900
- """Performs the model conversion process."""
901
- try:
902
- # Display banner and configuration
903
- console.print(f"[bold green]{figlet_format('GGUF Converter')}")
904
- self.display_config()
905
-
906
- # Validate inputs
907
- self.validate_inputs()
908
-
909
- # Check dependencies
910
- deps = self.check_dependencies()
911
- missing = [name for name, installed in deps.items() if not installed and name != 'ninja']
912
- if missing:
913
- raise ConversionError(f"Missing required dependencies: {', '.join(missing)}")
914
-
915
- # Setup llama.cpp
916
- self.setup_llama_cpp()
917
-
918
- # Determine if we need temporary directories (only for uploads)
919
- needs_temp = bool(self.username and self.token)
920
-
921
- if needs_temp:
922
- # Use temporary directories for upload case
923
- with tempfile.TemporaryDirectory() as outdir:
924
- with tempfile.TemporaryDirectory() as tmpdir:
925
- self._convert_with_dirs(tmpdir, outdir)
926
- else:
927
- # Use current directory for local output
928
- outdir = os.getcwd()
929
- tmpdir = os.path.join(outdir, "temp_download")
930
- os.makedirs(tmpdir, exist_ok=True)
931
- try:
932
- self._convert_with_dirs(tmpdir, outdir)
933
- finally:
934
- # Clean up temporary download directory
935
- import shutil
936
- shutil.rmtree(tmpdir, ignore_errors=True)
937
-
938
- # Display success message
939
- console.print(Panel.fit(
940
- "[bold green]✓[/] Conversion completed successfully!\n\n"
941
- f"[cyan]Output files can be found in: {self.workspace / self.model_name}[/]",
942
- title="Success",
943
- border_style="green"
944
- ))
945
-
946
- except Exception as e:
947
- console.print(Panel.fit(
948
- f"[bold red]✗[/] {str(e)}",
949
- title="Error",
950
- border_style="red"
951
- ))
952
- raise
953
-
954
- def _convert_with_dirs(self, tmpdir: str, outdir: str) -> None:
955
- """Helper method to perform conversion with given directories."""
956
- fp16 = str(Path(outdir)/f"{self.model_name}.fp16.gguf")
957
-
958
- # Download model
959
- local_dir = Path(tmpdir)/self.model_name
960
- console.print("[bold green]Downloading model...")
961
- api = HfApi(token=self.token)
962
- api.snapshot_download(
963
- repo_id=self.model_id,
964
- local_dir=local_dir,
965
- local_dir_use_symlinks=False
966
- )
967
-
968
- # Convert to fp16
969
- console.print("[bold green]Converting to fp16...")
970
-
971
- # Find the conversion script
972
- conversion_scripts = [
973
- "llama.cpp/convert_hf_to_gguf.py",
974
- "llama.cpp/convert-hf-to-gguf.py",
975
- "llama.cpp/convert.py"
976
- ]
977
-
978
- conversion_script = None
979
- for script in conversion_scripts:
980
- if os.path.isfile(script):
981
- conversion_script = script
982
- break
983
-
984
- if not conversion_script:
985
- raise ConversionError("Could not find HuggingFace to GGUF conversion script")
986
-
987
- # Use the appropriate Python executable
988
- python_cmd = "python" if platform.system() == "Windows" else "python3"
989
-
990
- convert_cmd = [
991
- python_cmd, conversion_script,
992
- str(local_dir),
993
- "--outtype", "f16",
994
- "--outfile", fp16
995
- ]
996
-
997
- console.print(f"[cyan]Conversion command: {' '.join(convert_cmd)}")
998
-
999
- try:
1000
- result = subprocess.run(convert_cmd, capture_output=True, text=True)
1001
-
1002
- if result.returncode != 0:
1003
- raise ConversionError(f"Error converting to fp16: {result.stderr}")
1004
- except FileNotFoundError as e:
1005
- raise ConversionError(f"Could not execute conversion script: {e}")
1006
-
1007
- if not os.path.isfile(fp16):
1008
- raise ConversionError(f"Conversion completed but output file not found: {fp16}")
1009
-
1010
- console.print("[green]Model converted to fp16 successfully!")
1011
-
1012
- # If fp16_only is True, we're done after fp16 conversion
1013
- if self.fp16_only:
1014
- quantized_files = [f"{self.model_name}.fp16.gguf"]
1015
- if self.username and self.token:
1016
- repo_id = f"{self.username}/{self.model_name}-GGUF"
1017
-
1018
- # Step 1: Create repository
1019
- self.create_repository(repo_id)
1020
-
1021
- # Step 2: Upload README first
1022
- readme_content = self.generate_readme(quantized_files)
1023
- self.upload_readme(readme_content, repo_id)
1024
-
1025
- # Step 3: Upload model GGUF file
1026
- file_name = f"{self.model_name}.fp16.gguf"
1027
- console.print(f"[bold green]Uploading model file: {file_name}")
1028
- try:
1029
- api.upload_file(
1030
- path_or_fileobj=fp16,
1031
- path_in_repo=file_name,
1032
- repo_id=repo_id
1033
- )
1034
- console.print(f"[green]✓ Successfully uploaded: {file_name}")
1035
- except Exception as e:
1036
- console.print(f"[red]✗ Failed to upload {file_name}: {e}")
1037
- raise ConversionError(f"Error uploading model file: {e}")
1038
- return
1039
-
1040
- # Generate importance matrix if needed
1041
- imatrix_path: Optional[str] = None
1042
- if self.use_imatrix:
1043
- train_data_path = self.train_data_file if self.train_data_file else "llama.cpp/groups_merged.txt"
1044
- imatrix_path = str(Path(outdir)/"imatrix.dat")
1045
- self.generate_importance_matrix(fp16, train_data_path, imatrix_path)
1046
-
1047
- # Quantize model
1048
- console.print("[bold green]Quantizing model...")
1049
- quantized_files: List[str] = []
1050
- quantize_binary = self.get_binary_path("llama-quantize")
1051
-
1052
- if not os.path.isfile(quantize_binary):
1053
- raise ConversionError(f"llama-quantize binary not found at: {quantize_binary}")
1054
-
1055
- for method in self.quantization_methods:
1056
- quantized_name = f"{self.model_name.lower()}-{method.lower()}"
1057
- if self.use_imatrix:
1058
- quantized_name += "-imat"
1059
- quantized_path = str(Path(outdir)/f"{quantized_name}.gguf")
1060
-
1061
- console.print(f"[cyan]Quantizing with method: {method}")
1062
-
1063
- if self.use_imatrix and imatrix_path:
1064
- quantize_cmd: List[str] = [
1065
- quantize_binary,
1066
- "--imatrix", str(imatrix_path),
1067
- fp16, quantized_path, method
1068
- ]
1069
- else:
1070
- quantize_cmd = [
1071
- quantize_binary,
1072
- fp16, quantized_path, method
1073
- ]
1074
-
1075
- console.print(f"[cyan]Quantization command: {' '.join(quantize_cmd)}")
1076
-
1077
- try:
1078
- result = subprocess.run(quantize_cmd, capture_output=True, text=True)
1079
- if result.returncode != 0:
1080
- raise ConversionError(f"Error quantizing with {method}: {result.stderr}")
1081
- except FileNotFoundError:
1082
- raise ConversionError(f"Could not execute llama-quantize binary: {quantize_binary}")
1083
-
1084
- if not os.path.isfile(quantized_path):
1085
- raise ConversionError(f"Quantization completed but output file not found: {quantized_path}")
1086
-
1087
- quantized_files.append(f"{quantized_name}.gguf")
1088
- console.print(f"[green]Successfully quantized with {method}: {quantized_name}.gguf")
1089
-
1090
- # Upload to Hugging Face if credentials provided
1091
- if self.username and self.token:
1092
- repo_id = f"{self.username}/{self.model_name}-GGUF"
1093
-
1094
- # Step 1: Create repository
1095
- console.print(f"[bold blue]Step 1: Creating repository {repo_id}")
1096
- self.create_repository(repo_id)
1097
-
1098
- # Step 2: Generate and upload README first
1099
- console.print("[bold blue]Step 2: Uploading README.md")
1100
- readme_content = self.generate_readme(quantized_files)
1101
- self.upload_readme(readme_content, repo_id)
1102
-
1103
- # Step 3: Upload model GGUF files
1104
- console.print("[bold blue]Step 3: Uploading model files")
1105
- if self.split_model:
1106
- split_files = self.split_model(quantized_path, outdir)
1107
- self.upload_split_files(split_files, outdir, repo_id)
1108
- else:
1109
- # Upload single quantized file
1110
- file_name = f"{self.model_name.lower()}-{self.quantization_methods[0].lower()}.gguf"
1111
- console.print(f"[bold green]Uploading quantized model: {file_name}")
1112
- try:
1113
- api.upload_file(
1114
- path_or_fileobj=quantized_path,
1115
- path_in_repo=file_name,
1116
- repo_id=repo_id
1117
- )
1118
- console.print(f"[green]✓ Successfully uploaded: {file_name}")
1119
- except Exception as e:
1120
- console.print(f"[red]✗ Failed to upload {file_name}: {e}")
1121
- raise ConversionError(f"Error uploading quantized model: {e}")
1122
-
1123
- # Step 4: Upload imatrix if generated (optional)
1124
- if imatrix_path:
1125
- console.print("[bold blue]Step 4: Uploading importance matrix")
1126
- console.print("[bold green]Uploading importance matrix: imatrix.dat")
1127
- try:
1128
- api.upload_file(
1129
- path_or_fileobj=imatrix_path,
1130
- path_in_repo="imatrix.dat",
1131
- repo_id=repo_id
1132
- )
1133
- console.print("[green]✓ Successfully uploaded: imatrix.dat")
1134
- except Exception as e:
1135
- console.print(f"[yellow]Warning: Failed to upload imatrix.dat: {e}")
1136
-
1137
- # Final success message
1138
- console.print(f"[bold green]🎉 All files uploaded successfully to {repo_id}!")
1139
- console.print(f"[cyan]Repository URL: https://huggingface.co/{repo_id}")
1140
-
1141
- # Initialize CLI with HAI vibes
1142
- app = CLI(
1143
- name="gguf",
1144
- help="Convert HuggingFace models to GGUF format with style! 🔥",
1145
- version="1.0.0"
1146
- )
1147
-
1148
- @app.command(name="convert")
1149
- @option("-m", "--model-id", help="The HuggingFace model ID (e.g., 'OEvortex/HelpingAI-Lite-1.5T')", required=True)
1150
- @option("-u", "--username", help="Your HuggingFace username for uploads", default=None)
1151
- @option("-t", "--token", help="Your HuggingFace API token for uploads", default=None)
1152
- @option("-q", "--quantization", help="Comma-separated quantization methods", default="q4_k_m")
1153
- @option("-i", "--use-imatrix", help="Use importance matrix for quantization", is_flag=True)
1154
- @option("--train-data", help="Training data file for imatrix quantization", default=None)
1155
- @option("-s", "--split-model", help="Split the model into smaller chunks", is_flag=True)
1156
- @option("--split-max-tensors", help="Maximum number of tensors per file when splitting", default=256)
1157
- @option("--split-max-size", help="Maximum file size when splitting (e.g., '256M', '5G')", default=None)
1158
- def convert_command(
1159
- model_id: str,
1160
- username: Optional[str] = None,
1161
- token: Optional[str] = None,
1162
- quantization: str = "q4_k_m",
1163
- use_imatrix: bool = False,
1164
- train_data: Optional[str] = None,
1165
- split_model: bool = False,
1166
- split_max_tensors: int = 256,
1167
- split_max_size: Optional[str] = None
1168
- ) -> None:
1169
- """
1170
- Convert and quantize HuggingFace models to GGUF format! 🚀
1171
-
1172
- Args:
1173
- model_id (str): Your model's HF ID (like 'OEvortex/HelpingAI-Lite-1.5T') 🎯
1174
- username (str, optional): Your HF username for uploads 👤
1175
- token (str, optional): Your HF API token 🔑
1176
- quantization (str): Quantization methods (default: q4_k_m,q5_k_m) 🎮
1177
- use_imatrix (bool): Use importance matrix for quantization 🔍
1178
- train_data (str, optional): Training data file for imatrix quantization 📚
1179
- split_model (bool): Split the model into smaller chunks 🔪
1180
- split_max_tensors (int): Max tensors per file when splitting (default: 256) 📊
1181
- split_max_size (str, optional): Max file size when splitting (e.g., '256M', '5G') 📏
1182
-
1183
- Example:
1184
- >>> python -m webscout.Extra.gguf convert \\
1185
- ... -m "OEvortex/HelpingAI-Lite-1.5T" \\
1186
- ... -q "q4_k_m,q5_k_m"
1187
- """
1188
- try:
1189
- converter = ModelConverter(
1190
- model_id=model_id,
1191
- username=username,
1192
- token=token,
1193
- quantization_methods=quantization,
1194
- use_imatrix=use_imatrix,
1195
- train_data_file=train_data,
1196
- split_model=split_model,
1197
- split_max_tensors=split_max_tensors,
1198
- split_max_size=split_max_size
1199
- )
1200
- converter.convert()
1201
- except (ConversionError, ValueError) as e:
1202
- console.print(f"[red]Error: {str(e)}")
1203
- sys.exit(1)
1204
- except Exception as e:
1205
- console.print(f"[red]Unexpected error: {str(e)}")
1206
- sys.exit(1)
1207
-
1208
- def main() -> None:
1209
- """Fire up the GGUF converter! 🚀"""
1210
- app.run()
1211
-
1212
- if __name__ == "__main__":
1213
- main()
1
+ """
2
+ Convert Hugging Face models to GGUF format with advanced features.
3
+
4
+ 🔥 2025 UPDATE: ALL CMAKE BUILD ERRORS FIXED! 🔥
5
+
6
+ This converter has been completely updated for 2025 compatibility with the latest llama.cpp:
7
+
8
+ CRITICAL FIXES:
9
+ - ✅ Updated all deprecated LLAMA_* flags to GGML_* (LLAMA_CUBLAS → GGML_CUDA)
10
+ - ✅ Fixed CURL dependency error by adding -DLLAMA_CURL=OFF
11
+ - ✅ Disabled optional dependencies (LLAMA_LLGUIDANCE=OFF)
12
+ - ✅ Cross-platform hardware detection (Windows, macOS, Linux)
13
+ - ✅ Robust CMake configuration with multiple fallback strategies
14
+ - ✅ Priority-based acceleration selection (CUDA > Metal > Vulkan > OpenCL > ROCm > BLAS)
15
+ - ✅ Enhanced error handling and recovery mechanisms
16
+ - ✅ Platform-specific optimizations and build generators
17
+ - ✅ Automatic build directory cleanup to avoid cached CMake conflicts
18
+
19
+ SUPPORTED ACCELERATION:
20
+ - CUDA: GGML_CUDA=ON (NVIDIA GPUs)
21
+ - Metal: GGML_METAL=ON (Apple Silicon/macOS)
22
+ - Vulkan: GGML_VULKAN=ON (Cross-platform GPU)
23
+ - OpenCL: GGML_OPENCL=ON (Cross-platform GPU)
24
+ - ROCm: GGML_HIPBLAS=ON (AMD GPUs)
25
+ - BLAS: GGML_BLAS=ON (Optimized CPU libraries)
26
+ - Accelerate: GGML_ACCELERATE=ON (Apple Accelerate framework)
27
+
28
+ For detailed documentation, see: webscout/Extra/gguf.md
29
+
30
+ USAGE EXAMPLES:
31
+ >>> python -m webscout.Extra.gguf convert -m "OEvortex/HelpingAI-Lite-1.5T" -q "q4_k_m,q5_k_m"
32
+ >>> # With upload options:
33
+ >>> python -m webscout.Extra.gguf convert -m "your-model" -u "username" -t "token" -q "q4_k_m"
34
+ >>> # With imatrix quantization:
35
+ >>> python -m webscout.Extra.gguf convert -m "your-model" -i -q "iq4_nl" --train-data "train_data.txt"
36
+ >>> # With model splitting:
37
+ >>> python -m webscout.Extra.gguf convert -m "your-model" -s --split-max-tensors 256
38
+ """
39
+
40
+ import subprocess
41
+ import os
42
+ import sys
43
+ import signal
44
+ import tempfile
45
+ import platform
46
+ from pathlib import Path
47
+ from typing import Optional, Dict, List, Any, Union, Literal, TypedDict, Set
48
+
49
+ from huggingface_hub import HfApi
50
+ from webscout.zeroart import figlet_format
51
+ from rich.console import Console
52
+ from rich.panel import Panel
53
+ from rich.table import Table
54
+ from webscout.swiftcli import CLI, option
55
+
56
+ console = Console()
57
+
58
+ class ConversionError(Exception):
59
+ """Custom exception for when things don't go as planned! ⚠️"""
60
+ pass
61
+
62
+ class QuantizationMethod(TypedDict):
63
+ """Type definition for quantization method descriptions."""
64
+ description: str
65
+
66
+ class ModelConverter:
67
+ """Handles the conversion of Hugging Face models to GGUF format."""
68
+
69
+ VALID_METHODS: Dict[str, str] = {
70
+ "fp16": "16-bit floating point - maximum accuracy, largest size",
71
+ "q2_k": "2-bit quantization (smallest size, lowest accuracy)",
72
+ "q3_k_l": "3-bit quantization (large) - balanced for size/accuracy",
73
+ "q3_k_m": "3-bit quantization (medium) - good balance for most use cases",
74
+ "q3_k_s": "3-bit quantization (small) - optimized for speed",
75
+ "q4_0": "4-bit quantization (version 0) - standard 4-bit compression",
76
+ "q4_1": "4-bit quantization (version 1) - improved accuracy over q4_0",
77
+ "q4_k_m": "4-bit quantization (medium) - balanced for most models",
78
+ "q4_k_s": "4-bit quantization (small) - optimized for speed",
79
+ "q5_0": "5-bit quantization (version 0) - high accuracy, larger size",
80
+ "q5_1": "5-bit quantization (version 1) - improved accuracy over q5_0",
81
+ "q5_k_m": "5-bit quantization (medium) - best balance for quality/size",
82
+ "q5_k_s": "5-bit quantization (small) - optimized for speed",
83
+ "q6_k": "6-bit quantization - highest accuracy, largest size",
84
+ "q8_0": "8-bit quantization - maximum accuracy, largest size"
85
+ }
86
+
87
+ VALID_IMATRIX_METHODS: Dict[str, str] = {
88
+ "iq3_m": "3-bit imatrix quantization (medium) - balanced importance-based",
89
+ "iq3_xxs": "3-bit imatrix quantization (extra extra small) - maximum compression",
90
+ "q4_k_m": "4-bit imatrix quantization (medium) - balanced importance-based",
91
+ "q4_k_s": "4-bit imatrix quantization (small) - optimized for speed",
92
+ "iq4_nl": "4-bit imatrix quantization (non-linear) - best accuracy for 4-bit",
93
+ "iq4_xs": "4-bit imatrix quantization (extra small) - maximum compression",
94
+ "q5_k_m": "5-bit imatrix quantization (medium) - balanced importance-based",
95
+ "q5_k_s": "5-bit imatrix quantization (small) - optimized for speed"
96
+ }
97
+
98
+ def __init__(
99
+ self,
100
+ model_id: str,
101
+ username: Optional[str] = None,
102
+ token: Optional[str] = None,
103
+ quantization_methods: str = "q4_k_m",
104
+ use_imatrix: bool = False,
105
+ train_data_file: Optional[str] = None,
106
+ split_model: bool = False,
107
+ split_max_tensors: int = 256,
108
+ split_max_size: Optional[str] = None
109
+ ) -> None:
110
+ self.model_id = model_id
111
+ self.username = username
112
+ self.token = token
113
+ self.quantization_methods = quantization_methods.split(',')
114
+ self.model_name = model_id.split('/')[-1]
115
+ self.workspace = Path(os.getcwd())
116
+ self.use_imatrix = use_imatrix
117
+ self.train_data_file = train_data_file
118
+ self.split_model = split_model
119
+ self.split_max_tensors = split_max_tensors
120
+ self.split_max_size = split_max_size
121
+ self.fp16_only = "fp16" in self.quantization_methods and len(self.quantization_methods) == 1
122
+
123
+ def validate_inputs(self) -> None:
124
+ """Validates all input parameters."""
125
+ if not '/' in self.model_id:
126
+ raise ValueError("Invalid model ID format. Expected format: 'organization/model-name'")
127
+
128
+ if self.use_imatrix:
129
+ invalid_methods = [m for m in self.quantization_methods if m not in self.VALID_IMATRIX_METHODS]
130
+ if invalid_methods:
131
+ raise ValueError(
132
+ f"Invalid imatrix quantization methods: {', '.join(invalid_methods)}.\n"
133
+ f"Valid methods are: {', '.join(self.VALID_IMATRIX_METHODS.keys())}"
134
+ )
135
+ if not self.train_data_file and not os.path.exists("llama.cpp/groups_merged.txt"):
136
+ raise ValueError("Training data file is required for imatrix quantization")
137
+ else:
138
+ invalid_methods = [m for m in self.quantization_methods if m not in self.VALID_METHODS]
139
+ if invalid_methods:
140
+ raise ValueError(
141
+ f"Invalid quantization methods: {', '.join(invalid_methods)}.\n"
142
+ f"Valid methods are: {', '.join(self.VALID_METHODS.keys())}"
143
+ )
144
+
145
+ if bool(self.username) != bool(self.token):
146
+ raise ValueError("Both username and token must be provided for upload, or neither.")
147
+
148
+ if self.split_model and self.split_max_size:
149
+ try:
150
+ size = int(self.split_max_size[:-1])
151
+ unit = self.split_max_size[-1].upper()
152
+ if unit not in ['M', 'G']:
153
+ raise ValueError("Split max size must end with M or G")
154
+ except ValueError:
155
+ raise ValueError("Invalid split max size format. Use format like '256M' or '5G'")
156
+
157
+ @staticmethod
158
+ def check_dependencies() -> Dict[str, bool]:
159
+ """Check if all required dependencies are installed with cross-platform support."""
160
+ system = platform.system()
161
+
162
+ dependencies: Dict[str, str] = {
163
+ 'git': 'Git version control',
164
+ 'cmake': 'CMake build system',
165
+ 'ninja': 'Ninja build system (optional)'
166
+ }
167
+
168
+ # Add platform-specific dependencies
169
+ if system != 'Windows':
170
+ dependencies['pip3'] = 'Python package installer'
171
+ else:
172
+ dependencies['pip'] = 'Python package installer'
173
+
174
+ status: Dict[str, bool] = {}
175
+
176
+ for cmd, desc in dependencies.items():
177
+ try:
178
+ if system == 'Windows':
179
+ # Use 'where' command on Windows
180
+ result = subprocess.run(['where', cmd], capture_output=True, text=True)
181
+ status[cmd] = result.returncode == 0
182
+ else:
183
+ # Use 'which' command on Unix-like systems
184
+ result = subprocess.run(['which', cmd], capture_output=True, text=True)
185
+ status[cmd] = result.returncode == 0
186
+ except (FileNotFoundError, subprocess.SubprocessError):
187
+ status[cmd] = False
188
+
189
+ # Special check for Python - try different variants
190
+ python_variants = ['python3', 'python', 'py'] if system != 'Windows' else ['python', 'py', 'python3']
191
+ status['python'] = False
192
+ for variant in python_variants:
193
+ try:
194
+ if system == 'Windows':
195
+ result = subprocess.run(['where', variant], capture_output=True)
196
+ else:
197
+ result = subprocess.run(['which', variant], capture_output=True)
198
+ if result.returncode == 0:
199
+ status['python'] = True
200
+ break
201
+ except:
202
+ continue
203
+
204
+ # Check for C++ compiler
205
+ cpp_compilers = ['cl', 'g++', 'clang++'] if system == 'Windows' else ['g++', 'clang++']
206
+ status['cpp_compiler'] = False
207
+ for compiler in cpp_compilers:
208
+ try:
209
+ if system == 'Windows':
210
+ result = subprocess.run(['where', compiler], capture_output=True)
211
+ else:
212
+ result = subprocess.run(['which', compiler], capture_output=True)
213
+ if result.returncode == 0:
214
+ status['cpp_compiler'] = True
215
+ break
216
+ except:
217
+ continue
218
+
219
+ dependencies['python'] = 'Python interpreter'
220
+ dependencies['cpp_compiler'] = 'C++ compiler (g++, clang++, or MSVC)'
221
+
222
+ return status
223
+
224
+ def detect_hardware(self) -> Dict[str, bool]:
225
+ """Detect available hardware acceleration with improved cross-platform support."""
226
+ hardware: Dict[str, bool] = {
227
+ 'cuda': False,
228
+ 'metal': False,
229
+ 'opencl': False,
230
+ 'vulkan': False,
231
+ 'rocm': False,
232
+ 'blas': False,
233
+ 'accelerate': False
234
+ }
235
+
236
+ system = platform.system()
237
+
238
+ # Check CUDA
239
+ try:
240
+ # Check for nvcc compiler
241
+ if subprocess.run(['nvcc', '--version'], capture_output=True, shell=(system == 'Windows')).returncode == 0:
242
+ hardware['cuda'] = True
243
+ # Also check for nvidia-smi as fallback
244
+ elif subprocess.run(['nvidia-smi'], capture_output=True, shell=(system == 'Windows')).returncode == 0:
245
+ hardware['cuda'] = True
246
+ except (FileNotFoundError, subprocess.SubprocessError):
247
+ # Check for CUDA libraries on Windows
248
+ if system == 'Windows':
249
+ cuda_paths = [
250
+ os.environ.get('CUDA_PATH'),
251
+ 'C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA',
252
+ 'C:\\Program Files (x86)\\NVIDIA GPU Computing Toolkit\\CUDA'
253
+ ]
254
+ for cuda_path in cuda_paths:
255
+ if cuda_path and os.path.exists(cuda_path):
256
+ hardware['cuda'] = True
257
+ break
258
+
259
+ # Check Metal (macOS)
260
+ if system == 'Darwin':
261
+ try:
262
+ # Check for Xcode command line tools
263
+ if subprocess.run(['xcrun', '--show-sdk-path'], capture_output=True).returncode == 0:
264
+ hardware['metal'] = True
265
+ # Check for Metal framework
266
+ if os.path.exists('/System/Library/Frameworks/Metal.framework'):
267
+ hardware['metal'] = True
268
+ # macOS also supports Accelerate framework
269
+ if os.path.exists('/System/Library/Frameworks/Accelerate.framework'):
270
+ hardware['accelerate'] = True
271
+ except (FileNotFoundError, subprocess.SubprocessError):
272
+ pass
273
+
274
+ # Check OpenCL
275
+ try:
276
+ if system == 'Windows':
277
+ # Check for OpenCL on Windows
278
+ opencl_paths = [
279
+ 'C:\\Windows\\System32\\OpenCL.dll',
280
+ 'C:\\Windows\\SysWOW64\\OpenCL.dll'
281
+ ]
282
+ if any(os.path.exists(path) for path in opencl_paths):
283
+ hardware['opencl'] = True
284
+ else:
285
+ if subprocess.run(['clinfo'], capture_output=True).returncode == 0:
286
+ hardware['opencl'] = True
287
+ except (FileNotFoundError, subprocess.SubprocessError):
288
+ pass
289
+
290
+ # Check Vulkan
291
+ try:
292
+ if system == 'Windows':
293
+ # Check for Vulkan on Windows
294
+ vulkan_paths = [
295
+ 'C:\\Windows\\System32\\vulkan-1.dll',
296
+ 'C:\\Windows\\SysWOW64\\vulkan-1.dll'
297
+ ]
298
+ if any(os.path.exists(path) for path in vulkan_paths):
299
+ hardware['vulkan'] = True
300
+ else:
301
+ if subprocess.run(['vulkaninfo'], capture_output=True).returncode == 0:
302
+ hardware['vulkan'] = True
303
+ except (FileNotFoundError, subprocess.SubprocessError):
304
+ pass
305
+
306
+ # Check ROCm (AMD)
307
+ try:
308
+ if subprocess.run(['rocm-smi'], capture_output=True, shell=(system == 'Windows')).returncode == 0:
309
+ hardware['rocm'] = True
310
+ elif system == 'Linux':
311
+ # Check for ROCm installation
312
+ rocm_paths = ['/opt/rocm', '/usr/lib/x86_64-linux-gnu/librocm-smi64.so']
313
+ if any(os.path.exists(path) for path in rocm_paths):
314
+ hardware['rocm'] = True
315
+ except (FileNotFoundError, subprocess.SubprocessError):
316
+ pass
317
+
318
+ # Check for BLAS libraries
319
+ try:
320
+ import numpy as np
321
+ # Check if numpy is linked with optimized BLAS
322
+ config = np.__config__.show()
323
+ if any(lib in str(config).lower() for lib in ['openblas', 'mkl', 'atlas', 'blis']):
324
+ hardware['blas'] = True
325
+ except (ImportError, AttributeError):
326
+ # Fallback: check for common BLAS libraries
327
+ if system == 'Linux':
328
+ blas_libs = ['/usr/lib/x86_64-linux-gnu/libopenblas.so', '/usr/lib/x86_64-linux-gnu/libblas.so']
329
+ if any(os.path.exists(lib) for lib in blas_libs):
330
+ hardware['blas'] = True
331
+ elif system == 'Windows':
332
+ # Check for Intel MKL or OpenBLAS on Windows
333
+ mkl_paths = ['C:\\Program Files (x86)\\Intel\\oneAPI\\mkl']
334
+ if any(os.path.exists(path) for path in mkl_paths):
335
+ hardware['blas'] = True
336
+
337
+ return hardware
338
+
339
+ def setup_llama_cpp(self) -> None:
340
+ """Sets up and builds llama.cpp repository with robust error handling."""
341
+ llama_path = self.workspace / "llama.cpp"
342
+ system = platform.system()
343
+
344
+ with console.status("[bold green]Setting up llama.cpp...") as status:
345
+ # Clone llama.cpp if not exists
346
+ if not llama_path.exists():
347
+ try:
348
+ subprocess.run(['git', 'clone', 'https://github.com/ggerganov/llama.cpp'], check=True)
349
+ except subprocess.CalledProcessError as e:
350
+ raise ConversionError(f"Failed to clone llama.cpp repository: {e}")
351
+
352
+ original_cwd = os.getcwd()
353
+ try:
354
+ os.chdir(llama_path)
355
+
356
+ # Update to latest version
357
+ try:
358
+ subprocess.run(['git', 'pull'], capture_output=True, check=False)
359
+ except subprocess.CalledProcessError:
360
+ console.print("[yellow]Warning: Could not update llama.cpp repository")
361
+
362
+ # Clean any existing build directory to avoid cached CMake variables
363
+ build_dir = Path('build')
364
+ if build_dir.exists():
365
+ console.print("[yellow]Cleaning existing build directory to avoid CMake cache conflicts...")
366
+ import shutil
367
+ try:
368
+ shutil.rmtree(build_dir)
369
+ console.print("[green]Build directory cleaned successfully")
370
+ except Exception as e:
371
+ console.print(f"[yellow]Warning: Could not clean build directory: {e}")
372
+
373
+ # Check if we're in a Nix environment
374
+ is_nix = system == "Linux" and os.path.exists("/nix/store")
375
+
376
+ if is_nix:
377
+ console.print("[yellow]Detected Nix environment. Using system Python packages...")
378
+ # In Nix, we need to use the system Python packages
379
+ try:
380
+ # Try to import required packages to check if they're available
381
+ import torch # type: ignore
382
+ import numpy # type: ignore
383
+ import sentencepiece # type: ignore
384
+ import transformers # type: ignore
385
+ console.print("[green]Required Python packages are already installed.")
386
+ except ImportError as e:
387
+ console.print("[red]Missing required Python packages in Nix environment.")
388
+ console.print("[yellow]Please install them using:")
389
+ console.print("nix-shell -p python3Packages.torch python3Packages.numpy python3Packages.sentencepiece python3Packages.transformers")
390
+ raise ConversionError("Missing required Python packages in Nix environment")
391
+ else:
392
+ # In non-Nix environments, install requirements if they exist
393
+ if os.path.exists('requirements.txt'):
394
+ try:
395
+ pip_cmd = 'pip' if system == 'Windows' else 'pip3'
396
+ subprocess.run([pip_cmd, 'install', '-r', 'requirements.txt'], check=True)
397
+ except subprocess.CalledProcessError as e:
398
+ if "externally-managed-environment" in str(e):
399
+ console.print("[yellow]Detected externally managed Python environment.")
400
+ console.print("[yellow]Please install the required packages manually:")
401
+ console.print("pip install torch numpy sentencepiece transformers")
402
+ raise ConversionError("Failed to install requirements in externally managed environment")
403
+ else:
404
+ console.print(f"[yellow]Warning: Failed to install requirements: {e}")
405
+
406
+ # Detect available hardware
407
+ hardware = self.detect_hardware()
408
+ console.print("[bold green]Detected hardware acceleration:")
409
+ for hw, available in hardware.items():
410
+ console.print(f" {'✓' if available else '✗'} {hw.upper()}")
411
+
412
+ # Clear any environment variables that might cause conflicts
413
+ env_vars_to_clear = [
414
+ 'LLAMA_CUBLAS', 'LLAMA_CLBLAST', 'LLAMA_HIPBLAS',
415
+ 'LLAMA_METAL', 'LLAMA_ACCELERATE', 'LLAMA_OPENBLAS'
416
+ ]
417
+ for var in env_vars_to_clear:
418
+ if var in os.environ:
419
+ console.print(f"[yellow]Clearing conflicting environment variable: {var}")
420
+ del os.environ[var]
421
+
422
+ # Configure CMake build with robust options
423
+ cmake_args: List[str] = ['cmake', '-B', 'build']
424
+
425
+ # Add basic CMake options with correct LLAMA prefixes
426
+ cmake_args.extend([
427
+ '-DCMAKE_BUILD_TYPE=Release',
428
+ '-DLLAMA_BUILD_TESTS=OFF',
429
+ '-DLLAMA_BUILD_EXAMPLES=ON',
430
+ '-DLLAMA_BUILD_SERVER=OFF',
431
+ # Disable optional dependencies that might cause issues
432
+ '-DLLAMA_CURL=OFF', # Disable CURL (not needed for GGUF conversion)
433
+ '-DLLAMA_LLGUIDANCE=OFF', # Disable LLGuidance (optional feature)
434
+ # Explicitly disable deprecated flags to avoid conflicts
435
+ '-DLLAMA_CUBLAS=OFF',
436
+ '-DLLAMA_CLBLAST=OFF',
437
+ '-DLLAMA_HIPBLAS=OFF'
438
+ ])
439
+
440
+ # Add hardware acceleration options with latest 2025 llama.cpp GGML flags
441
+ # Use priority order: CUDA > Metal > Vulkan > OpenCL > ROCm > BLAS > Accelerate
442
+ acceleration_enabled = False
443
+
444
+ if hardware['cuda']:
445
+ # Latest 2025 GGML CUDA flags (LLAMA_CUBLAS is deprecated)
446
+ cmake_args.extend(['-DGGML_CUDA=ON'])
447
+ console.print("[green]Enabling CUDA acceleration (GGML_CUDA=ON)")
448
+ acceleration_enabled = True
449
+ elif hardware['metal']:
450
+ # Latest 2025 GGML Metal flags for macOS
451
+ cmake_args.extend(['-DGGML_METAL=ON'])
452
+ console.print("[green]Enabling Metal acceleration (GGML_METAL=ON)")
453
+ acceleration_enabled = True
454
+ elif hardware['vulkan']:
455
+ # Latest 2025 GGML Vulkan flags
456
+ cmake_args.extend(['-DGGML_VULKAN=ON'])
457
+ console.print("[green]Enabling Vulkan acceleration (GGML_VULKAN=ON)")
458
+ acceleration_enabled = True
459
+ elif hardware['opencl']:
460
+ # Latest 2025 GGML OpenCL flags (LLAMA_CLBLAST is deprecated)
461
+ cmake_args.extend(['-DGGML_OPENCL=ON'])
462
+ console.print("[green]Enabling OpenCL acceleration (GGML_OPENCL=ON)")
463
+ acceleration_enabled = True
464
+ elif hardware['rocm']:
465
+ # Latest 2025 GGML ROCm/HIP flags
466
+ cmake_args.extend(['-DGGML_HIPBLAS=ON'])
467
+ console.print("[green]Enabling ROCm acceleration (GGML_HIPBLAS=ON)")
468
+ acceleration_enabled = True
469
+ elif hardware['blas']:
470
+ # Latest 2025 GGML BLAS flags with vendor detection
471
+ cmake_args.extend(['-DGGML_BLAS=ON'])
472
+ # Try to detect BLAS vendor for optimal performance
473
+ if system == 'Darwin':
474
+ cmake_args.extend(['-DGGML_BLAS_VENDOR=Accelerate'])
475
+ elif 'mkl' in str(hardware).lower():
476
+ cmake_args.extend(['-DGGML_BLAS_VENDOR=Intel10_64lp'])
477
+ else:
478
+ cmake_args.extend(['-DGGML_BLAS_VENDOR=OpenBLAS'])
479
+ console.print("[green]Enabling BLAS acceleration (GGML_BLAS=ON)")
480
+ acceleration_enabled = True
481
+ elif hardware['accelerate']:
482
+ # Latest 2025 GGML Accelerate framework flags for macOS
483
+ cmake_args.extend(['-DGGML_ACCELERATE=ON'])
484
+ console.print("[green]Enabling Accelerate framework (GGML_ACCELERATE=ON)")
485
+ acceleration_enabled = True
486
+
487
+ if not acceleration_enabled:
488
+ console.print("[yellow]No hardware acceleration available, using CPU only")
489
+ console.print("[cyan]Note: All deprecated LLAMA_* flags have been updated to GGML_* for 2025 compatibility")
490
+
491
+ # Platform-specific optimizations
492
+ if system == 'Windows':
493
+ # Use Visual Studio generator on Windows if available
494
+ try:
495
+ vs_result = subprocess.run(['where', 'msbuild'], capture_output=True)
496
+ if vs_result.returncode == 0:
497
+ cmake_args.extend(['-G', 'Visual Studio 17 2022'])
498
+ else:
499
+ cmake_args.extend(['-G', 'MinGW Makefiles'])
500
+ except:
501
+ cmake_args.extend(['-G', 'MinGW Makefiles'])
502
+ else:
503
+ # Use Ninja if available on Unix systems
504
+ try:
505
+ ninja_cmd = 'ninja' if system != 'Windows' else 'ninja.exe'
506
+ if subprocess.run(['which', ninja_cmd], capture_output=True).returncode == 0:
507
+ cmake_args.extend(['-G', 'Ninja'])
508
+ except:
509
+ pass # Fall back to default generator
510
+
511
+ # Configure the build with error handling and multiple fallback strategies
512
+ status.update("[bold green]Configuring CMake build...")
513
+ config_success = False
514
+
515
+ # Try main configuration
516
+ try:
517
+ console.print(f"[cyan]CMake command: {' '.join(cmake_args)}")
518
+ result = subprocess.run(cmake_args, capture_output=True, text=True)
519
+ if result.returncode == 0:
520
+ config_success = True
521
+ console.print("[green]CMake configuration successful!")
522
+ else:
523
+ console.print(f"[red]CMake configuration failed: {result.stderr}")
524
+ except subprocess.CalledProcessError as e:
525
+ console.print(f"[red]CMake execution failed: {e}")
526
+
527
+ # Try fallback without hardware acceleration if main config failed
528
+ if not config_success:
529
+ console.print("[yellow]Attempting fallback configuration without hardware acceleration...")
530
+ console.print("[cyan]Using 2025-compatible LLAMA build flags...")
531
+ fallback_args = [
532
+ 'cmake', '-B', 'build',
533
+ '-DCMAKE_BUILD_TYPE=Release',
534
+ '-DLLAMA_BUILD_TESTS=OFF',
535
+ '-DLLAMA_BUILD_EXAMPLES=ON',
536
+ '-DLLAMA_BUILD_SERVER=OFF',
537
+ # Disable optional dependencies that might cause issues
538
+ '-DLLAMA_CURL=OFF', # Disable CURL (not needed for GGUF conversion)
539
+ '-DLLAMA_LLGUIDANCE=OFF', # Disable LLGuidance (optional feature)
540
+ # Explicitly disable all deprecated flags
541
+ '-DLLAMA_CUBLAS=OFF',
542
+ '-DLLAMA_CLBLAST=OFF',
543
+ '-DLLAMA_HIPBLAS=OFF',
544
+ '-DLLAMA_METAL=OFF',
545
+ # Enable CPU optimizations
546
+ '-DGGML_NATIVE=OFF', # Disable native optimizations for compatibility
547
+ '-DGGML_AVX=ON', # Enable AVX if available
548
+ '-DGGML_AVX2=ON', # Enable AVX2 if available
549
+ '-DGGML_FMA=ON' # Enable FMA if available
550
+ ]
551
+ try:
552
+ console.print(f"[cyan]Fallback CMake command: {' '.join(fallback_args)}")
553
+ result = subprocess.run(fallback_args, capture_output=True, text=True)
554
+ if result.returncode == 0:
555
+ config_success = True
556
+ console.print("[green]Fallback CMake configuration successful!")
557
+ else:
558
+ console.print(f"[red]Fallback CMake configuration failed: {result.stderr}")
559
+ except subprocess.CalledProcessError as e:
560
+ console.print(f"[red]Fallback CMake execution failed: {e}")
561
+
562
+ # Try minimal configuration as last resort
563
+ if not config_success:
564
+ console.print("[yellow]Attempting minimal configuration...")
565
+ minimal_args = [
566
+ 'cmake', '-B', 'build',
567
+ # Disable optional dependencies that might cause issues
568
+ '-DLLAMA_CURL=OFF', # Disable CURL (not needed for GGUF conversion)
569
+ '-DLLAMA_LLGUIDANCE=OFF', # Disable LLGuidance (optional feature)
570
+ '-DLLAMA_BUILD_SERVER=OFF', # Disable server (not needed for conversion)
571
+ '-DLLAMA_BUILD_TESTS=OFF', # Disable tests (not needed for conversion)
572
+ # Explicitly disable ALL deprecated flags to avoid conflicts
573
+ '-DLLAMA_CUBLAS=OFF',
574
+ '-DLLAMA_CLBLAST=OFF',
575
+ '-DLLAMA_HIPBLAS=OFF',
576
+ '-DLLAMA_METAL=OFF',
577
+ '-DLLAMA_ACCELERATE=OFF'
578
+ ]
579
+ try:
580
+ console.print(f"[cyan]Minimal CMake command: {' '.join(minimal_args)}")
581
+ result = subprocess.run(minimal_args, capture_output=True, text=True)
582
+ if result.returncode == 0:
583
+ config_success = True
584
+ console.print("[green]Minimal CMake configuration successful!")
585
+ else:
586
+ console.print(f"[red]Minimal CMake configuration failed: {result.stderr}")
587
+ raise ConversionError(f"All CMake configuration attempts failed. Last error: {result.stderr}")
588
+ except subprocess.CalledProcessError as e:
589
+ raise ConversionError(f"All CMake configuration attempts failed: {e}")
590
+
591
+ if not config_success:
592
+ raise ConversionError("CMake configuration failed with all attempted strategies")
593
+
594
+ # Build the project
595
+ status.update("[bold green]Building llama.cpp...")
596
+ build_cmd = ['cmake', '--build', 'build', '--config', 'Release']
597
+
598
+ # Add parallel build option
599
+ cpu_count = os.cpu_count() or 1
600
+ if system == 'Windows':
601
+ build_cmd.extend(['--parallel', str(cpu_count)])
602
+ else:
603
+ build_cmd.extend(['-j', str(cpu_count)])
604
+
605
+ try:
606
+ result = subprocess.run(build_cmd, capture_output=True, text=True)
607
+ if result.returncode != 0:
608
+ console.print(f"[red]Build failed: {result.stderr}")
609
+ # Try single-threaded build as fallback
610
+ console.print("[yellow]Attempting single-threaded build...")
611
+ fallback_build = ['cmake', '--build', 'build', '--config', 'Release']
612
+ result = subprocess.run(fallback_build, capture_output=True, text=True)
613
+ if result.returncode != 0:
614
+ raise ConversionError(f"Build failed: {result.stderr}")
615
+ except subprocess.CalledProcessError as e:
616
+ raise ConversionError(f"Build failed: {e}")
617
+
618
+ console.print("[green]llama.cpp built successfully!")
619
+
620
+ finally:
621
+ os.chdir(original_cwd)
622
+
623
+ def display_config(self) -> None:
624
+ """Displays the current configuration in a formatted table."""
625
+ table = Table(title="Configuration", show_header=True, header_style="bold magenta")
626
+ table.add_column("Setting", style="cyan")
627
+ table.add_column("Value", style="green")
628
+
629
+ table.add_row("Model ID", self.model_id)
630
+ table.add_row("Model Name", self.model_name)
631
+ table.add_row("Username", self.username or "Not provided")
632
+ table.add_row("Token", "****" if self.token else "Not provided")
633
+ table.add_row("Quantization Methods", "\n".join(
634
+ f"{method} ({self.VALID_METHODS[method]})"
635
+ for method in self.quantization_methods
636
+ ))
637
+
638
+ console.print(Panel(table))
639
+
640
+ def get_binary_path(self, binary_name: str) -> str:
641
+ """Get the correct path to llama.cpp binaries based on platform."""
642
+ system = platform.system()
643
+
644
+ # Possible binary locations
645
+ possible_paths = [
646
+ f"./llama.cpp/build/bin/{binary_name}", # Standard build location
647
+ f"./llama.cpp/build/{binary_name}", # Alternative build location
648
+ f"./llama.cpp/{binary_name}", # Root directory
649
+ f"./llama.cpp/build/Release/{binary_name}", # Windows Release build
650
+ f"./llama.cpp/build/Debug/{binary_name}", # Windows Debug build
651
+ ]
652
+
653
+ # Add .exe extension on Windows
654
+ if system == 'Windows':
655
+ possible_paths = [path + '.exe' for path in possible_paths]
656
+
657
+ # Find the first existing binary
658
+ for path in possible_paths:
659
+ if os.path.isfile(path):
660
+ return path
661
+
662
+ # If not found, return the most likely path and let it fail with a clear error
663
+ default_path = f"./llama.cpp/build/bin/{binary_name}"
664
+ if system == 'Windows':
665
+ default_path += '.exe'
666
+ return default_path
667
+
668
+ def generate_importance_matrix(self, model_path: str, train_data_path: str, output_path: str) -> None:
669
+ """Generates importance matrix for quantization with improved error handling."""
670
+ imatrix_binary = self.get_binary_path("llama-imatrix")
671
+
672
+ imatrix_command: List[str] = [
673
+ imatrix_binary,
674
+ "-m", model_path,
675
+ "-f", train_data_path,
676
+ "-ngl", "99",
677
+ "--output-frequency", "10",
678
+ "-o", output_path,
679
+ ]
680
+
681
+ if not os.path.isfile(model_path):
682
+ raise ConversionError(f"Model file not found: {model_path}")
683
+
684
+ if not os.path.isfile(train_data_path):
685
+ raise ConversionError(f"Training data file not found: {train_data_path}")
686
+
687
+ if not os.path.isfile(imatrix_binary):
688
+ raise ConversionError(f"llama-imatrix binary not found at: {imatrix_binary}")
689
+
690
+ console.print("[bold green]Generating importance matrix...")
691
+ console.print(f"[cyan]Command: {' '.join(imatrix_command)}")
692
+
693
+ try:
694
+ process = subprocess.Popen(
695
+ imatrix_command,
696
+ shell=False,
697
+ stdout=subprocess.PIPE,
698
+ stderr=subprocess.PIPE,
699
+ text=True
700
+ )
701
+
702
+ try:
703
+ stdout, stderr = process.communicate(timeout=300) # 5 minute timeout
704
+ if process.returncode != 0:
705
+ raise ConversionError(f"Failed to generate importance matrix: {stderr}")
706
+ except subprocess.TimeoutExpired:
707
+ console.print("[yellow]Imatrix computation timed out. Sending SIGINT...")
708
+ process.send_signal(signal.SIGINT)
709
+ try:
710
+ stdout, stderr = process.communicate(timeout=10)
711
+ except subprocess.TimeoutExpired:
712
+ console.print("[red]Imatrix process still running. Force terminating...")
713
+ process.kill()
714
+ stdout, stderr = process.communicate()
715
+ raise ConversionError(f"Imatrix generation timed out: {stderr}")
716
+ except FileNotFoundError:
717
+ raise ConversionError(f"Could not execute llama-imatrix binary: {imatrix_binary}")
718
+
719
+ console.print("[green]Importance matrix generation completed.")
720
+
721
+ def split_model(self, model_path: str, outdir: str) -> List[str]:
722
+ """Splits the model into smaller chunks with improved error handling."""
723
+ split_binary = self.get_binary_path("llama-gguf-split")
724
+
725
+ split_cmd: List[str] = [
726
+ split_binary,
727
+ "--split",
728
+ ]
729
+
730
+ if self.split_max_size:
731
+ split_cmd.extend(["--split-max-size", self.split_max_size])
732
+ else:
733
+ split_cmd.extend(["--split-max-tensors", str(self.split_max_tensors)])
734
+
735
+ model_path_prefix = '.'.join(model_path.split('.')[:-1])
736
+ split_cmd.extend([model_path, model_path_prefix])
737
+
738
+ if not os.path.isfile(model_path):
739
+ raise ConversionError(f"Model file not found: {model_path}")
740
+
741
+ if not os.path.isfile(split_binary):
742
+ raise ConversionError(f"llama-gguf-split binary not found at: {split_binary}")
743
+
744
+ console.print(f"[bold green]Splitting model with command: {' '.join(split_cmd)}")
745
+
746
+ try:
747
+ result = subprocess.run(split_cmd, shell=False, capture_output=True, text=True)
748
+
749
+ if result.returncode != 0:
750
+ raise ConversionError(f"Error splitting model: {result.stderr}")
751
+ except FileNotFoundError:
752
+ raise ConversionError(f"Could not execute llama-gguf-split binary: {split_binary}")
753
+
754
+ console.print("[green]Model split successfully!")
755
+
756
+ # Get list of split files
757
+ model_file_prefix = os.path.basename(model_path_prefix)
758
+ try:
759
+ split_files = [f for f in os.listdir(outdir)
760
+ if f.startswith(model_file_prefix) and f.endswith(".gguf")]
761
+ except OSError as e:
762
+ raise ConversionError(f"Error reading output directory: {e}")
763
+
764
+ if not split_files:
765
+ raise ConversionError(f"No split files found in {outdir} with prefix {model_file_prefix}")
766
+
767
+ console.print(f"[green]Found {len(split_files)} split files: {', '.join(split_files)}")
768
+ return split_files
769
+
770
+ def upload_split_files(self, split_files: List[str], outdir: str, repo_id: str) -> None:
771
+ """Uploads split model files to Hugging Face."""
772
+ api = HfApi(token=self.token)
773
+
774
+ for file in split_files:
775
+ file_path = os.path.join(outdir, file)
776
+ console.print(f"[bold green]Uploading file: {file}")
777
+ try:
778
+ api.upload_file(
779
+ path_or_fileobj=file_path,
780
+ path_in_repo=file,
781
+ repo_id=repo_id,
782
+ )
783
+ console.print(f"[green]✓ Successfully uploaded: {file}")
784
+ except Exception as e:
785
+ console.print(f"[red]✗ Failed to upload {file}: {e}")
786
+ raise ConversionError(f"Error uploading file {file}: {e}")
787
+
788
+ def generate_readme(self, quantized_files: List[str]) -> str:
789
+ """Generate a README.md file for the Hugging Face Hub."""
790
+ readme = f"""# {self.model_name} GGUF
791
+
792
+ This repository contains GGUF quantized versions of [{self.model_id}](https://huggingface.co/{self.model_id}).
793
+
794
+ ## About
795
+
796
+ This model was converted using [Webscout](https://github.com/Webscout/webscout).
797
+
798
+ ## Quantization Methods
799
+
800
+ The following quantization methods were used:
801
+
802
+ """
803
+ # Add quantization method descriptions
804
+ for method in self.quantization_methods:
805
+ if self.use_imatrix:
806
+ readme += f"- `{method}`: {self.VALID_IMATRIX_METHODS[method]}\n"
807
+ else:
808
+ readme += f"- `{method}`: {self.VALID_METHODS[method]}\n"
809
+
810
+ readme += """
811
+ ## Available Files
812
+
813
+ The following quantized files are available:
814
+
815
+ """
816
+ # Add file information
817
+ for file in quantized_files:
818
+ readme += f"- `{file}`\n"
819
+
820
+ if self.use_imatrix:
821
+ readme += """
822
+ ## Importance Matrix
823
+
824
+ This model was quantized using importance matrix quantization. The `imatrix.dat` file contains the importance matrix used for quantization.
825
+
826
+ """
827
+
828
+ readme += """
829
+ ## Usage
830
+
831
+ These GGUF files can be used with [llama.cpp](https://github.com/ggerganov/llama.cpp) and compatible tools.
832
+
833
+ Example usage:
834
+ ```bash
835
+ ./main -m model.gguf -n 1024 --repeat_penalty 1.0 --color -i -r "User:" -f prompts/chat-with-bob.txt
836
+ ```
837
+
838
+ ## Conversion Process
839
+
840
+ This model was converted using the following command:
841
+ ```bash
842
+ python -m webscout.Extra.gguf convert \\
843
+ -m "{self.model_id}" \\
844
+ -q "{','.join(self.quantization_methods)}" \\
845
+ {f'-i' if self.use_imatrix else ''} \\
846
+ {f'--train-data "{self.train_data_file}"' if self.train_data_file else ''} \\
847
+ {f'-s' if self.split_model else ''} \\
848
+ {f'--split-max-tensors {self.split_max_tensors}' if self.split_model else ''} \\
849
+ {f'--split-max-size {self.split_max_size}' if self.split_max_size else ''}
850
+ ```
851
+
852
+ ## License
853
+
854
+ This repository is licensed under the same terms as the original model.
855
+ """
856
+ return readme
857
+
858
+ def create_repository(self, repo_id: str) -> None:
859
+ """Create a new repository on Hugging Face Hub if it doesn't exist."""
860
+ api = HfApi(token=self.token)
861
+ try:
862
+ # Check if repository already exists
863
+ try:
864
+ api.repo_info(repo_id=repo_id)
865
+ console.print(f"[green]✓ Repository {repo_id} already exists")
866
+ return
867
+ except Exception:
868
+ # Repository doesn't exist, create it
869
+ pass
870
+
871
+ console.print(f"[bold green]Creating new repository: {repo_id}")
872
+ api.create_repo(
873
+ repo_id=repo_id,
874
+ exist_ok=True,
875
+ private=False,
876
+ repo_type="model"
877
+ )
878
+ console.print(f"[green]✓ Successfully created repository: {repo_id}")
879
+ console.print(f"[cyan]Repository URL: https://huggingface.co/{repo_id}")
880
+ except Exception as e:
881
+ console.print(f"[red]✗ Failed to create repository: {e}")
882
+ raise ConversionError(f"Error creating repository {repo_id}: {e}")
883
+
884
+ def upload_readme(self, readme_content: str, repo_id: str) -> None:
885
+ """Upload README.md to Hugging Face Hub."""
886
+ api = HfApi(token=self.token)
887
+ console.print("[bold green]Uploading README.md with model documentation")
888
+ try:
889
+ api.upload_file(
890
+ path_or_fileobj=readme_content.encode(),
891
+ path_in_repo="README.md",
892
+ repo_id=repo_id,
893
+ )
894
+ console.print("[green]✓ Successfully uploaded: README.md")
895
+ except Exception as e:
896
+ console.print(f"[red]✗ Failed to upload README.md: {e}")
897
+ raise ConversionError(f"Error uploading README.md: {e}")
898
+
899
+ def convert(self) -> None:
900
+ """Performs the model conversion process."""
901
+ try:
902
+ # Display banner and configuration
903
+ console.print(f"[bold green]{figlet_format('GGUF Converter')}")
904
+ self.display_config()
905
+
906
+ # Validate inputs
907
+ self.validate_inputs()
908
+
909
+ # Check dependencies
910
+ deps = self.check_dependencies()
911
+ missing = [name for name, installed in deps.items() if not installed and name != 'ninja']
912
+ if missing:
913
+ raise ConversionError(f"Missing required dependencies: {', '.join(missing)}")
914
+
915
+ # Setup llama.cpp
916
+ self.setup_llama_cpp()
917
+
918
+ # Determine if we need temporary directories (only for uploads)
919
+ needs_temp = bool(self.username and self.token)
920
+
921
+ if needs_temp:
922
+ # Use temporary directories for upload case
923
+ with tempfile.TemporaryDirectory() as outdir:
924
+ with tempfile.TemporaryDirectory() as tmpdir:
925
+ self._convert_with_dirs(tmpdir, outdir)
926
+ else:
927
+ # Use current directory for local output
928
+ outdir = os.getcwd()
929
+ tmpdir = os.path.join(outdir, "temp_download")
930
+ os.makedirs(tmpdir, exist_ok=True)
931
+ try:
932
+ self._convert_with_dirs(tmpdir, outdir)
933
+ finally:
934
+ # Clean up temporary download directory
935
+ import shutil
936
+ shutil.rmtree(tmpdir, ignore_errors=True)
937
+
938
+ # Display success message
939
+ console.print(Panel.fit(
940
+ "[bold green]✓[/] Conversion completed successfully!\n\n"
941
+ f"[cyan]Output files can be found in: {self.workspace / self.model_name}[/]",
942
+ title="Success",
943
+ border_style="green"
944
+ ))
945
+
946
+ except Exception as e:
947
+ console.print(Panel.fit(
948
+ f"[bold red]✗[/] {str(e)}",
949
+ title="Error",
950
+ border_style="red"
951
+ ))
952
+ raise
953
+
954
+ def _convert_with_dirs(self, tmpdir: str, outdir: str) -> None:
955
+ """Helper method to perform conversion with given directories."""
956
+ fp16 = str(Path(outdir)/f"{self.model_name}.fp16.gguf")
957
+
958
+ # Download model
959
+ local_dir = Path(tmpdir)/self.model_name
960
+ console.print("[bold green]Downloading model...")
961
+ api = HfApi(token=self.token)
962
+ api.snapshot_download(
963
+ repo_id=self.model_id,
964
+ local_dir=local_dir,
965
+ local_dir_use_symlinks=False
966
+ )
967
+
968
+ # Convert to fp16
969
+ console.print("[bold green]Converting to fp16...")
970
+
971
+ # Find the conversion script
972
+ conversion_scripts = [
973
+ "llama.cpp/convert_hf_to_gguf.py",
974
+ "llama.cpp/convert-hf-to-gguf.py",
975
+ "llama.cpp/convert.py"
976
+ ]
977
+
978
+ conversion_script = None
979
+ for script in conversion_scripts:
980
+ if os.path.isfile(script):
981
+ conversion_script = script
982
+ break
983
+
984
+ if not conversion_script:
985
+ raise ConversionError("Could not find HuggingFace to GGUF conversion script")
986
+
987
+ # Use the appropriate Python executable
988
+ python_cmd = "python" if platform.system() == "Windows" else "python3"
989
+
990
+ convert_cmd = [
991
+ python_cmd, conversion_script,
992
+ str(local_dir),
993
+ "--outtype", "f16",
994
+ "--outfile", fp16
995
+ ]
996
+
997
+ console.print(f"[cyan]Conversion command: {' '.join(convert_cmd)}")
998
+
999
+ try:
1000
+ result = subprocess.run(convert_cmd, capture_output=True, text=True)
1001
+
1002
+ if result.returncode != 0:
1003
+ raise ConversionError(f"Error converting to fp16: {result.stderr}")
1004
+ except FileNotFoundError as e:
1005
+ raise ConversionError(f"Could not execute conversion script: {e}")
1006
+
1007
+ if not os.path.isfile(fp16):
1008
+ raise ConversionError(f"Conversion completed but output file not found: {fp16}")
1009
+
1010
+ console.print("[green]Model converted to fp16 successfully!")
1011
+
1012
+ # If fp16_only is True, we're done after fp16 conversion
1013
+ if self.fp16_only:
1014
+ quantized_files = [f"{self.model_name}.fp16.gguf"]
1015
+ if self.username and self.token:
1016
+ repo_id = f"{self.username}/{self.model_name}-GGUF"
1017
+
1018
+ # Step 1: Create repository
1019
+ self.create_repository(repo_id)
1020
+
1021
+ # Step 2: Upload README first
1022
+ readme_content = self.generate_readme(quantized_files)
1023
+ self.upload_readme(readme_content, repo_id)
1024
+
1025
+ # Step 3: Upload model GGUF file
1026
+ file_name = f"{self.model_name}.fp16.gguf"
1027
+ console.print(f"[bold green]Uploading model file: {file_name}")
1028
+ try:
1029
+ api.upload_file(
1030
+ path_or_fileobj=fp16,
1031
+ path_in_repo=file_name,
1032
+ repo_id=repo_id
1033
+ )
1034
+ console.print(f"[green]✓ Successfully uploaded: {file_name}")
1035
+ except Exception as e:
1036
+ console.print(f"[red]✗ Failed to upload {file_name}: {e}")
1037
+ raise ConversionError(f"Error uploading model file: {e}")
1038
+ return
1039
+
1040
+ # Generate importance matrix if needed
1041
+ imatrix_path: Optional[str] = None
1042
+ if self.use_imatrix:
1043
+ train_data_path = self.train_data_file if self.train_data_file else "llama.cpp/groups_merged.txt"
1044
+ imatrix_path = str(Path(outdir)/"imatrix.dat")
1045
+ self.generate_importance_matrix(fp16, train_data_path, imatrix_path)
1046
+
1047
+ # Quantize model
1048
+ console.print("[bold green]Quantizing model...")
1049
+ quantized_files: List[str] = []
1050
+ quantize_binary = self.get_binary_path("llama-quantize")
1051
+
1052
+ if not os.path.isfile(quantize_binary):
1053
+ raise ConversionError(f"llama-quantize binary not found at: {quantize_binary}")
1054
+
1055
+ for method in self.quantization_methods:
1056
+ quantized_name = f"{self.model_name.lower()}-{method.lower()}"
1057
+ if self.use_imatrix:
1058
+ quantized_name += "-imat"
1059
+ quantized_path = str(Path(outdir)/f"{quantized_name}.gguf")
1060
+
1061
+ console.print(f"[cyan]Quantizing with method: {method}")
1062
+
1063
+ if self.use_imatrix and imatrix_path:
1064
+ quantize_cmd: List[str] = [
1065
+ quantize_binary,
1066
+ "--imatrix", str(imatrix_path),
1067
+ fp16, quantized_path, method
1068
+ ]
1069
+ else:
1070
+ quantize_cmd = [
1071
+ quantize_binary,
1072
+ fp16, quantized_path, method
1073
+ ]
1074
+
1075
+ console.print(f"[cyan]Quantization command: {' '.join(quantize_cmd)}")
1076
+
1077
+ try:
1078
+ result = subprocess.run(quantize_cmd, capture_output=True, text=True)
1079
+ if result.returncode != 0:
1080
+ raise ConversionError(f"Error quantizing with {method}: {result.stderr}")
1081
+ except FileNotFoundError:
1082
+ raise ConversionError(f"Could not execute llama-quantize binary: {quantize_binary}")
1083
+
1084
+ if not os.path.isfile(quantized_path):
1085
+ raise ConversionError(f"Quantization completed but output file not found: {quantized_path}")
1086
+
1087
+ quantized_files.append(f"{quantized_name}.gguf")
1088
+ console.print(f"[green]Successfully quantized with {method}: {quantized_name}.gguf")
1089
+
1090
+ # Upload to Hugging Face if credentials provided
1091
+ if self.username and self.token:
1092
+ repo_id = f"{self.username}/{self.model_name}-GGUF"
1093
+
1094
+ # Step 1: Create repository
1095
+ console.print(f"[bold blue]Step 1: Creating repository {repo_id}")
1096
+ self.create_repository(repo_id)
1097
+
1098
+ # Step 2: Generate and upload README first
1099
+ console.print("[bold blue]Step 2: Uploading README.md")
1100
+ readme_content = self.generate_readme(quantized_files)
1101
+ self.upload_readme(readme_content, repo_id)
1102
+
1103
+ # Step 3: Upload model GGUF files
1104
+ console.print("[bold blue]Step 3: Uploading model files")
1105
+ if self.split_model:
1106
+ split_files = self.split_model(quantized_path, outdir)
1107
+ self.upload_split_files(split_files, outdir, repo_id)
1108
+ else:
1109
+ # Upload single quantized file
1110
+ file_name = f"{self.model_name.lower()}-{self.quantization_methods[0].lower()}.gguf"
1111
+ console.print(f"[bold green]Uploading quantized model: {file_name}")
1112
+ try:
1113
+ api.upload_file(
1114
+ path_or_fileobj=quantized_path,
1115
+ path_in_repo=file_name,
1116
+ repo_id=repo_id
1117
+ )
1118
+ console.print(f"[green]✓ Successfully uploaded: {file_name}")
1119
+ except Exception as e:
1120
+ console.print(f"[red]✗ Failed to upload {file_name}: {e}")
1121
+ raise ConversionError(f"Error uploading quantized model: {e}")
1122
+
1123
+ # Step 4: Upload imatrix if generated (optional)
1124
+ if imatrix_path:
1125
+ console.print("[bold blue]Step 4: Uploading importance matrix")
1126
+ console.print("[bold green]Uploading importance matrix: imatrix.dat")
1127
+ try:
1128
+ api.upload_file(
1129
+ path_or_fileobj=imatrix_path,
1130
+ path_in_repo="imatrix.dat",
1131
+ repo_id=repo_id
1132
+ )
1133
+ console.print("[green]✓ Successfully uploaded: imatrix.dat")
1134
+ except Exception as e:
1135
+ console.print(f"[yellow]Warning: Failed to upload imatrix.dat: {e}")
1136
+
1137
+ # Final success message
1138
+ console.print(f"[bold green]🎉 All files uploaded successfully to {repo_id}!")
1139
+ console.print(f"[cyan]Repository URL: https://huggingface.co/{repo_id}")
1140
+
1141
+ # Initialize CLI with HAI vibes
1142
+ app = CLI(
1143
+ name="gguf",
1144
+ help="Convert HuggingFace models to GGUF format with style! 🔥",
1145
+ version="1.0.0"
1146
+ )
1147
+
1148
+ @app.command(name="convert")
1149
+ @option("-m", "--model-id", help="The HuggingFace model ID (e.g., 'OEvortex/HelpingAI-Lite-1.5T')", required=True)
1150
+ @option("-u", "--username", help="Your HuggingFace username for uploads", default=None)
1151
+ @option("-t", "--token", help="Your HuggingFace API token for uploads", default=None)
1152
+ @option("-q", "--quantization", help="Comma-separated quantization methods", default="q4_k_m")
1153
+ @option("-i", "--use-imatrix", help="Use importance matrix for quantization", is_flag=True)
1154
+ @option("--train-data", help="Training data file for imatrix quantization", default=None)
1155
+ @option("-s", "--split-model", help="Split the model into smaller chunks", is_flag=True)
1156
+ @option("--split-max-tensors", help="Maximum number of tensors per file when splitting", default=256)
1157
+ @option("--split-max-size", help="Maximum file size when splitting (e.g., '256M', '5G')", default=None)
1158
+ def convert_command(
1159
+ model_id: str,
1160
+ username: Optional[str] = None,
1161
+ token: Optional[str] = None,
1162
+ quantization: str = "q4_k_m",
1163
+ use_imatrix: bool = False,
1164
+ train_data: Optional[str] = None,
1165
+ split_model: bool = False,
1166
+ split_max_tensors: int = 256,
1167
+ split_max_size: Optional[str] = None
1168
+ ) -> None:
1169
+ """
1170
+ Convert and quantize HuggingFace models to GGUF format! 🚀
1171
+
1172
+ Args:
1173
+ model_id (str): Your model's HF ID (like 'OEvortex/HelpingAI-Lite-1.5T') 🎯
1174
+ username (str, optional): Your HF username for uploads 👤
1175
+ token (str, optional): Your HF API token 🔑
1176
+ quantization (str): Quantization methods (default: q4_k_m,q5_k_m) 🎮
1177
+ use_imatrix (bool): Use importance matrix for quantization 🔍
1178
+ train_data (str, optional): Training data file for imatrix quantization 📚
1179
+ split_model (bool): Split the model into smaller chunks 🔪
1180
+ split_max_tensors (int): Max tensors per file when splitting (default: 256) 📊
1181
+ split_max_size (str, optional): Max file size when splitting (e.g., '256M', '5G') 📏
1182
+
1183
+ Example:
1184
+ >>> python -m webscout.Extra.gguf convert \\
1185
+ ... -m "OEvortex/HelpingAI-Lite-1.5T" \\
1186
+ ... -q "q4_k_m,q5_k_m"
1187
+ """
1188
+ try:
1189
+ converter = ModelConverter(
1190
+ model_id=model_id,
1191
+ username=username,
1192
+ token=token,
1193
+ quantization_methods=quantization,
1194
+ use_imatrix=use_imatrix,
1195
+ train_data_file=train_data,
1196
+ split_model=split_model,
1197
+ split_max_tensors=split_max_tensors,
1198
+ split_max_size=split_max_size
1199
+ )
1200
+ converter.convert()
1201
+ except (ConversionError, ValueError) as e:
1202
+ console.print(f"[red]Error: {str(e)}")
1203
+ sys.exit(1)
1204
+ except Exception as e:
1205
+ console.print(f"[red]Unexpected error: {str(e)}")
1206
+ sys.exit(1)
1207
+
1208
+ def main() -> None:
1209
+ """Fire up the GGUF converter! 🚀"""
1210
+ app.run()
1211
+
1212
+ if __name__ == "__main__":
1213
+ main()