webscout 8.2.4__py3-none-any.whl → 8.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

Files changed (110) hide show
  1. webscout/AIauto.py +112 -22
  2. webscout/AIutel.py +240 -344
  3. webscout/Extra/autocoder/autocoder.py +66 -5
  4. webscout/Extra/gguf.py +2 -0
  5. webscout/Provider/AISEARCH/scira_search.py +3 -5
  6. webscout/Provider/Aitopia.py +75 -51
  7. webscout/Provider/AllenAI.py +64 -67
  8. webscout/Provider/ChatGPTClone.py +33 -34
  9. webscout/Provider/ChatSandbox.py +342 -0
  10. webscout/Provider/Cloudflare.py +79 -32
  11. webscout/Provider/Deepinfra.py +69 -56
  12. webscout/Provider/ElectronHub.py +48 -39
  13. webscout/Provider/ExaChat.py +36 -20
  14. webscout/Provider/GPTWeb.py +24 -18
  15. webscout/Provider/GithubChat.py +52 -49
  16. webscout/Provider/GizAI.py +285 -0
  17. webscout/Provider/Glider.py +39 -28
  18. webscout/Provider/Groq.py +48 -20
  19. webscout/Provider/HeckAI.py +18 -36
  20. webscout/Provider/Jadve.py +30 -37
  21. webscout/Provider/LambdaChat.py +36 -59
  22. webscout/Provider/MCPCore.py +18 -21
  23. webscout/Provider/Marcus.py +23 -14
  24. webscout/Provider/Nemotron.py +218 -0
  25. webscout/Provider/Netwrck.py +35 -26
  26. webscout/Provider/OPENAI/__init__.py +1 -1
  27. webscout/Provider/OPENAI/exachat.py +4 -0
  28. webscout/Provider/OPENAI/scirachat.py +3 -4
  29. webscout/Provider/OPENAI/textpollinations.py +20 -22
  30. webscout/Provider/OPENAI/toolbaz.py +1 -0
  31. webscout/Provider/PI.py +22 -13
  32. webscout/Provider/StandardInput.py +42 -30
  33. webscout/Provider/TeachAnything.py +24 -12
  34. webscout/Provider/TextPollinationsAI.py +78 -76
  35. webscout/Provider/TwoAI.py +120 -88
  36. webscout/Provider/TypliAI.py +305 -0
  37. webscout/Provider/Venice.py +24 -22
  38. webscout/Provider/VercelAI.py +31 -12
  39. webscout/Provider/WiseCat.py +1 -1
  40. webscout/Provider/WrDoChat.py +370 -0
  41. webscout/Provider/__init__.py +11 -13
  42. webscout/Provider/ai4chat.py +5 -3
  43. webscout/Provider/akashgpt.py +59 -66
  44. webscout/Provider/asksteve.py +53 -44
  45. webscout/Provider/cerebras.py +77 -31
  46. webscout/Provider/chatglm.py +47 -37
  47. webscout/Provider/elmo.py +38 -32
  48. webscout/Provider/freeaichat.py +57 -43
  49. webscout/Provider/granite.py +24 -21
  50. webscout/Provider/hermes.py +27 -20
  51. webscout/Provider/learnfastai.py +25 -20
  52. webscout/Provider/llmchatco.py +48 -78
  53. webscout/Provider/multichat.py +13 -3
  54. webscout/Provider/scira_chat.py +50 -30
  55. webscout/Provider/scnet.py +27 -21
  56. webscout/Provider/searchchat.py +16 -24
  57. webscout/Provider/sonus.py +37 -39
  58. webscout/Provider/toolbaz.py +24 -46
  59. webscout/Provider/turboseek.py +37 -41
  60. webscout/Provider/typefully.py +30 -22
  61. webscout/Provider/typegpt.py +47 -51
  62. webscout/Provider/uncovr.py +46 -40
  63. webscout/__init__.py +0 -1
  64. webscout/cli.py +256 -0
  65. webscout/conversation.py +305 -448
  66. webscout/exceptions.py +3 -0
  67. webscout/swiftcli/__init__.py +80 -794
  68. webscout/swiftcli/core/__init__.py +7 -0
  69. webscout/swiftcli/core/cli.py +297 -0
  70. webscout/swiftcli/core/context.py +104 -0
  71. webscout/swiftcli/core/group.py +241 -0
  72. webscout/swiftcli/decorators/__init__.py +28 -0
  73. webscout/swiftcli/decorators/command.py +221 -0
  74. webscout/swiftcli/decorators/options.py +220 -0
  75. webscout/swiftcli/decorators/output.py +252 -0
  76. webscout/swiftcli/exceptions.py +21 -0
  77. webscout/swiftcli/plugins/__init__.py +9 -0
  78. webscout/swiftcli/plugins/base.py +135 -0
  79. webscout/swiftcli/plugins/manager.py +262 -0
  80. webscout/swiftcli/utils/__init__.py +59 -0
  81. webscout/swiftcli/utils/formatting.py +252 -0
  82. webscout/swiftcli/utils/parsing.py +267 -0
  83. webscout/version.py +1 -1
  84. {webscout-8.2.4.dist-info → webscout-8.2.6.dist-info}/METADATA +166 -45
  85. {webscout-8.2.4.dist-info → webscout-8.2.6.dist-info}/RECORD +89 -89
  86. {webscout-8.2.4.dist-info → webscout-8.2.6.dist-info}/WHEEL +1 -1
  87. webscout-8.2.6.dist-info/entry_points.txt +3 -0
  88. {webscout-8.2.4.dist-info → webscout-8.2.6.dist-info}/top_level.txt +0 -1
  89. inferno/__init__.py +0 -6
  90. inferno/__main__.py +0 -9
  91. inferno/cli.py +0 -6
  92. inferno/lol.py +0 -589
  93. webscout/LLM.py +0 -442
  94. webscout/Local/__init__.py +0 -12
  95. webscout/Local/__main__.py +0 -9
  96. webscout/Local/api.py +0 -576
  97. webscout/Local/cli.py +0 -516
  98. webscout/Local/config.py +0 -75
  99. webscout/Local/llm.py +0 -287
  100. webscout/Local/model_manager.py +0 -253
  101. webscout/Local/server.py +0 -721
  102. webscout/Local/utils.py +0 -93
  103. webscout/Provider/Chatify.py +0 -175
  104. webscout/Provider/PizzaGPT.py +0 -228
  105. webscout/Provider/askmyai.py +0 -158
  106. webscout/Provider/gaurish.py +0 -244
  107. webscout/Provider/promptrefine.py +0 -193
  108. webscout/Provider/tutorai.py +0 -270
  109. webscout-8.2.4.dist-info/entry_points.txt +0 -5
  110. {webscout-8.2.4.dist-info → webscout-8.2.6.dist-info}/licenses/LICENSE.md +0 -0
inferno/lol.py DELETED
@@ -1,589 +0,0 @@
1
- import os
2
- import requests
3
- import math
4
- from pathlib import Path
5
- from typing import Dict, Optional, Union, List, Tuple
6
-
7
- def estimate_gguf_ram_requirements(model_path: str, verbose: bool = True) -> Dict[str, float]:
8
- """
9
- Estimate RAM requirements to run a GGUF model.
10
-
11
- Args:
12
- model_path: Path to the GGUF model file or URL
13
- verbose: Whether to print detailed information
14
-
15
- Returns:
16
- Dictionary with RAM requirements for different quantization levels
17
- """
18
- # Get model size in bytes
19
- file_size_bytes = get_model_size(model_path)
20
- if file_size_bytes is None:
21
- print(f"Couldn't determine the size of the model at {model_path}")
22
- return {}
23
-
24
- file_size_gb = file_size_bytes / (1024**3) # Convert to GB
25
-
26
- if verbose:
27
- print(f"Model size: {file_size_gb:.2f} GB")
28
-
29
- # Estimate RAM requirements based on model size and quantization
30
- # These multipliers are based on empirical observations
31
- ram_requirements = {}
32
-
33
- # Comprehensive list of all GGUF quantization levels and their typical RAM multipliers
34
- # From lowest precision (Q2) to highest (F16/FP16)
35
- quantization_multipliers = {
36
- # 2-bit quantization
37
- "Q2_K": 1.15, # Q2_K (2-bit quantization with K-quants)
38
- "Q2_K_S": 1.18, # Q2_K_S (2-bit quantization with K-quants, small)
39
-
40
- # 3-bit quantization
41
- "Q3_K_S": 1.25, # Q3_K_S (3-bit quantization with K-quants, small)
42
- "Q3_K_M": 1.28, # Q3_K_M (3-bit quantization with K-quants, medium)
43
- "Q3_K_L": 1.30, # Q3_K_L (3-bit quantization with K-quants, large)
44
-
45
- # 4-bit quantization
46
- "Q4_0": 1.33, # Q4_0 (4-bit quantization, version 0)
47
- "Q4_1": 1.35, # Q4_1 (4-bit quantization, version 1)
48
- "Q4_K_S": 1.38, # Q4_K_S (4-bit quantization with K-quants, small)
49
- "Q4_K_M": 1.40, # Q4_K_M (4-bit quantization with K-quants, medium)
50
- "Q4_K_L": 1.43, # Q4_K_L (4-bit quantization with K-quants, large)
51
-
52
- # 5-bit quantization
53
- "Q5_0": 1.50, # Q5_0 (5-bit quantization, version 0)
54
- "Q5_1": 1.55, # Q5_1 (5-bit quantization, version 1)
55
- "Q5_K_S": 1.60, # Q5_K_S (5-bit quantization with K-quants, small)
56
- "Q5_K_M": 1.65, # Q5_K_M (5-bit quantization with K-quants, medium)
57
- "Q5_K_L": 1.70, # Q5_K_L (5-bit quantization with K-quants, large)
58
-
59
- # 6-bit quantization
60
- "Q6_K": 1.80, # Q6_K (6-bit quantization with K-quants)
61
-
62
- # 8-bit quantization
63
- "Q8_0": 2.00, # Q8_0 (8-bit quantization, version 0)
64
- "Q8_K": 2.10, # Q8_K (8-bit quantization with K-quants)
65
-
66
- # Floating point formats
67
- "F16": 2.80, # F16 (16-bit float, same as FP16)
68
- "FP16": 2.80, # FP16 (16-bit float)
69
- }
70
-
71
- # Calculate RAM requirements for each quantization level
72
- for quant_name, multiplier in quantization_multipliers.items():
73
- ram_requirements[quant_name] = file_size_gb * multiplier
74
-
75
- # For context generation, add additional overhead based on context length
76
- context_lengths = [2048, 4096, 8192, 16384, 32768, 65536, 131072]
77
- context_ram = {}
78
-
79
- # Formula for estimating KV cache size based on model size and context length
80
- # This formula is approximate and based on empirical observations
81
- model_params_billions = estimate_params_from_file_size(file_size_bytes, quant="Q4_K_M")
82
-
83
- for ctx_len in context_lengths:
84
- # KV cache formula: 2 (K&V) * num_layers * hidden_dim * context_length * bytes_per_token
85
- # We estimate based on model parameters
86
- estimated_layers = min(max(int(model_params_billions * 0.8), 24), 80) # Estimate number of layers
87
- estimated_hidden_dim = min(max(int(model_params_billions * 30), 1024), 8192) # Estimate hidden dimension
88
- bytes_per_token = 2 # 2 bytes for half-precision (FP16) KV cache
89
-
90
- kv_cache_size_gb = (2 * estimated_layers * estimated_hidden_dim * ctx_len * bytes_per_token) / (1024**3)
91
- context_ram[f"Context {ctx_len}"] = kv_cache_size_gb
92
-
93
- ram_requirements["context_overhead"] = context_ram
94
-
95
- if verbose:
96
- print("\nEstimated RAM requirements for running the model:")
97
- for quant, ram in sorted(
98
- [(q, r) for q, r in ram_requirements.items() if q != "context_overhead"],
99
- key=lambda x: x[1] # Sort by RAM requirement
100
- ):
101
- print(f"- {quant}: {ram:.2f} GB base RAM")
102
-
103
- print("\nEstimated model parameters: ~{:.1f}B".format(model_params_billions))
104
- print("\nAdditional RAM for KV cache at different context lengths:")
105
- for ctx, ram in context_ram.items():
106
- print(f"- {ctx}: +{ram:.2f} GB")
107
-
108
- print("\nTotal RAM examples (sorted by increasing RAM usage):")
109
- # Show examples for a few representative quantization levels
110
- example_quants = ["Q2_K", "Q4_K_M", "Q8_0", "F16"]
111
- for ctx, kv_ram in list(context_ram.items())[:4]: # Show first 4 context lengths only
112
- ctx_length = int(ctx.split(" ")[1])
113
- print(f"\nWith {ctx_length} context length:")
114
- for quant in example_quants:
115
- total = ram_requirements[quant] + kv_ram
116
- print(f"- {quant}: {total:.2f} GB")
117
-
118
- return ram_requirements
119
-
120
- def estimate_params_from_file_size(file_size_bytes: int, quant: str = "Q4_K_M") -> float:
121
- """
122
- Estimate the number of parameters (in billions) from model file size.
123
-
124
- Args:
125
- file_size_bytes: Size of the model file in bytes
126
- quant: Quantization type
127
-
128
- Returns:
129
- Estimated number of parameters in billions
130
- """
131
- # Bits per parameter for different quantization types
132
- bits_per_param = {
133
- "Q2_K": 2.5, # ~2-2.5 bits per param
134
- "Q3_K_M": 3.5, # ~3-3.5 bits per param
135
- "Q4_K_M": 4.5, # ~4-4.5 bits per param
136
- "Q5_K_M": 5.5, # ~5-5.5 bits per param
137
- "Q6_K": 6.5, # ~6-6.5 bits per param
138
- "Q8_0": 8.5, # ~8-8.5 bits per param
139
- "F16": 16.0, # 16 bits per param
140
- }
141
-
142
- # Default to Q4_K_M if the specified quant is not in the dictionary
143
- bits = bits_per_param.get(quant, 4.5)
144
-
145
- # Convert bits to bytes for calculation
146
- bytes_per_param = bits / 8
147
-
148
- # Calculate number of parameters
149
- params = file_size_bytes / bytes_per_param
150
-
151
- # Convert to billions
152
- params_billions = params / 1e9
153
-
154
- return params_billions
155
-
156
- def get_model_size(model_path: str) -> Optional[int]:
157
- """
158
- Get the size of a model file in bytes.
159
- Works for both local files and remote URLs.
160
-
161
- Args:
162
- model_path: Path to the model file or URL
163
-
164
- Returns:
165
- Size in bytes or None if size can't be determined
166
- """
167
- if os.path.exists(model_path):
168
- # Local file
169
- return os.path.getsize(model_path)
170
-
171
- elif model_path.startswith(('http://', 'https://')):
172
- # Remote file - try to get size from HTTP headers
173
- try:
174
- response = requests.head(model_path, allow_redirects=True)
175
- if response.status_code == 200 and 'content-length' in response.headers:
176
- return int(response.headers['content-length'])
177
- else:
178
- print(f"Couldn't get Content-Length header for {model_path}")
179
- return None
180
- except Exception as e:
181
- print(f"Error getting file size from URL: {e}")
182
- return None
183
- else:
184
- print(f"Path {model_path} is neither a valid file nor URL")
185
- return None
186
-
187
- def suggest_hardware(ram_required: float) -> str:
188
- """
189
- Suggest hardware based on RAM requirements.
190
-
191
- Args:
192
- ram_required: RAM required in GB
193
-
194
- Returns:
195
- Hardware recommendation
196
- """
197
- if ram_required <= 4:
198
- return "Entry-level desktop/laptop with 8GB RAM should work"
199
- elif ram_required <= 8:
200
- return "Standard desktop/laptop with 16GB RAM recommended"
201
- elif ram_required <= 16:
202
- return "High-end desktop/laptop with 32GB RAM recommended"
203
- elif ram_required <= 32:
204
- return "Workstation with 64GB RAM recommended"
205
- elif ram_required <= 64:
206
- return "High-end workstation with 128GB RAM recommended"
207
- else:
208
- return f"Server-grade hardware with at least {math.ceil(ram_required*1.5)}GB RAM recommended"
209
-
210
- def detect_gpu_vram():
211
- """
212
- Detect available GPU VRAM if possible.
213
- Requires optional dependencies (nvidia-ml-py or pynvml).
214
-
215
- Returns:
216
- Dict mapping GPU index to available VRAM in GB, or empty dict if detection fails
217
- """
218
- try:
219
- import pynvml # type: ignore[import]
220
- pynvml.nvmlInit()
221
-
222
- vram_info = {}
223
- device_count = pynvml.nvmlDeviceGetCount()
224
-
225
- for i in range(device_count):
226
- handle = pynvml.nvmlDeviceGetHandleByIndex(i)
227
- info = pynvml.nvmlDeviceGetMemoryInfo(handle)
228
- vram_total_gb = info.total / (1024**3)
229
- vram_free_gb = info.free / (1024**3)
230
- vram_info[i] = {
231
- "total": vram_total_gb,
232
- "free": vram_free_gb,
233
- "name": pynvml.nvmlDeviceGetName(handle).decode('utf-8')
234
- }
235
-
236
- pynvml.nvmlShutdown()
237
- return vram_info
238
-
239
- except ImportError:
240
- print("GPU VRAM detection requires pynvml. Install with: pip install nvidia-ml-py")
241
- return {}
242
- except Exception as e:
243
- print(f"Error detecting GPUs: {e}")
244
- return {}
245
-
246
-
247
- def detect_quantization_from_filename(filename: str) -> Optional[str]:
248
- """
249
- Try to detect the quantization type from the filename.
250
-
251
- Args:
252
- filename: Name of the model file
253
-
254
- Returns:
255
- Detected quantization type or None if not detected
256
- """
257
- filename = filename.lower()
258
-
259
- # Common quantization naming patterns
260
- patterns = [
261
- ('q2k', 'Q2_K'),
262
- ('q2_k', 'Q2_K'),
263
- ('q3k', 'Q3_K_M'),
264
- ('q3_k', 'Q3_K_M'),
265
- ('q3_k_m', 'Q3_K_M'),
266
- ('q3_k_s', 'Q3_K_S'),
267
- ('q3_k_l', 'Q3_K_L'),
268
- ('q4_0', 'Q4_0'),
269
- ('q4_1', 'Q4_1'),
270
- ('q4k', 'Q4_K_M'),
271
- ('q4_k', 'Q4_K_M'),
272
- ('q4_k_m', 'Q4_K_M'),
273
- ('q4_k_s', 'Q4_K_S'),
274
- ('q4_k_l', 'Q4_K_L'),
275
- ('q5_0', 'Q5_0'),
276
- ('q5_1', 'Q5_1'),
277
- ('q5k', 'Q5_K_M'),
278
- ('q5_k', 'Q5_K_M'),
279
- ('q5_k_m', 'Q5_K_M'),
280
- ('q5_k_s', 'Q5_K_S'),
281
- ('q5_k_l', 'Q5_K_L'),
282
- ('q6k', 'Q6_K'),
283
- ('q6_k', 'Q6_K'),
284
- ('q8_0', 'Q8_0'),
285
- ('q8k', 'Q8_K'),
286
- ('q8_k', 'Q8_K'),
287
- ('f16', 'F16'),
288
- ('fp16', 'FP16')
289
- ]
290
-
291
- for pattern, quant_type in patterns:
292
- if pattern in filename:
293
- return quant_type
294
-
295
- return None
296
-
297
- def estimate_from_huggingface_repo(repo_id: str, branch: str = "main") -> Dict[str, float]:
298
- """
299
- Estimate RAM requirements for a model from a Hugging Face repository.
300
-
301
- Args:
302
- repo_id: Hugging Face repository ID (e.g., 'TheBloke/Llama-2-7B-GGUF')
303
- branch: Repository branch
304
-
305
- Returns:
306
- Dictionary with RAM requirements
307
- """
308
- api_url = f"https://huggingface.co/api/models/{repo_id}/tree/{branch}"
309
- try:
310
- response = requests.get(api_url)
311
- if response.status_code != 200:
312
- print(f"Error accessing repository: HTTP {response.status_code}")
313
- return {}
314
-
315
- files = response.json()
316
- gguf_files = [f for f in files if f.get('path', '').endswith('.gguf')]
317
-
318
- if not gguf_files:
319
- print(f"No GGUF files found in repository {repo_id}")
320
- return {}
321
-
322
- print(f"Found {len(gguf_files)} GGUF files in repository")
323
-
324
- # Group files by quantization type
325
- quant_groups = {}
326
-
327
- for file in gguf_files:
328
- file_path = file.get('path', '')
329
- filename = os.path.basename(file_path)
330
- size_bytes = file.get('size', 0)
331
- size_gb = size_bytes / (1024**3)
332
-
333
- quant_type = detect_quantization_from_filename(filename)
334
- if quant_type:
335
- if quant_type not in quant_groups:
336
- quant_groups[quant_type] = []
337
- quant_groups[quant_type].append((filename, size_gb, size_bytes))
338
-
339
- print("\nAvailable quantizations in repository:")
340
- for quant, files in quant_groups.items():
341
- for filename, size_gb, _ in files:
342
- print(f"- {quant}: {filename} ({size_gb:.2f} GB)")
343
-
344
- # Find a representative file for RAM estimation
345
- # Prefer Q4_K_M as it's common, or pick the largest file
346
- if not quant_groups:
347
- # If quantization detection failed, just use the largest file
348
- largest_file = max(gguf_files, key=lambda x: x.get('size', 0))
349
- size_bytes = largest_file.get('size', 0)
350
- file_path = largest_file.get('path', '')
351
- print(f"\nUsing largest GGUF model for estimation: {file_path} ({size_bytes / (1024**3):.2f} GB)")
352
- return estimate_gguf_ram_requirements(file_path, verbose=False)
353
-
354
- # Choose a representative model
355
- chosen_quant = None
356
- chosen_file = None
357
-
358
- # Preference order
359
- preferred_quants = ["Q4_K_M", "Q5_K_M", "Q6_K", "Q8_0", "F16"]
360
-
361
- for quant in preferred_quants:
362
- if quant in quant_groups:
363
- chosen_quant = quant
364
- # Choose the latest version if multiple files with same quant
365
- chosen_file = max(quant_groups[quant], key=lambda x: x[1]) # Sort by size
366
- break
367
-
368
- if not chosen_quant:
369
- # Just choose the first available quantization
370
- chosen_quant = list(quant_groups.keys())[0]
371
- chosen_file = quant_groups[chosen_quant][0]
372
-
373
- filename, size_gb, size_bytes = chosen_file
374
- print(f"\nUsing {chosen_quant} model for estimation: {filename} ({size_gb:.2f} GB)")
375
-
376
- # Create RAM estimation using the file size
377
- ram_requirements = {}
378
-
379
- # Use the same estimation logic as the main function
380
- # Comprehensive list of all GGUF quantization levels
381
- quantization_multipliers = {
382
- # 2-bit quantization
383
- "Q2_K": 1.15,
384
- "Q2_K_S": 1.18,
385
-
386
- # 3-bit quantization
387
- "Q3_K_S": 1.25,
388
- "Q3_K_M": 1.28,
389
- "Q3_K_L": 1.30,
390
-
391
- # 4-bit quantization
392
- "Q4_0": 1.33,
393
- "Q4_1": 1.35,
394
- "Q4_K_S": 1.38,
395
- "Q4_K_M": 1.40,
396
- "Q4_K_L": 1.43,
397
-
398
- # 5-bit quantization
399
- "Q5_0": 1.50,
400
- "Q5_1": 1.55,
401
- "Q5_K_S": 1.60,
402
- "Q5_K_M": 1.65,
403
- "Q5_K_L": 1.70,
404
-
405
- # 6-bit quantization
406
- "Q6_K": 1.80,
407
-
408
- # 8-bit quantization
409
- "Q8_0": 2.00,
410
- "Q8_K": 2.10,
411
-
412
- # Floating point formats
413
- "F16": 2.80,
414
- "FP16": 2.80,
415
- }
416
-
417
- # Estimate base size from the chosen quantization
418
- base_size_gb = size_bytes / (1024**3)
419
- model_params_billions = estimate_params_from_file_size(size_bytes, chosen_quant)
420
-
421
- # Calculate RAM estimates for all quantizations by scaling from the chosen one
422
- chosen_multiplier = quantization_multipliers[chosen_quant]
423
- base_model_size = base_size_gb / chosen_multiplier # Theoretical unquantized size
424
-
425
- for quant_name, multiplier in quantization_multipliers.items():
426
- ram_requirements[quant_name] = base_model_size * multiplier
427
-
428
- # For context generation, add additional overhead
429
- context_lengths = [2048, 4096, 8192, 16384, 32768, 65536]
430
- context_ram = {}
431
-
432
- # KV cache formula
433
- estimated_layers = min(max(int(model_params_billions * 0.8), 24), 80)
434
- estimated_hidden_dim = min(max(int(model_params_billions * 30), 1024), 8192)
435
- bytes_per_token = 2 # 2 bytes for half-precision KV cache
436
-
437
- for ctx_len in context_lengths:
438
- kv_cache_size_gb = (2 * estimated_layers * estimated_hidden_dim * ctx_len * bytes_per_token) / (1024**3)
439
- context_ram[f"Context {ctx_len}"] = kv_cache_size_gb
440
-
441
- ram_requirements["context_overhead"] = context_ram
442
- ram_requirements["model_params_billions"] = model_params_billions
443
-
444
- return ram_requirements
445
-
446
- except Exception as e:
447
- print(f"Error accessing Hugging Face repository: {e}")
448
- return {}
449
-
450
- def print_gpu_compatibility(ram_requirements: Dict[str, float], vram_info: Dict):
451
- """
452
- Print GPU compatibility information based on RAM requirements.
453
-
454
- Args:
455
- ram_requirements: Dictionary with RAM requirements
456
- vram_info: Dictionary with GPU VRAM information
457
- """
458
- if not vram_info:
459
- print("\nNo GPU information available")
460
- return
461
-
462
- print("\n=== GPU Compatibility Analysis ===")
463
-
464
- # Context lengths to analyze
465
- context_lengths = [2048, 4096, 8192, 16384]
466
-
467
- # Quantization levels to check (arranged from most efficient to highest quality)
468
- quant_levels = ["Q2_K", "Q3_K_M", "Q4_K_M", "Q5_K_M", "Q6_K", "Q8_0", "F16"]
469
-
470
- # Get context RAM overhead
471
- context_ram = ram_requirements.get("context_overhead", {})
472
-
473
- # For each GPU
474
- for gpu_idx, gpu_data in vram_info.items():
475
- gpu_name = gpu_data.get("name", f"GPU {gpu_idx}")
476
- vram_total = gpu_data.get("total", 0)
477
- vram_free = gpu_data.get("free", 0)
478
-
479
- print(f"\n{gpu_name}: {vram_total:.2f} GB total VRAM, {vram_free:.2f} GB free")
480
-
481
- # Check compatibility for each combination
482
- print("\nCompatibility matrix (✓: fits, ✗: doesn't fit):")
483
-
484
- # Print header row with context lengths
485
- header = "Quantization | "
486
- for ctx_len in context_lengths:
487
- header += f"{ctx_len:6d} | "
488
- print(header)
489
- print("-" * len(header))
490
-
491
- # Print compatibility for each quantization level
492
- for quant in quant_levels:
493
- if quant not in ram_requirements:
494
- continue
495
-
496
- base_ram = ram_requirements[quant]
497
- row = f"{quant:11s} | "
498
-
499
- for ctx_len in context_lengths:
500
- ctx_key = f"Context {ctx_len}"
501
- if ctx_key in context_ram:
502
- ctx_overhead = context_ram[ctx_key]
503
- total_ram = base_ram + ctx_overhead
504
-
505
- # Check if it fits in VRAM
506
- fits = total_ram <= vram_free
507
- row += f"{'✓':6s} | " if fits else f"{'✗':6s} | "
508
- else:
509
- row += f"{'?':6s} | "
510
-
511
- print(row)
512
-
513
- print("\nRecommendations:")
514
-
515
- # Find best quantization/context combination that fits
516
- best_quant = None
517
- best_ctx = None
518
-
519
- # Start with highest quality and largest context
520
- for quant in reversed(quant_levels):
521
- if quant not in ram_requirements:
522
- continue
523
-
524
- base_ram = ram_requirements[quant]
525
-
526
- for ctx_len in reversed(context_lengths):
527
- ctx_key = f"Context {ctx_len}"
528
- if ctx_key in context_ram:
529
- ctx_overhead = context_ram[ctx_key]
530
- total_ram = base_ram + ctx_overhead
531
-
532
- # Check if any GPU can run this configuration
533
- for _, gpu_data in vram_info.items():
534
- vram_free = gpu_data.get("free", 0)
535
- if total_ram <= vram_free:
536
- best_quant = quant
537
- best_ctx = ctx_len
538
- break
539
-
540
- if best_quant:
541
- break
542
-
543
- if best_quant:
544
- break
545
-
546
- if best_quant:
547
- print(f"- Recommended configuration: {best_quant} with context length {best_ctx}")
548
- else:
549
- print("- Your GPU(s) may not have enough VRAM to run this model efficiently.")
550
- print("- Consider using a smaller model or a lower quantization level.")
551
-
552
- # Example usage
553
- if __name__ == "__main__":
554
- # Example 1: Estimate from local file
555
- print("==== Local GGUF Model Example ====")
556
- model_path = "path/to/your/model.gguf" # Change this to your model path
557
-
558
- if os.path.exists(model_path):
559
- ram_reqs = estimate_gguf_ram_requirements(model_path)
560
- if "Q4_K_M" in ram_reqs:
561
- q4_ram = ram_reqs["Q4_K_M"]
562
- print(f"\nHardware suggestion: {suggest_hardware(q4_ram)}")
563
- else:
564
- print(f"Model file {model_path} not found. Skipping local example.")
565
-
566
- # Example 2: Estimate from Hugging Face repository
567
- print("\n==== Hugging Face Model Example ====")
568
- repo_id = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF" # Example repository
569
- ram_reqs = estimate_from_huggingface_repo(repo_id)
570
-
571
- if ram_reqs:
572
- print("\nEstimated RAM requirements:")
573
- for quant in ["Q2_K", "Q4_K_M", "Q8_0", "F16"]:
574
- if quant in ram_reqs:
575
- ram = ram_reqs[quant]
576
- print(f"- {quant}: {ram:.2f} GB")
577
-
578
- print(f"\nEstimated parameters: ~{ram_reqs.get('model_params_billions', 0):.1f}B")
579
-
580
- if "Q4_K_M" in ram_reqs:
581
- q4_ram = ram_reqs["Q4_K_M"]
582
- print(f"\nHardware suggestion: {suggest_hardware(q4_ram)}")
583
-
584
- # Example 3: Check GPU VRAM and compatibility
585
- print("\n==== GPU VRAM Detection ====")
586
- vram_info = detect_gpu_vram()
587
-
588
- if ram_reqs and vram_info:
589
- print_gpu_compatibility(ram_reqs, vram_info)