thonny-codemate 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thonny_codemate-0.1.0.dist-info/METADATA +307 -0
- thonny_codemate-0.1.0.dist-info/RECORD +27 -0
- thonny_codemate-0.1.0.dist-info/WHEEL +5 -0
- thonny_codemate-0.1.0.dist-info/licenses/LICENSE +21 -0
- thonny_codemate-0.1.0.dist-info/top_level.txt +1 -0
- thonnycontrib/__init__.py +1 -0
- thonnycontrib/thonny_codemate/__init__.py +397 -0
- thonnycontrib/thonny_codemate/api.py +154 -0
- thonnycontrib/thonny_codemate/context_manager.py +296 -0
- thonnycontrib/thonny_codemate/external_providers.py +714 -0
- thonnycontrib/thonny_codemate/i18n.py +506 -0
- thonnycontrib/thonny_codemate/llm_client.py +841 -0
- thonnycontrib/thonny_codemate/message_virtualization.py +136 -0
- thonnycontrib/thonny_codemate/model_manager.py +515 -0
- thonnycontrib/thonny_codemate/performance_monitor.py +141 -0
- thonnycontrib/thonny_codemate/prompts.py +102 -0
- thonnycontrib/thonny_codemate/ui/__init__.py +1 -0
- thonnycontrib/thonny_codemate/ui/chat_view.py +687 -0
- thonnycontrib/thonny_codemate/ui/chat_view_html.py +1299 -0
- thonnycontrib/thonny_codemate/ui/custom_prompt_dialog.py +175 -0
- thonnycontrib/thonny_codemate/ui/markdown_renderer.py +484 -0
- thonnycontrib/thonny_codemate/ui/model_download_dialog.py +355 -0
- thonnycontrib/thonny_codemate/ui/settings_dialog.py +1218 -0
- thonnycontrib/thonny_codemate/utils/__init__.py +25 -0
- thonnycontrib/thonny_codemate/utils/constants.py +138 -0
- thonnycontrib/thonny_codemate/utils/error_messages.py +92 -0
- thonnycontrib/thonny_codemate/utils/unified_error_handler.py +310 -0
@@ -0,0 +1,841 @@
|
|
1
|
+
"""
|
2
|
+
LLMクライアント
|
3
|
+
llama-cpp-pythonを使用してローカルLLMモデルとのインターフェースを提供
|
4
|
+
"""
|
5
|
+
import os
|
6
|
+
import logging
|
7
|
+
import threading
|
8
|
+
import queue
|
9
|
+
import platform
|
10
|
+
import traceback
|
11
|
+
from pathlib import Path
|
12
|
+
from typing import Optional, Iterator, Dict, Any, List
|
13
|
+
from dataclasses import dataclass
|
14
|
+
|
15
|
+
# 安全なロガーを使用
|
16
|
+
try:
|
17
|
+
from . import get_safe_logger
|
18
|
+
logger = get_safe_logger(__name__)
|
19
|
+
except ImportError:
|
20
|
+
logger = logging.getLogger(__name__)
|
21
|
+
logger.addHandler(logging.NullHandler())
|
22
|
+
|
23
|
+
|
24
|
+
def detect_gpu_availability() -> int:
|
25
|
+
"""
|
26
|
+
GPUの利用可能性を検出し、推奨されるGPUレイヤー数を返す
|
27
|
+
|
28
|
+
Returns:
|
29
|
+
int: GPU使用レイヤー数(0=CPU only, -1=全レイヤーをGPUに配置)
|
30
|
+
"""
|
31
|
+
try:
|
32
|
+
# CUDA環境変数をチェック
|
33
|
+
cuda_visible_devices = os.environ.get('CUDA_VISIBLE_DEVICES', '')
|
34
|
+
if cuda_visible_devices == '-1':
|
35
|
+
logger.info("CUDA_VISIBLE_DEVICES=-1: GPU disabled by environment")
|
36
|
+
return 0
|
37
|
+
|
38
|
+
# プラットフォームチェック
|
39
|
+
system = platform.system()
|
40
|
+
|
41
|
+
if system == "Windows" or system == "Linux":
|
42
|
+
# NVIDIAドライバーの存在をチェック
|
43
|
+
try:
|
44
|
+
import subprocess
|
45
|
+
result = subprocess.run(['nvidia-smi'], capture_output=True, text=True)
|
46
|
+
if result.returncode == 0:
|
47
|
+
logger.info("NVIDIA GPU detected via nvidia-smi")
|
48
|
+
return -1 # 全レイヤーをGPUに配置
|
49
|
+
except (FileNotFoundError, subprocess.SubprocessError):
|
50
|
+
pass
|
51
|
+
|
52
|
+
elif system == "Darwin": # macOS
|
53
|
+
# Apple Siliconの場合、Metalを使用可能
|
54
|
+
try:
|
55
|
+
import subprocess
|
56
|
+
result = subprocess.run(['system_profiler', 'SPDisplaysDataType'],
|
57
|
+
capture_output=True, text=True)
|
58
|
+
if 'Metal' in result.stdout or 'M1' in result.stdout or 'M2' in result.stdout or 'M3' in result.stdout:
|
59
|
+
logger.info("Apple Silicon GPU detected")
|
60
|
+
return -1 # 全レイヤーをGPUに配置
|
61
|
+
except:
|
62
|
+
pass
|
63
|
+
|
64
|
+
# llama-cpp-pythonのビルド情報をチェック
|
65
|
+
try:
|
66
|
+
from llama_cpp import llama_cpp
|
67
|
+
if hasattr(llama_cpp, 'GGML_USE_CUBLAS') and llama_cpp.GGML_USE_CUBLAS:
|
68
|
+
logger.info("llama-cpp-python built with CUDA support")
|
69
|
+
return -1
|
70
|
+
elif hasattr(llama_cpp, 'GGML_USE_METAL') and llama_cpp.GGML_USE_METAL:
|
71
|
+
logger.info("llama-cpp-python built with Metal support")
|
72
|
+
return -1
|
73
|
+
except:
|
74
|
+
pass
|
75
|
+
|
76
|
+
except Exception as e:
|
77
|
+
logger.debug(f"Error detecting GPU: {e}")
|
78
|
+
logger.debug(f"GPU detection stack trace:\n{traceback.format_exc()}")
|
79
|
+
|
80
|
+
logger.info("No GPU detected, using CPU")
|
81
|
+
return 0
|
82
|
+
|
83
|
+
|
84
|
+
@dataclass
|
85
|
+
class ModelConfig:
|
86
|
+
"""モデル設定"""
|
87
|
+
model_path: str
|
88
|
+
n_ctx: int = 4096 # コンテキストサイズ
|
89
|
+
n_gpu_layers: int = -2 # GPU使用レイヤー数(-2=自動検出, -1=全て, 0=CPU only)
|
90
|
+
temperature: float = 0.3
|
91
|
+
max_tokens: int = 2048
|
92
|
+
top_p: float = 0.95
|
93
|
+
top_k: int = 40
|
94
|
+
repeat_penalty: float = 1.1
|
95
|
+
n_threads: Optional[int] = None # None = auto
|
96
|
+
|
97
|
+
|
98
|
+
class LLMClient:
|
99
|
+
"""
|
100
|
+
ローカルLLMとの通信を管理するクライアント
|
101
|
+
遅延読み込みとストリーミング応答をサポート
|
102
|
+
外部プロバイダー(ChatGPT、Ollama、OpenRouter)もサポート
|
103
|
+
"""
|
104
|
+
|
105
|
+
def __init__(self, config: Optional[ModelConfig] = None):
|
106
|
+
self._model = None
|
107
|
+
self._config = config
|
108
|
+
self._loading = False
|
109
|
+
self._load_lock = threading.Lock()
|
110
|
+
self._load_error: Optional[Exception] = None
|
111
|
+
self._load_thread = None
|
112
|
+
self._shutdown = False
|
113
|
+
|
114
|
+
# ストリーミング用のキュー
|
115
|
+
self._response_queue: queue.Queue = queue.Queue()
|
116
|
+
self._streaming = False
|
117
|
+
|
118
|
+
# 外部プロバイダー
|
119
|
+
self._external_provider = None
|
120
|
+
self._current_provider = None # 現在設定されているプロバイダーを追跡
|
121
|
+
|
122
|
+
# デフォルトシステムプロンプト(統合版)
|
123
|
+
self.default_system_prompt = """You are an expert programming assistant integrated into Thonny IDE.
|
124
|
+
|
125
|
+
Core principles:
|
126
|
+
- Be concise and direct in your responses
|
127
|
+
- Provide code examples without lengthy explanations unless asked
|
128
|
+
- Focus on solving the immediate problem
|
129
|
+
- Adapt complexity to user's skill level
|
130
|
+
- Detect and work with the programming language being used
|
131
|
+
|
132
|
+
When generating code:
|
133
|
+
- Write clean, readable code following the language's best practices
|
134
|
+
- Include only essential comments
|
135
|
+
- Handle edge cases appropriately
|
136
|
+
|
137
|
+
When explaining:
|
138
|
+
- Keep explanations brief and to the point
|
139
|
+
- Use simple language for beginners
|
140
|
+
- Provide more detail only when specifically requested
|
141
|
+
|
142
|
+
Remember: Prioritize clarity and brevity. Get straight to the solution."""
|
143
|
+
|
144
|
+
# デフォルトプロンプトを使用
|
145
|
+
self.system_prompt = self.default_system_prompt
|
146
|
+
|
147
|
+
@property
|
148
|
+
def is_loaded(self) -> bool:
|
149
|
+
"""モデルがロードされているか(外部プロバイダーの場合は常にTrue)"""
|
150
|
+
if self._external_provider:
|
151
|
+
return True
|
152
|
+
return self._model is not None
|
153
|
+
|
154
|
+
@property
|
155
|
+
def is_loading(self) -> bool:
|
156
|
+
"""モデルをロード中か"""
|
157
|
+
return self._loading
|
158
|
+
|
159
|
+
def get_config(self) -> ModelConfig:
|
160
|
+
"""現在の設定を取得"""
|
161
|
+
# 毎回プロバイダーをチェック
|
162
|
+
from thonny import get_workbench
|
163
|
+
workbench = get_workbench()
|
164
|
+
provider = workbench.get_option("llm.provider", "local")
|
165
|
+
|
166
|
+
# プロバイダーが変更された場合は再設定
|
167
|
+
if provider != self._current_provider:
|
168
|
+
self._current_provider = provider
|
169
|
+
self._external_provider = None # 古いプロバイダーをクリア
|
170
|
+
if provider != "local":
|
171
|
+
self._setup_external_provider(provider)
|
172
|
+
|
173
|
+
if self._config is None:
|
174
|
+
|
175
|
+
model_path = workbench.get_option("llm.model_path", "")
|
176
|
+
if not model_path and provider == "local":
|
177
|
+
# モデルディレクトリから最初のGGUFファイルを探す
|
178
|
+
models_dir = Path(__file__).parent.parent.parent / "models"
|
179
|
+
if models_dir.exists():
|
180
|
+
gguf_files = list(models_dir.glob("*.gguf"))
|
181
|
+
if gguf_files:
|
182
|
+
model_path = str(gguf_files[0])
|
183
|
+
|
184
|
+
self._config = ModelConfig(
|
185
|
+
model_path=model_path,
|
186
|
+
n_ctx=workbench.get_option("llm.context_size", 4096),
|
187
|
+
temperature=workbench.get_option("llm.temperature", 0.3),
|
188
|
+
max_tokens=workbench.get_option("llm.max_tokens", 2048),
|
189
|
+
repeat_penalty=workbench.get_option("llm.repeat_penalty", 1.1),
|
190
|
+
)
|
191
|
+
|
192
|
+
# プロンプトタイプを適用
|
193
|
+
prompt_type = workbench.get_option("llm.prompt_type", "default")
|
194
|
+
|
195
|
+
if prompt_type == "custom":
|
196
|
+
custom_prompt = workbench.get_option("llm.custom_prompt", "")
|
197
|
+
if custom_prompt:
|
198
|
+
self.set_system_prompt(custom_prompt)
|
199
|
+
else:
|
200
|
+
# デフォルトプロンプトを使用
|
201
|
+
self.use_default_prompt()
|
202
|
+
|
203
|
+
return self._config
|
204
|
+
|
205
|
+
def _detect_programming_language(self) -> str:
|
206
|
+
"""現在のエディタファイルからプログラミング言語を検出"""
|
207
|
+
try:
|
208
|
+
from thonny import get_workbench
|
209
|
+
workbench = get_workbench()
|
210
|
+
editor = workbench.get_editor_notebook().get_current_editor()
|
211
|
+
|
212
|
+
if editor:
|
213
|
+
filename = editor.get_filename()
|
214
|
+
if filename:
|
215
|
+
file_ext = Path(filename).suffix.lower()
|
216
|
+
# ファイル拡張子から言語を判定
|
217
|
+
from .utils.constants import LANGUAGE_EXTENSIONS
|
218
|
+
return LANGUAGE_EXTENSIONS.get(file_ext, 'Python') # デフォルトはPython
|
219
|
+
except Exception:
|
220
|
+
pass
|
221
|
+
|
222
|
+
return 'Python' # エラー時のデフォルト
|
223
|
+
|
224
|
+
def _get_language_instruction(self) -> str:
|
225
|
+
"""言語設定に基づく指示を取得"""
|
226
|
+
from thonny import get_workbench
|
227
|
+
workbench = get_workbench()
|
228
|
+
|
229
|
+
output_language = workbench.get_option("llm.output_language", "auto")
|
230
|
+
|
231
|
+
if output_language == "auto":
|
232
|
+
# Thonnyの言語設定に従う
|
233
|
+
thonny_language = workbench.get_option("general.language", None)
|
234
|
+
if thonny_language and thonny_language.startswith("ja"):
|
235
|
+
return "\nPlease respond in Japanese (日本語で回答してください)."
|
236
|
+
elif thonny_language and thonny_language.startswith("zh"):
|
237
|
+
if "TW" in thonny_language or "HK" in thonny_language:
|
238
|
+
return "\nPlease respond in Traditional Chinese (請用繁體中文回答)."
|
239
|
+
else:
|
240
|
+
return "\nPlease respond in Simplified Chinese (请用简体中文回答)."
|
241
|
+
else:
|
242
|
+
return "" # 英語はデフォルトなので指示不要
|
243
|
+
elif output_language == "ja":
|
244
|
+
return "\nPlease respond in Japanese (日本語で回答してください)."
|
245
|
+
elif output_language == "en":
|
246
|
+
return "" # 英語はデフォルトなので指示不要
|
247
|
+
elif output_language == "zh-CN":
|
248
|
+
return "\nPlease respond in Simplified Chinese (请用简体中文回答)."
|
249
|
+
elif output_language == "zh-TW":
|
250
|
+
return "\nPlease respond in Traditional Chinese (請用繁體中文回答)."
|
251
|
+
elif output_language == "other":
|
252
|
+
custom_code = workbench.get_option("llm.custom_language_code", "")
|
253
|
+
if custom_code:
|
254
|
+
return f"\nPlease respond in {custom_code}."
|
255
|
+
|
256
|
+
return ""
|
257
|
+
|
258
|
+
def _build_system_prompt(self) -> str:
|
259
|
+
"""言語設定とスキルレベルを含むシステムプロンプトを構築"""
|
260
|
+
from thonny import get_workbench
|
261
|
+
workbench = get_workbench()
|
262
|
+
|
263
|
+
# 基本のシステムプロンプトを取得
|
264
|
+
base_prompt = self.system_prompt
|
265
|
+
|
266
|
+
# プログラミング言語を検出
|
267
|
+
prog_language = self._detect_programming_language()
|
268
|
+
|
269
|
+
# プロンプトタイプを確認
|
270
|
+
prompt_type = workbench.get_option("llm.prompt_type", "default")
|
271
|
+
skill_level = workbench.get_option("llm.skill_level", "beginner")
|
272
|
+
output_language = workbench.get_option("llm.output_language", "auto")
|
273
|
+
|
274
|
+
if prompt_type == "custom":
|
275
|
+
# カスタムプロンプトの場合は、変数を置換
|
276
|
+
enhanced_prompt = base_prompt
|
277
|
+
|
278
|
+
# スキルレベルの詳細な説明を作成
|
279
|
+
skill_level_descriptions = {
|
280
|
+
"beginner": "beginner (new to programming, needs detailed explanations, simple examples, and encouragement)",
|
281
|
+
"intermediate": "intermediate (familiar with basics, can understand technical terms, needs guidance on best practices)",
|
282
|
+
"advanced": "advanced (experienced developer, prefers concise technical explanations, interested in optimization and design patterns)"
|
283
|
+
}
|
284
|
+
|
285
|
+
# 変数を置換
|
286
|
+
enhanced_prompt = enhanced_prompt.replace("{skill_level}", skill_level_descriptions.get(skill_level, skill_level))
|
287
|
+
enhanced_prompt = enhanced_prompt.replace("{language}", output_language if output_language != "auto" else "the user's language")
|
288
|
+
|
289
|
+
# プログラミング言語を追加
|
290
|
+
enhanced_prompt += f"\n\nCurrent programming language: {prog_language}"
|
291
|
+
|
292
|
+
# 出力言語指示を追加
|
293
|
+
language_instruction = self._get_language_instruction()
|
294
|
+
if language_instruction:
|
295
|
+
enhanced_prompt += language_instruction
|
296
|
+
|
297
|
+
return enhanced_prompt
|
298
|
+
|
299
|
+
# デフォルトプロンプトの場合は、スキルレベル、プログラミング言語、出力言語を統合
|
300
|
+
|
301
|
+
# プログラミング言語の指示を追加
|
302
|
+
enhanced_prompt = base_prompt + f"\n\nCurrent programming language: {prog_language}"
|
303
|
+
|
304
|
+
# スキルレベルの詳細な説明を追加
|
305
|
+
skill_instructions = {
|
306
|
+
"beginner": """\n\nIMPORTANT: The user is a BEGINNER programmer. Follow these guidelines:
|
307
|
+
- Use simple, everyday language and avoid technical jargon
|
308
|
+
- Explain concepts step-by-step with clear examples
|
309
|
+
- Provide encouragement and positive reinforcement
|
310
|
+
- Anticipate common mistakes and explain how to avoid them
|
311
|
+
- Use analogies to relate programming concepts to real-world scenarios
|
312
|
+
- Keep code examples short and well-commented
|
313
|
+
- Explain what each line of code does""",
|
314
|
+
"intermediate": """\n\nIMPORTANT: The user has INTERMEDIATE programming knowledge. Follow these guidelines:
|
315
|
+
- Balance technical accuracy with clarity
|
316
|
+
- Introduce best practices and coding standards
|
317
|
+
- Explain the 'why' behind recommendations
|
318
|
+
- Provide multiple solution approaches when relevant
|
319
|
+
- Include error handling and edge cases
|
320
|
+
- Reference documentation and useful resources
|
321
|
+
- Encourage exploration of advanced features""",
|
322
|
+
"advanced": """\n\nIMPORTANT: The user is an ADVANCED programmer. Follow these guidelines:
|
323
|
+
- Be concise and technically precise
|
324
|
+
- Focus on optimization, performance, and design patterns
|
325
|
+
- Discuss trade-offs and architectural decisions
|
326
|
+
- Assume familiarity with programming concepts
|
327
|
+
- Include advanced techniques and idioms
|
328
|
+
- Reference relevant specifications and standards
|
329
|
+
- Skip basic explanations unless specifically asked"""
|
330
|
+
}
|
331
|
+
|
332
|
+
# スキルレベルの指示を追加
|
333
|
+
enhanced_prompt += skill_instructions.get(skill_level, "")
|
334
|
+
|
335
|
+
# 出力言語指示を追加
|
336
|
+
language_instruction = self._get_language_instruction()
|
337
|
+
if language_instruction:
|
338
|
+
enhanced_prompt += language_instruction
|
339
|
+
|
340
|
+
return enhanced_prompt
|
341
|
+
|
342
|
+
def _setup_external_provider(self, provider: str):
|
343
|
+
"""外部プロバイダーをセットアップ"""
|
344
|
+
from thonny import get_workbench
|
345
|
+
from .external_providers import ChatGPTProvider, OllamaProvider, OpenRouterProvider
|
346
|
+
|
347
|
+
workbench = get_workbench()
|
348
|
+
|
349
|
+
if provider == "chatgpt":
|
350
|
+
self._external_provider = ChatGPTProvider(
|
351
|
+
api_key=workbench.get_option("llm.chatgpt_api_key", ""),
|
352
|
+
model=workbench.get_option("llm.external_model", "gpt-3.5-turbo")
|
353
|
+
)
|
354
|
+
elif provider == "ollama":
|
355
|
+
self._external_provider = OllamaProvider(
|
356
|
+
base_url=workbench.get_option("llm.base_url", "http://localhost:11434"),
|
357
|
+
model=workbench.get_option("llm.external_model", "llama3")
|
358
|
+
)
|
359
|
+
elif provider == "openrouter":
|
360
|
+
self._external_provider = OpenRouterProvider(
|
361
|
+
api_key=workbench.get_option("llm.openrouter_api_key", ""),
|
362
|
+
model=workbench.get_option("llm.external_model", "meta-llama/llama-3.2-3b-instruct:free")
|
363
|
+
)
|
364
|
+
|
365
|
+
def set_config(self, config: ModelConfig):
|
366
|
+
"""設定を更新(モデルの再読み込みが必要)"""
|
367
|
+
self._config = config
|
368
|
+
if self._model is not None:
|
369
|
+
# 既存のモデルをアンロード
|
370
|
+
self.unload_model()
|
371
|
+
|
372
|
+
def load_model(self, force: bool = False) -> bool:
|
373
|
+
"""
|
374
|
+
モデルを同期的に読み込む
|
375
|
+
|
376
|
+
Args:
|
377
|
+
force: 既にロードされていても再読み込みする
|
378
|
+
|
379
|
+
Returns:
|
380
|
+
読み込みに成功したらTrue
|
381
|
+
"""
|
382
|
+
# 外部プロバイダーの場合はモデルロード不要
|
383
|
+
if self._external_provider:
|
384
|
+
return True
|
385
|
+
|
386
|
+
if self._model is not None and not force:
|
387
|
+
return True
|
388
|
+
|
389
|
+
with self._load_lock:
|
390
|
+
if self._loading:
|
391
|
+
return False
|
392
|
+
|
393
|
+
self._loading = True
|
394
|
+
self._load_error = None
|
395
|
+
|
396
|
+
try:
|
397
|
+
config = self.get_config()
|
398
|
+
if not config.model_path or not Path(config.model_path).exists():
|
399
|
+
raise FileNotFoundError(f"Model file not found: {config.model_path}")
|
400
|
+
|
401
|
+
logger.info(f"Loading model from: {config.model_path}")
|
402
|
+
|
403
|
+
try:
|
404
|
+
from llama_cpp import Llama
|
405
|
+
except ImportError:
|
406
|
+
raise ImportError(
|
407
|
+
"llama-cpp-python is not installed. "
|
408
|
+
"Please run: uv pip install llama-cpp-python --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu"
|
409
|
+
)
|
410
|
+
|
411
|
+
# GPU自動検出
|
412
|
+
n_gpu_layers = config.n_gpu_layers
|
413
|
+
if n_gpu_layers == -2: # 自動検出
|
414
|
+
n_gpu_layers = detect_gpu_availability()
|
415
|
+
logger.info(f"Auto-detected GPU layers: {n_gpu_layers}")
|
416
|
+
|
417
|
+
# モデルを読み込む
|
418
|
+
self._model = Llama(
|
419
|
+
model_path=config.model_path,
|
420
|
+
n_ctx=config.n_ctx,
|
421
|
+
n_gpu_layers=n_gpu_layers,
|
422
|
+
n_threads=config.n_threads,
|
423
|
+
verbose=False,
|
424
|
+
)
|
425
|
+
|
426
|
+
# 実際に使用されているGPUレイヤー数をログ出力
|
427
|
+
if n_gpu_layers > 0:
|
428
|
+
logger.info(f"Model loaded with {n_gpu_layers} GPU layers")
|
429
|
+
elif n_gpu_layers == -1:
|
430
|
+
logger.info("Model loaded with all layers on GPU")
|
431
|
+
else:
|
432
|
+
logger.info("Model loaded on CPU")
|
433
|
+
|
434
|
+
logger.info("Model loaded successfully")
|
435
|
+
return True
|
436
|
+
|
437
|
+
except ImportError as e:
|
438
|
+
error_msg = f"Failed to import llama-cpp-python: {e}"
|
439
|
+
logger.error(error_msg)
|
440
|
+
logger.error(f"Import error stack trace:\n{traceback.format_exc()}")
|
441
|
+
self._load_error = e
|
442
|
+
return False
|
443
|
+
except FileNotFoundError as e:
|
444
|
+
error_msg = f"Model file not found at {config.model_path}: {e}"
|
445
|
+
logger.error(error_msg)
|
446
|
+
self._load_error = e
|
447
|
+
return False
|
448
|
+
except Exception as e:
|
449
|
+
error_msg = f"Failed to load model from {config.model_path}: {e}"
|
450
|
+
logger.error(error_msg)
|
451
|
+
logger.error(f"Model loading stack trace:\n{traceback.format_exc()}")
|
452
|
+
logger.error(f"Model config: n_ctx={config.n_ctx}, n_gpu_layers={n_gpu_layers}")
|
453
|
+
self._load_error = e
|
454
|
+
return False
|
455
|
+
finally:
|
456
|
+
self._loading = False
|
457
|
+
|
458
|
+
def load_model_async(self, callback=None):
|
459
|
+
"""
|
460
|
+
モデルを非同期で読み込む
|
461
|
+
|
462
|
+
Args:
|
463
|
+
callback: 読み込み完了時に呼ばれるコールバック(success: bool, error: Optional[Exception])
|
464
|
+
"""
|
465
|
+
def _load():
|
466
|
+
if self._shutdown:
|
467
|
+
return
|
468
|
+
success = self.load_model()
|
469
|
+
if callback and not self._shutdown:
|
470
|
+
callback(success, self._load_error)
|
471
|
+
|
472
|
+
self._load_thread = threading.Thread(target=_load, daemon=True)
|
473
|
+
self._load_thread.start()
|
474
|
+
|
475
|
+
def unload_model(self):
|
476
|
+
"""モデルをアンロード"""
|
477
|
+
if self._model is not None:
|
478
|
+
del self._model
|
479
|
+
self._model = None
|
480
|
+
logger.info("Model unloaded")
|
481
|
+
|
482
|
+
def generate(self, prompt: str, **kwargs) -> str:
|
483
|
+
"""
|
484
|
+
プロンプトに対する応答を生成(同期)
|
485
|
+
|
486
|
+
Args:
|
487
|
+
prompt: 入力プロンプト
|
488
|
+
**kwargs: 生成パラメータのオーバーライド
|
489
|
+
|
490
|
+
Returns:
|
491
|
+
生成されたテキスト
|
492
|
+
"""
|
493
|
+
# 外部プロバイダーを使用する場合
|
494
|
+
if self._external_provider:
|
495
|
+
# システムプロンプトを構築
|
496
|
+
system_content = self._build_system_prompt()
|
497
|
+
|
498
|
+
messages = [
|
499
|
+
{"role": "system", "content": system_content},
|
500
|
+
{"role": "user", "content": prompt}
|
501
|
+
]
|
502
|
+
return self._external_provider.generate(
|
503
|
+
prompt=prompt,
|
504
|
+
messages=messages,
|
505
|
+
temperature=kwargs.get("temperature", self.get_config().temperature),
|
506
|
+
max_tokens=kwargs.get("max_tokens", self.get_config().max_tokens)
|
507
|
+
)
|
508
|
+
|
509
|
+
# ローカルモデルを使用する場合
|
510
|
+
if self._model is None:
|
511
|
+
if not self.load_model():
|
512
|
+
raise RuntimeError(f"Failed to load model: {self._load_error}")
|
513
|
+
|
514
|
+
config = self.get_config()
|
515
|
+
|
516
|
+
# パラメータをマージ
|
517
|
+
params = {
|
518
|
+
"max_tokens": config.max_tokens,
|
519
|
+
"temperature": config.temperature,
|
520
|
+
"top_p": config.top_p,
|
521
|
+
"top_k": config.top_k,
|
522
|
+
"repeat_penalty": config.repeat_penalty,
|
523
|
+
"stop": ["</s>", "\n\n\n"],
|
524
|
+
}
|
525
|
+
# messagesパラメータは除外(ローカルモデルでは使用しない)
|
526
|
+
kwargs_without_messages = {k: v for k, v in kwargs.items() if k != "messages"}
|
527
|
+
params.update(kwargs_without_messages)
|
528
|
+
|
529
|
+
# フルプロンプトを作成
|
530
|
+
full_prompt = self._format_prompt(prompt)
|
531
|
+
|
532
|
+
# 生成
|
533
|
+
response = self._model(full_prompt, **params)
|
534
|
+
return response["choices"][0]["text"].strip()
|
535
|
+
|
536
|
+
def generate_stream(self, prompt: str, **kwargs) -> Iterator[str]:
|
537
|
+
"""
|
538
|
+
プロンプトに対する応答をストリーミング生成
|
539
|
+
|
540
|
+
Args:
|
541
|
+
prompt: 入力プロンプト
|
542
|
+
**kwargs: 生成パラメータのオーバーライド
|
543
|
+
|
544
|
+
Yields:
|
545
|
+
生成されたテキストのチャンク
|
546
|
+
"""
|
547
|
+
# 外部プロバイダーを使用する場合
|
548
|
+
if self._external_provider:
|
549
|
+
# システムプロンプトを構築
|
550
|
+
system_content = self._build_system_prompt()
|
551
|
+
|
552
|
+
# 会話履歴を含むメッセージリストを構築
|
553
|
+
messages = [{"role": "system", "content": system_content}]
|
554
|
+
|
555
|
+
# 既存の会話履歴があれば追加
|
556
|
+
if "messages" in kwargs:
|
557
|
+
for msg in kwargs["messages"]:
|
558
|
+
# システムメッセージは除外(既に追加済み)
|
559
|
+
if msg.get("role") != "system":
|
560
|
+
messages.append(msg)
|
561
|
+
|
562
|
+
# 現在のユーザーメッセージを追加
|
563
|
+
messages.append({"role": "user", "content": prompt})
|
564
|
+
|
565
|
+
for token in self._external_provider.generate_stream(
|
566
|
+
prompt=prompt,
|
567
|
+
messages=messages,
|
568
|
+
temperature=kwargs.get("temperature", self.get_config().temperature),
|
569
|
+
max_tokens=kwargs.get("max_tokens", self.get_config().max_tokens)
|
570
|
+
):
|
571
|
+
yield token
|
572
|
+
return
|
573
|
+
|
574
|
+
# ローカルモデルを使用する場合
|
575
|
+
if self._model is None:
|
576
|
+
if not self.load_model():
|
577
|
+
raise RuntimeError(f"Failed to load model: {self._load_error}")
|
578
|
+
|
579
|
+
config = self.get_config()
|
580
|
+
|
581
|
+
# パラメータをマージ
|
582
|
+
params = {
|
583
|
+
"max_tokens": config.max_tokens,
|
584
|
+
"temperature": config.temperature,
|
585
|
+
"top_p": config.top_p,
|
586
|
+
"top_k": config.top_k,
|
587
|
+
"repeat_penalty": config.repeat_penalty,
|
588
|
+
"stop": ["</s>", "\n\n\n"],
|
589
|
+
"stream": True,
|
590
|
+
}
|
591
|
+
# messagesパラメータは除外(ローカルモデルでは使用しない)
|
592
|
+
kwargs_without_messages = {k: v for k, v in kwargs.items() if k != "messages"}
|
593
|
+
params.update(kwargs_without_messages)
|
594
|
+
|
595
|
+
# 会話履歴がある場合は、chat completion APIを使用
|
596
|
+
if "messages" in kwargs:
|
597
|
+
# メッセージリストを構築
|
598
|
+
messages = []
|
599
|
+
|
600
|
+
# システムプロンプトを追加
|
601
|
+
system_prompt = self._build_system_prompt()
|
602
|
+
messages.append({"role": "system", "content": system_prompt})
|
603
|
+
|
604
|
+
# 既存の会話履歴を追加
|
605
|
+
for msg in kwargs["messages"]:
|
606
|
+
if msg.get("role") != "system": # システムプロンプトは既に追加済み
|
607
|
+
messages.append(msg)
|
608
|
+
|
609
|
+
# 現在のメッセージを追加
|
610
|
+
messages.append({"role": "user", "content": prompt})
|
611
|
+
|
612
|
+
# Chat completion APIを使用(llama-cpp-pythonがサポートしている場合)
|
613
|
+
try:
|
614
|
+
# create_chat_completionメソッドが利用可能か確認
|
615
|
+
if hasattr(self._model, 'create_chat_completion'):
|
616
|
+
# stopパラメータを調整(chat completion用)
|
617
|
+
params_for_chat = params.copy()
|
618
|
+
params_for_chat['stop'] = ["</s>"] # chat completion用のストップトークン
|
619
|
+
|
620
|
+
for output in self._model.create_chat_completion(messages, **params_for_chat):
|
621
|
+
if "choices" in output and len(output["choices"]) > 0:
|
622
|
+
delta = output["choices"][0].get("delta", {})
|
623
|
+
content = delta.get("content", "")
|
624
|
+
if content:
|
625
|
+
yield content
|
626
|
+
else:
|
627
|
+
# フォールバック: 従来の方法でプロンプトを構築
|
628
|
+
full_prompt = self._format_messages_as_prompt(messages)
|
629
|
+
for output in self._model(full_prompt, **params):
|
630
|
+
token = output["choices"][0]["text"]
|
631
|
+
if token:
|
632
|
+
yield token
|
633
|
+
except Exception as e:
|
634
|
+
logger.warning(f"Chat completion failed, falling back to text completion: {e}")
|
635
|
+
# エラー時のフォールバック
|
636
|
+
full_prompt = self._format_messages_as_prompt(messages)
|
637
|
+
for output in self._model(full_prompt, **params):
|
638
|
+
token = output["choices"][0]["text"]
|
639
|
+
if token:
|
640
|
+
yield token
|
641
|
+
else:
|
642
|
+
# 従来の単一プロンプト形式
|
643
|
+
full_prompt = self._format_prompt(prompt)
|
644
|
+
for output in self._model(full_prompt, **params):
|
645
|
+
token = output["choices"][0]["text"]
|
646
|
+
if token:
|
647
|
+
yield token
|
648
|
+
|
649
|
+
def _format_messages_as_prompt(self, messages: list) -> str:
|
650
|
+
"""
|
651
|
+
OpenAI形式のメッセージリストをプロンプト文字列に変換
|
652
|
+
|
653
|
+
Args:
|
654
|
+
messages: [{"role": "system/user/assistant", "content": "..."}, ...]
|
655
|
+
|
656
|
+
Returns:
|
657
|
+
フォーマットされたプロンプト文字列
|
658
|
+
"""
|
659
|
+
parts = []
|
660
|
+
for msg in messages:
|
661
|
+
role = msg.get("role", "")
|
662
|
+
content = msg.get("content", "")
|
663
|
+
|
664
|
+
if role == "system":
|
665
|
+
parts.append(content)
|
666
|
+
elif role == "user":
|
667
|
+
parts.append(f"\n\nHuman: {content}")
|
668
|
+
elif role == "assistant":
|
669
|
+
parts.append(f"\n\nAssistant: {content}")
|
670
|
+
|
671
|
+
# 最後にアシスタントの応答を促す
|
672
|
+
parts.append("\n\nAssistant:")
|
673
|
+
|
674
|
+
return "".join(parts)
|
675
|
+
|
676
|
+
def set_system_prompt(self, prompt: str):
|
677
|
+
"""カスタムシステムプロンプトを設定"""
|
678
|
+
self.system_prompt = prompt
|
679
|
+
|
680
|
+
def use_default_prompt(self):
|
681
|
+
"""デフォルトシステムプロンプトを使用"""
|
682
|
+
self.system_prompt = self.default_system_prompt
|
683
|
+
|
684
|
+
def get_current_system_prompt(self) -> str:
|
685
|
+
"""現在のシステムプロンプトを取得"""
|
686
|
+
return self.system_prompt
|
687
|
+
|
688
|
+
def explain_code(self, code: str, skill_level: str = "beginner") -> str:
|
689
|
+
"""
|
690
|
+
コードを説明する特化したメソッド
|
691
|
+
|
692
|
+
Args:
|
693
|
+
code: 説明するコード
|
694
|
+
skill_level: ユーザーのスキルレベル (beginner/intermediate/advanced)
|
695
|
+
|
696
|
+
Returns:
|
697
|
+
コードの説明
|
698
|
+
"""
|
699
|
+
skill_descriptions = {
|
700
|
+
"beginner": "a complete beginner who is just learning programming",
|
701
|
+
"intermediate": "someone with basic programming knowledge",
|
702
|
+
"advanced": "an experienced programmer"
|
703
|
+
}
|
704
|
+
|
705
|
+
# プログラミング言語を検出
|
706
|
+
prog_language = self._detect_programming_language()
|
707
|
+
lang_lower = prog_language.lower()
|
708
|
+
|
709
|
+
prompt = f"""Explain this {prog_language} code for {skill_descriptions.get(skill_level, skill_descriptions['beginner'])}:
|
710
|
+
|
711
|
+
```{lang_lower}
|
712
|
+
{code}
|
713
|
+
```
|
714
|
+
|
715
|
+
Be concise. Focus on what the code does and key concepts."""
|
716
|
+
|
717
|
+
return self.generate(prompt, temperature=0.3) # 低めの温度で一貫性のある説明を生成
|
718
|
+
|
719
|
+
def fix_error(self, code: str, error_message: str) -> str:
|
720
|
+
"""
|
721
|
+
エラーを修正する提案を生成
|
722
|
+
|
723
|
+
Args:
|
724
|
+
code: エラーが発生したコード
|
725
|
+
error_message: エラーメッセージ
|
726
|
+
|
727
|
+
Returns:
|
728
|
+
修正提案
|
729
|
+
"""
|
730
|
+
# プログラミング言語を検出
|
731
|
+
prog_language = self._detect_programming_language()
|
732
|
+
lang_lower = prog_language.lower()
|
733
|
+
|
734
|
+
prompt = f"""Fix this {prog_language} error:
|
735
|
+
|
736
|
+
```{lang_lower}
|
737
|
+
{code}
|
738
|
+
```
|
739
|
+
|
740
|
+
Error:
|
741
|
+
```
|
742
|
+
{error_message}
|
743
|
+
```
|
744
|
+
|
745
|
+
Provide:
|
746
|
+
1. Brief explanation of the error
|
747
|
+
2. Corrected code
|
748
|
+
3. What changed"""
|
749
|
+
|
750
|
+
return self.generate(prompt, temperature=0.3)
|
751
|
+
|
752
|
+
def generate_with_context(self, prompt: str, context: str, **kwargs) -> str:
|
753
|
+
"""
|
754
|
+
コンテキスト付きで応答を生成
|
755
|
+
|
756
|
+
Args:
|
757
|
+
prompt: ユーザープロンプト
|
758
|
+
context: プロジェクトコンテキスト
|
759
|
+
**kwargs: 生成パラメータ
|
760
|
+
|
761
|
+
Returns:
|
762
|
+
生成されたテキスト
|
763
|
+
"""
|
764
|
+
# コンテキストを含むプロンプトを作成
|
765
|
+
full_prompt = f"""Here is the context from the current project:
|
766
|
+
|
767
|
+
{context}
|
768
|
+
|
769
|
+
Based on this context, {prompt}"""
|
770
|
+
|
771
|
+
return self.generate(full_prompt, **kwargs)
|
772
|
+
|
773
|
+
def _format_prompt(self, user_prompt: str) -> str:
|
774
|
+
"""プロンプトをモデル用にフォーマット"""
|
775
|
+
# システムプロンプトを構築
|
776
|
+
system_content = self._build_system_prompt()
|
777
|
+
|
778
|
+
# 簡単なChat MLフォーマット(モデルによって調整が必要)
|
779
|
+
return f"""<|system|>
|
780
|
+
{system_content}
|
781
|
+
<|user|>
|
782
|
+
{user_prompt}
|
783
|
+
<|assistant|>
|
784
|
+
"""
|
785
|
+
|
786
|
+
def test_connection(self) -> Dict[str, Any]:
|
787
|
+
"""
|
788
|
+
接続とモデルをテスト
|
789
|
+
|
790
|
+
Returns:
|
791
|
+
テスト結果の辞書
|
792
|
+
"""
|
793
|
+
# 外部プロバイダーの場合
|
794
|
+
if self._external_provider:
|
795
|
+
return self._external_provider.test_connection()
|
796
|
+
|
797
|
+
# ローカルモデルの場合
|
798
|
+
result = {
|
799
|
+
"model_loaded": self.is_loaded,
|
800
|
+
"model_path": self.get_config().model_path,
|
801
|
+
"error": None,
|
802
|
+
"test_response": None,
|
803
|
+
"success": False
|
804
|
+
}
|
805
|
+
|
806
|
+
try:
|
807
|
+
if not self.is_loaded:
|
808
|
+
self.load_model()
|
809
|
+
|
810
|
+
# 簡単なテストプロンプト
|
811
|
+
test_response = self.generate(
|
812
|
+
"Say 'Hello from LLM!' in exactly 5 words.",
|
813
|
+
max_tokens=20,
|
814
|
+
temperature=0.1
|
815
|
+
)
|
816
|
+
result["test_response"] = test_response
|
817
|
+
result["success"] = True
|
818
|
+
|
819
|
+
except Exception as e:
|
820
|
+
result["error"] = str(e)
|
821
|
+
|
822
|
+
return result
|
823
|
+
|
824
|
+
def shutdown(self):
|
825
|
+
"""クライアントをシャットダウンして、すべてのスレッドを適切に終了"""
|
826
|
+
self._shutdown = True
|
827
|
+
self._stop_generation = True
|
828
|
+
|
829
|
+
# ローディングスレッドの終了を待つ
|
830
|
+
if self._load_thread and self._load_thread.is_alive():
|
831
|
+
self._load_thread.join(timeout=5.0)
|
832
|
+
|
833
|
+
# モデルをアンロード
|
834
|
+
self.unload_model()
|
835
|
+
|
836
|
+
# キューをクリア
|
837
|
+
while not self._response_queue.empty():
|
838
|
+
try:
|
839
|
+
self._response_queue.get_nowait()
|
840
|
+
except queue.Empty:
|
841
|
+
break
|