spec2function 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,17 @@
1
+ from .model_manager import Spec2FunctionManager, manager
2
+ from .gpt_inference import GPTInference
3
+ from .pubmed import PubMedSearcher
4
+ from .config import Spec2FunctionConfig, config
5
+ from .utils import parse_mgf, parse_msp, preprocess_spectrum
6
+
7
+ __all__ = [
8
+ "Spec2FunctionManager",
9
+ "manager",
10
+ "GPTInference",
11
+ "PubMedSearcher",
12
+ "Spec2FunctionConfig",
13
+ "config",
14
+ "parse_mgf",
15
+ "parse_msp",
16
+ "preprocess_spectrum",
17
+ ]
@@ -0,0 +1,74 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ import os
5
+ from typing import Iterable, Optional
6
+
7
+ REQUIRED_ASSETS = (
8
+ "models/best_model.pth",
9
+ "models/config.json",
10
+ "data/hmdb_subsections_WITH_NAME.jsonl",
11
+ "data/all_jsonl_embeddings.pt",
12
+ )
13
+
14
+
15
+ def _default_cache_dir() -> Path:
16
+ if os.name == "nt":
17
+ base = os.getenv("LOCALAPPDATA") or os.getenv("APPDATA") or str(Path.home())
18
+ return Path(base) / "Spec2Function"
19
+ xdg_cache = os.getenv("XDG_CACHE_HOME")
20
+ if xdg_cache:
21
+ return Path(xdg_cache) / "Spec2Function"
22
+ return Path.home() / ".cache" / "Spec2Function"
23
+
24
+
25
+ def _has_required_files(root: Path, files: Iterable[str] = REQUIRED_ASSETS) -> bool:
26
+ return all((root / rel).exists() for rel in files)
27
+
28
+
29
+ def _download_assets(root: Path, repo_id: str, files: Iterable[str]) -> None:
30
+ try:
31
+ from huggingface_hub import hf_hub_download
32
+ except Exception as exc:
33
+ raise RuntimeError(
34
+ "huggingface_hub is required to download Spec2Function assets. "
35
+ "Install with `pip install huggingface_hub` or set MS2FUNCTION_ASSET_DIR."
36
+ ) from exc
37
+
38
+ root.mkdir(parents=True, exist_ok=True)
39
+ for rel in files:
40
+ local_path = root / rel
41
+ if local_path.exists():
42
+ continue
43
+ local_path.parent.mkdir(parents=True, exist_ok=True)
44
+ hf_hub_download(
45
+ repo_id=repo_id,
46
+ filename=rel,
47
+ local_dir=root,
48
+ local_dir_use_symlinks=False,
49
+ )
50
+
51
+
52
+ def resolve_assets_root(project_root: Optional[Path]) -> Path:
53
+ env_dir = os.getenv("MS2FUNCTION_ASSET_DIR")
54
+ if env_dir:
55
+ root = Path(env_dir)
56
+ if not _has_required_files(root):
57
+ raise FileNotFoundError(
58
+ f"Spec2Function assets not found in MS2FUNCTION_ASSET_DIR: {root}"
59
+ )
60
+ return root
61
+
62
+ if project_root:
63
+ project_root = Path(project_root)
64
+ if _has_required_files(project_root):
65
+ return project_root
66
+
67
+ repo_id = os.getenv("MS2FUNCTION_ASSET_REPO", "cgxjdzz/spec2function-assets")
68
+ cache_root = _default_cache_dir()
69
+ _download_assets(cache_root, repo_id, REQUIRED_ASSETS)
70
+ if not _has_required_files(cache_root):
71
+ raise FileNotFoundError(
72
+ f"Failed to download Spec2Function assets from {repo_id} into {cache_root}"
73
+ )
74
+ return cache_root
@@ -0,0 +1,380 @@
1
+ import torch
2
+ from torch.utils.data import Dataset
3
+ import pandas as pd
4
+ import re
5
+ from typing import Dict, Optional, Callable, List, Union, Any, Tuple
6
+ from abc import ABC, abstractmethod
7
+
8
+
9
+ class BioTextProcessor(ABC):
10
+ """
11
+ 生物文本处理器的抽象基类
12
+ 定义了处理生物文本数据的统一接口
13
+ """
14
+
15
+ def __init__(self, fields_to_keep: Union[List[str], str] = "all"):
16
+ """
17
+ 初始化处理器
18
+
19
+ 参数:
20
+ fields_to_keep: 需要保留的数据字段,可以是"all"或字段名列表
21
+ """
22
+ self.fields_to_keep = fields_to_keep
23
+
24
+ @abstractmethod
25
+ def process(self, biotext_data: Dict, meta_data: Optional[pd.DataFrame] = None) -> Dict:
26
+ """
27
+ 处理生物文本数据的抽象方法,子类必须实现
28
+
29
+ 参数:
30
+ biotext_data: 原始的生物文本数据 {molecule_id: text_str}
31
+ meta_data: 元数据DataFrame
32
+
33
+ 返回:
34
+ 处理后的数据 {molecule_id: processed_str}
35
+ """
36
+ pass
37
+
38
+ def __call__(self, biotext_data: Dict, meta_data: Optional[pd.DataFrame] = None) -> Dict:
39
+ """
40
+ 使处理器对象可以像函数一样被调用
41
+
42
+ 参数:
43
+ biotext_data: 原始的生物文本数据
44
+ meta_data: 元数据DataFrame
45
+
46
+ 返回:
47
+ 处理后的数据
48
+ """
49
+ return self.process(biotext_data, meta_data)
50
+
51
+ class HMDBProcessor(BioTextProcessor):
52
+ """HMDB数据集的处理器实现,支持字典和字符串输出格式"""
53
+
54
+ def __init__(self, fields_to_keep: Union[List[str], str] = "all",
55
+ return_type: str = "dict",
56
+ max_synonyms: int = 5,
57
+ delimiter: str = "; "):
58
+ """
59
+ 初始化HMDB处理器
60
+
61
+ 参数:
62
+ fields_to_keep: 需要保留的数据字段,可以是"all"或字段名列表
63
+ return_type: 返回类型 - "dict"返回字典结构,"str"返回字符串结构
64
+ max_synonyms: 最多保留的同义词数量
65
+ delimiter: 用于替换原始数据中"{}"分隔符的新分隔符
66
+ """
67
+ super().__init__(fields_to_keep)
68
+ # 定义所有可能的字段
69
+ self.all_fields = [
70
+ "molecular_function",
71
+ "enzymes_proteins_pathways",
72
+ "toxicity_or_benefit",
73
+ "disease_association",
74
+ "distribution",
75
+ "smiles_synonyms",
76
+ "kingdom"
77
+ ]
78
+
79
+ # 决定要保留哪些字段
80
+ if self.fields_to_keep == "all":
81
+ self.fields_to_keep = self.all_fields
82
+
83
+ # 设置返回类型
84
+ if return_type not in ["dict", "str"]:
85
+ raise ValueError("return_type必须是'dict'或'str'")
86
+ self.return_type = return_type
87
+
88
+ # 设置最大同义词数量
89
+ self.max_synonyms = max_synonyms
90
+
91
+ # 设置分隔符
92
+ self.delimiter = delimiter
93
+
94
+ # 定义缺失数据的默认句子模板
95
+ self.default_sentences = {
96
+ "molecular_function": "No known molecular function or biological role has been reported.",
97
+ "enzymes_proteins_pathways": "No specific enzymes, proteins or pathways associated with this compound have been documented.",
98
+ "toxicity_or_benefit": "No information regarding toxicity or health benefits is available for this compound.",
99
+ "disease_association": "No disease associations have been reported for this compound.",
100
+ "distribution": "The distribution of this compound in biological systems is not well characterized.",
101
+ "smiles_synonyms": "No synonyms or SMILES structure are available for this compound.",
102
+ "kingdom": "The taxonomic classification of this compound is not available."
103
+ }
104
+
105
+ def process(self, biotext_data: Dict, meta_data) -> Dict:
106
+ """
107
+ 处理HMDB数据集的生物文本数据
108
+
109
+ 参数:
110
+ biotext_data: 原始的生物文本数据 {molecule_id: text_str}
111
+ meta_data: 元数据DataFrame,包含synonyms和kingdom等信息
112
+
113
+ 返回:
114
+ 处理后的数据,根据return_type设置返回:
115
+ - dict模式: {molecule_id: processed_dict}
116
+ - str模式: {molecule_id: processed_str}
117
+ """
118
+ processed_data = {}
119
+
120
+ # 处理每个分子ID
121
+ for molecule_id, text_data in biotext_data.items():
122
+ # 初始化该分子的处理结果
123
+ processed_dict = {field: None for field in self.all_fields}
124
+
125
+ # 从文本中提取数据
126
+ if isinstance(text_data, str) and text_data.strip():
127
+ # 解析分子功能
128
+ if "molecular_function" in self.fields_to_keep:
129
+ molecular_function_match = re.search(
130
+ r'=== molecular_function ===\s*{.*?"biological_function_sentence":\s*"(.*?)"\s*}',
131
+ text_data, re.DOTALL
132
+ )
133
+ if molecular_function_match and molecular_function_match.group(1).strip():
134
+ processed_dict["molecular_function"] = molecular_function_match.group(1).strip()
135
+
136
+ # 解析酶蛋白和路径
137
+ if "enzymes_proteins_pathways" in self.fields_to_keep:
138
+ enzymes_match = re.search(
139
+ r'=== enzymes_proteins_pathways ===\s*{.*?"enzymes_proteins_pathways_sentence":\s*"(.*?)"\s*}',
140
+ text_data, re.DOTALL
141
+ )
142
+ if enzymes_match and enzymes_match.group(1).strip():
143
+ processed_dict["enzymes_proteins_pathways"] = enzymes_match.group(1).strip()
144
+
145
+ # 解析毒性和益处
146
+ if "toxicity_or_benefit" in self.fields_to_keep:
147
+ toxicity_match = re.search(
148
+ r'=== toxicity_or_benefit ===\s*{.*?"toxicity_or_benefit_sentence":\s*"(.*?)"\s*}',
149
+ text_data, re.DOTALL
150
+ )
151
+ if toxicity_match and toxicity_match.group(1).strip():
152
+ processed_dict["toxicity_or_benefit"] = toxicity_match.group(1).strip()
153
+
154
+ # 解析疾病关联
155
+ if "disease_association" in self.fields_to_keep:
156
+ disease_match = re.search(
157
+ r'=== disease_association ===\s*{.*?"disease_association_sentence":\s*"(.*?)"\s*}',
158
+ text_data, re.DOTALL
159
+ )
160
+ if disease_match and disease_match.group(1).strip():
161
+ processed_dict["disease_association"] = disease_match.group(1).strip()
162
+
163
+ # 从元数据中获取额外信息
164
+ if meta_data is not None:
165
+ # 检查molecule_id是否在元数据中
166
+ meta_row = None
167
+ if 'HMDB.ID' in meta_data.columns and molecule_id in meta_data['HMDB.ID'].values:
168
+ meta_row = meta_data[meta_data['HMDB.ID'] == molecule_id].iloc[0]
169
+ elif molecule_id in meta_data.index:
170
+ meta_row = meta_data.loc[molecule_id]
171
+
172
+ if meta_row is not None:
173
+ # 处理同义词和SMILES
174
+ if "smiles_synonyms" in self.fields_to_keep:
175
+ synonyms_dict = {
176
+ 'smiles': None,
177
+ 'common_names': []
178
+ }
179
+
180
+ # 添加SMILES结构
181
+ if 'SMILES.ID' in meta_row and not pd.isna(meta_row['SMILES.ID']):
182
+ smiles = meta_row['SMILES.ID']
183
+ if isinstance(smiles, str) and smiles.strip():
184
+ synonyms_dict['smiles'] = smiles
185
+
186
+ # 添加同义词
187
+ if 'Synonyms' in meta_row and not pd.isna(meta_row['Synonyms']):
188
+ synonyms = meta_row['Synonyms']
189
+ if isinstance(synonyms, str):
190
+ # 替换"{}"为delimiter
191
+ synonyms_processed = synonyms.replace('{}', self.delimiter)
192
+ # 拆分为列表
193
+ synonyms_list = synonyms_processed.split(';')
194
+ # 过滤掉空字符串
195
+ synonyms_list = [s.strip() for s in synonyms_list if s.strip()]
196
+ # 限制数量
197
+ synonyms_dict['common_names'] = synonyms_list[:self.max_synonyms]
198
+ elif isinstance(synonyms, list):
199
+ # 替换"{}"为delimiter
200
+ synonyms_list = [s.replace('{}', self.delimiter) for s in synonyms if s.strip()]
201
+ # 限制数量
202
+ synonyms_dict['common_names'] = synonyms_list[:self.max_synonyms]
203
+
204
+ # 如果有任何有效数据,保存到处理结果
205
+ if synonyms_dict['smiles'] is not None or synonyms_dict['common_names']:
206
+ processed_dict["smiles_synonyms"] = synonyms_dict
207
+
208
+ # 处理kingdom分类信息
209
+ if "kingdom" in self.fields_to_keep:
210
+ kingdom_info = {}
211
+ for category in ['Kingdom', 'Super_class', 'Class', 'Sub_class']:
212
+ if category in meta_row and not pd.isna(meta_row[category]):
213
+ kingdom_info[category.lower()] = meta_row[category]
214
+
215
+ if kingdom_info:
216
+ processed_dict["kingdom"] = kingdom_info
217
+
218
+ # 处理分布信息
219
+ if "distribution" in self.fields_to_keep:
220
+ distribution_info = {}
221
+ for location_type in ['Biospecimen_locations', 'Cellular_locations', 'Tissue_locations']:
222
+ if location_type in meta_row and not pd.isna(meta_row[location_type]):
223
+ key = location_type.replace('_locations', '')
224
+ locations = meta_row[location_type]
225
+ if isinstance(locations, str):
226
+ locations_list = locations.split(';')
227
+ # 替换分隔符
228
+ locations_list = [loc.replace('{}', self.delimiter) for loc in locations_list]
229
+ distribution_info[key.lower()] = locations_list
230
+ elif isinstance(locations, list):
231
+ # 替换分隔符
232
+ locations_list = [loc.replace('{}', self.delimiter) for loc in locations]
233
+ distribution_info[key.lower()] = locations_list
234
+
235
+ if distribution_info:
236
+ processed_dict["distribution"] = distribution_info
237
+
238
+ # 转换为句子形式
239
+ processed_dict = self._convert_to_sentences(processed_dict, molecule_id, meta_data)
240
+
241
+ # 根据返回类型进行处理
242
+ if self.return_type == "str":
243
+ # 将字典转换为字符串
244
+ processed_str = self._dict_to_text(processed_dict)
245
+ processed_data[molecule_id] = processed_str
246
+ else:
247
+ # 保存处理结果,只保留需要的字段
248
+ processed_data[molecule_id] = {k: v for k, v in processed_dict.items() if k in self.fields_to_keep}
249
+
250
+ return processed_data
251
+
252
+ def _convert_to_sentences(self, data_dict: Dict, molecule_id: str, meta_data) -> Dict:
253
+ """
254
+ 将字典格式的数据转换为句子格式
255
+
256
+ 参数:
257
+ data_dict: 包含提取数据的字典
258
+ molecule_id: 分子ID,用于生成更具体的描述
259
+
260
+ 返回:
261
+ 包含句子形式数据的字典
262
+ """
263
+ result_dict = {}
264
+
265
+ for field in self.all_fields:
266
+ if field not in self.fields_to_keep:
267
+ continue
268
+
269
+ value = data_dict.get(field)
270
+
271
+ # 如果值为None,使用默认句子
272
+ if value is None:
273
+ result_dict[field] = self.default_sentences[field]
274
+ continue
275
+
276
+ # 根据不同字段类型进行处理
277
+ if field == "smiles_synonyms":
278
+ if isinstance(value, dict):
279
+ smiles = value.get('smiles')
280
+ common_names = value.get('common_names', [])
281
+
282
+ # 构建句子
283
+ sentence_parts = []
284
+
285
+ # 添加SMILES信息
286
+ if smiles:
287
+ sentence_parts.append(f"SMILES structure: {smiles}")
288
+
289
+ # 添加同义词信息
290
+ if common_names:
291
+ if len(common_names) == 1:
292
+ sentence_parts.append(f"Also known as: {common_names[0]}")
293
+ else:
294
+ names_str = self.delimiter.join(common_names)
295
+ sentence_parts.append(f"Common names include: {names_str}")
296
+
297
+ # 如果同义词数量超过显示限制,添加提示
298
+ total_count = len(common_names)
299
+ if 'Synonyms' in meta_data.columns and molecule_id in meta_data['HMDB.ID'].values:
300
+ meta_row = meta_data[meta_data['HMDB.ID'] == molecule_id].iloc[0]
301
+ if 'Synonyms' in meta_row and not pd.isna(meta_row['Synonyms']):
302
+ synonyms = meta_row['Synonyms']
303
+ if isinstance(synonyms, str):
304
+ total_count = len(synonyms.split(';'))
305
+ elif isinstance(synonyms, list):
306
+ total_count = len(synonyms)
307
+
308
+ if total_count > len(common_names):
309
+ sentence_parts.append(f"({total_count - len(common_names)} additional synonyms not shown)")
310
+
311
+ if sentence_parts:
312
+ result_dict[field] = f"This compound has the following identifiers: {'. '.join(sentence_parts)}."
313
+ else:
314
+ result_dict[field] = self.default_sentences[field]
315
+ else:
316
+ result_dict[field] = self.default_sentences[field]
317
+
318
+ elif field == "kingdom":
319
+ if isinstance(value, dict) and value:
320
+ parts = []
321
+ if "kingdom" in value:
322
+ parts.append(f"Kingdom: {value['kingdom']}")
323
+ if "super_class" in value:
324
+ parts.append(f"Super class: {value['super_class']}")
325
+ if "class" in value:
326
+ parts.append(f"Class: {value['class']}")
327
+ if "sub_class" in value:
328
+ parts.append(f"Sub class: {value['sub_class']}")
329
+
330
+ if parts:
331
+ result_dict[field] = f"The taxonomic classification of this compound is: {'; '.join(parts)}."
332
+ else:
333
+ result_dict[field] = self.default_sentences[field]
334
+ else:
335
+ result_dict[field] = self.default_sentences[field]
336
+
337
+ elif field == "distribution":
338
+ if isinstance(value, dict) and value:
339
+ parts = []
340
+ if "biospecimen" in value and value["biospecimen"]:
341
+ parts.append(f"Biospecimen locations: {self.delimiter.join(value['biospecimen'])}")
342
+ if "cellular" in value and value["cellular"]:
343
+ parts.append(f"Cellular locations: {self.delimiter.join(value['cellular'])}")
344
+ if "tissue" in value and value["tissue"]:
345
+ parts.append(f"Tissue locations: {self.delimiter.join(value['tissue'])}")
346
+
347
+ if parts:
348
+ result_dict[field] = f"This compound is distributed in the following locations: {'; '.join(parts)}."
349
+ else:
350
+ result_dict[field] = self.default_sentences[field]
351
+ else:
352
+ result_dict[field] = self.default_sentences[field]
353
+ else:
354
+ # 其他字段已经是字符串形式
355
+ result_dict[field] = value
356
+
357
+ return result_dict
358
+
359
+ def _dict_to_text(self, data_dict: Dict) -> str:
360
+ """
361
+ 将字典转换为文本形式
362
+
363
+ 参数:
364
+ data_dict: 包含句子形式数据的字典
365
+
366
+ 返回:
367
+ 合并后的文本
368
+ """
369
+ text_parts = []
370
+
371
+ for field in self.fields_to_keep:
372
+ if field in data_dict:
373
+ text_parts.append(f"=== {field} ===\n{data_dict[field]}")
374
+
375
+ return "\n\n".join(text_parts)
376
+
377
+
378
+ class KEGGProcessor(BioTextProcessor):
379
+ def process(self, biotext_data: Dict, meta_data: Optional[pd.DataFrame] = None) -> Dict:
380
+ raise NotImplementedError("KEGGProcessor 的 process 方法尚未实现")
@@ -0,0 +1,118 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Spec2Function Backend Configuration
4
+ """
5
+ from pathlib import Path
6
+ import os
7
+ from typing import Optional
8
+ from .assets import resolve_assets_root
9
+
10
+ class Spec2FunctionConfig:
11
+ """Global configuration manager."""
12
+
13
+ def __init__(self, project_root: Optional[Path] = None):
14
+ """
15
+ Args:
16
+ project_root: Project root directory. If None, infer automatically.
17
+ """
18
+ if project_root is None:
19
+ # Infer: parent directory of this file (backend -> root)
20
+ self.project_root = resolve_assets_root(project_root)
21
+ else:
22
+ self.project_root = Path(project_root)
23
+
24
+ # Model file paths
25
+ self.model_dir = self.project_root / "models"
26
+ self.model_checkpoint = self.model_dir / "best_model.pth"
27
+ self.model_config = self.model_dir / "config.json"
28
+
29
+ # Data file paths
30
+ self.data_dir = self.project_root / "data"
31
+ # Ensure names here match the local files.
32
+ self.femdb_jsonl = self.data_dir / "hmdb_subsections_WITH_NAME.jsonl"
33
+ self.femdb_embeddings = self.data_dir / "all_jsonl_embeddings.pt"
34
+
35
+ # Optional alias for compatibility
36
+ self.hmdb_metadata_path = self.femdb_jsonl
37
+
38
+ # SPECTER model
39
+ self.specter_model_name = "allenai/specter"
40
+ self.specter_cache_dir = self.model_dir / "specter_cache"
41
+
42
+ # Retrieval params
43
+ # self.default_top_k = 10
44
+ self.single_retrieval_top_k = 5
45
+ self.set_retrieval_top_k = 20
46
+ self.min_similarity = 0.65
47
+
48
+ # GPT params (fixes missing attributes)
49
+ self.openai_api_key = os.getenv("OPENAI_API_KEY", "")
50
+ self.gpt_model = "gpt-4o" # Or "gpt-4o", "gpt-3.5-turbo"
51
+ self.gpt_max_tokens = 2000 # Mode 2 needs more tokens
52
+ self.gpt_temperature = 0.4 # Previously missing line that caused errors
53
+
54
+ # LLM provider params
55
+ self.llm_provider = os.getenv("LLM_PROVIDER", "openai").lower()
56
+ self.llm_api_key = os.getenv("LLM_API_KEY", "")
57
+ self.llm_base_url = os.getenv("LLM_BASE_URL", "")
58
+ self.llm_model = os.getenv("LLM_MODEL", "")
59
+ self.gemini_api_key = os.getenv("GEMINI_API_KEY", "")
60
+ self.siliconflow_api_key = os.getenv("SILICONFLOW_API_KEY", "")
61
+
62
+ if not self.llm_model:
63
+ if self.llm_provider == "siliconflow":
64
+ self.llm_model = "deepseek-ai/DeepSeek-V3.2"
65
+ else:
66
+ self.llm_model = self.gpt_model
67
+
68
+ if not self.llm_base_url and self.llm_provider == "siliconflow":
69
+ self.llm_base_url = "https://api.siliconflow.cn/v1/chat/completions"
70
+
71
+ # PubMed params
72
+ self.pubmed_email = os.getenv("PUBMED_EMAIL", "your_email@example.com")
73
+ self.pubmed_max_results = 5
74
+
75
+ # MSBERT params
76
+ self.msbert_vocab_size = 100002
77
+ self.msbert_hidden_size = 512
78
+ self.msbert_num_layers = 6
79
+ self.msbert_num_heads = 16
80
+ self.msbert_dropout = 0
81
+ self.msbert_max_len = 100
82
+ self.msbert_kernel_size = 3
83
+
84
+ def validate(self):
85
+ """Validate required files exist."""
86
+ missing = []
87
+
88
+ if not self.model_checkpoint.exists():
89
+ missing.append(f"Model checkpoint: {self.model_checkpoint}")
90
+
91
+ if not self.model_config.exists():
92
+ missing.append(f"Model config: {self.model_config}")
93
+
94
+ # Temporarily skip strict data file checks if files are not downloaded yet.
95
+ # if not self.femdb_jsonl.exists():
96
+ # missing.append(f"FemDB JSONL: {self.femdb_jsonl}")
97
+
98
+ if missing:
99
+ raise FileNotFoundError(
100
+ "Missing required files:\n" + "\n".join(f" - {m}" for m in missing)
101
+ )
102
+
103
+ if not self.resolve_llm_api_key():
104
+ print("Warning: LLM API key not set. GPT inference will fail.")
105
+
106
+ return True
107
+
108
+ def resolve_llm_api_key(self) -> str:
109
+ provider = (self.llm_provider or "openai").lower()
110
+ if provider == "gemini":
111
+ return self.gemini_api_key or self.llm_api_key
112
+ if provider == "siliconflow":
113
+ return self.siliconflow_api_key or self.llm_api_key
114
+ return self.openai_api_key or self.llm_api_key
115
+
116
+
117
+ # Global config instance
118
+ config = Spec2FunctionConfig()