wikontic 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. wikontic/__init__.py +16 -0
  2. wikontic/create_ontological_triplets_db.py +193 -0
  3. wikontic/create_triplets_db.py +259 -0
  4. wikontic/create_wikidata_ontology_db.py +555 -0
  5. wikontic/utils/__init__.py +7 -0
  6. wikontic/utils/base_inference_with_db.py +329 -0
  7. wikontic/utils/dynamic_aligner.py +281 -0
  8. wikontic/utils/inference_with_db.py +224 -0
  9. wikontic/utils/ontology_mappings/entity_hierarchy.json +1 -0
  10. wikontic/utils/ontology_mappings/entity_names.json +1 -0
  11. wikontic/utils/ontology_mappings/entity_type2aliases.json +1 -0
  12. wikontic/utils/ontology_mappings/entity_type2hierarchy.json +1 -0
  13. wikontic/utils/ontology_mappings/entity_type2label.json +1 -0
  14. wikontic/utils/ontology_mappings/enum_entity_ids.json +1 -0
  15. wikontic/utils/ontology_mappings/enum_prop_ids.json +1 -0
  16. wikontic/utils/ontology_mappings/label2entity.json +1 -0
  17. wikontic/utils/ontology_mappings/obj_constraint2prop.json +1 -0
  18. wikontic/utils/ontology_mappings/prop2aliases.json +1 -0
  19. wikontic/utils/ontology_mappings/prop2constraints.json +1 -0
  20. wikontic/utils/ontology_mappings/prop2data_type.json +1 -0
  21. wikontic/utils/ontology_mappings/prop2label.json +1 -0
  22. wikontic/utils/ontology_mappings/propid2enum.json +1 -0
  23. wikontic/utils/ontology_mappings/subj_constraint2prop.json +1 -0
  24. wikontic/utils/ontology_mappings/subject_object_constraints.json +1 -0
  25. wikontic/utils/openai_utils.py +517 -0
  26. wikontic/utils/prompts/name_refinement/prompt_choose_relation_wo_entity_types.txt +17 -0
  27. wikontic/utils/prompts/name_refinement/prompt_choose_relation_wo_entity_types_dialog_bench.txt +18 -0
  28. wikontic/utils/prompts/name_refinement/rank_object_names.txt +17 -0
  29. wikontic/utils/prompts/name_refinement/rank_object_names_dialog_bench.txt +18 -0
  30. wikontic/utils/prompts/name_refinement/rank_object_qualifiers.txt +20 -0
  31. wikontic/utils/prompts/name_refinement/rank_subject_names.txt +18 -0
  32. wikontic/utils/prompts/name_refinement/rank_subject_names_dialog_bench.txt +20 -0
  33. wikontic/utils/prompts/ontology_refinement/prompt_choose_entity_types.txt +26 -0
  34. wikontic/utils/prompts/ontology_refinement/prompt_choose_relation.txt +24 -0
  35. wikontic/utils/prompts/ontology_refinement/prompt_choose_relation_and_types.txt +28 -0
  36. wikontic/utils/prompts/qa/prompt_choose_relevant_entities_for_question.txt +17 -0
  37. wikontic/utils/prompts/qa/prompt_choose_relevant_entities_for_question_wo_types.txt +16 -0
  38. wikontic/utils/prompts/qa/prompt_entity_extraction_from_question.txt +3 -0
  39. wikontic/utils/prompts/qa/prompt_is_answered.txt +43 -0
  40. wikontic/utils/prompts/qa/qa_collapsing_prompt.txt +22 -0
  41. wikontic/utils/prompts/qa/qa_prompt.txt +5 -0
  42. wikontic/utils/prompts/qa/qa_prompt_hotpot.txt +6 -0
  43. wikontic/utils/prompts/qa/question_decomposition_1.txt +7 -0
  44. wikontic/utils/prompts/triplet_extraction/prompt_1_types_qualifiers_dialog_bench.txt +75 -0
  45. wikontic/utils/prompts/triplet_extraction/prompt_1_types_qualifiers_dialog_bench_in_russian.txt +78 -0
  46. wikontic/utils/prompts/triplet_extraction/propmt_1_types_qualifiers.txt +91 -0
  47. wikontic/utils/structured_aligner.py +606 -0
  48. wikontic/utils/structured_inference_with_db.py +561 -0
  49. wikontic-0.0.3.dist-info/METADATA +111 -0
  50. wikontic-0.0.3.dist-info/RECORD +53 -0
  51. wikontic-0.0.3.dist-info/WHEEL +5 -0
  52. wikontic-0.0.3.dist-info/licenses/LICENSE +19 -0
  53. wikontic-0.0.3.dist-info/top_level.txt +1 -0
@@ -0,0 +1,517 @@
1
+ import openai
2
+ import os
3
+ from dotenv import load_dotenv, find_dotenv
4
+ from tenacity import (
5
+ retry,
6
+ wait_random_exponential,
7
+ before_sleep_log,
8
+ stop_after_attempt,
9
+ )
10
+ import logging
11
+ import sys
12
+ import json
13
+ import re
14
+ from pathlib import Path
15
+ from typing import Dict, List, Union, Optional
16
+ import tenacity
17
+ import httpx
18
+
19
+ # Configure logging
20
+ logging.basicConfig(stream=sys.stderr, level=logging.WARNING)
21
+ logger = logging.getLogger("OpenAIUtils")
22
+ logger.setLevel(logging.ERROR)
23
+ logging.getLogger("httpx").setLevel(logging.WARNING)
24
+
25
+ _ = load_dotenv(find_dotenv())
26
+ # OpenAI
27
+ client = openai.OpenAI(api_key=os.getenv("KEY"))
28
+
29
+ MAX_ATTEMPTS = 1
30
+
31
+
32
+ class LLMTripletExtractor:
33
+ """A class for extracting and processing knowledge graph triplets using OpenAI's LLMs."""
34
+
35
+ MODEL_PRICES = {
36
+ "gpt-4o": {"input": 2.5, "output": 10},
37
+ "gpt-4o-mini": {"input": 0.15, "output": 0.6},
38
+ "gpt-4.1-mini": {"input": 0.4, "output": 1.6},
39
+ "gpt-4.1": {"input": 2.0, "output": 8.0},
40
+ "Meta-llama/Llama-3.3-70B-Instruct": {"input": 0.04, "output": 0.12},
41
+ "qwen/qwen3-32b": {"input": 0.05, "output": 0.2},
42
+ }
43
+
44
+ def __init__(
45
+ self,
46
+ prompt_folder_path: str = str(Path(__file__).parent / "prompts"),
47
+ system_prompt_paths: Optional[Dict[str, str]] = None,
48
+ model: str = "gpt-4o",
49
+ max_attempts=MAX_ATTEMPTS,
50
+ ):
51
+ """
52
+ Initialize the LLMTripletExtractor.
53
+
54
+ Args:
55
+ prompt_folder_path: Path to folder containing prompt files
56
+ system_prompt_paths: Dictionary mapping prompt types to file paths
57
+ model: Name of the OpenAI model to use
58
+ """
59
+ if system_prompt_paths is None:
60
+ system_prompt_paths = {
61
+ "triplet_extraction": "triplet_extraction/propmt_1_types_qualifiers.txt",
62
+ # 'triplet_extraction': 'triplet_extraction/prompt_1_types_qualifiers_dialog_bench.txt',
63
+ "relation_entity_types_ranker": "ontology_refinement/prompt_choose_relation_and_types.txt",
64
+ "relation_ranker": "ontology_refinement/prompt_choose_relation.txt",
65
+ "entity_types_ranker": "ontology_refinement/prompt_choose_entity_types.txt",
66
+ "relation_ranker_wo_entity_types": "name_refinement/prompt_choose_relation_wo_entity_types.txt",
67
+ # 'relation_ranker_wo_entity_types': 'name_refinement/prompt_choose_relation_wo_entity_types_dialog_bench.txt',
68
+ # 'subject_ranker': 'name_refinement/rank_subject_names_dialog_bench.txt',
69
+ "subject_ranker": "name_refinement/rank_subject_names.txt",
70
+ # 'object_ranker': 'name_refinement/rank_object_names_dialog_bench.txt',
71
+ "object_ranker": "name_refinement/rank_object_names.txt",
72
+ "quailfier_object_ranker": "name_refinement/rank_object_qualifiers.txt",
73
+ "question_entity_extractor": "qa/prompt_entity_extraction_from_question.txt",
74
+ "question_entity_ranker": "qa/prompt_choose_relevant_entities_for_question.txt",
75
+ "question_entity_ranker_wo_types": "qa/prompt_choose_relevant_entities_for_question_wo_types.txt",
76
+ # 'qa': 'qa_prompt_hotpot.txt'
77
+ "question_decomposition_1": "qa/question_decomposition_1.txt",
78
+ "qa_collapsing": "qa/qa_collapsing_prompt.txt",
79
+ "qa_is_answered": "qa/prompt_is_answered.txt",
80
+ "qa": "qa/qa_prompt.txt",
81
+ }
82
+
83
+ # Load all prompts
84
+ prompt_folder = Path(prompt_folder_path)
85
+ self.prompts = {}
86
+ for prompt_type, filename in system_prompt_paths.items():
87
+ with open(prompt_folder / filename) as f:
88
+ self.prompts[prompt_type] = f.read()
89
+
90
+ self.model = model
91
+ self.messages = []
92
+ self.prompt_tokens_num = 0
93
+ self.completion_tokens_num = 0
94
+ self.current_cost = 0
95
+
96
+ self._refine_attempt = 0
97
+ self._prev_error = None # store previous exception
98
+ self.MAX_ATTEMPTS = max_attempts
99
+
100
+ # Set pricing
101
+ if model not in self.MODEL_PRICES:
102
+ raise ValueError(f"Unknown model: {model}")
103
+ self.input_price = self.MODEL_PRICES[model]["input"]
104
+ self.output_price = self.MODEL_PRICES[model]["output"]
105
+
106
+ def extract_json(self, text: str) -> Union[dict, list, str]:
107
+ """Extract JSON from text, handling both code blocks and inline JSON."""
108
+ patterns = [
109
+ r"```json\s*(\{.*?\}|\[.*?\])\s*```", # JSON in code blocks
110
+ r"(\{.*?\}|\[.*?\])", # Inline JSON
111
+ ]
112
+
113
+ try:
114
+ return json.loads(text)
115
+ except json.JSONDecodeError:
116
+ pass
117
+
118
+ for pattern in patterns:
119
+ match = re.search(pattern, text, re.DOTALL)
120
+ if match:
121
+ try:
122
+ return json.loads(match.group(1))
123
+ except json.JSONDecodeError:
124
+ logging.ERROR(f"Failed to parse JSON: {text}")
125
+
126
+ return text
127
+
128
+ @retry(
129
+ wait=wait_random_exponential(multiplier=1, max=60),
130
+ before_sleep=before_sleep_log(logger, logging.ERROR),
131
+ stop=stop_after_attempt(5),
132
+ )
133
+ def get_completion(
134
+ self, system_prompt: str, user_prompt: str, transform_to_json: bool = True
135
+ ) -> Union[dict, list, str]:
136
+ """Get completion from OpenAI API with retry logic."""
137
+ if self.model == "qwen/qwen3-32b":
138
+ user_prompt = "/no_think \n" + user_prompt
139
+ messages = [
140
+ {"role": "system", "content": system_prompt},
141
+ {"role": "user", "content": user_prompt},
142
+ ]
143
+
144
+ response = client.chat.completions.create(
145
+ model=self.model, messages=messages, temperature=0
146
+ )
147
+ self.completion_tokens_num += response.usage.completion_tokens
148
+ self.prompt_tokens_num += response.usage.prompt_tokens
149
+ self.current_cost += (
150
+ response.usage.completion_tokens * self.output_price
151
+ + response.usage.prompt_tokens * self.input_price
152
+ )
153
+
154
+ content = response.choices[0].message.content.strip()
155
+ logger.debug("Output content: %s\n%s", str(content), "-" * 100)
156
+ output = self.extract_json(content) if transform_to_json else content
157
+
158
+ self.messages = messages + [{"role": "assistant", "content": output}]
159
+ return output
160
+
161
+ @tenacity.retry(stop=tenacity.stop_after_attempt(MAX_ATTEMPTS), reraise=True)
162
+ def extract_triplets_from_text(self, text: str) -> dict:
163
+ """Extract knowledge graph triplets from text."""
164
+
165
+ self._refine_attempt += 1
166
+ attempt = self._refine_attempt
167
+ logger.log(
168
+ logging.DEBUG,
169
+ "Attempt of a function call extract_triplets_from_text: %s",
170
+ attempt,
171
+ )
172
+ system_prompt = self.prompts["triplet_extraction"]
173
+ if attempt > 1:
174
+ prev_error = self._prev_error
175
+ system_prompt += f"\n(Previous attempt #{attempt-1} failed with error: {prev_error}. Please adjust your answer!)"
176
+ logger.log(logging.ERROR, "System prompt: %s", system_prompt)
177
+
178
+ try:
179
+ return self.get_completion(
180
+ system_prompt=system_prompt, user_prompt=f'Text: "{text}"'
181
+ )
182
+ except Exception as e:
183
+ self._prev_error = e
184
+ # if json from output is broken after 3 attempts - raise an exception
185
+ logger.log(logging.ERROR, str(e))
186
+ if attempt > self.MAX_ATTEMPTS:
187
+ raise e
188
+
189
+ @tenacity.retry(stop=tenacity.stop_after_attempt(MAX_ATTEMPTS), reraise=True)
190
+ def refine_entity_types(
191
+ self,
192
+ text: str,
193
+ triplet: dict,
194
+ candidate_subject_types: List[str],
195
+ candidate_object_types: List[str],
196
+ ) -> dict:
197
+ """Refine relations and entity types using candidate backbone triplets."""
198
+ triplet_filtered = {
199
+ k: triplet[k]
200
+ for k in ["subject", "relation", "object", "subject_type", "object_type"]
201
+ }
202
+
203
+ candidates_subject_types_str = json.dumps(candidate_subject_types)
204
+ candidates_object_types_str = json.dumps(candidate_object_types)
205
+ logger.log(
206
+ logging.DEBUG,
207
+ "candidates subject types: %s\n%s",
208
+ str(candidates_subject_types_str),
209
+ "-" * 100,
210
+ )
211
+ logger.log(
212
+ logging.DEBUG,
213
+ "candidates object types: %s\n%s",
214
+ str(candidates_object_types_str),
215
+ "-" * 100,
216
+ )
217
+
218
+ self._refine_attempt += 1
219
+ attempt = self._refine_attempt
220
+ logger.log(
221
+ logging.DEBUG, "Attempt of a function call refine_entity_types: %s", attempt
222
+ )
223
+ system_prompt = self.prompts["entity_types_ranker"]
224
+ if attempt > 1:
225
+ prev_error = self._prev_error
226
+ system_prompt += f"\n(Previous attempt #{attempt-1} failed with error: {prev_error}. Please adjust your answer!)"
227
+ logger.log(logging.ERROR, "System prompt: %s", system_prompt)
228
+
229
+ try:
230
+ output = self.get_completion(
231
+ system_prompt=system_prompt,
232
+ user_prompt=f'Text: "{text}\nExtracted Triplet: {json.dumps(triplet_filtered)}\n'
233
+ f"Candidate Subject Types: {candidates_subject_types_str}\n"
234
+ f"Candidate Object Types: {candidates_object_types_str}",
235
+ )
236
+ except Exception as e:
237
+ self._prev_error = e
238
+ logger.log(logging.ERROR, str(e))
239
+ # if json from output is broken after 3 attempts - raise an exception
240
+ if attempt > self.MAX_ATTEMPTS:
241
+ raise e
242
+
243
+ logger.log(
244
+ logging.DEBUG,
245
+ "refined subject type: %s\n%s",
246
+ str(output["subject_type"]),
247
+ "-" * 100,
248
+ )
249
+ logger.log(
250
+ logging.DEBUG,
251
+ "refined object type: %s\n%s",
252
+ str(output["object_type"]),
253
+ "-" * 100,
254
+ )
255
+
256
+ try:
257
+ assert (
258
+ output["subject_type"] in candidate_subject_types
259
+ ), "Refined subject type is not in candidate subject types"
260
+ assert (
261
+ output["object_type"] in candidate_object_types
262
+ ), "Refined object type is not in candidate object types"
263
+ except Exception as e:
264
+ self._prev_error = e
265
+ logger.log(logging.ERROR, str(e))
266
+ # do not raise an exception - save triplet in ontology filtered collection
267
+ return output
268
+
269
+ @tenacity.retry(stop=tenacity.stop_after_attempt(MAX_ATTEMPTS), reraise=True)
270
+ def refine_relation(
271
+ self, text: str, triplet: dict, candidate_relations: List[dict]
272
+ ) -> dict:
273
+ """Refine relation using candidate relations."""
274
+ triplet_filtered = {
275
+ k: triplet[k]
276
+ for k in ["subject", "relation", "object", "subject_type", "object_type"]
277
+ }
278
+
279
+ candidates_str = json.dumps(candidate_relations, ensure_ascii=False)
280
+ logger.log(
281
+ logging.DEBUG,
282
+ "candidates relations: %s\n%s",
283
+ str(candidates_str),
284
+ "-" * 100,
285
+ )
286
+ self._refine_attempt += 1
287
+ attempt = self._refine_attempt
288
+
289
+ logger.log(
290
+ logging.DEBUG, "Attempt of a function call refine_relation: %s", attempt
291
+ )
292
+ system_prompt = self.prompts["relation_ranker"]
293
+
294
+ if attempt > 1:
295
+ prev_error = self._prev_error
296
+ system_prompt += f"\n(Previous attempt #{attempt-1} failed with error {prev_error}. Please adjust your answer!)"
297
+ logger.log(logging.ERROR, "System prompt: %s", system_prompt)
298
+ try:
299
+ output = self.get_completion(
300
+ system_prompt=system_prompt,
301
+ user_prompt=f'Text: "{text}\nExtracted Triplet: {json.dumps(triplet_filtered, ensure_ascii=False)}\n'
302
+ f"Candidate relations: {candidates_str}",
303
+ transform_to_json=True,
304
+ )
305
+ except Exception as e:
306
+ self._prev_error = e
307
+ logger.log(logging.ERROR, str(e))
308
+ # if json from output is broken after 3 attempts - raise an exception
309
+ if attempt > self.MAX_ATTEMPTS:
310
+ raise e
311
+
312
+ logger.log(
313
+ logging.DEBUG,
314
+ "refined relation: %s\n%s",
315
+ str(output["relation"]),
316
+ "-" * 100,
317
+ )
318
+
319
+ try:
320
+ assert (
321
+ output["relation"] in candidate_relations
322
+ ), "Refined relation is not in candidate relations"
323
+ except Exception as e:
324
+ self._prev_error = e
325
+ logger.log(logging.ERROR, str(e))
326
+ # do not raise an exception - save triplet in ontology filtered collection
327
+
328
+ return output
329
+
330
+ @tenacity.retry(stop=tenacity.stop_after_attempt(MAX_ATTEMPTS), reraise=True)
331
+ def refine_relation_wo_entity_types(
332
+ self, text: str, triplet: dict, candidate_relations: List[dict]
333
+ ) -> dict:
334
+ """Refine relation using candidate relations."""
335
+ triplet_filtered = {k: triplet[k] for k in ["subject", "relation", "object"]}
336
+ candidates_str = json.dumps(candidate_relations, ensure_ascii=False)
337
+ logger.log(
338
+ logging.DEBUG,
339
+ "candidates relations: %s\n%s",
340
+ str(candidates_str),
341
+ "-" * 100,
342
+ )
343
+
344
+ attempt = self._refine_attempt
345
+
346
+ logger.log(
347
+ logging.DEBUG,
348
+ "Attempt of a function call refine_relation_wo_entity_types: %s",
349
+ attempt,
350
+ )
351
+ self._refine_attempt += 1
352
+ system_prompt = self.prompts["relation_ranker_wo_entity_types"]
353
+
354
+ if attempt > 1:
355
+ prev_error = self._prev_error
356
+ system_prompt += f"\n(Previous attempt #{attempt-1} failed with error {prev_error}. Please adjust your answer!)"
357
+ logger.log(logging.ERROR, "System prompt: %s", system_prompt)
358
+ try:
359
+ return self.get_completion(
360
+ system_prompt=system_prompt,
361
+ user_prompt=f'Text: "{text}\nExtracted Triplet: {json.dumps(triplet_filtered, ensure_ascii=False)}\n'
362
+ f"Candidate relations: {candidates_str}",
363
+ transform_to_json=False,
364
+ )
365
+ except Exception as e:
366
+ self._prev_error = e
367
+ logger.log(logging.ERROR, str(e))
368
+ # if json from output is broken after 3 attempts - raise an exception
369
+ if self._refine_attempt > self.MAX_ATTEMPTS:
370
+ raise e
371
+
372
+ def refine_relation_and_entity_types(
373
+ self, text: str, triplet: dict, candidate_triplets: List[dict]
374
+ ) -> dict:
375
+ """Refine relations and entity types using candidate backbone triplets."""
376
+ triplet_filtered = {
377
+ k: triplet[k]
378
+ for k in ["subject", "relation", "object", "subject_type", "object_type"]
379
+ }
380
+
381
+ candidates_str = "".join(f"{json.dumps(c)}\n" for c in candidate_triplets)
382
+
383
+ return self.get_completion(
384
+ system_prompt=self.prompts["relation_entity_types_ranker"],
385
+ user_prompt=f'Text: "{text}\nExtracted Triplet: {json.dumps(triplet_filtered)}\n'
386
+ f"Candidate Triplets: {candidates_str}",
387
+ )
388
+
389
+ def refine_entity(
390
+ self,
391
+ text: str,
392
+ triplet: dict,
393
+ candidates: List[str],
394
+ is_object: bool = False,
395
+ role: str = "user",
396
+ ) -> dict:
397
+ """Refine subject/object names using candidate options from pre-built KG."""
398
+
399
+ triplet_filtered = {k: triplet[k] for k in ["subject", "relation", "object"]}
400
+ original_name = triplet_filtered["object" if is_object else "subject"]
401
+
402
+ self._refine_attempt += 1
403
+ attempt = self._refine_attempt
404
+
405
+ logger.log(
406
+ logging.DEBUG, "Attempt of a function call refine_entity: %s", attempt
407
+ )
408
+ prompt_key = "object_ranker" if is_object else "subject_ranker"
409
+ entity_type = "Object" if is_object else "Subject"
410
+ system_prompt = self.prompts[prompt_key]
411
+
412
+ if attempt > 1:
413
+ prev_error = self._prev_error
414
+ system_prompt += f"\n(Previous attempt #{attempt-1} failed with error: {prev_error}. Please adjust your answer!)"
415
+ logger.log(logging.ERROR, "System prompt: %s", system_prompt)
416
+
417
+ try:
418
+ return self.get_completion(
419
+ system_prompt=system_prompt,
420
+ user_prompt=f'Text: "{text}\nRole: {role}\nExtracted Triplet: {json.dumps(triplet_filtered, ensure_ascii=False)}\n'
421
+ f"Original {entity_type}: {original_name}\n"
422
+ f'Candidate {entity_type}s: {json.dumps(candidates, ensure_ascii=False)}"',
423
+ transform_to_json=False,
424
+ )
425
+ except Exception as e:
426
+ self._prev_error = e
427
+ logger.log(logging.ERROR, str(e))
428
+ # if json from output is broken after 3 attempts - raise an exception
429
+ if attempt > self.MAX_ATTEMPTS:
430
+ raise e
431
+
432
+ def extract_entities_from_question(self, question: str) -> dict:
433
+ """Extract entities from a question."""
434
+ return self.get_completion(
435
+ system_prompt=self.prompts["question_entity_extractor"],
436
+ user_prompt=f"Question: {question}",
437
+ )
438
+
439
+ def identify_relevant_entities(
440
+ self, question: str, entity_list: List[str]
441
+ ) -> List[str]:
442
+ """Identify entities relevant to a question."""
443
+ return self.get_completion(
444
+ system_prompt=self.prompts["question_entity_ranker"],
445
+ user_prompt=f"Question: {question}\nEntities: {entity_list}",
446
+ )
447
+
448
+ def identify_relevant_entities_wo_types(
449
+ self, question: str, entity_list: List[str]
450
+ ) -> List[str]:
451
+ """Identify entities relevant to a question."""
452
+ return self.get_completion(
453
+ system_prompt=self.prompts["question_entity_ranker_wo_types"],
454
+ user_prompt=f"Question: {question}\nEntities: {entity_list}",
455
+ )
456
+
457
+ def answer_question(self, question: str, triplets: List[dict]) -> str:
458
+ """Answer a question using knowledge graph triplets."""
459
+ return self.get_completion(
460
+ system_prompt=self.prompts["qa"],
461
+ user_prompt=f'Question: {question}\n\nTriplets: "{triplets}"',
462
+ transform_to_json=False,
463
+ )
464
+
465
+ def collapse_question(
466
+ self, original_question: str, question: str, answer: str
467
+ ) -> str:
468
+ """Collapse a question using knowledge graph triplets."""
469
+ return self.get_completion(
470
+ system_prompt=self.prompts["qa_collapsing"],
471
+ user_prompt=f"Original multi-hop question: {original_question}\n\Answered sub-question: {question}\n\Answer: {answer}",
472
+ transform_to_json=True,
473
+ )
474
+
475
+ def decompose_question(self, question: str) -> str:
476
+ """Decompose a question using knowledge graph triplets."""
477
+ return self.get_completion(
478
+ system_prompt=self.prompts["question_decomposition_1"],
479
+ user_prompt=f"Question: {question}",
480
+ transform_to_json=False,
481
+ )
482
+
483
+ def check_if_question_is_answered(
484
+ self, question: str, subquestions: List[str], answers: List[str]
485
+ ) -> str:
486
+ """Check if a question is answered."""
487
+ user_prompt = (
488
+ f"Original multi-hop question: {question}\nQuestion->answer sequence:\n"
489
+ )
490
+ for question, answer in zip(subquestions, answers):
491
+ user_prompt += f"{question} -> {answer}\n"
492
+ return self.get_completion(
493
+ system_prompt=self.prompts["qa_is_answered"],
494
+ user_prompt=user_prompt,
495
+ transform_to_json=False,
496
+ )
497
+
498
+ def calculate_cost(self) -> float:
499
+ """Calculate the total cost of API usage."""
500
+ return self.current_cost / 1e6
501
+
502
+ def calculate_used_tokens(self) -> int:
503
+ """Calculate the total # of used tokens for generation"""
504
+ return self.prompt_tokens_num, self.completion_tokens_num
505
+
506
+ def reset_tokens(self):
507
+ """Reset the total # of used tokens for generation"""
508
+ self.prompt_tokens_num = 0
509
+ self.completion_tokens_num = 0
510
+
511
+ def reset_messages(self):
512
+ """Reset the messages"""
513
+ self.messages = []
514
+
515
+ def reset_error_state(self):
516
+ self._prev_error = None
517
+ self._refine_attempt = 0
@@ -0,0 +1,17 @@
1
+ In the previous step, there was extracted a triplet akin to one in Wikidata knowledge graph from the text.
2
+ Triplet contains two entities (subject and object) and one relation that connects these subject and object.
3
+ Using semantic similarity, we linked relation name with top similar exact names from the knowledge graph built from previously seen texts.
4
+
5
+ You will be provided with the following:
6
+
7
+ Text: The original sentence or passage from which the triplet was extracted.
8
+ Extracted Triplet: A structured representation in the format { "subject": "...", "relation": "...", "object": "..." }.
9
+ Original relation: A relation (or in other words property) name from triplet that needs refinement.
10
+ Candidate relations: A list of possible relation (property) names from previously seen texts.
11
+
12
+ Your Task:
13
+ Select the most contextually appropriate relation name from the Candidate relations list that best matches relation from extracted triplet and context of the given Text.
14
+
15
+ - If an exact or semantically appropriate match for relation name is found, return the corresponding name exactly as it appears in the list.
16
+ - If no suitable match for relation exists, return the string "None".
17
+ - Do not modify name from the cancidate list in case of match, add explanations, or provide any additional text.
@@ -0,0 +1,18 @@
1
+ In the previous step, there was extracted a triplet akin to one in Wikidata knowledge graph from the text of user-assistant dialog.
2
+ Triplet contains two entities (subject and object) and one relation that connects these subject and object.
3
+ Using semantic similarity, we linked relation name with top similar exact names from the knowledge graph built from previously seen texts.
4
+
5
+ You will be provided with the following:
6
+
7
+ Text: The original sentence or passage from user-assistant dialog from which the triplet was extracted.
8
+ Extracted Triplet: A structured representation in the format { "subject": "...", "relation": "...", "object": "..." }.
9
+ Original relation: A relation (or in other words property) name from triplet that needs refinement.
10
+ Candidate relations: A list of possible relation (property) names from previously seen texts.
11
+
12
+ Your Task:
13
+ Select the most contextually appropriate relation name from the Candidate relations list that best matches relation from extracted triplet and context of the given Text.
14
+
15
+ - If an exact or semantically appropriate match for relation name is found, return the corresponding name exactly as it appears in the list.
16
+ - If no suitable match for relation exists, return the string "None".
17
+ - Do not modify name from the cancidate list in case of match, add explanations, or provide any additional text.
18
+ - Preserve language of the original text (particularly, Russian) for the name of relation!
@@ -0,0 +1,17 @@
1
+ In the previous step, there was extracted a triplet akin to one in Wikidata knowledge graph from the text.
2
+ Triplet contains two entities (subject and object) and one relation that connects these subject and object.
3
+ Using semantic similarity, we linked object name with top similar exact names from the knowledge graph built from previously seen texts.
4
+
5
+ You will be provided with the following:
6
+
7
+ Text: The original sentence or passage from which the triplet was extracted.
8
+ Extracted Triplet: A structured representation in the format { "subject": "...", "relation": "...", "object": "..." }.
9
+ Original Object: An object name that needs refinement.
10
+ Candidate Objects: A list of possible entity names from previously seen texts.
11
+
12
+ Your Task:
13
+ Select the most contextually appropriate object name from the Candidate Object list that best matches object from extracted triplet and context of the given Text.
14
+
15
+ - If an exact or semantically appropriate match for object name is found, return the corresponding name exactly as it appears in the list.
16
+ - If no suitable match for object exists, return the string "None".
17
+ - Do not modify name from the cancidate list in case of match, add explanations, or provide any additional text.
@@ -0,0 +1,18 @@
1
+ In the previous step, there was extracted a triplet akin to one in Wikidata knowledge graph from the text of user-assistant dialog.
2
+ Triplet contains two entities (subject and object) and one relation that connects these subject and object.
3
+ Using semantic similarity, we linked object name with top similar exact names from the knowledge graph built from previously seen texts.
4
+
5
+ You will be provided with the following:
6
+
7
+ Text: The original sentence or passage from user-assistant dialog from which the triplet was extracted.
8
+ Extracted Triplet: A structured representation in the format { "subject": "...", "relation": "...", "object": "..." }.
9
+ Original Object: An object name that needs refinement.
10
+ Candidate Objects: A list of possible entity names from previously seen texts.
11
+
12
+ Your Task:
13
+ Select the most contextually appropriate object name from the Candidate Object list that best matches object from extracted triplet and context of the given Text.
14
+
15
+ - If an exact or semantically appropriate match for object name is found, return the corresponding name exactly as it appears in the list.
16
+ - If no suitable match for object exists, return the string "None".
17
+ - Do not modify name from the cancidate list in case of match, add explanations, or provide any additional text.
18
+ - Preserve language of the original text (particularly, Russian) for the name of object!
@@ -0,0 +1,20 @@
1
+ In the previous step, there was extracted a triplet akin to one in Wikidata knowledge graph from the text.
2
+ Triplet contains two entities (subject and object) and one relation that connects these subject and object.
3
+ Triplets also has **qualifier** that provide more context (e.g., date, place, or other attributes). Qualifiers should have relations and object like triplets do, but instead of subject their relation connects an object and the triplet qualifier belongs to. **Qualifiers must always be attached to a triplet** and never exist as standalone triplets.
4
+
5
+ Using semantic similarity, we linked object name from qualifier with top similar exact names from the knowledge graph built from previously seen texts.
6
+
7
+ You will be provided with the following:
8
+
9
+ Text: The original sentence or passage from which the triplet was extracted.
10
+ Extracted Triplet: A structured representation in the format:
11
+ {"subject": "...", "relation": "...", "object": "...", "qualifier": {"relation": "...", "object": "..."}}.
12
+ Original Object: An object name that needs refinement.
13
+ Candidate Objects: A list of possible entity names from previously seen texts.
14
+
15
+ Your Task:
16
+ Select the most contextually appropriate object name from the Candidate Object list that best matches object from extracted triplet and context of the given Text.
17
+
18
+ - If an exact or semantically appropriate match for object name is found, return the corresponding name exactly as it appears in the list.
19
+ - If no suitable match for object exists, return the string "None".
20
+ - Do not modify name from the cancidate list in case of match, add explanations, or provide any additional text.
@@ -0,0 +1,18 @@
1
+ In the previous step, there was extracted a triplet akin to one in Wikidata knowledge graph from the text.
2
+ Triplet contains two entities (subject and object) and one relation that connects these subject and object.
3
+ Using semantic similarity, we linked subject name with top similar exact names from the knowledge graph built from previously seen texts.
4
+
5
+ You will be provided with the following:
6
+
7
+ Text: The original sentence or passage from which the triplet was extracted.
8
+ Extracted Triplet: A structured representation in the format { "subject": "...", "relation": "...", "object": "..." }.
9
+ Original Subject: A subject name that needs refinement.
10
+ Candidate Subjects: A list of possible entity names from previously seen texts.
11
+
12
+ Your Task:
13
+ Select the most contextually appropriate subject name from the Candidate Subjects list that best matches subject from extracted triplet and context of the given Text.
14
+
15
+ - If an exact or semantically appropriate match is found, return the corresponding name exactly as it appears in the list.
16
+ - If no suitable match exists, return the string "None".
17
+ - Do not modify name from the cancidate list in case of match, add explanations, or provide any additional text.
18
+
@@ -0,0 +1,20 @@
1
+ In the previous step, there was extracted a triplet akin to one in Wikidata knowledge graph from the text of user-assistant dialog.
2
+ Triplet contains two entities (subject and object) and one relation that connects these subject and object.
3
+ Using semantic similarity, we linked subject name with top similar exact names from the knowledge graph built from previously seen texts.
4
+
5
+ You will be provided with the following:
6
+
7
+ Text: The original sentence or passage from user-assistant dialog from which the triplet was extracted.
8
+ Extracted Triplet: A structured representation in the format { "subject": "...", "relation": "...", "object": "..." }.
9
+ Original Subject: A subject name that needs refinement.
10
+ Candidate Subjects: A list of possible entity names from previously seen texts.
11
+
12
+ Your Task:
13
+ Select the most contextually appropriate subject name from the Candidate Subjects list that best matches subject from extracted triplet and context of the given Text.
14
+
15
+ - If an exact or semantically appropriate match is found, return the corresponding name exactly as it appears in the list.
16
+ - If no suitable match exists, return the string "None".
17
+ - Do not modify name from the cancidate list in case of match, add explanations, or provide any additional text.
18
+ - Preserve language of the original text (particularly, Russian) for the name of subject!
19
+
20
+