PyPI - wikontic - Versions diffs - 0.0.3__py3-none-any.whl → 0.0.4__py3-none-any.whl - Mend

wikontic 0.0.3py3-none-any.whl → 0.0.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

wikontic/create_wikidata_ontology_db.py CHANGED Viewed

@@ -13,7 +13,9 @@ import logging
 import os
 from pathlib import Path
 import torch
+from dotenv import load_dotenv, find_dotenv
+_ = load_dotenv(find_dotenv())
 # Configure logging
 logging.basicConfig(
     level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
@@ -21,10 +23,21 @@ logging.basicConfig(
 logger = logging.getLogger(__name__)
 device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
-tokenizer = AutoTokenizer.from_pretrained("facebook/contriever")
-model = AutoModel.from_pretrained("facebook/contriever", use_safetensors=True).to(
-    device
-)
+# Check for local model first, then fall back to remote
+model_name = "facebook/contriever"
+# local_model_path = os.getenv("HF_MODEL_PATH") or str(
+#     Path(__file__).parent.parent.parent.parent / "models" / "facebook--contriever"
+# )
+# if os.path.exists(local_model_path) and os.path.isdir(local_model_path):
+#     model_path = local_model_path
+# else:
+model_path = model_name
+tokenizer = AutoTokenizer.from_pretrained(model_path)
+model = AutoModel.from_pretrained(model_path, use_safetensors=True).to(device)
+# model = AutoModel.from_pretrained(model_path).to(device)
 class EntityType(BaseModel):

wikontic/utils/base_inference_with_db.py CHANGED Viewed

@@ -113,6 +113,8 @@ class BaseInferenceWithDB:
         Returns:
             A list of dictionaries with the subject, relation, object, and qualifiers that correspond to the 1-hop supporting triplets for the given entities.
         """
+        if len(entities4search) == 0:
+            return []
         or_conditions = []
         for ent in entities4search:
             or_conditions.append({"$and": [{"subject": ent}]})
@@ -187,21 +189,23 @@ class BaseInferenceWithDB:
         supporting_triplets = []
         for _ in range(hop_depth):
-            new_entities4search = []
+            new_entities4search = set()
             new_supporting_triplets = self.get_1_hop_supporting_triplets(
                 entities4search, sample_id, use_qualifiers, use_filtered_triplets
             )
-            supporting_triplets.extend(new_supporting_triplets)
+            for triplet in new_supporting_triplets:
+                if triplet not in supporting_triplets:
+                    supporting_triplets.append(triplet)
             for doc in supporting_triplets:
                 if doc["subject"] not in entities4search:
-                    new_entities4search.append(doc["subject"])
+                    new_entities4search.add(doc["subject"])
                 if doc["object"] not in entities4search:
-                    new_entities4search.append(doc["object"])
+                    new_entities4search.add(doc["object"])
                 if use_qualifiers:
                     for q in doc["qualifiers"]:
                         if q["object"] not in entities4search:
-                            new_entities4search.append(q["object"])
+                            new_entities4search.add(q["object"])
             entities4search = list(set(new_entities4search))

wikontic/utils/inference_with_db.py CHANGED Viewed

@@ -29,6 +29,7 @@ class InferenceWithDB(BaseInferenceWithDB):
         self.get_1_hop_supporting_triplets_tool = tool(
             self.get_1_hop_supporting_triplets
         )
+        self.answer_question_with_llm_tool = tool(self.answer_question_with_llm)
     def sanitize_string(self, s):
         s = str(s).strip().replace('\\"', "")

wikontic/utils/openai_utils.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import openai
-import os
-from dotenv import load_dotenv, find_dotenv
+# import os
+# from dotenv import load_dotenv, find_dotenv
 from tenacity import (
     retry,
     wait_random_exponential,
@@ -19,13 +20,11 @@ import httpx
 # Configure logging
 logging.basicConfig(stream=sys.stderr, level=logging.WARNING)
 logger = logging.getLogger("OpenAIUtils")
-logger.setLevel(logging.ERROR)
-logging.getLogger("httpx").setLevel(logging.WARNING)
+logger.setLevel(logging.DEBUG)
+logging.getLogger("httpx").setLevel(logging.ERROR)
-_ = load_dotenv(find_dotenv())
+# _ = load_dotenv(find_dotenv())
 # OpenAI
-client = openai.OpenAI(api_key=os.getenv("KEY"))
 MAX_ATTEMPTS = 1
@@ -43,11 +42,19 @@ class LLMTripletExtractor:
     def __init__(
         self,
+        api_key: str,
         prompt_folder_path: str = str(Path(__file__).parent / "prompts"),
         system_prompt_paths: Optional[Dict[str, str]] = None,
         model: str = "gpt-4o",
         max_attempts=MAX_ATTEMPTS,
+        proxy: str = None,
     ):
+        if proxy:
+            http_client = httpx.Client(proxy=proxy)
+            self.client = openai.OpenAI(api_key=api_key, http_client=http_client)
+        else:
+            self.client = openai.OpenAI(api_key=api_key)
         """
         Initialize the LLMTripletExtractor.
@@ -141,7 +148,7 @@ class LLMTripletExtractor:
             {"role": "user", "content": user_prompt},
         ]
-        response = client.chat.completions.create(
+        response = self.client.chat.completions.create(
             model=self.model, messages=messages, temperature=0
         )
         self.completion_tokens_num += response.usage.completion_tokens

wikontic/utils/structured_aligner.py CHANGED Viewed

@@ -6,6 +6,7 @@ from pymongo import MongoClient, UpdateOne
 import torch
 from dotenv import load_dotenv, find_dotenv
 import os
+from pathlib import Path
 # os.environ["CUDA_VISIBLE_DEVICES"] = "1"
 _ = load_dotenv(find_dotenv())
@@ -47,12 +48,30 @@ class Aligner:
         self.entities_vector_index_name = "entity_aliases"
         self.device = torch.device(device)
-        # self.tokenizer = AutoTokenizer.from_pretrained('facebook/contriever', token=os.getenv("HF_KEY"))
-        self.tokenizer = AutoTokenizer.from_pretrained("facebook/contriever")
-        # self.model = AutoModel.from_pretrained('facebook/contriever', token=os.getenv("HF_KEY")).to(self.device)
-        self.model = AutoModel.from_pretrained(
-            "facebook/contriever", use_safetensors=True
-        ).to(self.device)
+        # self.tokenizer = AutoTokenizer.from_pretrained(
+        #     "facebook/contriever", token=os.getenv("HF_KEY")
+        # )
+        # self.model = AutoModel.from_pretrained(
+        #     "facebook/contriever", token=os.getenv("HF_KEY"), use_safetensors=True
+        # ).to(self.device)
+        # Check for local model first, then fall back to remote
+        model_name = "facebook/contriever"
+        # local_model_path = os.getenv("HF_MODEL_PATH") or str(
+        #     Path(__file__).parent.parent.parent.parent
+        #     / "models"
+        #     / "facebook--contriever"
+        # )
+        # if os.path.exists(local_model_path) and os.path.isdir(local_model_path):
+        #     model_path = local_model_path
+        # else:
+        model_path = model_name
+        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
+        self.model = AutoModel.from_pretrained(model_path, use_safetensors=True).to(
+            self.device
+        )
+        # self.model = AutoModel.from_pretrained(model_path).to(self.device)
     def get_embedding(self, text):

wikontic/utils/structured_inference_with_db.py CHANGED Viewed

@@ -33,6 +33,7 @@ class StructuredInferenceWithDB(BaseInferenceWithDB):
         self.get_1_hop_supporting_triplets_tool = tool(
             self.get_1_hop_supporting_triplets
         )
+        self.answer_question_with_llm_tool = tool(self.answer_question_with_llm)
         # 1st step extraction without database
     def _refine_entity_types(self, text, triplet):

{wikontic-0.0.3.dist-info → wikontic-0.0.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: wikontic
-Version: 0.0.3
+Version: 0.0.4
 Summary: Extract a knowledge graph with LLM from texts and perform QA over the resulted KG
 Author-email: Alla Chepurova <chepurova.data@gmail.com>
 License-Expression: MIT
@@ -27,6 +27,7 @@ Requires-Dist: dataclasses
 Requires-Dist: pydantic
 Requires-Dist: accelerate
 Requires-Dist: langchain
+Requires-Dist: langchain_openai
 Dynamic: license-file
 ![Wikontic logo](/media/wikontic.png)
@@ -78,9 +79,9 @@ Knowledge Graphs (KGs) provide structured, verifiable representations of knowled
   -  `Aligner` class: entity and relation name refinement
 ### Evaluation:
-- `inference_and_eval`
+- `inference_and_eval/`
 	- Scripts for building KGs for MuSiQue and HotPot datasets and evaluation of QA performance
-- `analysis`
+- `analysis/`
   - Notebooks with downstream analysis of the resulted KG
 ### Use Wikontic as a service:

{wikontic-0.0.3.dist-info → wikontic-0.0.4.dist-info}/RECORD RENAMED Viewed

@@ -1,14 +1,14 @@
 wikontic/__init__.py,sha256=9zw-dHDIyJ49TJ0PI3vHuW4wucW7_EhSyX98XE3_rys,483
 wikontic/create_ontological_triplets_db.py,sha256=yz2Nc1kxbtAagZuovKpxc2P3OH4qBalQNd7m7s9kpWo,6764
 wikontic/create_triplets_db.py,sha256=MsrNQmzkk6wxwy8gMV19FdRFCMiFmF13Z3bn2P9ZAQQ,8845
-wikontic/create_wikidata_ontology_db.py,sha256=O_dDMtqTlVGhaLCDy9yIbFUhQXCdg5pGohR6MmONkAI,18071
+wikontic/create_wikidata_ontology_db.py,sha256=hDI1prU4eU6DBe90EA_dN0ZqyWNyQOHlVRTxOipzq64,18559
 wikontic/utils/__init__.py,sha256=U41kQFNPpfYV6KJpMnkqgqLkozqXiG4tgV6rj8IW1BU,7
-wikontic/utils/base_inference_with_db.py,sha256=Jv8HxHwg2mBtqDHZTCzQdYY3Jjv8jDMr8nMR9FI6rWc,12965
+wikontic/utils/base_inference_with_db.py,sha256=utG-ykcM88Y6JYCbbgOtc7HUOqtC3O3gSmKGY4WN_5E,13118
 wikontic/utils/dynamic_aligner.py,sha256=xKw0spAHn6lxNRX_9xuLY5FtWEHAoIfwB2HYaAEgKJY,9616
-wikontic/utils/inference_with_db.py,sha256=iuvF08DQ16SThovSGLCLyApme-AidLLWO9DPP0ozM3c,8824
-wikontic/utils/openai_utils.py,sha256=93f9w7V-zAVub4b79_zFvRWnyPyjqR4qBYRPUv6fVE8,20462
-wikontic/utils/structured_aligner.py,sha256=WzX_J0MaAUip1w3nXGXF8AZJ2n6UZraKBVgLa69Br9A,22093
-wikontic/utils/structured_inference_with_db.py,sha256=kbDC1q87cdqqtWEnJmopJCvG9Da75R_Xj2EOt0USAtA,22916
+wikontic/utils/inference_with_db.py,sha256=E6xUNfzOHS7mKbo_20dXiy6zgpVnMxegKi3zfw-p2vI,8905
+wikontic/utils/openai_utils.py,sha256=zq9Z87FEbuqo3upqn4GSEkzggm7Yy69JDTQxfnLkTwo,20695
+wikontic/utils/structured_aligner.py,sha256=ixRKKgtcLcsRkoLkfsXmYhDATiV0DonDBpPc9pbDPcM,22728
+wikontic/utils/structured_inference_with_db.py,sha256=3vE3sZI2aVunRzr08S2IwFfb_MXmXYYBiksq-VD2DwM,22997
 wikontic/utils/ontology_mappings/entity_hierarchy.json,sha256=QG-uGxDlgw_wHI40Y57wTUUbd5fPeKYtDsYGzdwP448,951258
 wikontic/utils/ontology_mappings/entity_names.json,sha256=if_hnOL4RGgZyYgDmgKL41GL6zq0rG6WZ6EKIf6UXr0,144317
 wikontic/utils/ontology_mappings/entity_type2aliases.json,sha256=r0QVK70KXAWVfDJ5ii0oKfyK59oy5YNzCu5LPiM8jZI,219271
@@ -46,8 +46,8 @@ wikontic/utils/prompts/qa/question_decomposition_1.txt,sha256=lDhkPRugox4zlqJxjr
 wikontic/utils/prompts/triplet_extraction/prompt_1_types_qualifiers_dialog_bench.txt,sha256=1cnJxjTXvOMKIplJKaUgx-tLxe1hpZJtwNbcNxX8fuw,3885
 wikontic/utils/prompts/triplet_extraction/prompt_1_types_qualifiers_dialog_bench_in_russian.txt,sha256=CqoM-9iMcbihwqkh38lVdZF3We1GYpju6u2cyKsb_AU,5425
 wikontic/utils/prompts/triplet_extraction/propmt_1_types_qualifiers.txt,sha256=Nq940rMcrNZmVTOxMKL3xYB0bDhxINVWi2ShJkJ2xRo,4034
-wikontic-0.0.3.dist-info/licenses/LICENSE,sha256=2bm9uFabQZ3Ykb_SaSU_uUbAj2-htc6WJQmS_65qD00,1073
-wikontic-0.0.3.dist-info/METADATA,sha256=DEQ-rt1sOOLzqOAvvmBFsufOOcnDzzDWO893P9KIApo,3312
-wikontic-0.0.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-wikontic-0.0.3.dist-info/top_level.txt,sha256=VkTVWaTtu5zD7QL2iF2cS4LOQAiPp_P0pssCxETRB_o,9
-wikontic-0.0.3.dist-info/RECORD,,
+wikontic-0.0.4.dist-info/licenses/LICENSE,sha256=2bm9uFabQZ3Ykb_SaSU_uUbAj2-htc6WJQmS_65qD00,1073
+wikontic-0.0.4.dist-info/METADATA,sha256=Mx2aQ__vpco8NL6QnqQd68DpE4WclKKqO2Xu-1ZOS94,3346
+wikontic-0.0.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+wikontic-0.0.4.dist-info/top_level.txt,sha256=VkTVWaTtu5zD7QL2iF2cS4LOQAiPp_P0pssCxETRB_o,9
+wikontic-0.0.4.dist-info/RECORD,,

{wikontic-0.0.3.dist-info → wikontic-0.0.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{wikontic-0.0.3.dist-info → wikontic-0.0.4.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{wikontic-0.0.3.dist-info → wikontic-0.0.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

wikontic 0.0.3__py3-none-any.whl → 0.0.4__py3-none-any.whl

wikontic 0.0.3py3-none-any.whl → 0.0.4py3-none-any.whl