wikontic 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wikontic/__init__.py +16 -0
- wikontic/create_ontological_triplets_db.py +193 -0
- wikontic/create_triplets_db.py +259 -0
- wikontic/create_wikidata_ontology_db.py +555 -0
- wikontic/utils/__init__.py +7 -0
- wikontic/utils/base_inference_with_db.py +329 -0
- wikontic/utils/dynamic_aligner.py +281 -0
- wikontic/utils/inference_with_db.py +224 -0
- wikontic/utils/ontology_mappings/entity_hierarchy.json +1 -0
- wikontic/utils/ontology_mappings/entity_names.json +1 -0
- wikontic/utils/ontology_mappings/entity_type2aliases.json +1 -0
- wikontic/utils/ontology_mappings/entity_type2hierarchy.json +1 -0
- wikontic/utils/ontology_mappings/entity_type2label.json +1 -0
- wikontic/utils/ontology_mappings/enum_entity_ids.json +1 -0
- wikontic/utils/ontology_mappings/enum_prop_ids.json +1 -0
- wikontic/utils/ontology_mappings/label2entity.json +1 -0
- wikontic/utils/ontology_mappings/obj_constraint2prop.json +1 -0
- wikontic/utils/ontology_mappings/prop2aliases.json +1 -0
- wikontic/utils/ontology_mappings/prop2constraints.json +1 -0
- wikontic/utils/ontology_mappings/prop2data_type.json +1 -0
- wikontic/utils/ontology_mappings/prop2label.json +1 -0
- wikontic/utils/ontology_mappings/propid2enum.json +1 -0
- wikontic/utils/ontology_mappings/subj_constraint2prop.json +1 -0
- wikontic/utils/ontology_mappings/subject_object_constraints.json +1 -0
- wikontic/utils/openai_utils.py +517 -0
- wikontic/utils/prompts/name_refinement/prompt_choose_relation_wo_entity_types.txt +17 -0
- wikontic/utils/prompts/name_refinement/prompt_choose_relation_wo_entity_types_dialog_bench.txt +18 -0
- wikontic/utils/prompts/name_refinement/rank_object_names.txt +17 -0
- wikontic/utils/prompts/name_refinement/rank_object_names_dialog_bench.txt +18 -0
- wikontic/utils/prompts/name_refinement/rank_object_qualifiers.txt +20 -0
- wikontic/utils/prompts/name_refinement/rank_subject_names.txt +18 -0
- wikontic/utils/prompts/name_refinement/rank_subject_names_dialog_bench.txt +20 -0
- wikontic/utils/prompts/ontology_refinement/prompt_choose_entity_types.txt +26 -0
- wikontic/utils/prompts/ontology_refinement/prompt_choose_relation.txt +24 -0
- wikontic/utils/prompts/ontology_refinement/prompt_choose_relation_and_types.txt +28 -0
- wikontic/utils/prompts/qa/prompt_choose_relevant_entities_for_question.txt +17 -0
- wikontic/utils/prompts/qa/prompt_choose_relevant_entities_for_question_wo_types.txt +16 -0
- wikontic/utils/prompts/qa/prompt_entity_extraction_from_question.txt +3 -0
- wikontic/utils/prompts/qa/prompt_is_answered.txt +43 -0
- wikontic/utils/prompts/qa/qa_collapsing_prompt.txt +22 -0
- wikontic/utils/prompts/qa/qa_prompt.txt +5 -0
- wikontic/utils/prompts/qa/qa_prompt_hotpot.txt +6 -0
- wikontic/utils/prompts/qa/question_decomposition_1.txt +7 -0
- wikontic/utils/prompts/triplet_extraction/prompt_1_types_qualifiers_dialog_bench.txt +75 -0
- wikontic/utils/prompts/triplet_extraction/prompt_1_types_qualifiers_dialog_bench_in_russian.txt +78 -0
- wikontic/utils/prompts/triplet_extraction/propmt_1_types_qualifiers.txt +91 -0
- wikontic/utils/structured_aligner.py +606 -0
- wikontic/utils/structured_inference_with_db.py +561 -0
- wikontic-0.0.3.dist-info/METADATA +111 -0
- wikontic-0.0.3.dist-info/RECORD +53 -0
- wikontic-0.0.3.dist-info/WHEEL +5 -0
- wikontic-0.0.3.dist-info/licenses/LICENSE +19 -0
- wikontic-0.0.3.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
You are given a factual triplet extracted from text. The triplet follows the format (subject, relation, object), where:
|
|
2
|
+
|
|
3
|
+
Subject: A named entity or concept that represents a person, group, event, or abstract entity serving as the source of the relation.
|
|
4
|
+
Relation: A Wikidata-style predicate that defines the connection between the subject and the object.
|
|
5
|
+
Object: A named entity or concept that represents a person, group, event, or abstract entity related to the subject.
|
|
6
|
+
Subject_type: a class that describes the object
|
|
7
|
+
Object_type: a class that describes the subject
|
|
8
|
+
|
|
9
|
+
The extracted entity types of both subject and object were mapped to a set of similar Wikidata-style entity types based on semantic similarity.
|
|
10
|
+
|
|
11
|
+
Your Task
|
|
12
|
+
You will be provided with the following:
|
|
13
|
+
|
|
14
|
+
Text: The original sentence or passage from which the triplet was extracted.
|
|
15
|
+
Extracted Triplet: The factual triplet derived from the text.
|
|
16
|
+
Candidate subject types: similar entity types for subject type of extracted triplet retrieved from Wikidata
|
|
17
|
+
Candidate object types: similar entity types for object type of extracted triplet retrieved from Wikidata.
|
|
18
|
+
|
|
19
|
+
Instructions
|
|
20
|
+
Select the most appropriate candidate entity types for both subject and object from the provided candidates that best match the meaning of previously extracted triplet and original text.
|
|
21
|
+
|
|
22
|
+
Provide ONLY an answer in JSON format with the following keys:
|
|
23
|
+
"subject_type": Selected subject type candidate.
|
|
24
|
+
"object_type": Selected object type candidate.
|
|
25
|
+
|
|
26
|
+
NO additional text, NO "```" in json, ONLY triplets in JSON format.
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
You are given a factual triplet extracted from text. The triplet follows the format (subject, relation, object), where:
|
|
2
|
+
|
|
3
|
+
Subject: A named entity or concept that represents a person, group, event, or abstract entity serving as the source of the relation.
|
|
4
|
+
Relation: A Wikidata-style predicate that defines the connection between the subject and the object.
|
|
5
|
+
Object: A named entity or concept that represents a person, group, event, or abstract entity related to the subject.
|
|
6
|
+
Subject_type: a class that describes the object
|
|
7
|
+
Object_type: a class that describes the subject
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
The extracted relation has been mapped to a set of similar Wikidata-style relations based on semantic similarity and the entity types they can connect.
|
|
11
|
+
|
|
12
|
+
Your Task
|
|
13
|
+
You will be provided with the following:
|
|
14
|
+
|
|
15
|
+
Text: The original sentence or passage from which the triplet was extracted.
|
|
16
|
+
Extracted Triplet: The factual triplet derived from the text.
|
|
17
|
+
Candidate relations: list of relation (or in other words property) names similar to the extracted relation from triplet retrieved from Wikidata.
|
|
18
|
+
|
|
19
|
+
Instructions
|
|
20
|
+
Select the most appropriate relation candidate from the provided candidate triplets that best match the meaning of previously extracted triplet and original text.
|
|
21
|
+
Provide only an answer in JSON format with the following keys:
|
|
22
|
+
|
|
23
|
+
"relation": Relation for the selected triplet.
|
|
24
|
+
NO additional text, NO "```" in json, ONLY triplets in JSON format.
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
You are given a factual triplet extracted from text. The triplet follows the format (subject, relation, object), where:
|
|
2
|
+
|
|
3
|
+
Subject: A named entity or concept that represents a person, group, event, or abstract entity serving as the source of the relation.
|
|
4
|
+
Relation: A Wikidata-style predicate that defines the connection between the subject and the object.
|
|
5
|
+
Object: A named entity or concept that represents a person, group, event, or abstract entity related to the subject.
|
|
6
|
+
Subject_type: a class that describes the object
|
|
7
|
+
Object_type: a class that describes the subject
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
The extracted relation has been mapped to a set of similar Wikidata-style relations based on semantic similarity and the entity types they connect.
|
|
11
|
+
|
|
12
|
+
Your Task
|
|
13
|
+
You will be provided with the following:
|
|
14
|
+
|
|
15
|
+
Text: The original sentence or passage from which the triplet was extracted.
|
|
16
|
+
Extracted Triplet: The factual triplet derived from the text.
|
|
17
|
+
Candidate Triplets: triplets with subject, object, modified relation and options for subject and object types from Wikidata. Please, keep in mind that subject and object, as well as subject and object types can be switched if the linked relation has an inverse meaning to the original one (i.e. created vs created by).
|
|
18
|
+
|
|
19
|
+
Instructions
|
|
20
|
+
Select the most appropriate triplet candidate with a single entity type for both subject and object from the provided candidate triplets that best match the meaning of previously extracted triplet and original text.
|
|
21
|
+
Choose relevant triplet not only based on best-matching relation, but also on the subject-object order to preserve the original text's semantical meaning.
|
|
22
|
+
Preserve the directionality of the selected triplet, i.e. you should use subject and object, as well as subject and object types as they are in the candidate triplet that match original meaning of text at the best.
|
|
23
|
+
Provide only an answer in JSON format with the following keys:
|
|
24
|
+
"subject": Subject from a selected triplet.
|
|
25
|
+
"relation": Relation from the selected triplet.
|
|
26
|
+
"object": Object from a selected triplet.
|
|
27
|
+
"subject_type": Selected subject type, considering directionality of a chosen relation.
|
|
28
|
+
"object_type": Selected object type, considering directionality of a chosen relation.
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
Task: Identify relevant entities from a pre-constructed knowledge graph that might help to answer a provided question.
|
|
2
|
+
|
|
3
|
+
Input Structure:
|
|
4
|
+
The question will be labeled as "Question:".
|
|
5
|
+
A list of entities and their corresponding types from the knowledge graph will be labeled as "Entities:".
|
|
6
|
+
|
|
7
|
+
Selection Criteria:
|
|
8
|
+
Relevance means an entity is directly or indirectly useful for answering the question.
|
|
9
|
+
Look for names, events, dates, and other related concepts or entities that match or connect to key concepts in the question.
|
|
10
|
+
Do not ignore possible indirect relevance (e.g., if the question asks about a competition, teams or winners of that competition may be useful).
|
|
11
|
+
|
|
12
|
+
Response Format:
|
|
13
|
+
Always return at least one relevant entity. It is guaranteed that there is at least one.
|
|
14
|
+
The output must be a JSON list of dictionaries, where each dictionary contains:
|
|
15
|
+
"entity": the name of the entity
|
|
16
|
+
"entity_type": the type of the entity
|
|
17
|
+
Do not return an empty list. Select the best possible options.
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
Task: Identify relevant entities from a pre-constructed knowledge graph that might help to answer a provided question.
|
|
2
|
+
|
|
3
|
+
Input Structure:
|
|
4
|
+
The question will be labeled as "Question:".
|
|
5
|
+
A list of entities from the knowledge graph will be labeled as "Entities:".
|
|
6
|
+
|
|
7
|
+
Selection Criteria:
|
|
8
|
+
Relevance means an entity is directly or indirectly useful for answering the question.
|
|
9
|
+
Look for names, events, dates, and other related concepts or entities that match or connect to key concepts in the question.
|
|
10
|
+
Do not ignore possible indirect relevance (e.g., if the question asks about a competition, teams or winners of that competition may be useful).
|
|
11
|
+
|
|
12
|
+
Response Format:
|
|
13
|
+
Always return at least one relevant entity. It is guaranteed that there is at least one.
|
|
14
|
+
The output must be a JSON list of dictionaries, where each dictionary contains:
|
|
15
|
+
"entity": the name of the chosen relevant entity
|
|
16
|
+
Do not return an empty list. Select the best possible options.
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
Extract wikidata-like entities from the question below. It is guaranteed that there is at least one mentioned entity.
|
|
2
|
+
Extract any entity, whether name entity or an abstract entity, that might help retrieve the information to answer the question
|
|
3
|
+
Provide output in json format, no additional symbols. Output should be represented as a LIST of extracted entities' names.
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
You are a reasoning assistant for multi-hop question answering.
|
|
2
|
+
|
|
3
|
+
Your task: Decide whether a list of subquestions and their answers fully resolves the original multi-hop question.
|
|
4
|
+
|
|
5
|
+
Input format:
|
|
6
|
+
|
|
7
|
+
Original multi-hop question: [text]
|
|
8
|
+
|
|
9
|
+
Question->answer sequence: [a list of subquestions and their answers, ending with the most recent one]
|
|
10
|
+
|
|
11
|
+
Output rules:
|
|
12
|
+
|
|
13
|
+
If the sequence of subquestions and answers completely and directly resolves the original multi-hop question, output only the final answer to the original multi-hop question (not just the last subanswer, i.e. answer the original question).
|
|
14
|
+
|
|
15
|
+
If the sequence is not sufficient and more reasoning or hops are needed, output exactly:
|
|
16
|
+
|
|
17
|
+
NOT FINAL
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
Do not include any prefixes like "Final answer:", "Answer:", suffixes, formatting, original questions or explanations.
|
|
21
|
+
|
|
22
|
+
Output must be a single line: either string with the final answer to the original multi-hop question or the exact string NOT FINAL.
|
|
23
|
+
|
|
24
|
+
<example>
|
|
25
|
+
Original multi-hop question: Who was the spouse of the person who wrote The Iron Heel?
|
|
26
|
+
|
|
27
|
+
Question->answer sequence:
|
|
28
|
+
Who wrote The Iron Heel? → Jack London
|
|
29
|
+
Who was the spouse of Jack London? → Charmian London
|
|
30
|
+
|
|
31
|
+
Expected output:
|
|
32
|
+
Charmian London
|
|
33
|
+
</example>
|
|
34
|
+
|
|
35
|
+
<example>
|
|
36
|
+
Original multi-hop question: Which country’s capital is closest to the birthplace of Nikola Tesla?
|
|
37
|
+
|
|
38
|
+
Question->answer sequence:
|
|
39
|
+
Where was Nikola Tesla born? → Smiljan, Croatia
|
|
40
|
+
|
|
41
|
+
Expected output:
|
|
42
|
+
NOT FINAL
|
|
43
|
+
</example>
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
You are an assistant for stepwise question decomposition.
|
|
2
|
+
|
|
3
|
+
You will be given three inputs:
|
|
4
|
+
1. An original multi-hop question.
|
|
5
|
+
2. A 1-hop sub-question that has already been answered.
|
|
6
|
+
3. The answer to that 1-hop sub-question.
|
|
7
|
+
|
|
8
|
+
Your task:
|
|
9
|
+
- Reformulate the original multi-hop question by integrating obtained answer from sub-question, so the new question has (n-1) hops.
|
|
10
|
+
|
|
11
|
+
Rules:
|
|
12
|
+
- Only perform one reasoning hop at a time. Do not generate additional reasoning steps beyond this hop.
|
|
13
|
+
- Do not include explanations or text, just reformulated question.
|
|
14
|
+
|
|
15
|
+
<example>
|
|
16
|
+
Original multi-hop question: "How many times did the plague occur in the birth city of the composer of Scanderbeg?"
|
|
17
|
+
Answered sub-question: "Who composed Scanderbeg?"
|
|
18
|
+
Answer: "Antonio Vivaldi"
|
|
19
|
+
|
|
20
|
+
Output:
|
|
21
|
+
"How many times did the plague occur in the birth city of Antonio Vivaldi?"
|
|
22
|
+
</example>
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
Your task is to answer a question based on facts written in a form of triplet (subject, relation, object, qualifiers) from knowledge graph.
|
|
2
|
+
|
|
3
|
+
The input question and triplet facts are denoted as "Question:" and "Triplets:" correspondingly.
|
|
4
|
+
Be concise and provide an answer to the specified question based on the provided triplets.
|
|
5
|
+
In case you cannot formulate answer to the provided question based on the triplets, return None.
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
Your task is to answer a question based on facts written in a form of triplet (subject, relation, object) from knowledge graph.
|
|
2
|
+
|
|
3
|
+
The input qustion and triplet facts are denoted as "Question:" and "Triplets:" correspondingly.
|
|
4
|
+
Be as concise as possible and provide an answer to the specified question ONLY from the triplets.
|
|
5
|
+
It is not necessary to formulate a detailed answer. It is enough to specify the entity name/phrase from triplets that is the answer. If it is a yes/no question, just answer yes or no.
|
|
6
|
+
In case there is no answer among triplets, return an empty string.
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
You are an assistant for question decomposition.
|
|
2
|
+
Your task is to take a multi-hop question and generate only the first 1-hop sub-question,
|
|
3
|
+
leaving the remaining reasoning for later.
|
|
4
|
+
|
|
5
|
+
Example:
|
|
6
|
+
Multi-hop question: "How many times did the plague occur in the birth city of the composer of Scanderbeg?"
|
|
7
|
+
Output: "Who composed Scanderbeg?"
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
You are an algorithm designed to extract structured knowledge from dialog texts to build a Wikidata-like knowledge graph from user-assistant interaction. A knowledge graph consists of **triplets** in the format (subject, relation, object), where:
|
|
2
|
+
|
|
3
|
+
- **Subject**: A named entity or a concept that describes a group of people, events, or any abstract objects that serves as the source of the relation.
|
|
4
|
+
- **Relation**: A Wikidata-style predicate that connects the subject and object.
|
|
5
|
+
- **Object**: A named entity or a concept that describes a group of people, events, or any abstract objects that is related to the subject.
|
|
6
|
+
|
|
7
|
+
Additionally, some triplets may have **qualifiers** that provide more context (e.g., date, place, or other attributes). Qualifiers should have relations and object like triplets do, but instead of subject their relation connects an object and the triplet qualifier belongs to. **Qualifiers must always be attached to a triplet** and never exist as standalone triplets.
|
|
8
|
+
|
|
9
|
+
You will receive a text labeled **"Text:"**. Your task is to extract meaningful triplets that represent factual relationships.
|
|
10
|
+
|
|
11
|
+
### Output Format
|
|
12
|
+
Return only triplets in **JSON format** as a list of dictionaries, where each dictionary contains:
|
|
13
|
+
- "subject": Subject entity.
|
|
14
|
+
- "relation": Relation connecting subject and object.
|
|
15
|
+
- "object": Object entity.
|
|
16
|
+
- "qualifiers": List of dictionaries, where each dictionary contains:
|
|
17
|
+
- "relation": Relation connecting triplet and object,
|
|
18
|
+
- "object": Object entity connected to the main triplet
|
|
19
|
+
|
|
20
|
+
NO additional text, NO "```" in json, ONLY triplets in JSON format.
|
|
21
|
+
Preserve language of the original text (particularly, Russian) for the names of subject, relation and object in extracted triplets!
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
<example>
|
|
25
|
+
### Input:
|
|
26
|
+
Text: Мария Кюри (7 ноября 1867 — 4 июля 1934) была физиком и химиком, которая проводила исследования в области радиоактивности. Она получила Нобелевскую премию по физике в 1903 году и Нобелевскую премию по химии в 1911 году.
|
|
27
|
+
### Output:
|
|
28
|
+
[
|
|
29
|
+
{
|
|
30
|
+
"subject":"Мария Кюри",
|
|
31
|
+
"relation":"дата рождения",
|
|
32
|
+
"object":"7 ноября 1867 года",
|
|
33
|
+
"qualifiers": [],
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
"subject":"Мария Кюри",
|
|
37
|
+
"relation":"дата смерти",
|
|
38
|
+
"object":"4 июля 1934",
|
|
39
|
+
"qualifiers": [],
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
"subject":"Мария Кюри",
|
|
43
|
+
"relation":"профессия",
|
|
44
|
+
"object":"физик",
|
|
45
|
+
"qualifiers": [],
|
|
46
|
+
},
|
|
47
|
+
{
|
|
48
|
+
"subject":"Мария Кюри",
|
|
49
|
+
"relation":"профессия",
|
|
50
|
+
"object":"химик",
|
|
51
|
+
"qualifiers": [],
|
|
52
|
+
},
|
|
53
|
+
|
|
54
|
+
{
|
|
55
|
+
"subject":"Мария Кюри",
|
|
56
|
+
"relation":"область исследований,
|
|
57
|
+
"object":"радиоактивность",
|
|
58
|
+
"qualifiers": [],
|
|
59
|
+
},
|
|
60
|
+
{
|
|
61
|
+
"subject":"Мария Кюри",
|
|
62
|
+
"relation":"полученная награда",
|
|
63
|
+
"object":"Нобелевская премия по физике",
|
|
64
|
+
"qualifiers": [{"relation":"момент времени","object": "1903"}],
|
|
65
|
+
},
|
|
66
|
+
{
|
|
67
|
+
"subject":"Мария Кюри",
|
|
68
|
+
"relation":"полученная награда",
|
|
69
|
+
"object":"Нобелевская премия по химии",
|
|
70
|
+
"qualifiers": [{“relation":"момент времени","object":"1911"}],
|
|
71
|
+
}
|
|
72
|
+
]
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
</example>
|
wikontic/utils/prompts/triplet_extraction/prompt_1_types_qualifiers_dialog_bench_in_russian.txt
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
Ты - алгоритм, предназначенный для извлечения структурированных знаний из текстов диалога с целью построения графа знаний, подобному Wikidata. Граф знаний состоит из **триплетов** в формате (субъект, отношение, объект), где:
|
|
2
|
+
|
|
3
|
+
- **Субъект** (subject): именованная сущность или концепция, описывающая пользователя, группу людей, событие или любой абстрактный объект, которая служит источником отношения.
|
|
4
|
+
|
|
5
|
+
- **Отношение** (relation): предикат, который связывает субъект и объект.
|
|
6
|
+
|
|
7
|
+
- **Объект** (object): именованная сущность или концепция, описывающая группу людей, пользователя, событие или любой абстрактных объект, связанный с субъектом.
|
|
8
|
+
|
|
9
|
+
Кроме того, некоторые триплеты могут иметь **квалификаторы** (qualifiers), которые предоставляют дополнительный контекст (например, дату, место или другие атрибуты). Квалификаторы должны иметь отношения и объект, как и триплеты, но вместо субъекта их отношение связывает объект и триплет, к которому принадлежит квалификатор. **Квалификаторы всегда привязаны к триплету** и никогда не существовать как самостоятельные триплеты.
|
|
10
|
+
|
|
11
|
+
Ты получишь текст с пометкой **"Text:"**. Твоя задача — извлечь триплеты, которые представляют факты, которые в дальнейшем могут использоваться для ответов на вопросы пользователя.
|
|
12
|
+
|
|
13
|
+
### Формат вывода
|
|
14
|
+
Возвращай только триплеты в **формате JSON** в виде списка словарей, где каждый словарь содержит:
|
|
15
|
+
- «subject»: сущность-субъект.
|
|
16
|
+
- «relation»: отношение, связывающее субъект и объект.
|
|
17
|
+
- «object»: сущность-объект.
|
|
18
|
+
- «qualifiers»: список словарей, где каждый словарь содержит:
|
|
19
|
+
- «relation»: отношение, соединяющая триплет и объект,
|
|
20
|
+
- «object»: объект, связанный с основным триплетом
|
|
21
|
+
|
|
22
|
+
Вывод должен быть БЕЗ дополнительного текста, БЕЗ «```» в json, ТОЛЬКО триплеты в формате JSON.
|
|
23
|
+
|
|
24
|
+
Сохраним язык оригинального текста (в частности, русский) в наименованиях субъектов, объектов и отношений.
|
|
25
|
+
|
|
26
|
+
<пример>
|
|
27
|
+
### Входные данные:
|
|
28
|
+
Текст: "Мария Кюри (7 ноября 1867 — 4 июля 1934) была физиком и химиком, которая проводила исследования в области радиоактивности. Она получила Нобелевскую премию по физике в 1903 году и Нобелевскую премию по химии в 1911 году."
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
### Вывод:
|
|
32
|
+
[
|
|
33
|
+
{
|
|
34
|
+
"subject":"Мария Кюри",
|
|
35
|
+
"relation":"дата рождения",
|
|
36
|
+
"object":"7 ноября 1867 года",
|
|
37
|
+
"qualifiers": [],
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
"subject":"Мария Кюри",
|
|
41
|
+
"relation":"дата смерти",
|
|
42
|
+
"object":"4 июля 1934",
|
|
43
|
+
"qualifiers": [],
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
"subject":"Мария Кюри",
|
|
47
|
+
"relation": профессия,
|
|
48
|
+
"object":"физик",
|
|
49
|
+
"qualifiers": [],
|
|
50
|
+
},
|
|
51
|
+
{
|
|
52
|
+
"subject":"Мария Кюри",
|
|
53
|
+
"relation":"профессия",
|
|
54
|
+
"object":"химик",
|
|
55
|
+
"qualifiers": [],
|
|
56
|
+
},
|
|
57
|
+
|
|
58
|
+
{
|
|
59
|
+
"subject":"Мария Кюри",
|
|
60
|
+
"relation":"область исследований,
|
|
61
|
+
"object":"радиоактивность",
|
|
62
|
+
"qualifiers": [],
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
"subject":"Мария Кюри",
|
|
66
|
+
"relation":"полученная награда",
|
|
67
|
+
"object":"Нобелевская премия по физике",
|
|
68
|
+
"qualifiers": [{"relation":"момент времени","object": "1903"}],
|
|
69
|
+
},
|
|
70
|
+
{
|
|
71
|
+
"subject":"Мария Кюри",
|
|
72
|
+
"relation":"полученная награда",
|
|
73
|
+
"object":"Нобелевская премия по химии",
|
|
74
|
+
"qualifiers": [{“relation":"момент времени","object":"1911"}],
|
|
75
|
+
}
|
|
76
|
+
]
|
|
77
|
+
|
|
78
|
+
</пример>
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
You are an algorithm designed to extract structured knowledge from texts to build a Wikidata-like knowledge graph. A knowledge graph consists of **triplets** in the format (subject, relation, object), where:
|
|
2
|
+
|
|
3
|
+
- **Subject**: A named entity or a concept that describes a group of people, events, or any abstract objects that serves as the source of the relation.
|
|
4
|
+
- **Relation**: A Wikidata-style predicate that connects the subject and object.
|
|
5
|
+
- **Object**: A named entity or a concept that describes a group of people, events, or any abstract objects that is related to the subject.
|
|
6
|
+
|
|
7
|
+
Additionally, some triplets may have **qualifiers** that provide more context (e.g., date, place, or other attributes). Qualifiers should have relations and object like triplets do, but instead of subject their relation connects an object and the triplet qualifier belongs to. **Qualifiers must always be attached to a triplet** and never exist as standalone triplets.
|
|
8
|
+
|
|
9
|
+
You will receive a text labeled **"Text:"**. Your task is to extract meaningful triplets that represent factual relationships.
|
|
10
|
+
|
|
11
|
+
### Output Format
|
|
12
|
+
Return only triplets in **JSON format** as a list of dictionaries, where each dictionary contains:
|
|
13
|
+
- "subject": Subject entity.
|
|
14
|
+
- "relation": Relation connecting subject and object.
|
|
15
|
+
- "object": Object entity.
|
|
16
|
+
- "qualifiers": List of dictionaries, where each dictionary contains:
|
|
17
|
+
- "relation": Relation connecting triplet and object,
|
|
18
|
+
- "object": Object entity connected to the main triplet
|
|
19
|
+
- "subject_type": a class that describes the subject
|
|
20
|
+
- "object_type": a class that describes the object
|
|
21
|
+
|
|
22
|
+
NO additional text, NO "```" in json, ONLY triplets in JSON format.
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
<example>
|
|
26
|
+
### Input:
|
|
27
|
+
Text: "Marie Curie (7 November 1867 - 4 July 1934) was a physicist and chemist who conducted pioneering research on radioactivity. She received the Nobel Prize in Physics in 1903 and the Nobel Prize in Chemistry in 1911.
|
|
28
|
+
### Output:
|
|
29
|
+
{
|
|
30
|
+
"triplets":
|
|
31
|
+
[
|
|
32
|
+
{
|
|
33
|
+
"subject": "Marie Curie",
|
|
34
|
+
"relation": "date of birth",
|
|
35
|
+
"object": "7 November 1867",
|
|
36
|
+
"qualifiers": [],
|
|
37
|
+
"subject_type": "human",
|
|
38
|
+
"object_type": "date"
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
"subject": "Marie Curie",
|
|
42
|
+
"relation": "date of death",
|
|
43
|
+
"object": "4 July 1934",
|
|
44
|
+
"qualifiers": [],
|
|
45
|
+
"subject_type": "human",
|
|
46
|
+
"object_type": "date"
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
"subject": "Marie Curie",
|
|
50
|
+
"relation": "occupation",
|
|
51
|
+
"object": "physicist",
|
|
52
|
+
"qualifiers": [],
|
|
53
|
+
"subject_type": "human",
|
|
54
|
+
"object_type": "profession"
|
|
55
|
+
},
|
|
56
|
+
{
|
|
57
|
+
"subject": "Marie Curie",
|
|
58
|
+
"relation": "occupation",
|
|
59
|
+
"object": "chemist",
|
|
60
|
+
"qualifiers": [],
|
|
61
|
+
"subject_type": "human",
|
|
62
|
+
"object_type": "profession"
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
"subject": "Marie Curie",
|
|
66
|
+
"relation": "field of work",
|
|
67
|
+
"object": "radioactivity",
|
|
68
|
+
"qualifiers": [],
|
|
69
|
+
"subject_type": "human",
|
|
70
|
+
"object_type": "physical phenomenon"
|
|
71
|
+
},
|
|
72
|
+
{
|
|
73
|
+
"subject": "Marie Curie",
|
|
74
|
+
"relation": "award received",
|
|
75
|
+
"object": "Nobel Prize in Physics",
|
|
76
|
+
"qualifiers": [{"relation": "point in time", "object": '1903'}],
|
|
77
|
+
"subject_type": "human",
|
|
78
|
+
"object_type": "award"
|
|
79
|
+
},
|
|
80
|
+
{
|
|
81
|
+
"subject": "Marie Curie",
|
|
82
|
+
"relation": "award received",
|
|
83
|
+
"object": "Nobel Prize in Chemistry",
|
|
84
|
+
"qualifiers": [{"relation": "point in time", "object": "1911"}],
|
|
85
|
+
"subject_type": "human",
|
|
86
|
+
"object_type": "award"
|
|
87
|
+
}
|
|
88
|
+
]
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
</example>
|