syscred 2.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- syscred/__init__.py +41 -0
- syscred/api_clients.py +560 -0
- syscred/backend_app.py +363 -0
- syscred/config.py +275 -0
- syscred/database.py +54 -0
- syscred/debug_factcheck.py +43 -0
- syscred/debug_graph_json.py +58 -0
- syscred/debug_init.py +33 -0
- syscred/debug_local_server.py +25 -0
- syscred/diagnose_imports.py +37 -0
- syscred/eval_metrics.py +349 -0
- syscred/graph_rag.py +171 -0
- syscred/ir_engine.py +410 -0
- syscred/ontology_manager.py +509 -0
- syscred/run_benchmark.py +135 -0
- syscred/seo_analyzer.py +610 -0
- syscred/setup.py +65 -0
- syscred/test_graphrag.py +87 -0
- syscred/test_phase1.py +28 -0
- syscred/test_phase2.py +55 -0
- syscred/test_suite.py +64 -0
- syscred/verification_system.py +765 -0
- syscred-2.2.0.dist-info/METADATA +259 -0
- syscred-2.2.0.dist-info/RECORD +28 -0
- syscred-2.2.0.dist-info/WHEEL +5 -0
- syscred-2.2.0.dist-info/entry_points.txt +3 -0
- syscred-2.2.0.dist-info/licenses/LICENSE +21 -0
- syscred-2.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,509 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
Ontology Manager Module - SysCRED
|
|
4
|
+
==================================
|
|
5
|
+
Manages the RDF ontology for the credibility verification system.
|
|
6
|
+
Handles reading, writing, and querying of semantic triplets.
|
|
7
|
+
|
|
8
|
+
(c) Dominique S. Loyer - PhD Thesis Prototype
|
|
9
|
+
Citation Key: loyerModelingHybridSystem2025
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from typing import Optional, List, Dict, Any
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
from dataclasses import dataclass
|
|
15
|
+
import os
|
|
16
|
+
|
|
17
|
+
# RDFLib imports with fallback
|
|
18
|
+
try:
|
|
19
|
+
from rdflib import Graph, Namespace, Literal, URIRef, BNode
|
|
20
|
+
from rdflib.namespace import RDF, RDFS, OWL, XSD
|
|
21
|
+
HAS_RDFLIB = True
|
|
22
|
+
except ImportError:
|
|
23
|
+
HAS_RDFLIB = False
|
|
24
|
+
print("Warning: rdflib not installed. Run: pip install rdflib")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class EvaluationRecord:
|
|
29
|
+
"""Represents a stored evaluation from the ontology."""
|
|
30
|
+
evaluation_id: str
|
|
31
|
+
url_or_text: str
|
|
32
|
+
score: float
|
|
33
|
+
level: str
|
|
34
|
+
timestamp: str
|
|
35
|
+
fact_checks: List[str]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class OntologyManager:
|
|
39
|
+
"""
|
|
40
|
+
Manages the credibility ontology using RDFLib.
|
|
41
|
+
|
|
42
|
+
Handles:
|
|
43
|
+
- Loading base ontology
|
|
44
|
+
- Adding evaluation triplets
|
|
45
|
+
- Querying historical data
|
|
46
|
+
- Exporting enriched ontology
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
# Namespace for the credibility ontology
|
|
50
|
+
CRED_NS = "https://github.com/DominiqueLoyer/systemFactChecking#"
|
|
51
|
+
|
|
52
|
+
def __init__(self, base_ontology_path: Optional[str] = None, data_path: Optional[str] = None):
|
|
53
|
+
"""
|
|
54
|
+
Initialize the ontology manager.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
base_ontology_path: Path to the base ontology TTL file
|
|
58
|
+
data_path: Path to store/load accumulated data triplets
|
|
59
|
+
"""
|
|
60
|
+
if not HAS_RDFLIB:
|
|
61
|
+
raise ImportError("rdflib is required. Install with: pip install rdflib")
|
|
62
|
+
|
|
63
|
+
self.base_path = base_ontology_path
|
|
64
|
+
self.data_path = data_path
|
|
65
|
+
|
|
66
|
+
# Create namespace
|
|
67
|
+
self.cred = Namespace(self.CRED_NS)
|
|
68
|
+
|
|
69
|
+
# Initialize graphs
|
|
70
|
+
self.base_graph = Graph()
|
|
71
|
+
self.data_graph = Graph()
|
|
72
|
+
|
|
73
|
+
# Bind prefixes for nicer serialization
|
|
74
|
+
self._bind_prefixes(self.base_graph)
|
|
75
|
+
self._bind_prefixes(self.data_graph)
|
|
76
|
+
|
|
77
|
+
# Load ontology files if they exist
|
|
78
|
+
if base_ontology_path and os.path.exists(base_ontology_path):
|
|
79
|
+
self.load_base_ontology(base_ontology_path)
|
|
80
|
+
|
|
81
|
+
if data_path and os.path.exists(data_path):
|
|
82
|
+
self.load_data_graph(data_path)
|
|
83
|
+
|
|
84
|
+
# Counter for generating unique IDs
|
|
85
|
+
self._evaluation_counter = 0
|
|
86
|
+
|
|
87
|
+
def _bind_prefixes(self, graph: Graph):
|
|
88
|
+
"""Bind common prefixes to a graph."""
|
|
89
|
+
graph.bind("cred", self.cred)
|
|
90
|
+
graph.bind("owl", OWL)
|
|
91
|
+
graph.bind("rdf", RDF)
|
|
92
|
+
graph.bind("rdfs", RDFS)
|
|
93
|
+
graph.bind("xsd", XSD)
|
|
94
|
+
|
|
95
|
+
def load_base_ontology(self, path: str) -> bool:
|
|
96
|
+
"""Load the base ontology from a TTL file."""
|
|
97
|
+
try:
|
|
98
|
+
self.base_graph.parse(path, format='turtle')
|
|
99
|
+
print(f"[OntologyManager] Loaded base ontology: {len(self.base_graph)} triples")
|
|
100
|
+
return True
|
|
101
|
+
except Exception as e:
|
|
102
|
+
print(f"[OntologyManager] Error loading base ontology: {e}")
|
|
103
|
+
return False
|
|
104
|
+
|
|
105
|
+
def load_data_graph(self, path: str) -> bool:
|
|
106
|
+
"""Load accumulated data triplets."""
|
|
107
|
+
try:
|
|
108
|
+
self.data_graph.parse(path, format='turtle')
|
|
109
|
+
print(f"[OntologyManager] Loaded data graph: {len(self.data_graph)} triples")
|
|
110
|
+
return True
|
|
111
|
+
except Exception as e:
|
|
112
|
+
print(f"[OntologyManager] Error loading data graph: {e}")
|
|
113
|
+
return False
|
|
114
|
+
|
|
115
|
+
def add_evaluation_triplets(self, report: Dict[str, Any]) -> str:
|
|
116
|
+
"""
|
|
117
|
+
Add triplets for a new credibility evaluation.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
report: The evaluation report dictionary from CredibilityVerificationSystem
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
The URI of the created RapportEvaluation individual
|
|
124
|
+
"""
|
|
125
|
+
timestamp = datetime.now()
|
|
126
|
+
timestamp_str = timestamp.strftime("%Y%m%d_%H%M%S")
|
|
127
|
+
self._evaluation_counter += 1
|
|
128
|
+
|
|
129
|
+
# Create URIs for new individuals
|
|
130
|
+
report_uri = self.cred[f"Report_{timestamp_str}_{self._evaluation_counter}"]
|
|
131
|
+
request_uri = self.cred[f"Request_{timestamp_str}_{self._evaluation_counter}"]
|
|
132
|
+
info_uri = self.cred[f"Info_{timestamp_str}_{self._evaluation_counter}"]
|
|
133
|
+
|
|
134
|
+
# Get data from report
|
|
135
|
+
score = report.get('scoreCredibilite', 0.5)
|
|
136
|
+
input_data = report.get('informationEntree', '')
|
|
137
|
+
summary = report.get('resumeAnalyse', '')
|
|
138
|
+
|
|
139
|
+
# Determine credibility level based on score
|
|
140
|
+
if score >= 0.7:
|
|
141
|
+
level_uri = self.cred.Niveau_Haut
|
|
142
|
+
info_class = self.cred.InformationHauteCredibilite
|
|
143
|
+
elif score >= 0.4:
|
|
144
|
+
level_uri = self.cred.Niveau_Moyen
|
|
145
|
+
info_class = self.cred.InformationMoyenneCredibilite
|
|
146
|
+
else:
|
|
147
|
+
level_uri = self.cred.Niveau_Bas
|
|
148
|
+
info_class = self.cred.InformationFaibleCredibilite
|
|
149
|
+
|
|
150
|
+
# Add Information triplets
|
|
151
|
+
self.data_graph.add((info_uri, RDF.type, self.cred.InformationSoumise))
|
|
152
|
+
self.data_graph.add((info_uri, RDF.type, info_class))
|
|
153
|
+
self.data_graph.add((info_uri, self.cred.informationContent,
|
|
154
|
+
Literal(input_data[:500], datatype=XSD.string)))
|
|
155
|
+
|
|
156
|
+
# Check if it's a URL
|
|
157
|
+
if input_data.startswith('http'):
|
|
158
|
+
self.data_graph.add((info_uri, self.cred.informationURL,
|
|
159
|
+
Literal(input_data, datatype=XSD.anyURI)))
|
|
160
|
+
|
|
161
|
+
# Add Request triplets
|
|
162
|
+
self.data_graph.add((request_uri, RDF.type, self.cred.RequeteEvaluation))
|
|
163
|
+
self.data_graph.add((request_uri, self.cred.concernsInformation, info_uri))
|
|
164
|
+
self.data_graph.add((request_uri, self.cred.submissionTimestamp,
|
|
165
|
+
Literal(timestamp.isoformat(), datatype=XSD.dateTime)))
|
|
166
|
+
self.data_graph.add((request_uri, self.cred.requestStatus,
|
|
167
|
+
Literal("Completed", datatype=XSD.string)))
|
|
168
|
+
|
|
169
|
+
# Add Report triplets
|
|
170
|
+
self.data_graph.add((report_uri, RDF.type, self.cred.RapportEvaluation))
|
|
171
|
+
self.data_graph.add((report_uri, self.cred.isReportOf, request_uri))
|
|
172
|
+
self.data_graph.add((report_uri, self.cred.credibilityScoreValue,
|
|
173
|
+
Literal(float(score), datatype=XSD.float)))
|
|
174
|
+
self.data_graph.add((report_uri, self.cred.assignsCredibilityLevel, level_uri))
|
|
175
|
+
self.data_graph.add((report_uri, self.cred.completionTimestamp,
|
|
176
|
+
Literal(timestamp.isoformat(), datatype=XSD.dateTime)))
|
|
177
|
+
self.data_graph.add((report_uri, self.cred.reportSummary,
|
|
178
|
+
Literal(summary, datatype=XSD.string)))
|
|
179
|
+
|
|
180
|
+
# Add NLP results if available
|
|
181
|
+
nlp_results = report.get('analyseNLP', {})
|
|
182
|
+
if nlp_results:
|
|
183
|
+
nlp_result_uri = self.cred[f"NLPResult_{timestamp_str}_{self._evaluation_counter}"]
|
|
184
|
+
self.data_graph.add((nlp_result_uri, RDF.type, self.cred.ResultatNLP))
|
|
185
|
+
self.data_graph.add((report_uri, self.cred.includesNLPResult, nlp_result_uri))
|
|
186
|
+
|
|
187
|
+
sentiment = nlp_results.get('sentiment', {})
|
|
188
|
+
if sentiment:
|
|
189
|
+
self.data_graph.add((nlp_result_uri, self.cred.sentimentScore,
|
|
190
|
+
Literal(float(sentiment.get('score', 0.5)), datatype=XSD.float)))
|
|
191
|
+
|
|
192
|
+
coherence = nlp_results.get('coherence_score')
|
|
193
|
+
if coherence is not None:
|
|
194
|
+
self.data_graph.add((nlp_result_uri, self.cred.coherenceScore,
|
|
195
|
+
Literal(float(coherence), datatype=XSD.float)))
|
|
196
|
+
|
|
197
|
+
# Add source analysis if available
|
|
198
|
+
rules = report.get('reglesAppliquees', {})
|
|
199
|
+
source_analysis = rules.get('source_analysis', {})
|
|
200
|
+
if source_analysis:
|
|
201
|
+
source_uri = self.cred[f"SourceAnalysis_{timestamp_str}_{self._evaluation_counter}"]
|
|
202
|
+
self.data_graph.add((source_uri, RDF.type, self.cred.InfoSourceAnalyse))
|
|
203
|
+
self.data_graph.add((report_uri, self.cred.includesSourceAnalysis, source_uri))
|
|
204
|
+
|
|
205
|
+
reputation = source_analysis.get('reputation', 'Unknown')
|
|
206
|
+
self.data_graph.add((source_uri, self.cred.sourceAnalyzedReputation,
|
|
207
|
+
Literal(reputation, datatype=XSD.string)))
|
|
208
|
+
|
|
209
|
+
domain_age = source_analysis.get('domain_age_days')
|
|
210
|
+
if domain_age is not None:
|
|
211
|
+
self.data_graph.add((source_uri, self.cred.sourceMentionsCount,
|
|
212
|
+
Literal(int(domain_age), datatype=XSD.integer)))
|
|
213
|
+
|
|
214
|
+
# Add fact check results
|
|
215
|
+
fact_checks = rules.get('fact_checking', [])
|
|
216
|
+
for i, fc in enumerate(fact_checks):
|
|
217
|
+
evidence_uri = self.cred[f"Evidence_{timestamp_str}_{self._evaluation_counter}_{i}"]
|
|
218
|
+
self.data_graph.add((evidence_uri, RDF.type, self.cred.PreuveFactuelle))
|
|
219
|
+
self.data_graph.add((report_uri, self.cred.basedOnEvidence, evidence_uri))
|
|
220
|
+
|
|
221
|
+
self.data_graph.add((evidence_uri, self.cred.evidenceClaim,
|
|
222
|
+
Literal(fc.get('claim', ''), datatype=XSD.string)))
|
|
223
|
+
self.data_graph.add((evidence_uri, self.cred.evidenceVerdict,
|
|
224
|
+
Literal(fc.get('rating', ''), datatype=XSD.string)))
|
|
225
|
+
self.data_graph.add((evidence_uri, self.cred.evidenceSource,
|
|
226
|
+
Literal(fc.get('publisher', ''), datatype=XSD.string)))
|
|
227
|
+
if fc.get('url'):
|
|
228
|
+
self.data_graph.add((evidence_uri, self.cred.evidenceURL,
|
|
229
|
+
Literal(fc.get('url', ''), datatype=XSD.anyURI)))
|
|
230
|
+
|
|
231
|
+
# [NEW] Link similar claims found by GraphRAG
|
|
232
|
+
similar_uris = report.get('similar_claims_uris', [])
|
|
233
|
+
for sim_uri_str in similar_uris:
|
|
234
|
+
try:
|
|
235
|
+
sim_uri = URIRef(sim_uri_str)
|
|
236
|
+
self.data_graph.add((report_uri, RDFS.seeAlso, sim_uri))
|
|
237
|
+
except Exception as e:
|
|
238
|
+
print(f"[Ontology] Error linking similar URI {sim_uri_str}: {e}")
|
|
239
|
+
|
|
240
|
+
print(f"[OntologyManager] Added evaluation triplets. Report: {report_uri}")
|
|
241
|
+
return str(report_uri)
|
|
242
|
+
|
|
243
|
+
def query_source_history(self, url: str) -> List[EvaluationRecord]:
|
|
244
|
+
"""
|
|
245
|
+
Query all previous evaluations for a URL/domain.
|
|
246
|
+
|
|
247
|
+
Args:
|
|
248
|
+
url: URL to search for
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
List of EvaluationRecord for this source
|
|
252
|
+
"""
|
|
253
|
+
results = []
|
|
254
|
+
|
|
255
|
+
# SPARQL query to find all evaluations for this URL
|
|
256
|
+
query = """
|
|
257
|
+
PREFIX cred: <http://www.dic9335.uqam.ca/ontologies/credibility-verification#>
|
|
258
|
+
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
|
|
259
|
+
|
|
260
|
+
SELECT ?report ?score ?level ?timestamp ?content
|
|
261
|
+
WHERE {
|
|
262
|
+
?info cred:informationURL ?url .
|
|
263
|
+
?request cred:concernsInformation ?info .
|
|
264
|
+
?report cred:isReportOf ?request .
|
|
265
|
+
?report cred:credibilityScoreValue ?score .
|
|
266
|
+
?report cred:assignsCredibilityLevel ?level .
|
|
267
|
+
?report cred:completionTimestamp ?timestamp .
|
|
268
|
+
?info cred:informationContent ?content .
|
|
269
|
+
FILTER(CONTAINS(STR(?url), "%s"))
|
|
270
|
+
}
|
|
271
|
+
ORDER BY DESC(?timestamp)
|
|
272
|
+
""" % url
|
|
273
|
+
|
|
274
|
+
try:
|
|
275
|
+
# Query combined graph (base + data)
|
|
276
|
+
combined = self.base_graph + self.data_graph
|
|
277
|
+
for row in combined.query(query):
|
|
278
|
+
results.append(EvaluationRecord(
|
|
279
|
+
evaluation_id=str(row.report),
|
|
280
|
+
url_or_text=str(row.content) if row.content else url,
|
|
281
|
+
score=float(row.score),
|
|
282
|
+
level=str(row.level).split('#')[-1],
|
|
283
|
+
timestamp=str(row.timestamp),
|
|
284
|
+
fact_checks=[]
|
|
285
|
+
))
|
|
286
|
+
except Exception as e:
|
|
287
|
+
print(f"[OntologyManager] Query error: {e}")
|
|
288
|
+
|
|
289
|
+
return results
|
|
290
|
+
|
|
291
|
+
def get_statistics(self) -> Dict[str, Any]:
|
|
292
|
+
"""Get statistics about the ontology data."""
|
|
293
|
+
stats = {
|
|
294
|
+
'base_triples': len(self.base_graph),
|
|
295
|
+
'data_triples': len(self.data_graph),
|
|
296
|
+
'total_triples': len(self.base_graph) + len(self.data_graph),
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
# Count evaluations
|
|
300
|
+
query = """
|
|
301
|
+
PREFIX cred: <http://www.dic9335.uqam.ca/ontologies/credibility-verification#>
|
|
302
|
+
SELECT (COUNT(?report) as ?count) WHERE {
|
|
303
|
+
?report a cred:RapportEvaluation .
|
|
304
|
+
}
|
|
305
|
+
"""
|
|
306
|
+
try:
|
|
307
|
+
for row in self.data_graph.query(query):
|
|
308
|
+
stats['total_evaluations'] = int(row.count)
|
|
309
|
+
except:
|
|
310
|
+
stats['total_evaluations'] = 0
|
|
311
|
+
|
|
312
|
+
return stats
|
|
313
|
+
|
|
314
|
+
def get_graph_json(self) -> Dict[str, List]:
|
|
315
|
+
"""
|
|
316
|
+
Convert ontology data into D3.js JSON format (Nodes & Links).
|
|
317
|
+
"""
|
|
318
|
+
nodes = []
|
|
319
|
+
links = []
|
|
320
|
+
added_nodes = set()
|
|
321
|
+
|
|
322
|
+
# Get the latest report ID
|
|
323
|
+
latest_query = """
|
|
324
|
+
PREFIX cred: <https://github.com/DominiqueLoyer/systemFactChecking#>
|
|
325
|
+
SELECT ?report ?timestamp WHERE {
|
|
326
|
+
?report a cred:RapportEvaluation .
|
|
327
|
+
?report cred:completionTimestamp ?timestamp .
|
|
328
|
+
}
|
|
329
|
+
ORDER BY DESC(?timestamp)
|
|
330
|
+
LIMIT 1
|
|
331
|
+
"""
|
|
332
|
+
latest_report = None
|
|
333
|
+
try:
|
|
334
|
+
for row in self.data_graph.query(latest_query):
|
|
335
|
+
latest_report = row.report
|
|
336
|
+
except:
|
|
337
|
+
pass
|
|
338
|
+
|
|
339
|
+
if not latest_report:
|
|
340
|
+
return {'nodes': [], 'links': []}
|
|
341
|
+
|
|
342
|
+
# Helper to add node if unique
|
|
343
|
+
def add_node(uri, label, type_class, group):
|
|
344
|
+
if str(uri) not in added_nodes:
|
|
345
|
+
nodes.append({
|
|
346
|
+
'id': str(uri),
|
|
347
|
+
'name': str(label),
|
|
348
|
+
'group': group,
|
|
349
|
+
'type': str(type_class).split('#')[-1]
|
|
350
|
+
})
|
|
351
|
+
added_nodes.add(str(uri))
|
|
352
|
+
|
|
353
|
+
# Add Central Node (Report)
|
|
354
|
+
add_node(latest_report, "Latest Report", "cred:RapportEvaluation", 1)
|
|
355
|
+
|
|
356
|
+
# Query triples related to this report (Level 1)
|
|
357
|
+
related_query = """
|
|
358
|
+
PREFIX cred: <https://github.com/DominiqueLoyer/systemFactChecking#>
|
|
359
|
+
SELECT ?p ?o ?oType ?oLabel WHERE {
|
|
360
|
+
<%s> ?p ?o .
|
|
361
|
+
OPTIONAL { ?o a ?oType } .
|
|
362
|
+
OPTIONAL { ?o cred:evidenceSnippet ?oLabel } .
|
|
363
|
+
OPTIONAL { ?o cred:sourceAnalyzedReputation ?oLabel } .
|
|
364
|
+
}
|
|
365
|
+
""" % str(latest_report)
|
|
366
|
+
|
|
367
|
+
try:
|
|
368
|
+
# Level 1: Report -> Components
|
|
369
|
+
for row in self.data_graph.query(related_query):
|
|
370
|
+
p = row.p
|
|
371
|
+
o = row.o
|
|
372
|
+
|
|
373
|
+
# Skip generic system triples like rdf:type, but allow rdfs:seeAlso
|
|
374
|
+
if str(p) == str(RDF.type): continue
|
|
375
|
+
if 'Literal' in str(type(o)): continue # Skip basic literals
|
|
376
|
+
|
|
377
|
+
# Determine Group/Color
|
|
378
|
+
o_type = str(row.oType) if row.oType else "Unknown"
|
|
379
|
+
group = 2 # Default gray
|
|
380
|
+
if 'High' in o_type or 'Supporting' in o_type: group = 3 # Green (Positive)
|
|
381
|
+
if 'Low' in o_type or 'Refuting' in o_type: group = 4 # Red (Negative)
|
|
382
|
+
if 'Rapport' in o_type: group = 1 # Purple (Hub)
|
|
383
|
+
if 'SourceAnalysis' in o_type: group = 5 # Blue (Source)
|
|
384
|
+
if str(p) == str(RDFS.seeAlso): group = 7 # Orange for similar claims
|
|
385
|
+
|
|
386
|
+
# Add Target Node (Level 1)
|
|
387
|
+
o_label = row.oLabel if row.oLabel else str(o).split('#')[-1]
|
|
388
|
+
add_node(o, o_label, o_type, group)
|
|
389
|
+
|
|
390
|
+
# Add Link L1
|
|
391
|
+
link_type = 'primary'
|
|
392
|
+
if str(p) == str(RDFS.seeAlso):
|
|
393
|
+
link_type = 'similar' # Special dash style for similar claims?
|
|
394
|
+
|
|
395
|
+
links.append({
|
|
396
|
+
'source': str(latest_report),
|
|
397
|
+
'target': str(o),
|
|
398
|
+
'value': 2,
|
|
399
|
+
'type': link_type
|
|
400
|
+
})
|
|
401
|
+
|
|
402
|
+
# Level 2: Component -> Details (Recursive enrich)
|
|
403
|
+
# Specifically for SourceAnalysis and Evidence
|
|
404
|
+
l2_query = """
|
|
405
|
+
SELECT ?p2 ?o2 ?o2Type WHERE {
|
|
406
|
+
<%s> ?p2 ?o2 .
|
|
407
|
+
OPTIONAL { ?o2 a ?o2Type } .
|
|
408
|
+
FILTER(isURI(?o2))
|
|
409
|
+
}""" % str(o)
|
|
410
|
+
|
|
411
|
+
for row2 in self.data_graph.query(l2_query):
|
|
412
|
+
o2 = row2.o2
|
|
413
|
+
if str(row2.p2) == str(RDF.type): continue
|
|
414
|
+
|
|
415
|
+
o2_label = str(o2).split('#')[-1]
|
|
416
|
+
add_node(o2, o2_label, "Detail", 6) # Group 6 for leaf nodes
|
|
417
|
+
|
|
418
|
+
links.append({
|
|
419
|
+
'source': str(o),
|
|
420
|
+
'target': str(o2),
|
|
421
|
+
'value': 1,
|
|
422
|
+
'type': 'secondary'
|
|
423
|
+
})
|
|
424
|
+
|
|
425
|
+
except Exception as e:
|
|
426
|
+
print(f"Graph query error: {e}")
|
|
427
|
+
|
|
428
|
+
return {'nodes': nodes, 'links': links}
|
|
429
|
+
|
|
430
|
+
def export_to_ttl(self, output_path: str, include_base: bool = False) -> bool:
|
|
431
|
+
"""
|
|
432
|
+
Export the ontology to a TTL file.
|
|
433
|
+
|
|
434
|
+
Args:
|
|
435
|
+
output_path: Path to write the TTL file
|
|
436
|
+
include_base: If True, include base ontology in export
|
|
437
|
+
|
|
438
|
+
Returns:
|
|
439
|
+
True if successful
|
|
440
|
+
"""
|
|
441
|
+
try:
|
|
442
|
+
if include_base:
|
|
443
|
+
combined = self.base_graph + self.data_graph
|
|
444
|
+
combined.serialize(destination=output_path, format='turtle')
|
|
445
|
+
else:
|
|
446
|
+
self.data_graph.serialize(destination=output_path, format='turtle')
|
|
447
|
+
|
|
448
|
+
print(f"[OntologyManager] Exported to: {output_path}")
|
|
449
|
+
return True
|
|
450
|
+
except Exception as e:
|
|
451
|
+
print(f"[OntologyManager] Export error: {e}")
|
|
452
|
+
return False
|
|
453
|
+
|
|
454
|
+
def save_data(self) -> bool:
|
|
455
|
+
"""Save the data graph to its configured path."""
|
|
456
|
+
if self.data_path:
|
|
457
|
+
return self.export_to_ttl(self.data_path, include_base=False)
|
|
458
|
+
return False
|
|
459
|
+
|
|
460
|
+
|
|
461
|
+
# --- Testing ---
|
|
462
|
+
if __name__ == "__main__":
|
|
463
|
+
print("=== Testing OntologyManager ===\n")
|
|
464
|
+
|
|
465
|
+
# Test with base ontology
|
|
466
|
+
base_path = "/Users/bk280625/documents041025/MonCode/sysCRED_onto26avrtil.ttl"
|
|
467
|
+
data_path = "/Users/bk280625/documents041025/MonCode/ontology/sysCRED_data.ttl"
|
|
468
|
+
|
|
469
|
+
manager = OntologyManager(base_ontology_path=base_path, data_path=None)
|
|
470
|
+
|
|
471
|
+
# Test adding evaluation
|
|
472
|
+
sample_report = {
|
|
473
|
+
'scoreCredibilite': 0.72,
|
|
474
|
+
'informationEntree': 'https://www.lemonde.fr/article/test',
|
|
475
|
+
'resumeAnalyse': "L'analyse suggère une crédibilité MOYENNE à ÉLEVÉE.",
|
|
476
|
+
'analyseNLP': {
|
|
477
|
+
'sentiment': {'label': 'POSITIVE', 'score': 0.85},
|
|
478
|
+
'coherence_score': 0.78
|
|
479
|
+
},
|
|
480
|
+
'reglesAppliquees': {
|
|
481
|
+
'source_analysis': {
|
|
482
|
+
'reputation': 'High',
|
|
483
|
+
'domain_age_days': 9000
|
|
484
|
+
},
|
|
485
|
+
'fact_checking': [
|
|
486
|
+
{'claim': 'Article verified by fact-checkers', 'rating': 'True'}
|
|
487
|
+
]
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
print("Test 1: Adding evaluation triplets...")
|
|
492
|
+
report_uri = manager.add_evaluation_triplets(sample_report)
|
|
493
|
+
print(f" Created: {report_uri}")
|
|
494
|
+
print()
|
|
495
|
+
|
|
496
|
+
# Test statistics
|
|
497
|
+
print("Test 2: Getting statistics...")
|
|
498
|
+
stats = manager.get_statistics()
|
|
499
|
+
for key, value in stats.items():
|
|
500
|
+
print(f" {key}: {value}")
|
|
501
|
+
print()
|
|
502
|
+
|
|
503
|
+
# Export test
|
|
504
|
+
print("Test 3: Exporting data graph...")
|
|
505
|
+
os.makedirs(os.path.dirname(data_path), exist_ok=True)
|
|
506
|
+
manager.export_to_ttl(data_path)
|
|
507
|
+
print(f" Exported to: {data_path}")
|
|
508
|
+
|
|
509
|
+
print("\n=== Tests Complete ===")
|
syscred/run_benchmark.py
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
|
|
2
|
+
import json
|
|
3
|
+
import time
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Dict, List
|
|
8
|
+
import pandas as pd
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
|
|
11
|
+
# Add project root to path (one level up from this script)
|
|
12
|
+
sys.path.append(str(Path(__file__).parent.parent))
|
|
13
|
+
|
|
14
|
+
from syscred.verification_system import CredibilityVerificationSystem
|
|
15
|
+
from syscred.config import config
|
|
16
|
+
|
|
17
|
+
def run_benchmark():
|
|
18
|
+
print("="*60)
|
|
19
|
+
print(" SysCRED v2.1 - Scientific Evaluation Benchmark ")
|
|
20
|
+
print("="*60)
|
|
21
|
+
|
|
22
|
+
# Load Benchmark Data
|
|
23
|
+
data_path = Path(__file__).parent / "benchmark_data.json"
|
|
24
|
+
if not data_path.exists():
|
|
25
|
+
print(f"❌ Error: {data_path} not found.")
|
|
26
|
+
return
|
|
27
|
+
|
|
28
|
+
with open(data_path, 'r') as f:
|
|
29
|
+
dataset = json.load(f)
|
|
30
|
+
|
|
31
|
+
print(f"Loaded {len(dataset)} test cases.\n")
|
|
32
|
+
|
|
33
|
+
# Initialize System with Full Capabilities
|
|
34
|
+
print("Initializing SysCRED (ML Models + Google API)...")
|
|
35
|
+
system = CredibilityVerificationSystem(
|
|
36
|
+
ontology_base_path=str(config.ONTOLOGY_BASE_PATH),
|
|
37
|
+
ontology_data_path=str(config.ONTOLOGY_DATA_PATH),
|
|
38
|
+
load_ml_models=True, # Use full ML for benchmark
|
|
39
|
+
google_api_key=config.GOOGLE_FACT_CHECK_API_KEY
|
|
40
|
+
)
|
|
41
|
+
print("System ready.\n")
|
|
42
|
+
|
|
43
|
+
results = []
|
|
44
|
+
|
|
45
|
+
# Run Evaluation
|
|
46
|
+
for i, item in enumerate(dataset):
|
|
47
|
+
url = item['url']
|
|
48
|
+
label = item['label']
|
|
49
|
+
print(f"[{i+1}/{len(dataset)}] Analyzing: {url} (Expected: {label})...")
|
|
50
|
+
|
|
51
|
+
start_time = time.time()
|
|
52
|
+
try:
|
|
53
|
+
# Run analysis
|
|
54
|
+
# We treat empty text fallbacks as valid logic path
|
|
55
|
+
report = system.verify_information(url)
|
|
56
|
+
score = report.get('score_credibilite', 0.5)
|
|
57
|
+
|
|
58
|
+
# Determine System Verdict
|
|
59
|
+
sys_verdict = "High" if score >= 0.55 else "Low"
|
|
60
|
+
|
|
61
|
+
# Compare
|
|
62
|
+
match = (sys_verdict == label) or (label == "High" and sys_verdict == "High") or (label == "Low" and sys_verdict == "Low")
|
|
63
|
+
# Handling Medium? For binary benchmark, we assume simplified threshold.
|
|
64
|
+
# Or we can map:
|
|
65
|
+
# High (>=0.7)
|
|
66
|
+
# Medium (0.4-0.7)
|
|
67
|
+
# Low (<0.4)
|
|
68
|
+
|
|
69
|
+
# Simple Binary Metric for Precision/Recall:
|
|
70
|
+
# Positive Class = "High Credibility"
|
|
71
|
+
|
|
72
|
+
results.append({
|
|
73
|
+
"url": url,
|
|
74
|
+
"expected": label,
|
|
75
|
+
"score": score,
|
|
76
|
+
"system_verdict": sys_verdict,
|
|
77
|
+
"match": match,
|
|
78
|
+
"time": time.time() - start_time,
|
|
79
|
+
"error": None
|
|
80
|
+
})
|
|
81
|
+
print(f" -> Score: {score:.2f} | Verdict: {sys_verdict} | match: {'✅' if match else '❌'}")
|
|
82
|
+
|
|
83
|
+
except Exception as e:
|
|
84
|
+
print(f" -> ❌ Error: {e}")
|
|
85
|
+
results.append({
|
|
86
|
+
"url": url,
|
|
87
|
+
"expected": label,
|
|
88
|
+
"score": 0,
|
|
89
|
+
"system_verdict": "Error",
|
|
90
|
+
"match": False,
|
|
91
|
+
"time": time.time() - start_time,
|
|
92
|
+
"error": str(e)
|
|
93
|
+
})
|
|
94
|
+
|
|
95
|
+
# Calculate Metrics
|
|
96
|
+
print("\n" + "="*60)
|
|
97
|
+
print("RESULTS SUMMARY")
|
|
98
|
+
print("="*60)
|
|
99
|
+
|
|
100
|
+
df = pd.DataFrame(results)
|
|
101
|
+
|
|
102
|
+
# Logic for metrics
|
|
103
|
+
# TP: System=High, Expected=High
|
|
104
|
+
# FP: System=High, Expected=Low
|
|
105
|
+
# TN: System=Low, Expected=Low
|
|
106
|
+
# FN: System=Low, Expected=High
|
|
107
|
+
|
|
108
|
+
tp = len(df[(df['system_verdict'] == 'High') & (df['expected'] == 'High')])
|
|
109
|
+
fp = len(df[(df['system_verdict'] == 'High') & (df['expected'] == 'Low')])
|
|
110
|
+
tn = len(df[(df['system_verdict'] == 'Low') & (df['expected'] == 'Low')])
|
|
111
|
+
fn = len(df[(df['system_verdict'] == 'Low') & (df['expected'] == 'High')])
|
|
112
|
+
|
|
113
|
+
precision = tp / (tp + fp) if (tp + fp) > 0 else 0
|
|
114
|
+
recall = tp / (tp + fn) if (tp + fn) > 0 else 0
|
|
115
|
+
accuracy = (tp + tn) / len(df) if len(df) > 0 else 0
|
|
116
|
+
f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
|
|
117
|
+
|
|
118
|
+
print(f"Total Cases: {len(df)}")
|
|
119
|
+
print(f"Accuracy: {accuracy:.2%}")
|
|
120
|
+
print(f"Precision: {precision:.2%}")
|
|
121
|
+
print(f"Recall: {recall:.2%}")
|
|
122
|
+
print(f"F1-Score: {f1:.2f}")
|
|
123
|
+
|
|
124
|
+
print("\nConfusion Matrix:")
|
|
125
|
+
print(f" | Pred High | Pred Low")
|
|
126
|
+
print(f"True High | {tp} | {fn}")
|
|
127
|
+
print(f"True Low | {fp} | {tn}")
|
|
128
|
+
|
|
129
|
+
# Save detailed report
|
|
130
|
+
report_path = Path(__file__).parent / "benchmark_results.csv"
|
|
131
|
+
df.to_csv(report_path, index=False)
|
|
132
|
+
print(f"\nDetailed CSV Saved to: {report_path}")
|
|
133
|
+
|
|
134
|
+
if __name__ == "__main__":
|
|
135
|
+
run_benchmark()
|