syscred 2.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,509 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Ontology Manager Module - SysCRED
4
+ ==================================
5
+ Manages the RDF ontology for the credibility verification system.
6
+ Handles reading, writing, and querying of semantic triplets.
7
+
8
+ (c) Dominique S. Loyer - PhD Thesis Prototype
9
+ Citation Key: loyerModelingHybridSystem2025
10
+ """
11
+
12
+ from typing import Optional, List, Dict, Any
13
+ from datetime import datetime
14
+ from dataclasses import dataclass
15
+ import os
16
+
17
+ # RDFLib imports with fallback
18
+ try:
19
+ from rdflib import Graph, Namespace, Literal, URIRef, BNode
20
+ from rdflib.namespace import RDF, RDFS, OWL, XSD
21
+ HAS_RDFLIB = True
22
+ except ImportError:
23
+ HAS_RDFLIB = False
24
+ print("Warning: rdflib not installed. Run: pip install rdflib")
25
+
26
+
27
+ @dataclass
28
+ class EvaluationRecord:
29
+ """Represents a stored evaluation from the ontology."""
30
+ evaluation_id: str
31
+ url_or_text: str
32
+ score: float
33
+ level: str
34
+ timestamp: str
35
+ fact_checks: List[str]
36
+
37
+
38
+ class OntologyManager:
39
+ """
40
+ Manages the credibility ontology using RDFLib.
41
+
42
+ Handles:
43
+ - Loading base ontology
44
+ - Adding evaluation triplets
45
+ - Querying historical data
46
+ - Exporting enriched ontology
47
+ """
48
+
49
+ # Namespace for the credibility ontology
50
+ CRED_NS = "https://github.com/DominiqueLoyer/systemFactChecking#"
51
+
52
+ def __init__(self, base_ontology_path: Optional[str] = None, data_path: Optional[str] = None):
53
+ """
54
+ Initialize the ontology manager.
55
+
56
+ Args:
57
+ base_ontology_path: Path to the base ontology TTL file
58
+ data_path: Path to store/load accumulated data triplets
59
+ """
60
+ if not HAS_RDFLIB:
61
+ raise ImportError("rdflib is required. Install with: pip install rdflib")
62
+
63
+ self.base_path = base_ontology_path
64
+ self.data_path = data_path
65
+
66
+ # Create namespace
67
+ self.cred = Namespace(self.CRED_NS)
68
+
69
+ # Initialize graphs
70
+ self.base_graph = Graph()
71
+ self.data_graph = Graph()
72
+
73
+ # Bind prefixes for nicer serialization
74
+ self._bind_prefixes(self.base_graph)
75
+ self._bind_prefixes(self.data_graph)
76
+
77
+ # Load ontology files if they exist
78
+ if base_ontology_path and os.path.exists(base_ontology_path):
79
+ self.load_base_ontology(base_ontology_path)
80
+
81
+ if data_path and os.path.exists(data_path):
82
+ self.load_data_graph(data_path)
83
+
84
+ # Counter for generating unique IDs
85
+ self._evaluation_counter = 0
86
+
87
+ def _bind_prefixes(self, graph: Graph):
88
+ """Bind common prefixes to a graph."""
89
+ graph.bind("cred", self.cred)
90
+ graph.bind("owl", OWL)
91
+ graph.bind("rdf", RDF)
92
+ graph.bind("rdfs", RDFS)
93
+ graph.bind("xsd", XSD)
94
+
95
+ def load_base_ontology(self, path: str) -> bool:
96
+ """Load the base ontology from a TTL file."""
97
+ try:
98
+ self.base_graph.parse(path, format='turtle')
99
+ print(f"[OntologyManager] Loaded base ontology: {len(self.base_graph)} triples")
100
+ return True
101
+ except Exception as e:
102
+ print(f"[OntologyManager] Error loading base ontology: {e}")
103
+ return False
104
+
105
+ def load_data_graph(self, path: str) -> bool:
106
+ """Load accumulated data triplets."""
107
+ try:
108
+ self.data_graph.parse(path, format='turtle')
109
+ print(f"[OntologyManager] Loaded data graph: {len(self.data_graph)} triples")
110
+ return True
111
+ except Exception as e:
112
+ print(f"[OntologyManager] Error loading data graph: {e}")
113
+ return False
114
+
115
+ def add_evaluation_triplets(self, report: Dict[str, Any]) -> str:
116
+ """
117
+ Add triplets for a new credibility evaluation.
118
+
119
+ Args:
120
+ report: The evaluation report dictionary from CredibilityVerificationSystem
121
+
122
+ Returns:
123
+ The URI of the created RapportEvaluation individual
124
+ """
125
+ timestamp = datetime.now()
126
+ timestamp_str = timestamp.strftime("%Y%m%d_%H%M%S")
127
+ self._evaluation_counter += 1
128
+
129
+ # Create URIs for new individuals
130
+ report_uri = self.cred[f"Report_{timestamp_str}_{self._evaluation_counter}"]
131
+ request_uri = self.cred[f"Request_{timestamp_str}_{self._evaluation_counter}"]
132
+ info_uri = self.cred[f"Info_{timestamp_str}_{self._evaluation_counter}"]
133
+
134
+ # Get data from report
135
+ score = report.get('scoreCredibilite', 0.5)
136
+ input_data = report.get('informationEntree', '')
137
+ summary = report.get('resumeAnalyse', '')
138
+
139
+ # Determine credibility level based on score
140
+ if score >= 0.7:
141
+ level_uri = self.cred.Niveau_Haut
142
+ info_class = self.cred.InformationHauteCredibilite
143
+ elif score >= 0.4:
144
+ level_uri = self.cred.Niveau_Moyen
145
+ info_class = self.cred.InformationMoyenneCredibilite
146
+ else:
147
+ level_uri = self.cred.Niveau_Bas
148
+ info_class = self.cred.InformationFaibleCredibilite
149
+
150
+ # Add Information triplets
151
+ self.data_graph.add((info_uri, RDF.type, self.cred.InformationSoumise))
152
+ self.data_graph.add((info_uri, RDF.type, info_class))
153
+ self.data_graph.add((info_uri, self.cred.informationContent,
154
+ Literal(input_data[:500], datatype=XSD.string)))
155
+
156
+ # Check if it's a URL
157
+ if input_data.startswith('http'):
158
+ self.data_graph.add((info_uri, self.cred.informationURL,
159
+ Literal(input_data, datatype=XSD.anyURI)))
160
+
161
+ # Add Request triplets
162
+ self.data_graph.add((request_uri, RDF.type, self.cred.RequeteEvaluation))
163
+ self.data_graph.add((request_uri, self.cred.concernsInformation, info_uri))
164
+ self.data_graph.add((request_uri, self.cred.submissionTimestamp,
165
+ Literal(timestamp.isoformat(), datatype=XSD.dateTime)))
166
+ self.data_graph.add((request_uri, self.cred.requestStatus,
167
+ Literal("Completed", datatype=XSD.string)))
168
+
169
+ # Add Report triplets
170
+ self.data_graph.add((report_uri, RDF.type, self.cred.RapportEvaluation))
171
+ self.data_graph.add((report_uri, self.cred.isReportOf, request_uri))
172
+ self.data_graph.add((report_uri, self.cred.credibilityScoreValue,
173
+ Literal(float(score), datatype=XSD.float)))
174
+ self.data_graph.add((report_uri, self.cred.assignsCredibilityLevel, level_uri))
175
+ self.data_graph.add((report_uri, self.cred.completionTimestamp,
176
+ Literal(timestamp.isoformat(), datatype=XSD.dateTime)))
177
+ self.data_graph.add((report_uri, self.cred.reportSummary,
178
+ Literal(summary, datatype=XSD.string)))
179
+
180
+ # Add NLP results if available
181
+ nlp_results = report.get('analyseNLP', {})
182
+ if nlp_results:
183
+ nlp_result_uri = self.cred[f"NLPResult_{timestamp_str}_{self._evaluation_counter}"]
184
+ self.data_graph.add((nlp_result_uri, RDF.type, self.cred.ResultatNLP))
185
+ self.data_graph.add((report_uri, self.cred.includesNLPResult, nlp_result_uri))
186
+
187
+ sentiment = nlp_results.get('sentiment', {})
188
+ if sentiment:
189
+ self.data_graph.add((nlp_result_uri, self.cred.sentimentScore,
190
+ Literal(float(sentiment.get('score', 0.5)), datatype=XSD.float)))
191
+
192
+ coherence = nlp_results.get('coherence_score')
193
+ if coherence is not None:
194
+ self.data_graph.add((nlp_result_uri, self.cred.coherenceScore,
195
+ Literal(float(coherence), datatype=XSD.float)))
196
+
197
+ # Add source analysis if available
198
+ rules = report.get('reglesAppliquees', {})
199
+ source_analysis = rules.get('source_analysis', {})
200
+ if source_analysis:
201
+ source_uri = self.cred[f"SourceAnalysis_{timestamp_str}_{self._evaluation_counter}"]
202
+ self.data_graph.add((source_uri, RDF.type, self.cred.InfoSourceAnalyse))
203
+ self.data_graph.add((report_uri, self.cred.includesSourceAnalysis, source_uri))
204
+
205
+ reputation = source_analysis.get('reputation', 'Unknown')
206
+ self.data_graph.add((source_uri, self.cred.sourceAnalyzedReputation,
207
+ Literal(reputation, datatype=XSD.string)))
208
+
209
+ domain_age = source_analysis.get('domain_age_days')
210
+ if domain_age is not None:
211
+ self.data_graph.add((source_uri, self.cred.sourceMentionsCount,
212
+ Literal(int(domain_age), datatype=XSD.integer)))
213
+
214
+ # Add fact check results
215
+ fact_checks = rules.get('fact_checking', [])
216
+ for i, fc in enumerate(fact_checks):
217
+ evidence_uri = self.cred[f"Evidence_{timestamp_str}_{self._evaluation_counter}_{i}"]
218
+ self.data_graph.add((evidence_uri, RDF.type, self.cred.PreuveFactuelle))
219
+ self.data_graph.add((report_uri, self.cred.basedOnEvidence, evidence_uri))
220
+
221
+ self.data_graph.add((evidence_uri, self.cred.evidenceClaim,
222
+ Literal(fc.get('claim', ''), datatype=XSD.string)))
223
+ self.data_graph.add((evidence_uri, self.cred.evidenceVerdict,
224
+ Literal(fc.get('rating', ''), datatype=XSD.string)))
225
+ self.data_graph.add((evidence_uri, self.cred.evidenceSource,
226
+ Literal(fc.get('publisher', ''), datatype=XSD.string)))
227
+ if fc.get('url'):
228
+ self.data_graph.add((evidence_uri, self.cred.evidenceURL,
229
+ Literal(fc.get('url', ''), datatype=XSD.anyURI)))
230
+
231
+ # [NEW] Link similar claims found by GraphRAG
232
+ similar_uris = report.get('similar_claims_uris', [])
233
+ for sim_uri_str in similar_uris:
234
+ try:
235
+ sim_uri = URIRef(sim_uri_str)
236
+ self.data_graph.add((report_uri, RDFS.seeAlso, sim_uri))
237
+ except Exception as e:
238
+ print(f"[Ontology] Error linking similar URI {sim_uri_str}: {e}")
239
+
240
+ print(f"[OntologyManager] Added evaluation triplets. Report: {report_uri}")
241
+ return str(report_uri)
242
+
243
+ def query_source_history(self, url: str) -> List[EvaluationRecord]:
244
+ """
245
+ Query all previous evaluations for a URL/domain.
246
+
247
+ Args:
248
+ url: URL to search for
249
+
250
+ Returns:
251
+ List of EvaluationRecord for this source
252
+ """
253
+ results = []
254
+
255
+ # SPARQL query to find all evaluations for this URL
256
+ query = """
257
+ PREFIX cred: <http://www.dic9335.uqam.ca/ontologies/credibility-verification#>
258
+ PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
259
+
260
+ SELECT ?report ?score ?level ?timestamp ?content
261
+ WHERE {
262
+ ?info cred:informationURL ?url .
263
+ ?request cred:concernsInformation ?info .
264
+ ?report cred:isReportOf ?request .
265
+ ?report cred:credibilityScoreValue ?score .
266
+ ?report cred:assignsCredibilityLevel ?level .
267
+ ?report cred:completionTimestamp ?timestamp .
268
+ ?info cred:informationContent ?content .
269
+ FILTER(CONTAINS(STR(?url), "%s"))
270
+ }
271
+ ORDER BY DESC(?timestamp)
272
+ """ % url
273
+
274
+ try:
275
+ # Query combined graph (base + data)
276
+ combined = self.base_graph + self.data_graph
277
+ for row in combined.query(query):
278
+ results.append(EvaluationRecord(
279
+ evaluation_id=str(row.report),
280
+ url_or_text=str(row.content) if row.content else url,
281
+ score=float(row.score),
282
+ level=str(row.level).split('#')[-1],
283
+ timestamp=str(row.timestamp),
284
+ fact_checks=[]
285
+ ))
286
+ except Exception as e:
287
+ print(f"[OntologyManager] Query error: {e}")
288
+
289
+ return results
290
+
291
+ def get_statistics(self) -> Dict[str, Any]:
292
+ """Get statistics about the ontology data."""
293
+ stats = {
294
+ 'base_triples': len(self.base_graph),
295
+ 'data_triples': len(self.data_graph),
296
+ 'total_triples': len(self.base_graph) + len(self.data_graph),
297
+ }
298
+
299
+ # Count evaluations
300
+ query = """
301
+ PREFIX cred: <http://www.dic9335.uqam.ca/ontologies/credibility-verification#>
302
+ SELECT (COUNT(?report) as ?count) WHERE {
303
+ ?report a cred:RapportEvaluation .
304
+ }
305
+ """
306
+ try:
307
+ for row in self.data_graph.query(query):
308
+ stats['total_evaluations'] = int(row.count)
309
+ except:
310
+ stats['total_evaluations'] = 0
311
+
312
+ return stats
313
+
314
+ def get_graph_json(self) -> Dict[str, List]:
315
+ """
316
+ Convert ontology data into D3.js JSON format (Nodes & Links).
317
+ """
318
+ nodes = []
319
+ links = []
320
+ added_nodes = set()
321
+
322
+ # Get the latest report ID
323
+ latest_query = """
324
+ PREFIX cred: <https://github.com/DominiqueLoyer/systemFactChecking#>
325
+ SELECT ?report ?timestamp WHERE {
326
+ ?report a cred:RapportEvaluation .
327
+ ?report cred:completionTimestamp ?timestamp .
328
+ }
329
+ ORDER BY DESC(?timestamp)
330
+ LIMIT 1
331
+ """
332
+ latest_report = None
333
+ try:
334
+ for row in self.data_graph.query(latest_query):
335
+ latest_report = row.report
336
+ except:
337
+ pass
338
+
339
+ if not latest_report:
340
+ return {'nodes': [], 'links': []}
341
+
342
+ # Helper to add node if unique
343
+ def add_node(uri, label, type_class, group):
344
+ if str(uri) not in added_nodes:
345
+ nodes.append({
346
+ 'id': str(uri),
347
+ 'name': str(label),
348
+ 'group': group,
349
+ 'type': str(type_class).split('#')[-1]
350
+ })
351
+ added_nodes.add(str(uri))
352
+
353
+ # Add Central Node (Report)
354
+ add_node(latest_report, "Latest Report", "cred:RapportEvaluation", 1)
355
+
356
+ # Query triples related to this report (Level 1)
357
+ related_query = """
358
+ PREFIX cred: <https://github.com/DominiqueLoyer/systemFactChecking#>
359
+ SELECT ?p ?o ?oType ?oLabel WHERE {
360
+ <%s> ?p ?o .
361
+ OPTIONAL { ?o a ?oType } .
362
+ OPTIONAL { ?o cred:evidenceSnippet ?oLabel } .
363
+ OPTIONAL { ?o cred:sourceAnalyzedReputation ?oLabel } .
364
+ }
365
+ """ % str(latest_report)
366
+
367
+ try:
368
+ # Level 1: Report -> Components
369
+ for row in self.data_graph.query(related_query):
370
+ p = row.p
371
+ o = row.o
372
+
373
+ # Skip generic system triples like rdf:type, but allow rdfs:seeAlso
374
+ if str(p) == str(RDF.type): continue
375
+ if 'Literal' in str(type(o)): continue # Skip basic literals
376
+
377
+ # Determine Group/Color
378
+ o_type = str(row.oType) if row.oType else "Unknown"
379
+ group = 2 # Default gray
380
+ if 'High' in o_type or 'Supporting' in o_type: group = 3 # Green (Positive)
381
+ if 'Low' in o_type or 'Refuting' in o_type: group = 4 # Red (Negative)
382
+ if 'Rapport' in o_type: group = 1 # Purple (Hub)
383
+ if 'SourceAnalysis' in o_type: group = 5 # Blue (Source)
384
+ if str(p) == str(RDFS.seeAlso): group = 7 # Orange for similar claims
385
+
386
+ # Add Target Node (Level 1)
387
+ o_label = row.oLabel if row.oLabel else str(o).split('#')[-1]
388
+ add_node(o, o_label, o_type, group)
389
+
390
+ # Add Link L1
391
+ link_type = 'primary'
392
+ if str(p) == str(RDFS.seeAlso):
393
+ link_type = 'similar' # Special dash style for similar claims?
394
+
395
+ links.append({
396
+ 'source': str(latest_report),
397
+ 'target': str(o),
398
+ 'value': 2,
399
+ 'type': link_type
400
+ })
401
+
402
+ # Level 2: Component -> Details (Recursive enrich)
403
+ # Specifically for SourceAnalysis and Evidence
404
+ l2_query = """
405
+ SELECT ?p2 ?o2 ?o2Type WHERE {
406
+ <%s> ?p2 ?o2 .
407
+ OPTIONAL { ?o2 a ?o2Type } .
408
+ FILTER(isURI(?o2))
409
+ }""" % str(o)
410
+
411
+ for row2 in self.data_graph.query(l2_query):
412
+ o2 = row2.o2
413
+ if str(row2.p2) == str(RDF.type): continue
414
+
415
+ o2_label = str(o2).split('#')[-1]
416
+ add_node(o2, o2_label, "Detail", 6) # Group 6 for leaf nodes
417
+
418
+ links.append({
419
+ 'source': str(o),
420
+ 'target': str(o2),
421
+ 'value': 1,
422
+ 'type': 'secondary'
423
+ })
424
+
425
+ except Exception as e:
426
+ print(f"Graph query error: {e}")
427
+
428
+ return {'nodes': nodes, 'links': links}
429
+
430
+ def export_to_ttl(self, output_path: str, include_base: bool = False) -> bool:
431
+ """
432
+ Export the ontology to a TTL file.
433
+
434
+ Args:
435
+ output_path: Path to write the TTL file
436
+ include_base: If True, include base ontology in export
437
+
438
+ Returns:
439
+ True if successful
440
+ """
441
+ try:
442
+ if include_base:
443
+ combined = self.base_graph + self.data_graph
444
+ combined.serialize(destination=output_path, format='turtle')
445
+ else:
446
+ self.data_graph.serialize(destination=output_path, format='turtle')
447
+
448
+ print(f"[OntologyManager] Exported to: {output_path}")
449
+ return True
450
+ except Exception as e:
451
+ print(f"[OntologyManager] Export error: {e}")
452
+ return False
453
+
454
+ def save_data(self) -> bool:
455
+ """Save the data graph to its configured path."""
456
+ if self.data_path:
457
+ return self.export_to_ttl(self.data_path, include_base=False)
458
+ return False
459
+
460
+
461
+ # --- Testing ---
462
+ if __name__ == "__main__":
463
+ print("=== Testing OntologyManager ===\n")
464
+
465
+ # Test with base ontology
466
+ base_path = "/Users/bk280625/documents041025/MonCode/sysCRED_onto26avrtil.ttl"
467
+ data_path = "/Users/bk280625/documents041025/MonCode/ontology/sysCRED_data.ttl"
468
+
469
+ manager = OntologyManager(base_ontology_path=base_path, data_path=None)
470
+
471
+ # Test adding evaluation
472
+ sample_report = {
473
+ 'scoreCredibilite': 0.72,
474
+ 'informationEntree': 'https://www.lemonde.fr/article/test',
475
+ 'resumeAnalyse': "L'analyse suggère une crédibilité MOYENNE à ÉLEVÉE.",
476
+ 'analyseNLP': {
477
+ 'sentiment': {'label': 'POSITIVE', 'score': 0.85},
478
+ 'coherence_score': 0.78
479
+ },
480
+ 'reglesAppliquees': {
481
+ 'source_analysis': {
482
+ 'reputation': 'High',
483
+ 'domain_age_days': 9000
484
+ },
485
+ 'fact_checking': [
486
+ {'claim': 'Article verified by fact-checkers', 'rating': 'True'}
487
+ ]
488
+ }
489
+ }
490
+
491
+ print("Test 1: Adding evaluation triplets...")
492
+ report_uri = manager.add_evaluation_triplets(sample_report)
493
+ print(f" Created: {report_uri}")
494
+ print()
495
+
496
+ # Test statistics
497
+ print("Test 2: Getting statistics...")
498
+ stats = manager.get_statistics()
499
+ for key, value in stats.items():
500
+ print(f" {key}: {value}")
501
+ print()
502
+
503
+ # Export test
504
+ print("Test 3: Exporting data graph...")
505
+ os.makedirs(os.path.dirname(data_path), exist_ok=True)
506
+ manager.export_to_ttl(data_path)
507
+ print(f" Exported to: {data_path}")
508
+
509
+ print("\n=== Tests Complete ===")
@@ -0,0 +1,135 @@
1
+
2
+ import json
3
+ import time
4
+ import os
5
+ import sys
6
+ from pathlib import Path
7
+ from typing import Dict, List
8
+ import pandas as pd
9
+ from datetime import datetime
10
+
11
+ # Add project root to path (one level up from this script)
12
+ sys.path.append(str(Path(__file__).parent.parent))
13
+
14
+ from syscred.verification_system import CredibilityVerificationSystem
15
+ from syscred.config import config
16
+
17
+ def run_benchmark():
18
+ print("="*60)
19
+ print(" SysCRED v2.1 - Scientific Evaluation Benchmark ")
20
+ print("="*60)
21
+
22
+ # Load Benchmark Data
23
+ data_path = Path(__file__).parent / "benchmark_data.json"
24
+ if not data_path.exists():
25
+ print(f"❌ Error: {data_path} not found.")
26
+ return
27
+
28
+ with open(data_path, 'r') as f:
29
+ dataset = json.load(f)
30
+
31
+ print(f"Loaded {len(dataset)} test cases.\n")
32
+
33
+ # Initialize System with Full Capabilities
34
+ print("Initializing SysCRED (ML Models + Google API)...")
35
+ system = CredibilityVerificationSystem(
36
+ ontology_base_path=str(config.ONTOLOGY_BASE_PATH),
37
+ ontology_data_path=str(config.ONTOLOGY_DATA_PATH),
38
+ load_ml_models=True, # Use full ML for benchmark
39
+ google_api_key=config.GOOGLE_FACT_CHECK_API_KEY
40
+ )
41
+ print("System ready.\n")
42
+
43
+ results = []
44
+
45
+ # Run Evaluation
46
+ for i, item in enumerate(dataset):
47
+ url = item['url']
48
+ label = item['label']
49
+ print(f"[{i+1}/{len(dataset)}] Analyzing: {url} (Expected: {label})...")
50
+
51
+ start_time = time.time()
52
+ try:
53
+ # Run analysis
54
+ # We treat empty text fallbacks as valid logic path
55
+ report = system.verify_information(url)
56
+ score = report.get('score_credibilite', 0.5)
57
+
58
+ # Determine System Verdict
59
+ sys_verdict = "High" if score >= 0.55 else "Low"
60
+
61
+ # Compare
62
+ match = (sys_verdict == label) or (label == "High" and sys_verdict == "High") or (label == "Low" and sys_verdict == "Low")
63
+ # Handling Medium? For binary benchmark, we assume simplified threshold.
64
+ # Or we can map:
65
+ # High (>=0.7)
66
+ # Medium (0.4-0.7)
67
+ # Low (<0.4)
68
+
69
+ # Simple Binary Metric for Precision/Recall:
70
+ # Positive Class = "High Credibility"
71
+
72
+ results.append({
73
+ "url": url,
74
+ "expected": label,
75
+ "score": score,
76
+ "system_verdict": sys_verdict,
77
+ "match": match,
78
+ "time": time.time() - start_time,
79
+ "error": None
80
+ })
81
+ print(f" -> Score: {score:.2f} | Verdict: {sys_verdict} | match: {'✅' if match else '❌'}")
82
+
83
+ except Exception as e:
84
+ print(f" -> ❌ Error: {e}")
85
+ results.append({
86
+ "url": url,
87
+ "expected": label,
88
+ "score": 0,
89
+ "system_verdict": "Error",
90
+ "match": False,
91
+ "time": time.time() - start_time,
92
+ "error": str(e)
93
+ })
94
+
95
+ # Calculate Metrics
96
+ print("\n" + "="*60)
97
+ print("RESULTS SUMMARY")
98
+ print("="*60)
99
+
100
+ df = pd.DataFrame(results)
101
+
102
+ # Logic for metrics
103
+ # TP: System=High, Expected=High
104
+ # FP: System=High, Expected=Low
105
+ # TN: System=Low, Expected=Low
106
+ # FN: System=Low, Expected=High
107
+
108
+ tp = len(df[(df['system_verdict'] == 'High') & (df['expected'] == 'High')])
109
+ fp = len(df[(df['system_verdict'] == 'High') & (df['expected'] == 'Low')])
110
+ tn = len(df[(df['system_verdict'] == 'Low') & (df['expected'] == 'Low')])
111
+ fn = len(df[(df['system_verdict'] == 'Low') & (df['expected'] == 'High')])
112
+
113
+ precision = tp / (tp + fp) if (tp + fp) > 0 else 0
114
+ recall = tp / (tp + fn) if (tp + fn) > 0 else 0
115
+ accuracy = (tp + tn) / len(df) if len(df) > 0 else 0
116
+ f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
117
+
118
+ print(f"Total Cases: {len(df)}")
119
+ print(f"Accuracy: {accuracy:.2%}")
120
+ print(f"Precision: {precision:.2%}")
121
+ print(f"Recall: {recall:.2%}")
122
+ print(f"F1-Score: {f1:.2f}")
123
+
124
+ print("\nConfusion Matrix:")
125
+ print(f" | Pred High | Pred Low")
126
+ print(f"True High | {tp} | {fn}")
127
+ print(f"True Low | {fp} | {tn}")
128
+
129
+ # Save detailed report
130
+ report_path = Path(__file__).parent / "benchmark_results.csv"
131
+ df.to_csv(report_path, index=False)
132
+ print(f"\nDetailed CSV Saved to: {report_path}")
133
+
134
+ if __name__ == "__main__":
135
+ run_benchmark()