syscred 2.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,43 @@
1
+ import os
2
+ import requests
3
+ from dotenv import load_dotenv
4
+
5
+ # Load environment variables
6
+ load_dotenv(dotenv_path='/Users/bk280625/documents041025/MonCode/syscred/.env')
7
+
8
+ API_KEY = os.getenv('SYSCRED_GOOGLE_API_KEY')
9
+ print(f"Loaded API Key: {API_KEY[:5]}...{API_KEY[-5:] if API_KEY else 'None'}")
10
+
11
+ if not API_KEY:
12
+ print("❌ Error: API Key not found in .env")
13
+ exit(1)
14
+
15
+ query = "La terre est plate"
16
+ url = "https://factchecktools.googleapis.com/v1alpha1/claims:search"
17
+ params = {
18
+ 'key': API_KEY,
19
+ 'query': query,
20
+ }
21
+
22
+ print(f"\nSending request for query: '{query}'...")
23
+ try:
24
+ response = requests.get(url, params=params)
25
+ print(f"Status Code: {response.status_code}")
26
+
27
+ if response.status_code == 200:
28
+ data = response.json()
29
+ claims = data.get('claims', [])
30
+ print(f"✅ Success! Found {len(claims)} claims.")
31
+ for i, claim in enumerate(claims[:3]):
32
+ print(f"\n--- Result {i+1} ---")
33
+ print(f"Claim: {claim.get('text')}")
34
+ print(f"Claimant: {claim.get('claimant')}")
35
+ reviews = claim.get('claimReview', [])
36
+ if reviews:
37
+ print(f"Rating: {reviews[0].get('textualRating')}")
38
+ print(f"URL: {reviews[0].get('url')}")
39
+ else:
40
+ print(f"❌ API Error: {response.text}")
41
+
42
+ except Exception as e:
43
+ print(f"❌ Connection Error: {e}")
@@ -0,0 +1,58 @@
1
+ import sys
2
+ from pathlib import Path
3
+ import json
4
+
5
+ # Add project root to path (one level up from this script)
6
+ sys.path.append(str(Path(__file__).parent.parent))
7
+
8
+ from syscred.ontology_manager import OntologyManager
9
+ from syscred.config import config
10
+
11
+ def debug_graph():
12
+ print("=== Debugging Ontology Graph Extraction ===")
13
+
14
+ # Initialize manager
15
+ base_path = str(config.ONTOLOGY_BASE_PATH)
16
+ data_path = str(config.ONTOLOGY_DATA_PATH)
17
+
18
+ print(f"Loading data from: {data_path}")
19
+ manager = OntologyManager(base_ontology_path=base_path, data_path=data_path)
20
+
21
+ # Get Stats
22
+ stats = manager.get_statistics()
23
+ print(f"Total Triples: {stats['total_triples']}")
24
+ print(f"Evaluations: {stats.get('total_evaluations', 'N/A')}")
25
+
26
+ # Try getting graph JSON
27
+ print("\nExtracting Graph JSON...")
28
+ graph_data = manager.get_graph_json()
29
+
30
+ nodes = graph_data.get('nodes', [])
31
+ links = graph_data.get('links', [])
32
+
33
+ print(f"Nodes found: {len(nodes)}")
34
+ print(f"Links found: {len(links)}")
35
+
36
+ if len(nodes) > 0:
37
+ print("\n--- Sample Nodes ---")
38
+ for n in nodes[:3]:
39
+ print(json.dumps(n, indent=2))
40
+ else:
41
+ print("\n❌ No nodes found! Checking latest report query...")
42
+ # Manually run the query to see what's wrong
43
+ query = """
44
+ PREFIX cred: <http://www.dic9335.uqam.ca/ontologies/credibility-verification#>
45
+ SELECT ?report ?timestamp WHERE {
46
+ ?report a cred:RapportEvaluation .
47
+ ?report cred:completionTimestamp ?timestamp .
48
+ }
49
+ ORDER BY DESC(?timestamp)
50
+ LIMIT 5
51
+ """
52
+ print(f"Running SPARQL:\n{query}")
53
+ results = manager.data_graph.query(query)
54
+ for row in results:
55
+ print(f"Found Report: {row.report} at {row.timestamp}")
56
+
57
+ if __name__ == "__main__":
58
+ debug_graph()
syscred/debug_init.py ADDED
@@ -0,0 +1,33 @@
1
+
2
+ import sys
3
+ import os
4
+ import traceback
5
+
6
+ # Setup path
7
+ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
8
+
9
+ from syscred.verification_system import CredibilityVerificationSystem
10
+ from syscred.config import config
11
+ from syscred.seo_analyzer import SEOAnalyzer
12
+
13
+ print("=== DEBUG INITIALIZATION ===")
14
+ try:
15
+ print("[1] Config check:")
16
+ print(f" Base Ontology: {config.ONTOLOGY_BASE_PATH}")
17
+ print(f" Data Path: {config.ONTOLOGY_DATA_PATH}")
18
+
19
+ print("\n[2] Initializing SEO Analyzer...")
20
+ seo = SEOAnalyzer()
21
+ print(" OK")
22
+
23
+ print("\n[3] Initializing Verification System...")
24
+ sys = CredibilityVerificationSystem(
25
+ ontology_base_path=config.ONTOLOGY_BASE_PATH,
26
+ ontology_data_path=config.ONTOLOGY_DATA_PATH,
27
+ load_ml_models=False # Disable ML for basic init test
28
+ )
29
+ print(" OK - System initialized successfully.")
30
+
31
+ except Exception as e:
32
+ print(f"\n❌ FATAL ERROR: {e}")
33
+ traceback.print_exc()
@@ -0,0 +1,25 @@
1
+ import requests
2
+ import json
3
+
4
+ url = "http://localhost:5001/api/verify"
5
+ payload = {
6
+ "input_data": "la terre est plate",
7
+ "include_seo": True
8
+ }
9
+ headers = {'Content-Type': 'application/json'}
10
+
11
+ try:
12
+ print(f"Sending POST to {url} with payload: {payload}")
13
+ response = requests.post(url, json=payload, headers=headers)
14
+ print(f"Status: {response.status_code}")
15
+
16
+ if response.status_code == 200:
17
+ data = response.json()
18
+ print("\n--- JSON RESPONSE PARTIAL ---")
19
+ facts = data.get('reglesAppliquees', {}).get('fact_checking', [])
20
+ print(f"Fact Checks Count: {len(facts)}")
21
+ print("Fact Checks Items:", json.dumps(facts, indent=2, ensure_ascii=False))
22
+ else:
23
+ print("Error:", response.text)
24
+ except Exception as e:
25
+ print(f"Connection failed: {e}")
@@ -0,0 +1,37 @@
1
+
2
+ import sys
3
+ import os
4
+ import traceback
5
+
6
+ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
7
+
8
+ print("--- DIAGNOSTIC START ---")
9
+ try:
10
+ print("[1] Importing config...")
11
+ from syscred.config import config
12
+ print(" OK")
13
+ except Exception:
14
+ traceback.print_exc()
15
+
16
+ try:
17
+ print("[2] Importing database...")
18
+ from syscred.database import init_db
19
+ print(" OK")
20
+ except Exception:
21
+ traceback.print_exc()
22
+
23
+ try:
24
+ print("[3] Importing ontology_manager...")
25
+ from syscred.ontology_manager import OntologyManager
26
+ print(" OK")
27
+ except Exception:
28
+ traceback.print_exc()
29
+
30
+ try:
31
+ print("[4] Importing verification_system...")
32
+ from syscred.verification_system import CredibilityVerificationSystem
33
+ print(" OK")
34
+ except Exception:
35
+ traceback.print_exc()
36
+
37
+ print("--- DIAGNOSTIC END ---")
@@ -0,0 +1,349 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Evaluation Metrics Module - SysCRED
4
+ ====================================
5
+ Information Retrieval evaluation metrics for TREC-style experiments.
6
+
7
+ Metrics:
8
+ - MAP (Mean Average Precision)
9
+ - NDCG (Normalized Discounted Cumulative Gain)
10
+ - P@K (Precision at K)
11
+ - Recall@K
12
+ - MRR (Mean Reciprocal Rank)
13
+
14
+ Based on pytrec_eval for official TREC evaluation.
15
+
16
+ (c) Dominique S. Loyer - PhD Thesis Prototype
17
+ Citation Key: loyerEvaluationModelesRecherche2025
18
+ """
19
+
20
+ import math
21
+ from typing import Dict, List, Tuple, Any
22
+ from collections import defaultdict
23
+
24
+ # Check for pytrec_eval
25
+ try:
26
+ import pytrec_eval
27
+ HAS_PYTREC_EVAL = True
28
+ except ImportError:
29
+ HAS_PYTREC_EVAL = False
30
+ print("[EvalMetrics] pytrec_eval not installed. Using built-in metrics.")
31
+
32
+
33
+ class EvaluationMetrics:
34
+ """
35
+ IR Evaluation metrics using pytrec_eval or built-in implementations.
36
+
37
+ Supports TREC-style evaluation with:
38
+ - Official pytrec_eval (if available)
39
+ - Fallback pure-Python implementations
40
+ """
41
+
42
+ def __init__(self):
43
+ """Initialize the metrics calculator."""
44
+ self.use_pytrec = HAS_PYTREC_EVAL
45
+
46
+ # --- Built-in Metric Implementations ---
47
+
48
+ @staticmethod
49
+ def precision_at_k(retrieved: List[str], relevant: set, k: int) -> float:
50
+ """
51
+ Calculate Precision@K.
52
+
53
+ P@K = |relevant ∩ retrieved[:k]| / k
54
+ """
55
+ if k <= 0:
56
+ return 0.0
57
+ retrieved_k = retrieved[:k]
58
+ relevant_retrieved = len([d for d in retrieved_k if d in relevant])
59
+ return relevant_retrieved / k
60
+
61
+ @staticmethod
62
+ def recall_at_k(retrieved: List[str], relevant: set, k: int) -> float:
63
+ """
64
+ Calculate Recall@K.
65
+
66
+ R@K = |relevant ∩ retrieved[:k]| / |relevant|
67
+ """
68
+ if not relevant:
69
+ return 0.0
70
+ retrieved_k = retrieved[:k]
71
+ relevant_retrieved = len([d for d in retrieved_k if d in relevant])
72
+ return relevant_retrieved / len(relevant)
73
+
74
+ @staticmethod
75
+ def average_precision(retrieved: List[str], relevant: set) -> float:
76
+ """
77
+ Calculate Average Precision for a single query.
78
+
79
+ AP = (1/|relevant|) × Σ (P@k × rel(k))
80
+ """
81
+ if not relevant:
82
+ return 0.0
83
+
84
+ hits = 0
85
+ sum_precision = 0.0
86
+
87
+ for i, doc in enumerate(retrieved):
88
+ if doc in relevant:
89
+ hits += 1
90
+ sum_precision += hits / (i + 1)
91
+
92
+ return sum_precision / len(relevant)
93
+
94
+ @staticmethod
95
+ def dcg_at_k(retrieved: List[str], relevance: Dict[str, int], k: int) -> float:
96
+ """
97
+ Calculate DCG@K (Discounted Cumulative Gain).
98
+
99
+ DCG@K = Σ (2^rel(i) - 1) / log2(i + 2)
100
+ """
101
+ dcg = 0.0
102
+ for i, doc in enumerate(retrieved[:k]):
103
+ rel = relevance.get(doc, 0)
104
+ dcg += (2 ** rel - 1) / math.log2(i + 2)
105
+ return dcg
106
+
107
+ @staticmethod
108
+ def ndcg_at_k(retrieved: List[str], relevance: Dict[str, int], k: int) -> float:
109
+ """
110
+ Calculate NDCG@K (Normalized DCG).
111
+
112
+ NDCG@K = DCG@K / IDCG@K
113
+ """
114
+ dcg = EvaluationMetrics.dcg_at_k(retrieved, relevance, k)
115
+
116
+ # Calculate IDCG (ideal DCG)
117
+ sorted_rels = sorted(relevance.values(), reverse=True)[:k]
118
+ idcg = 0.0
119
+ for i, rel in enumerate(sorted_rels):
120
+ idcg += (2 ** rel - 1) / math.log2(i + 2)
121
+
122
+ return dcg / idcg if idcg > 0 else 0.0
123
+
124
+ @staticmethod
125
+ def reciprocal_rank(retrieved: List[str], relevant: set) -> float:
126
+ """
127
+ Calculate Reciprocal Rank.
128
+
129
+ RR = 1 / rank of first relevant document
130
+ """
131
+ for i, doc in enumerate(retrieved):
132
+ if doc in relevant:
133
+ return 1.0 / (i + 1)
134
+ return 0.0
135
+
136
+ # --- TREC-Style Evaluation ---
137
+
138
+ def evaluate_run(
139
+ self,
140
+ run: Dict[str, List[Tuple[str, float]]],
141
+ qrels: Dict[str, Dict[str, int]],
142
+ metrics: List[str] = None
143
+ ) -> Dict[str, Dict[str, float]]:
144
+ """
145
+ Evaluate a run against qrels (relevance judgments).
146
+
147
+ Args:
148
+ run: {query_id: [(doc_id, score), ...]}
149
+ qrels: {query_id: {doc_id: relevance}}
150
+ metrics: List of metrics to compute
151
+ ['map', 'ndcg', 'P_5', 'P_10', 'recall_100']
152
+
153
+ Returns:
154
+ {query_id: {metric: value}}
155
+ """
156
+ if metrics is None:
157
+ metrics = ['map', 'ndcg', 'P_5', 'P_10', 'P_20', 'recall_100', 'recip_rank']
158
+
159
+ if self.use_pytrec and HAS_PYTREC_EVAL:
160
+ return self._evaluate_pytrec(run, qrels, metrics)
161
+ else:
162
+ return self._evaluate_builtin(run, qrels, metrics)
163
+
164
+ def _evaluate_pytrec(
165
+ self,
166
+ run: Dict[str, List[Tuple[str, float]]],
167
+ qrels: Dict[str, Dict[str, int]],
168
+ metrics: List[str]
169
+ ) -> Dict[str, Dict[str, float]]:
170
+ """Evaluate using pytrec_eval."""
171
+ # Convert run format for pytrec_eval
172
+ pytrec_run = {}
173
+ for qid, docs in run.items():
174
+ pytrec_run[qid] = {doc_id: score for doc_id, score in docs}
175
+
176
+ # Create evaluator
177
+ evaluator = pytrec_eval.RelevanceEvaluator(qrels, set(metrics))
178
+
179
+ # Evaluate
180
+ results = evaluator.evaluate(pytrec_run)
181
+
182
+ return results
183
+
184
+ def _evaluate_builtin(
185
+ self,
186
+ run: Dict[str, List[Tuple[str, float]]],
187
+ qrels: Dict[str, Dict[str, int]],
188
+ metrics: List[str]
189
+ ) -> Dict[str, Dict[str, float]]:
190
+ """Evaluate using built-in implementations."""
191
+ results = {}
192
+
193
+ for qid, docs_scores in run.items():
194
+ if qid not in qrels:
195
+ continue
196
+
197
+ q_results = {}
198
+ retrieved = [doc_id for doc_id, _ in docs_scores]
199
+ relevance = qrels[qid]
200
+ relevant = set(doc_id for doc_id, rel in relevance.items() if rel > 0)
201
+
202
+ for metric in metrics:
203
+ if metric == 'map':
204
+ q_results['map'] = self.average_precision(retrieved, relevant)
205
+ elif metric == 'ndcg':
206
+ q_results['ndcg'] = self.ndcg_at_k(retrieved, relevance, 1000)
207
+ elif metric.startswith('ndcg_cut_'):
208
+ k = int(metric.split('_')[-1])
209
+ q_results[metric] = self.ndcg_at_k(retrieved, relevance, k)
210
+ elif metric.startswith('P_'):
211
+ k = int(metric.split('_')[-1])
212
+ q_results[metric] = self.precision_at_k(retrieved, relevant, k)
213
+ elif metric.startswith('recall_'):
214
+ k = int(metric.split('_')[-1])
215
+ q_results[metric] = self.recall_at_k(retrieved, relevant, k)
216
+ elif metric == 'recip_rank':
217
+ q_results['recip_rank'] = self.reciprocal_rank(retrieved, relevant)
218
+
219
+ results[qid] = q_results
220
+
221
+ return results
222
+
223
+ def compute_aggregate(
224
+ self,
225
+ results: Dict[str, Dict[str, float]]
226
+ ) -> Dict[str, float]:
227
+ """
228
+ Compute aggregate metrics across all queries.
229
+
230
+ Returns mean values for each metric.
231
+ """
232
+ if not results:
233
+ return {}
234
+
235
+ aggregated = defaultdict(list)
236
+ for qid, metrics in results.items():
237
+ for metric, value in metrics.items():
238
+ aggregated[metric].append(value)
239
+
240
+ return {metric: sum(values) / len(values)
241
+ for metric, values in aggregated.items()}
242
+
243
+ def format_results(
244
+ self,
245
+ results: Dict[str, Dict[str, float]],
246
+ include_per_query: bool = False
247
+ ) -> str:
248
+ """Format results as a readable string."""
249
+ lines = []
250
+
251
+ # Aggregate
252
+ agg = self.compute_aggregate(results)
253
+ lines.append("=" * 50)
254
+ lines.append("AGGREGATE METRICS")
255
+ lines.append("=" * 50)
256
+ for metric, value in sorted(agg.items()):
257
+ lines.append(f" {metric:20s}: {value:.4f}")
258
+
259
+ # Per-query (optional)
260
+ if include_per_query:
261
+ lines.append("")
262
+ lines.append("=" * 50)
263
+ lines.append("PER-QUERY METRICS")
264
+ lines.append("=" * 50)
265
+ for qid in sorted(results.keys()):
266
+ lines.append(f"\nQuery {qid}:")
267
+ for metric, value in sorted(results[qid].items()):
268
+ lines.append(f" {metric:20s}: {value:.4f}")
269
+
270
+ return '\n'.join(lines)
271
+
272
+
273
+ def parse_qrels_file(filepath: str) -> Dict[str, Dict[str, int]]:
274
+ """
275
+ Parse a TREC qrels file.
276
+
277
+ Format: query_id 0 doc_id relevance
278
+ """
279
+ qrels = defaultdict(dict)
280
+ with open(filepath, 'r') as f:
281
+ for line in f:
282
+ parts = line.strip().split()
283
+ if len(parts) >= 4:
284
+ qid, _, docid, rel = parts[:4]
285
+ qrels[qid][docid] = int(rel)
286
+ return dict(qrels)
287
+
288
+
289
+ def parse_run_file(filepath: str) -> Dict[str, List[Tuple[str, float]]]:
290
+ """
291
+ Parse a TREC run file.
292
+
293
+ Format: query_id Q0 doc_id rank score run_tag
294
+ """
295
+ run = defaultdict(list)
296
+ with open(filepath, 'r') as f:
297
+ for line in f:
298
+ parts = line.strip().split()
299
+ if len(parts) >= 5:
300
+ qid, _, docid, rank, score = parts[:5]
301
+ run[qid].append((docid, float(score)))
302
+
303
+ # Sort by score descending
304
+ for qid in run:
305
+ run[qid].sort(key=lambda x: x[1], reverse=True)
306
+
307
+ return dict(run)
308
+
309
+
310
+ # --- Testing ---
311
+ if __name__ == "__main__":
312
+ print("=" * 60)
313
+ print("SysCRED Evaluation Metrics - Tests")
314
+ print("=" * 60)
315
+
316
+ metrics = EvaluationMetrics()
317
+ print(f"\nUsing pytrec_eval: {metrics.use_pytrec}")
318
+
319
+ # Test data
320
+ retrieved = ['doc1', 'doc2', 'doc3', 'doc4', 'doc5', 'doc6', 'doc7', 'doc8', 'doc9', 'doc10']
321
+ relevant = {'doc1', 'doc3', 'doc5', 'doc8'}
322
+ relevance = {'doc1': 2, 'doc3': 1, 'doc5': 2, 'doc8': 1}
323
+
324
+ print("\n--- Built-in Metrics Tests ---")
325
+ print(f"P@5: {metrics.precision_at_k(retrieved, relevant, 5):.4f}")
326
+ print(f"P@10: {metrics.precision_at_k(retrieved, relevant, 10):.4f}")
327
+ print(f"R@5: {metrics.recall_at_k(retrieved, relevant, 5):.4f}")
328
+ print(f"R@10: {metrics.recall_at_k(retrieved, relevant, 10):.4f}")
329
+ print(f"AP: {metrics.average_precision(retrieved, relevant):.4f}")
330
+ print(f"NDCG@10: {metrics.ndcg_at_k(retrieved, relevance, 10):.4f}")
331
+ print(f"RR: {metrics.reciprocal_rank(retrieved, relevant):.4f}")
332
+
333
+ # Test run evaluation
334
+ print("\n--- Run Evaluation Test ---")
335
+ run = {
336
+ 'Q1': [(doc, 10-i) for i, doc in enumerate(retrieved)],
337
+ 'Q2': [('doc2', 10), ('doc1', 9), ('doc4', 8), ('doc3', 7)]
338
+ }
339
+ qrels = {
340
+ 'Q1': relevance,
341
+ 'Q2': {'doc1': 1, 'doc3': 2}
342
+ }
343
+
344
+ results = metrics.evaluate_run(run, qrels)
345
+ print(metrics.format_results(results))
346
+
347
+ print("\n" + "=" * 60)
348
+ print("Tests complete!")
349
+ print("=" * 60)