syscred 2.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- syscred/__init__.py +41 -0
- syscred/api_clients.py +560 -0
- syscred/backend_app.py +363 -0
- syscred/config.py +275 -0
- syscred/database.py +54 -0
- syscred/debug_factcheck.py +43 -0
- syscred/debug_graph_json.py +58 -0
- syscred/debug_init.py +33 -0
- syscred/debug_local_server.py +25 -0
- syscred/diagnose_imports.py +37 -0
- syscred/eval_metrics.py +349 -0
- syscred/graph_rag.py +171 -0
- syscred/ir_engine.py +410 -0
- syscred/ontology_manager.py +509 -0
- syscred/run_benchmark.py +135 -0
- syscred/seo_analyzer.py +610 -0
- syscred/setup.py +65 -0
- syscred/test_graphrag.py +87 -0
- syscred/test_phase1.py +28 -0
- syscred/test_phase2.py +55 -0
- syscred/test_suite.py +64 -0
- syscred/verification_system.py +765 -0
- syscred-2.2.0.dist-info/METADATA +259 -0
- syscred-2.2.0.dist-info/RECORD +28 -0
- syscred-2.2.0.dist-info/WHEEL +5 -0
- syscred-2.2.0.dist-info/entry_points.txt +3 -0
- syscred-2.2.0.dist-info/licenses/LICENSE +21 -0
- syscred-2.2.0.dist-info/top_level.txt +1 -0
syscred/backend_app.py
ADDED
|
@@ -0,0 +1,363 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
SysCRED Backend API - Flask Server
|
|
4
|
+
===================================
|
|
5
|
+
REST API for the credibility verification system.
|
|
6
|
+
|
|
7
|
+
Endpoints:
|
|
8
|
+
- POST /api/verify - Verify URL or text credibility
|
|
9
|
+
- POST /api/seo - Get SEO analysis only
|
|
10
|
+
- GET /api/ontology/stats - Get ontology statistics
|
|
11
|
+
- GET /api/health - Health check
|
|
12
|
+
- GET /api/config - View current configuration
|
|
13
|
+
|
|
14
|
+
(c) Dominique S. Loyer - PhD Thesis Prototype
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import sys
|
|
18
|
+
import os
|
|
19
|
+
import traceback
|
|
20
|
+
from flask import Flask, request, jsonify, send_from_directory
|
|
21
|
+
from flask_cors import CORS
|
|
22
|
+
|
|
23
|
+
# Add syscred package to path
|
|
24
|
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
25
|
+
|
|
26
|
+
# Import SysCRED modules
|
|
27
|
+
try:
|
|
28
|
+
from syscred.verification_system import CredibilityVerificationSystem
|
|
29
|
+
from syscred.seo_analyzer import SEOAnalyzer
|
|
30
|
+
from syscred.ontology_manager import OntologyManager
|
|
31
|
+
from syscred.ontology_manager import OntologyManager
|
|
32
|
+
from syscred.config import config, Config
|
|
33
|
+
from syscred.database import init_db, db, AnalysisResult
|
|
34
|
+
SYSCRED_AVAILABLE = True
|
|
35
|
+
print("[SysCRED Backend] Modules imported successfully")
|
|
36
|
+
except ImportError as e:
|
|
37
|
+
SYSCRED_AVAILABLE = False
|
|
38
|
+
print(f"[SysCRED Backend] Warning: Could not import modules: {e}")
|
|
39
|
+
# Define dummy init_db to prevent crash
|
|
40
|
+
def init_db(app): pass
|
|
41
|
+
|
|
42
|
+
# Fallback config
|
|
43
|
+
class Config:
|
|
44
|
+
HOST = "0.0.0.0"
|
|
45
|
+
PORT = 5000
|
|
46
|
+
DEBUG = True
|
|
47
|
+
ONTOLOGY_BASE_PATH = None
|
|
48
|
+
ONTOLOGY_DATA_PATH = None
|
|
49
|
+
LOAD_ML_MODELS = True
|
|
50
|
+
GOOGLE_FACT_CHECK_API_KEY = None
|
|
51
|
+
config = Config()
|
|
52
|
+
|
|
53
|
+
# --- Initialize Flask App ---
|
|
54
|
+
app = Flask(__name__)
|
|
55
|
+
CORS(app) # Enable CORS for frontend
|
|
56
|
+
|
|
57
|
+
# Initialize Database
|
|
58
|
+
try:
|
|
59
|
+
init_db(app) # [NEW] Setup DB connection
|
|
60
|
+
except Exception as e:
|
|
61
|
+
print(f"[SysCRED Backend] Warning: DB init failed: {e}")
|
|
62
|
+
|
|
63
|
+
# --- Initialize SysCRED System ---
|
|
64
|
+
credibility_system = None
|
|
65
|
+
seo_analyzer = None
|
|
66
|
+
|
|
67
|
+
def initialize_system():
|
|
68
|
+
"""Initialize the credibility system (lazy loading)."""
|
|
69
|
+
global credibility_system, seo_analyzer
|
|
70
|
+
|
|
71
|
+
if not SYSCRED_AVAILABLE:
|
|
72
|
+
print("[SysCRED Backend] Cannot initialize - modules not available")
|
|
73
|
+
return False
|
|
74
|
+
|
|
75
|
+
try:
|
|
76
|
+
# Initialize SEO analyzer (lightweight)
|
|
77
|
+
seo_analyzer = SEOAnalyzer()
|
|
78
|
+
print("[SysCRED Backend] SEO Analyzer initialized")
|
|
79
|
+
|
|
80
|
+
# Initialize full system (may take time to load ML models)
|
|
81
|
+
print("[SysCRED Backend] Initializing credibility system (loading ML models)...")
|
|
82
|
+
ontology_base = str(config.ONTOLOGY_BASE_PATH) if config.ONTOLOGY_BASE_PATH else None
|
|
83
|
+
ontology_data = str(config.ONTOLOGY_DATA_PATH) if config.ONTOLOGY_DATA_PATH else None
|
|
84
|
+
credibility_system = CredibilityVerificationSystem(
|
|
85
|
+
ontology_base_path=ontology_base if ontology_base and os.path.exists(ontology_base) else None,
|
|
86
|
+
ontology_data_path=ontology_data,
|
|
87
|
+
load_ml_models=config.LOAD_ML_MODELS,
|
|
88
|
+
google_api_key=config.GOOGLE_FACT_CHECK_API_KEY
|
|
89
|
+
)
|
|
90
|
+
print("[SysCRED Backend] System initialized successfully!")
|
|
91
|
+
return True
|
|
92
|
+
|
|
93
|
+
except Exception as e:
|
|
94
|
+
print(f"[SysCRED Backend] Error initializing system: {e}")
|
|
95
|
+
traceback.print_exc()
|
|
96
|
+
return False
|
|
97
|
+
|
|
98
|
+
# --- API Routes ---
|
|
99
|
+
|
|
100
|
+
@app.route('/')
|
|
101
|
+
def index():
|
|
102
|
+
"""Serve the frontend."""
|
|
103
|
+
return send_from_directory('static', 'index.html')
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
@app.route('/api/health', methods=['GET'])
|
|
107
|
+
def health_check():
|
|
108
|
+
"""Health check endpoint."""
|
|
109
|
+
return jsonify({
|
|
110
|
+
'status': 'healthy',
|
|
111
|
+
'syscred_available': SYSCRED_AVAILABLE,
|
|
112
|
+
'system_initialized': credibility_system is not None,
|
|
113
|
+
'seo_analyzer_ready': seo_analyzer is not None
|
|
114
|
+
})
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
@app.route('/api/verify', methods=['POST'])
|
|
118
|
+
def verify_endpoint():
|
|
119
|
+
"""
|
|
120
|
+
Main verification endpoint.
|
|
121
|
+
|
|
122
|
+
Request JSON:
|
|
123
|
+
{
|
|
124
|
+
"input_data": "URL or text to verify",
|
|
125
|
+
"include_seo": true/false (optional, default true),
|
|
126
|
+
"include_pagerank": true/false (optional, default true)
|
|
127
|
+
}
|
|
128
|
+
"""
|
|
129
|
+
global credibility_system
|
|
130
|
+
|
|
131
|
+
# Lazy initialization
|
|
132
|
+
if credibility_system is None:
|
|
133
|
+
if not initialize_system():
|
|
134
|
+
return jsonify({
|
|
135
|
+
'error': 'System initialization failed. Check server logs.'
|
|
136
|
+
}), 503
|
|
137
|
+
|
|
138
|
+
# Validate request
|
|
139
|
+
if not request.is_json:
|
|
140
|
+
return jsonify({'error': 'Request must be JSON'}), 400
|
|
141
|
+
|
|
142
|
+
data = request.get_json()
|
|
143
|
+
input_data = data.get('input_data', '').strip()
|
|
144
|
+
|
|
145
|
+
if not input_data:
|
|
146
|
+
return jsonify({'error': "'input_data' is required"}), 400
|
|
147
|
+
|
|
148
|
+
include_seo = data.get('include_seo', True)
|
|
149
|
+
include_pagerank = data.get('include_pagerank', True)
|
|
150
|
+
|
|
151
|
+
print(f"[SysCRED Backend] Verifying: {input_data[:100]}...")
|
|
152
|
+
|
|
153
|
+
try:
|
|
154
|
+
# Run main verification
|
|
155
|
+
result = credibility_system.verify_information(input_data)
|
|
156
|
+
|
|
157
|
+
if 'error' in result:
|
|
158
|
+
return jsonify(result), 400
|
|
159
|
+
|
|
160
|
+
# Add SEO analysis if requested and it's a URL
|
|
161
|
+
if include_seo and credibility_system.is_url(input_data):
|
|
162
|
+
try:
|
|
163
|
+
web_content = credibility_system.api_clients.fetch_web_content(input_data)
|
|
164
|
+
if web_content.success:
|
|
165
|
+
seo_result = seo_analyzer.analyze_seo(
|
|
166
|
+
url=input_data,
|
|
167
|
+
title=web_content.title,
|
|
168
|
+
meta_description=web_content.meta_description,
|
|
169
|
+
text_content=web_content.text_content
|
|
170
|
+
)
|
|
171
|
+
result['seoAnalysis'] = {
|
|
172
|
+
'titleLength': seo_result.title_length,
|
|
173
|
+
'titleHasKeywords': seo_result.title_has_keywords,
|
|
174
|
+
'metaDescriptionLength': seo_result.meta_description_length,
|
|
175
|
+
'wordCount': seo_result.word_count,
|
|
176
|
+
'readabilityScore': round(seo_result.readability_score, 2),
|
|
177
|
+
'seoScore': round(seo_result.seo_score, 2),
|
|
178
|
+
'topKeywords': list(seo_result.keyword_density.keys())
|
|
179
|
+
}
|
|
180
|
+
except Exception as e:
|
|
181
|
+
print(f"[SysCRED Backend] SEO analysis error: {e}")
|
|
182
|
+
result['seoAnalysis'] = {'error': str(e)}
|
|
183
|
+
|
|
184
|
+
# Add PageRank estimation if requested
|
|
185
|
+
if include_pagerank and credibility_system.is_url(input_data):
|
|
186
|
+
try:
|
|
187
|
+
external_data = credibility_system.api_clients.fetch_external_data(input_data)
|
|
188
|
+
pr_result = seo_analyzer.estimate_pagerank(
|
|
189
|
+
url=input_data,
|
|
190
|
+
domain_age_days=external_data.domain_age_days,
|
|
191
|
+
source_reputation=external_data.source_reputation
|
|
192
|
+
)
|
|
193
|
+
result['pageRankEstimation'] = {
|
|
194
|
+
'estimatedPR': round(pr_result.estimated_pr, 3),
|
|
195
|
+
'confidence': round(pr_result.confidence, 2),
|
|
196
|
+
'factors': pr_result.factors,
|
|
197
|
+
'explanation': pr_result.explanation_text
|
|
198
|
+
}
|
|
199
|
+
except Exception as e:
|
|
200
|
+
print(f"[SysCRED Backend] PageRank estimation error: {e}")
|
|
201
|
+
result['pageRankEstimation'] = {'error': str(e)}
|
|
202
|
+
|
|
203
|
+
print(f"[SysCRED Backend] Score: {result.get('scoreCredibilite', 'N/A')}")
|
|
204
|
+
|
|
205
|
+
# [NEW] Persist to Database
|
|
206
|
+
try:
|
|
207
|
+
new_analysis = AnalysisResult(
|
|
208
|
+
url=input_data[:500],
|
|
209
|
+
credibility_score=result.get('scoreCredibilite', 0.5),
|
|
210
|
+
summary=result.get('resumeAnalyse', ''),
|
|
211
|
+
source_reputation=result.get('detailsScore', {}).get('factors', [{}])[0].get('value')
|
|
212
|
+
)
|
|
213
|
+
db.session.add(new_analysis)
|
|
214
|
+
db.session.commit()
|
|
215
|
+
print(f"[SysCRED-DB] Result saved. ID: {new_analysis.id}")
|
|
216
|
+
except Exception as e:
|
|
217
|
+
print(f"[SysCRED-DB] Save failed: {e}")
|
|
218
|
+
|
|
219
|
+
return jsonify(result), 200
|
|
220
|
+
|
|
221
|
+
except Exception as e:
|
|
222
|
+
print(f"[SysCRED Backend] Error: {e}")
|
|
223
|
+
traceback.print_exc()
|
|
224
|
+
return jsonify({'error': f'Internal error: {str(e)}'}), 500
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
@app.route('/api/seo', methods=['POST'])
|
|
228
|
+
def seo_endpoint():
|
|
229
|
+
"""
|
|
230
|
+
SEO-only analysis endpoint (faster, no ML models needed).
|
|
231
|
+
|
|
232
|
+
Request JSON:
|
|
233
|
+
{
|
|
234
|
+
"url": "URL to analyze"
|
|
235
|
+
}
|
|
236
|
+
"""
|
|
237
|
+
global seo_analyzer
|
|
238
|
+
|
|
239
|
+
if seo_analyzer is None:
|
|
240
|
+
seo_analyzer = SEOAnalyzer()
|
|
241
|
+
|
|
242
|
+
if not request.is_json:
|
|
243
|
+
return jsonify({'error': 'Request must be JSON'}), 400
|
|
244
|
+
|
|
245
|
+
data = request.get_json()
|
|
246
|
+
url = data.get('url', '').strip()
|
|
247
|
+
|
|
248
|
+
if not url or not url.startswith('http'):
|
|
249
|
+
return jsonify({'error': 'Valid URL is required'}), 400
|
|
250
|
+
|
|
251
|
+
try:
|
|
252
|
+
# Fetch content
|
|
253
|
+
from syscred.api_clients import ExternalAPIClients
|
|
254
|
+
api_client = ExternalAPIClients()
|
|
255
|
+
|
|
256
|
+
web_content = api_client.fetch_web_content(url)
|
|
257
|
+
if not web_content.success:
|
|
258
|
+
return jsonify({'error': f'Failed to fetch URL: {web_content.error}'}), 400
|
|
259
|
+
|
|
260
|
+
# SEO analysis
|
|
261
|
+
seo_result = seo_analyzer.analyze_seo(
|
|
262
|
+
url=url,
|
|
263
|
+
title=web_content.title,
|
|
264
|
+
meta_description=web_content.meta_description,
|
|
265
|
+
text_content=web_content.text_content
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
# IR metrics
|
|
269
|
+
ir_metrics = seo_analyzer.get_ir_metrics(web_content.text_content)
|
|
270
|
+
|
|
271
|
+
# PageRank estimation
|
|
272
|
+
external_data = api_client.fetch_external_data(url)
|
|
273
|
+
pr_result = seo_analyzer.estimate_pagerank(
|
|
274
|
+
url=url,
|
|
275
|
+
domain_age_days=external_data.domain_age_days,
|
|
276
|
+
source_reputation=external_data.source_reputation
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
return jsonify({
|
|
280
|
+
'url': url,
|
|
281
|
+
'title': web_content.title,
|
|
282
|
+
'seo': {
|
|
283
|
+
'titleLength': seo_result.title_length,
|
|
284
|
+
'metaDescriptionLength': seo_result.meta_description_length,
|
|
285
|
+
'wordCount': seo_result.word_count,
|
|
286
|
+
'readabilityScore': round(seo_result.readability_score, 2),
|
|
287
|
+
'seoScore': round(seo_result.seo_score, 2),
|
|
288
|
+
'keywordDensity': seo_result.keyword_density
|
|
289
|
+
},
|
|
290
|
+
'irMetrics': {
|
|
291
|
+
'documentLength': ir_metrics.document_length,
|
|
292
|
+
'topTerms': ir_metrics.top_terms[:5],
|
|
293
|
+
'avgTermFrequency': round(ir_metrics.avg_term_frequency, 4)
|
|
294
|
+
},
|
|
295
|
+
'pageRank': {
|
|
296
|
+
'estimated': round(pr_result.estimated_pr, 3),
|
|
297
|
+
'confidence': round(pr_result.confidence, 2),
|
|
298
|
+
'factors': pr_result.factors
|
|
299
|
+
},
|
|
300
|
+
'domain': {
|
|
301
|
+
'reputation': external_data.source_reputation,
|
|
302
|
+
'ageDays': external_data.domain_age_days
|
|
303
|
+
}
|
|
304
|
+
}), 200
|
|
305
|
+
|
|
306
|
+
except Exception as e:
|
|
307
|
+
print(f"[SysCRED Backend] SEO endpoint error: {e}")
|
|
308
|
+
traceback.print_exc()
|
|
309
|
+
return jsonify({'error': str(e)}), 500
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
@app.route('/api/ontology/graph', methods=['GET'])
|
|
314
|
+
def ontology_graph():
|
|
315
|
+
"""Get ontology graph data for D3.js."""
|
|
316
|
+
global credibility_system
|
|
317
|
+
|
|
318
|
+
if credibility_system and credibility_system.ontology_manager:
|
|
319
|
+
graph_data = credibility_system.ontology_manager.get_graph_json()
|
|
320
|
+
return jsonify(graph_data), 200
|
|
321
|
+
else:
|
|
322
|
+
# Return empty graph rather than 400 to avoid breaking frontend
|
|
323
|
+
return jsonify({'nodes': [], 'links': []}), 200
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
@app.route('/api/ontology/stats', methods=['GET'])
|
|
327
|
+
def ontology_stats():
|
|
328
|
+
"""Get ontology statistics."""
|
|
329
|
+
global credibility_system
|
|
330
|
+
|
|
331
|
+
if credibility_system and credibility_system.ontology_manager:
|
|
332
|
+
stats = credibility_system.ontology_manager.get_statistics()
|
|
333
|
+
return jsonify(stats), 200
|
|
334
|
+
else:
|
|
335
|
+
return jsonify({
|
|
336
|
+
'error': 'Ontology not loaded',
|
|
337
|
+
'base_triples': 0,
|
|
338
|
+
'data_triples': 0
|
|
339
|
+
}), 200
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
# --- Main ---
|
|
343
|
+
if __name__ == '__main__':
|
|
344
|
+
print("=" * 60)
|
|
345
|
+
print("SysCRED Backend API Server")
|
|
346
|
+
print("(c) Dominique S. Loyer - PhD Thesis Prototype")
|
|
347
|
+
print("=" * 60)
|
|
348
|
+
print()
|
|
349
|
+
|
|
350
|
+
# Initialize system at startup
|
|
351
|
+
print("[SysCRED Backend] Pre-initializing system...")
|
|
352
|
+
initialize_system()
|
|
353
|
+
|
|
354
|
+
print()
|
|
355
|
+
print("[SysCRED Backend] Starting Flask server...")
|
|
356
|
+
print("[SysCRED Backend] Endpoints:")
|
|
357
|
+
print(" - POST /api/verify - Full credibility verification")
|
|
358
|
+
print(" - POST /api/seo - SEO analysis only (faster)")
|
|
359
|
+
print(" - GET /api/ontology/stats - Ontology statistics")
|
|
360
|
+
print(" - GET /api/health - Health check")
|
|
361
|
+
print()
|
|
362
|
+
|
|
363
|
+
app.run(host='0.0.0.0', port=5001, debug=True)
|
syscred/config.py
ADDED
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
SysCRED Configuration
|
|
4
|
+
=====================
|
|
5
|
+
Configuration centralisée pour le système de vérification de crédibilité.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
from syscred.config import Config
|
|
9
|
+
|
|
10
|
+
# Accéder aux paramètres
|
|
11
|
+
config = Config()
|
|
12
|
+
port = config.PORT
|
|
13
|
+
|
|
14
|
+
# Ou avec variables d'environnement
|
|
15
|
+
# export SYSCRED_GOOGLE_API_KEY=your_key
|
|
16
|
+
# export SYSCRED_PORT=8080
|
|
17
|
+
|
|
18
|
+
(c) Dominique S. Loyer - PhD Thesis Prototype
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
import os
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
from typing import Dict, Optional
|
|
24
|
+
from dotenv import load_dotenv
|
|
25
|
+
|
|
26
|
+
# Charger les variables depuis .env
|
|
27
|
+
# Charger les variables depuis .env (Project Root)
|
|
28
|
+
# Path: .../systemFactChecking/02_Code/syscred/config.py
|
|
29
|
+
# Root .env is at .../systemFactChecking/.env (3 levels up)
|
|
30
|
+
current_path = Path(__file__).resolve()
|
|
31
|
+
env_path = current_path.parent.parent.parent / '.env'
|
|
32
|
+
|
|
33
|
+
if not env_path.exists():
|
|
34
|
+
print(f"[Config] WARNING: .env not found at {env_path}")
|
|
35
|
+
# Try alternate location (sometimes CWD matters)
|
|
36
|
+
env_path = Path.cwd().parent / '.env'
|
|
37
|
+
|
|
38
|
+
load_dotenv(dotenv_path=env_path)
|
|
39
|
+
print(f"[Config] Loading .env from {env_path}")
|
|
40
|
+
print(f"[Config] SYSCRED_GOOGLE_API_KEY loaded: {'Yes' if os.environ.get('SYSCRED_GOOGLE_API_KEY') else 'No'}")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class Config:
|
|
45
|
+
"""
|
|
46
|
+
Configuration centralisée pour SysCRED.
|
|
47
|
+
|
|
48
|
+
Les valeurs peuvent être override par des variables d'environnement
|
|
49
|
+
préfixées par SYSCRED_.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
# === Chemins ===
|
|
53
|
+
BASE_DIR = Path(__file__).parent.parent
|
|
54
|
+
ONTOLOGY_BASE_PATH = BASE_DIR / "sysCRED_onto26avrtil.ttl"
|
|
55
|
+
ONTOLOGY_DATA_PATH = BASE_DIR / "ontology" / "sysCRED_data.ttl"
|
|
56
|
+
|
|
57
|
+
# === Serveur Flask ===
|
|
58
|
+
HOST = os.getenv("SYSCRED_HOST", "0.0.0.0")
|
|
59
|
+
PORT = int(os.getenv("SYSCRED_PORT", "5000"))
|
|
60
|
+
DEBUG = os.getenv("SYSCRED_DEBUG", "true").lower() == "true"
|
|
61
|
+
|
|
62
|
+
# === API Keys ===
|
|
63
|
+
GOOGLE_FACT_CHECK_API_KEY = os.getenv("SYSCRED_GOOGLE_API_KEY")
|
|
64
|
+
DATABASE_URL = os.getenv("DATABASE_URL") # [NEW] Read DB URL from env
|
|
65
|
+
|
|
66
|
+
# === Modèles ML ===
|
|
67
|
+
LOAD_ML_MODELS = os.getenv("SYSCRED_LOAD_ML", "true").lower() == "true"
|
|
68
|
+
SENTIMENT_MODEL = "distilbert-base-uncased-finetuned-sst-2-english"
|
|
69
|
+
NER_MODEL = "dbmdz/bert-large-cased-finetuned-conll03-english"
|
|
70
|
+
|
|
71
|
+
# === Timeouts ===
|
|
72
|
+
WEB_FETCH_TIMEOUT = int(os.getenv("SYSCRED_TIMEOUT", "10"))
|
|
73
|
+
|
|
74
|
+
# === Pondération des scores ===
|
|
75
|
+
SCORE_WEIGHTS = {
|
|
76
|
+
'source_reputation': 0.25,
|
|
77
|
+
'domain_age': 0.10,
|
|
78
|
+
'sentiment_neutrality': 0.15,
|
|
79
|
+
'entity_presence': 0.15,
|
|
80
|
+
'coherence': 0.15,
|
|
81
|
+
'fact_check': 0.20
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
# === Seuils de crédibilité ===
|
|
85
|
+
CREDIBILITY_THRESHOLDS = {
|
|
86
|
+
'HIGH': 0.7,
|
|
87
|
+
'MEDIUM': 0.4,
|
|
88
|
+
'LOW': 0.0
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
# === Base de données de réputation ===
|
|
92
|
+
# Les sources peuvent être étendues ou chargées d'un fichier externe
|
|
93
|
+
SOURCE_REPUTATIONS: Dict[str, str] = {
|
|
94
|
+
# === HAUTE CRÉDIBILITÉ ===
|
|
95
|
+
# Médias internationaux
|
|
96
|
+
'lemonde.fr': 'High',
|
|
97
|
+
'nytimes.com': 'High',
|
|
98
|
+
'reuters.com': 'High',
|
|
99
|
+
'bbc.com': 'High',
|
|
100
|
+
'bbc.co.uk': 'High',
|
|
101
|
+
'theguardian.com': 'High',
|
|
102
|
+
'apnews.com': 'High',
|
|
103
|
+
'afp.com': 'High',
|
|
104
|
+
'france24.com': 'High',
|
|
105
|
+
|
|
106
|
+
# Médias canadiens
|
|
107
|
+
'cbc.ca': 'High',
|
|
108
|
+
'radio-canada.ca': 'High',
|
|
109
|
+
'lapresse.ca': 'High',
|
|
110
|
+
'ledevoir.com': 'High',
|
|
111
|
+
'theglobeandmail.com': 'High',
|
|
112
|
+
|
|
113
|
+
# Sources académiques
|
|
114
|
+
'nature.com': 'High',
|
|
115
|
+
'sciencedirect.com': 'High',
|
|
116
|
+
'scholar.google.com': 'High',
|
|
117
|
+
'pubmed.ncbi.nlm.nih.gov': 'High',
|
|
118
|
+
'jstor.org': 'High',
|
|
119
|
+
'springer.com': 'High',
|
|
120
|
+
'ieee.org': 'High',
|
|
121
|
+
'acm.org': 'High',
|
|
122
|
+
'arxiv.org': 'High',
|
|
123
|
+
|
|
124
|
+
# Fact-checkers
|
|
125
|
+
'factcheck.org': 'High',
|
|
126
|
+
'snopes.com': 'High',
|
|
127
|
+
'politifact.com': 'High',
|
|
128
|
+
'fullfact.org': 'High',
|
|
129
|
+
'checknews.fr': 'High',
|
|
130
|
+
|
|
131
|
+
# Institutions
|
|
132
|
+
'who.int': 'High',
|
|
133
|
+
'un.org': 'High',
|
|
134
|
+
'europa.eu': 'High',
|
|
135
|
+
'canada.ca': 'High',
|
|
136
|
+
'gouv.fr': 'High',
|
|
137
|
+
'gouv.qc.ca': 'High',
|
|
138
|
+
|
|
139
|
+
# === CRÉDIBILITÉ MOYENNE ===
|
|
140
|
+
'wikipedia.org': 'Medium',
|
|
141
|
+
'medium.com': 'Medium',
|
|
142
|
+
'huffpost.com': 'Medium',
|
|
143
|
+
'buzzfeed.com': 'Medium',
|
|
144
|
+
'vice.com': 'Medium',
|
|
145
|
+
'slate.com': 'Medium',
|
|
146
|
+
'theconversation.com': 'Medium',
|
|
147
|
+
|
|
148
|
+
# === BASSE CRÉDIBILITÉ ===
|
|
149
|
+
'infowars.com': 'Low',
|
|
150
|
+
'naturalnews.com': 'Low',
|
|
151
|
+
'breitbart.com': 'Low',
|
|
152
|
+
'dailystormer.su': 'Low',
|
|
153
|
+
'beforeitsnews.com': 'Low',
|
|
154
|
+
'worldtruth.tv': 'Low',
|
|
155
|
+
'yournewswire.com': 'Low',
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
# === Patterns de mésinformation ===
|
|
159
|
+
MISINFORMATION_KEYWORDS = [
|
|
160
|
+
'conspiracy', 'hoax', 'fake news', 'miracle cure',
|
|
161
|
+
"they don't want you to know", 'mainstream media lies',
|
|
162
|
+
'deep state', 'plandemic', 'wake up sheeple',
|
|
163
|
+
'big pharma cover-up', 'government conspiracy',
|
|
164
|
+
'censored truth', 'what they hide'
|
|
165
|
+
]
|
|
166
|
+
|
|
167
|
+
@classmethod
|
|
168
|
+
def load_external_reputations(cls, filepath: str) -> None:
|
|
169
|
+
"""
|
|
170
|
+
Charger des réputations supplémentaires depuis un fichier JSON.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
filepath: Chemin vers le fichier JSON avec format:
|
|
174
|
+
{"domain.com": "High", "autre.com": "Low"}
|
|
175
|
+
"""
|
|
176
|
+
import json
|
|
177
|
+
try:
|
|
178
|
+
with open(filepath, 'r') as f:
|
|
179
|
+
external_reps = json.load(f)
|
|
180
|
+
cls.SOURCE_REPUTATIONS.update(external_reps)
|
|
181
|
+
print(f"[Config] Loaded {len(external_reps)} external reputations")
|
|
182
|
+
except Exception as e:
|
|
183
|
+
print(f"[Config] Could not load external reputations: {e}")
|
|
184
|
+
|
|
185
|
+
@classmethod
|
|
186
|
+
def update_weights(cls, new_weights: Dict[str, float]) -> None:
|
|
187
|
+
"""
|
|
188
|
+
Mettre à jour les pondérations des scores.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
new_weights: Dictionnaire avec les nouvelles pondérations
|
|
192
|
+
"""
|
|
193
|
+
cls.SCORE_WEIGHTS.update(new_weights)
|
|
194
|
+
# Normaliser pour que la somme = 1
|
|
195
|
+
total = sum(cls.SCORE_WEIGHTS.values())
|
|
196
|
+
cls.SCORE_WEIGHTS = {k: v/total for k, v in cls.SCORE_WEIGHTS.items()}
|
|
197
|
+
print(f"[Config] Updated weights: {cls.SCORE_WEIGHTS}")
|
|
198
|
+
|
|
199
|
+
@classmethod
|
|
200
|
+
def to_dict(cls) -> Dict:
|
|
201
|
+
"""Exporter la configuration actuelle en dictionnaire."""
|
|
202
|
+
return {
|
|
203
|
+
'host': cls.HOST,
|
|
204
|
+
'port': cls.PORT,
|
|
205
|
+
'debug': cls.DEBUG,
|
|
206
|
+
'google_api_configured': cls.GOOGLE_FACT_CHECK_API_KEY is not None,
|
|
207
|
+
'ml_models_enabled': cls.LOAD_ML_MODELS,
|
|
208
|
+
'score_weights': cls.SCORE_WEIGHTS,
|
|
209
|
+
'known_sources_count': len(cls.SOURCE_REPUTATIONS),
|
|
210
|
+
'ontology_base': str(cls.ONTOLOGY_BASE_PATH),
|
|
211
|
+
'ontology_data': str(cls.ONTOLOGY_DATA_PATH),
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
@classmethod
|
|
215
|
+
def print_config(cls) -> None:
|
|
216
|
+
"""Afficher la configuration actuelle."""
|
|
217
|
+
print("=" * 50)
|
|
218
|
+
print("SysCRED Configuration")
|
|
219
|
+
print("=" * 50)
|
|
220
|
+
for key, value in cls.to_dict().items():
|
|
221
|
+
print(f" {key}: {value}")
|
|
222
|
+
print("=" * 50)
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
# === Configuration par environnement ===
|
|
226
|
+
|
|
227
|
+
class DevelopmentConfig(Config):
|
|
228
|
+
"""Configuration pour développement local."""
|
|
229
|
+
DEBUG = True
|
|
230
|
+
LOAD_ML_MODELS = True
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
class ProductionConfig(Config):
|
|
234
|
+
"""Configuration pour production."""
|
|
235
|
+
DEBUG = False
|
|
236
|
+
LOAD_ML_MODELS = True
|
|
237
|
+
HOST = "0.0.0.0"
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
class TestingConfig(Config):
|
|
241
|
+
"""Configuration pour tests."""
|
|
242
|
+
DEBUG = True
|
|
243
|
+
LOAD_ML_MODELS = False # Plus rapide pour les tests
|
|
244
|
+
WEB_FETCH_TIMEOUT = 5
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
# Sélection automatique de la configuration
|
|
248
|
+
def get_config() -> Config:
|
|
249
|
+
"""
|
|
250
|
+
Retourne la configuration appropriée selon l'environnement.
|
|
251
|
+
|
|
252
|
+
Variable d'environnement: SYSCRED_ENV (development, production, testing)
|
|
253
|
+
"""
|
|
254
|
+
env = os.getenv("SYSCRED_ENV", "development").lower()
|
|
255
|
+
|
|
256
|
+
configs = {
|
|
257
|
+
'development': DevelopmentConfig,
|
|
258
|
+
'production': ProductionConfig,
|
|
259
|
+
'testing': TestingConfig,
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
return configs.get(env, DevelopmentConfig)
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
# Instance par défaut
|
|
266
|
+
config = get_config()
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
if __name__ == "__main__":
|
|
270
|
+
# Test de la configuration
|
|
271
|
+
config.print_config()
|
|
272
|
+
|
|
273
|
+
print("\n=== Source Reputations Sample ===")
|
|
274
|
+
for domain, rep in list(config.SOURCE_REPUTATIONS.items())[:10]:
|
|
275
|
+
print(f" {domain}: {rep}")
|
syscred/database.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
Database Manager for SysCRED
|
|
4
|
+
===========================
|
|
5
|
+
Handles connection to Supabase (PostgreSQL) and defines models.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
from flask_sqlalchemy import SQLAlchemy
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
|
|
12
|
+
# Initialize SQLAlchemy
|
|
13
|
+
db = SQLAlchemy()
|
|
14
|
+
|
|
15
|
+
class AnalysisResult(db.Model):
|
|
16
|
+
"""Stores the result of a credibility analysis."""
|
|
17
|
+
__tablename__ = 'analysis_results'
|
|
18
|
+
|
|
19
|
+
id = db.Column(db.Integer, primary_key=True)
|
|
20
|
+
url = db.Column(db.String(500), nullable=False)
|
|
21
|
+
credibility_score = db.Column(db.Float, nullable=False)
|
|
22
|
+
summary = db.Column(db.Text)
|
|
23
|
+
created_at = db.Column(db.DateTime, default=datetime.utcnow)
|
|
24
|
+
|
|
25
|
+
# Metadata stored as JSON if supported, or simplified columns
|
|
26
|
+
source_reputation = db.Column(db.String(50))
|
|
27
|
+
fact_check_count = db.Column(db.Integer, default=0)
|
|
28
|
+
|
|
29
|
+
def to_dict(self):
|
|
30
|
+
return {
|
|
31
|
+
'id': self.id,
|
|
32
|
+
'url': self.url,
|
|
33
|
+
'score': self.credibility_score,
|
|
34
|
+
'summary': self.summary,
|
|
35
|
+
'created_at': self.created_at.isoformat(),
|
|
36
|
+
'source_reputation': self.source_reputation
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
def init_db(app):
|
|
40
|
+
"""Initialize the database with the Flask app."""
|
|
41
|
+
# Fallback to sqlite for local dev if no DATABASE_URL
|
|
42
|
+
db_url = os.environ.get('DATABASE_URL')
|
|
43
|
+
if db_url and db_url.startswith("postgres://"):
|
|
44
|
+
db_url = db_url.replace("postgres://", "postgresql://", 1)
|
|
45
|
+
|
|
46
|
+
app.config['SQLALCHEMY_DATABASE_URI'] = db_url or 'sqlite:///syscred.db'
|
|
47
|
+
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
|
|
48
|
+
|
|
49
|
+
db.init_app(app)
|
|
50
|
+
|
|
51
|
+
# Create tables if they don't exist (basic migration)
|
|
52
|
+
with app.app_context():
|
|
53
|
+
db.create_all()
|
|
54
|
+
print("[SysCRED-DB] Database tables initialized.")
|