vnai 2.0.1__py3-none-any.whl → 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vnai/scope/profile.py CHANGED
@@ -1,767 +1,223 @@
1
- # vnai/scope/profile.py
2
- # System environment detection
3
-
4
- import os
5
- import sys
6
- import platform
7
- import uuid
8
- import hashlib
9
- import psutil
10
- import threading
11
- import time
12
- import importlib.metadata
1
+ _W='type_confidence'
2
+ _V='detected_type'
3
+ _U='commercial_app'
4
+ _T='version'
5
+ _S='django'
6
+ _R='fastapi'
7
+ _Q='streamlit'
8
+ _P='indicators'
9
+ _O='commercial_indicators'
10
+ _N='likely_commercial'
11
+ _M='KAGGLE_KERNEL_RUN_TYPE'
12
+ _L='machine_id'
13
+ _K='domain'
14
+ _J='.git'
15
+ _I='backtesting'
16
+ _H='commercial_probability'
17
+ _G='timestamp'
18
+ _F='business_hours_usage'
19
+ _E='google.colab'
20
+ _D='unknown'
21
+ _C=False
22
+ _B=None
23
+ _A=True
24
+ import os,sys,platform,uuid,hashlib,psutil,threading,time,importlib.metadata
13
25
  from datetime import datetime
14
26
  import subprocess
15
27
  from pathlib import Path
16
-
17
28
  class Inspector:
18
- """Inspects execution environment"""
19
-
20
- _instance = None
21
- _lock = None
22
-
23
- def __new__(cls):
24
- import threading
25
- if cls._lock is None:
26
- cls._lock = threading.Lock()
27
-
28
- with cls._lock:
29
- if cls._instance is None:
30
- cls._instance = super(Inspector, cls).__new__(cls)
31
- cls._instance._initialize()
32
- return cls._instance
33
-
34
- def _initialize(self):
35
- """Initialize inspector"""
36
- self.cache = {}
37
- self.cache_ttl = 3600 # 1 hour cache validity
38
- self.last_examination = 0
39
- self.machine_id = None
40
- self._colab_auth_triggered = False
41
-
42
- # Paths
43
- self.home_dir = Path.home()
44
- self.project_dir = self.home_dir / ".vnstock"
45
- self.project_dir.mkdir(exist_ok=True)
46
- self.id_dir = self.project_dir / 'id'
47
- self.id_dir.mkdir(exist_ok=True)
48
- self.machine_id_path = self.id_dir / "machine_id.txt"
49
-
50
- # Perform initial examination
51
- self.examine()
52
-
53
- def examine(self, force_refresh=False):
54
- """Examine current execution context"""
55
- current_time = time.time()
56
-
57
- # Return cached data if it's fresh enough and we're not forcing a refresh
58
- if not force_refresh and (current_time - self.last_examination) < self.cache_ttl:
59
- return self.cache
60
-
61
- # Start with basic information
62
- info = {
63
- "timestamp": datetime.now().isoformat(),
64
- "python_version": platform.python_version(),
65
- "os_name": platform.system(),
66
- "platform": platform.platform()
67
- }
68
-
69
- # Machine identifier
70
- info["machine_id"] = self.fingerprint()
71
-
72
- # Environment detection
73
- try:
74
- # Check for Jupyter/IPython
75
- import importlib.util
76
- ipython_spec = importlib.util.find_spec("IPython")
77
-
78
- if ipython_spec:
79
- from IPython import get_ipython
80
- ipython = get_ipython()
81
- if ipython is not None:
82
- info["environment"] = "jupyter"
83
- # Check for hosted notebooks
84
- if 'google.colab' in sys.modules:
85
- info["hosting_service"] = "colab"
86
- elif 'KAGGLE_KERNEL_RUN_TYPE' in os.environ:
87
- info["hosting_service"] = "kaggle"
88
- else:
89
- info["hosting_service"] = "local_jupyter"
90
- elif sys.stdout.isatty():
91
- info["environment"] = "terminal"
92
- else:
93
- info["environment"] = "script"
94
- elif sys.stdout.isatty():
95
- info["environment"] = "terminal"
96
- else:
97
- info["environment"] = "script"
98
- except:
99
- info["environment"] = "unknown"
100
-
101
- # System resources
102
- try:
103
- info["cpu_count"] = os.cpu_count()
104
- info["memory_gb"] = round(psutil.virtual_memory().total / (1024**3), 1)
105
- except:
106
- pass
107
-
108
- # Check if in Google Colab
109
- is_colab = 'google.colab' in sys.modules
110
- if is_colab:
111
- info["is_colab"] = True
112
- # Setup delayed authentication if not already triggered
113
- self.detect_colab_with_delayed_auth()
114
-
115
- # Enhanced context information
116
- try:
117
- # Commercial usage detection
118
- info["commercial_usage"] = self.enhanced_commercial_detection()
119
-
120
- # Project context
121
- info["project_context"] = self.analyze_project_structure()
122
-
123
- # Git info
124
- info["git_info"] = self.analyze_git_info()
125
-
126
- # Working hours pattern
127
- info["usage_pattern"] = self.detect_usage_pattern()
128
-
129
- # Dependency analysis
130
- info["dependencies"] = self.analyze_dependencies()
131
- except Exception as e:
132
- # Don't let enhanced detection failure stop basic functionality
133
- info["detection_error"] = str(e)
134
-
135
- # Update cache
136
- self.cache = info
137
- self.last_examination = current_time
138
-
139
- return info
140
-
141
- def fingerprint(self):
142
- """Generate unique environment fingerprint"""
143
- # Always return cached machine_id if it exists
144
- if self.machine_id:
145
- return self.machine_id
146
-
147
- # Try to load from file first
148
- if self.machine_id_path.exists():
149
- try:
150
- with open(self.machine_id_path, "r") as f:
151
- self.machine_id = f.read().strip()
152
- return self.machine_id
153
- except:
154
- pass
155
-
156
- # Check for Colab and setup delayed authentication
157
- is_colab = self.detect_colab_with_delayed_auth()
158
-
159
- # Generate a new machine ID only if necessary
160
- try:
161
- # Use consistent system information
162
- system_info = platform.node() + platform.platform() + platform.machine()
163
- self.machine_id = hashlib.md5(system_info.encode()).hexdigest()
164
- except:
165
- # Fallback to UUID but only as last resort
166
- self.machine_id = str(uuid.uuid4())
167
-
168
- # Save to ensure consistency across calls
169
- try:
170
- with open(self.machine_id_path, "w") as f:
171
- f.write(self.machine_id)
172
- except:
173
- pass
174
-
175
- return self.machine_id
176
-
177
- def detect_hosting(self):
178
- """Detect if running in a hosted environment"""
179
- # Check common environment variables for hosted environments
180
- hosting_markers = {
181
- "COLAB_GPU": "Google Colab",
182
- "KAGGLE_KERNEL_RUN_TYPE": "Kaggle",
183
- "BINDER_SERVICE_HOST": "Binder",
184
- "CODESPACE_NAME": "GitHub Codespaces",
185
- "STREAMLIT_SERVER_HEADLESS": "Streamlit Cloud",
186
- "CLOUD_SHELL": "Cloud Shell"
187
- }
188
-
189
- for env_var, host_name in hosting_markers.items():
190
- if env_var in os.environ:
191
- return host_name
192
-
193
- # Check for Google Colab module
194
- if 'google.colab' in sys.modules:
195
- return "Google Colab"
196
-
197
- return "local"
198
-
199
- def detect_commercial_usage(self):
200
- """Detect if running in commercial environment"""
201
- commercial_indicators = {
202
- "env_domains": [".com", ".io", ".co", "enterprise", "corp", "inc"],
203
- "file_patterns": ["invoice", "payment", "customer", "client", "product", "sale"],
204
- "env_vars": ["COMPANY", "BUSINESS", "ENTERPRISE", "CORPORATE"],
205
- "dir_patterns": ["company", "business", "enterprise", "corporate", "client"]
206
- }
207
-
208
- # Check environment variables for commercial domains
209
- env_values = " ".join(os.environ.values()).lower()
210
- domain_match = any(domain in env_values for domain in commercial_indicators["env_domains"])
211
-
212
- # Check if commercial-related environment variables exist
213
- env_var_match = any(var in os.environ for var in commercial_indicators["env_vars"])
214
-
215
- # Check current directory for commercial indicators
216
- current_dir = os.getcwd().lower()
217
- dir_match = any(pattern in current_dir for pattern in commercial_indicators["dir_patterns"])
218
-
219
- # Check files in current directory for commercial patterns
220
- try:
221
- files = [f.lower() for f in os.listdir() if os.path.isfile(f)]
222
- file_match = any(any(pattern in f for pattern in commercial_indicators["file_patterns"]) for f in files)
223
- except:
224
- file_match = False
225
-
226
- # Calculate probability
227
- indicators = [domain_match, env_var_match, dir_match, file_match]
228
- commercial_probability = sum(indicators) / len(indicators)
229
-
230
- return {
231
- "likely_commercial": commercial_probability > 0.3,
232
- "commercial_probability": commercial_probability,
233
- "commercial_indicators": {
234
- "domain_match": domain_match,
235
- "env_var_match": env_var_match,
236
- "dir_match": dir_match,
237
- "file_match": file_match
238
- }
239
- }
240
-
241
- def scan_packages(self):
242
- """Scan for installed packages by category"""
243
- package_groups = {
244
- "vnstock_family": [
245
- "vnstock",
246
- "vnstock3",
247
- "vnstock_ezchart",
248
- "vnstock_data_pro", # Fixed missing comma here
249
- "vnstock_market_data_pipeline",
250
- "vnstock_ta",
251
- "vnii",
252
- "vnai"
253
- ],
254
- "analytics": [
255
- "openbb",
256
- "pandas_ta"
257
- ],
258
- "static_charts": [
259
- "matplotlib",
260
- "seaborn",
261
- "altair"
262
- ],
263
- "dashboard": [
264
- "streamlit",
265
- "voila",
266
- "panel",
267
- "shiny",
268
- "dash"
269
- ],
270
- "interactive_charts": [
271
- "mplfinance",
272
- "plotly",
273
- "plotline",
274
- "bokeh",
275
- "pyecharts",
276
- "highcharts-core",
277
- "highcharts-stock",
278
- "mplchart"
279
- ],
280
- "datafeed": [
281
- "yfinance",
282
- "alpha_vantage",
283
- "pandas-datareader",
284
- "investpy"
285
- ],
286
- "official_api": [
287
- "ssi-fc-data",
288
- "ssi-fctrading"
289
- ],
290
- "risk_return": [
291
- "pyfolio",
292
- "empyrical",
293
- "quantstats",
294
- "financetoolkit"
295
- ],
296
- "machine_learning": [
297
- "scipy",
298
- "sklearn",
299
- "statsmodels",
300
- "pytorch",
301
- "tensorflow",
302
- "keras",
303
- "xgboost"
304
- ],
305
- "indicators": [
306
- "stochastic",
307
- "talib",
308
- "tqdm",
309
- "finta",
310
- "financetoolkit",
311
- "tulipindicators"
312
- ],
313
- "backtesting": [
314
- "vectorbt",
315
- "backtesting",
316
- "bt",
317
- "zipline",
318
- "pyalgotrade",
319
- "backtrader",
320
- "pybacktest",
321
- "fastquant",
322
- "lean",
323
- "ta",
324
- "finmarketpy",
325
- "qstrader"
326
- ],
327
- "server": [
328
- "fastapi",
329
- "flask",
330
- "uvicorn",
331
- "gunicorn"
332
- ],
333
- "framework": [
334
- "lightgbm",
335
- "catboost",
336
- "django"
337
- ]
338
- }
339
-
340
- installed = {}
341
-
342
- for category, packages in package_groups.items():
343
- installed[category] = []
344
- for pkg in packages:
345
- try:
346
- version = importlib.metadata.version(pkg)
347
- installed[category].append({"name": pkg, "version": version})
348
- except:
349
- pass
350
-
351
- return installed
352
-
353
- def setup_vnstock_environment(self):
354
- """Set up environment for vnstock library"""
355
- # Create environment.json file
356
- env_file = self.id_dir / "environment.json"
357
- env_data = {
358
- "accepted_agreement": True,
359
- "timestamp": datetime.now().isoformat(),
360
- "machine_id": self.fingerprint()
361
- }
362
-
363
- try:
364
- with open(env_file, "w") as f:
365
- import json
366
- json.dump(env_data, f)
367
- return True
368
- except Exception as e:
369
- print(f"Failed to set up vnstock environment: {e}")
370
- return False
371
-
372
- # Update detect_colab_with_delayed_auth method in Inspector class
373
- def detect_colab_with_delayed_auth(self, immediate=False):
374
- """Detect if running in Google Colab and setup authentication"""
375
- # Check if we're in Colab without mounting drive yet
376
- is_colab = 'google.colab' in sys.modules
377
-
378
- if is_colab and not self._colab_auth_triggered:
379
- if immediate:
380
- # Immediate authentication
381
- self._colab_auth_triggered = True
382
- user_id = self.get_or_create_user_id()
383
- if user_id and user_id != self.machine_id:
384
- self.machine_id = user_id
385
- try:
386
- with open(self.machine_id_path, "w") as f:
387
- f.write(user_id)
388
- except:
389
- pass
390
- else:
391
- # Start a delayed thread to trigger authentication after user is already using the package
392
- def delayed_auth():
393
- # Wait for some time (e.g., 5 minutes) before attempting auth
394
- time.sleep(300)
395
- # Try to get authenticated user ID
396
- user_id = self.get_or_create_user_id()
397
- # Update machine ID with the authenticated one
398
- if user_id and user_id != self.machine_id:
399
- self.machine_id = user_id
400
- # Save to the machine_id_path
401
- try:
402
- with open(self.machine_id_path, "w") as f:
403
- f.write(user_id)
404
- except:
405
- pass
406
-
407
- # Start the delayed authentication thread
408
- thread = threading.Thread(target=delayed_auth, daemon=True)
409
- thread.start()
410
-
411
- return is_colab
412
-
413
- def get_or_create_user_id(self):
414
- """Get existing user ID from Google Drive or create new one"""
415
- if self._colab_auth_triggered:
416
- return self.machine_id # Avoid triggering multiple times
417
-
418
- try:
419
- from google.colab import drive
420
- print("\n📋 Kết nối tài khoản Google Drive để lưu các thiết lập của dự án.")
421
- print("Dữ liệu phiên làm việc với Colab của bạn sẽ bị xóa nếu không lưu trữ vào Google Drive.\n")
422
-
423
- # Mark that we've triggered the auth
424
- self._colab_auth_triggered = True
425
-
426
- # Mount Google Drive
427
- drive.mount('/content/drive')
428
- id_path = '/content/drive/MyDrive/.vnstock/user_id.txt'
429
-
430
- if os.path.exists(id_path):
431
- with open(id_path, 'r') as f:
432
- return f.read().strip()
433
- else:
434
- user_id = str(uuid.uuid4())
435
- os.makedirs(os.path.dirname(id_path), exist_ok=True)
436
- with open(id_path, 'w') as f:
437
- f.write(user_id)
438
- return user_id
439
- except Exception as e:
440
- # Silently fail and return the existing machine ID
441
- return self.machine_id
442
-
443
- # Enhanced methods for project context collection
444
-
445
- def analyze_project_structure(self):
446
- """Analyze project directory structure for context"""
447
- current_dir = os.getcwd()
448
- project_indicators = {
449
- "commercial_app": ["app", "services", "products", "customers", "billing"],
450
- "financial_tool": ["portfolio", "backtesting", "trading", "strategy"],
451
- "data_science": ["models", "notebooks", "datasets", "visualization"],
452
- "educational": ["examples", "lectures", "assignments", "slides"]
453
- }
454
-
455
- # Look for key directories up to 2 levels deep (limited for privacy)
456
- project_type = {}
457
- for category, markers in project_indicators.items():
458
- match_count = 0
459
- for marker in markers:
460
- if os.path.exists(os.path.join(current_dir, marker)):
461
- match_count += 1
462
- if len(markers) > 0:
463
- project_type[category] = match_count / len(markers)
464
-
465
- # Scan for direct child files and directories (limited depth for privacy)
466
- try:
467
- root_files = [f for f in os.listdir(current_dir) if os.path.isfile(os.path.join(current_dir, f))]
468
- root_dirs = [d for d in os.listdir(current_dir) if os.path.isdir(os.path.join(current_dir, d))]
469
-
470
- # Detect project type
471
- file_markers = {
472
- "python_project": ["setup.py", "pyproject.toml", "requirements.txt"],
473
- "data_science": ["notebook.ipynb", ".ipynb_checkpoints"],
474
- "web_app": ["app.py", "wsgi.py", "manage.py", "server.py"],
475
- "finance_app": ["portfolio.py", "trading.py", "backtest.py"],
476
- }
477
-
478
- file_project_type = "unknown"
479
- for ptype, markers in file_markers.items():
480
- if any(marker in root_files for marker in markers):
481
- file_project_type = ptype
482
- break
483
-
484
- # Scan for specific frameworks
485
- frameworks = []
486
- framework_markers = {
487
- "django": ["manage.py", "settings.py"],
488
- "flask": ["app.py", "wsgi.py"],
489
- "streamlit": ["streamlit_app.py", "app.py"],
490
- "fastapi": ["main.py", "app.py"],
491
- }
492
-
493
- for framework, markers in framework_markers.items():
494
- if any(marker in root_files for marker in markers):
495
- frameworks.append(framework)
496
-
497
- except Exception as e:
498
- root_files = []
499
- root_dirs = []
500
- file_project_type = "unknown"
501
- frameworks = []
502
-
503
- return {
504
- "project_dir": current_dir,
505
- "detected_type": max(project_type.items(), key=lambda x: x[1])[0] if project_type else "unknown",
506
- "file_type": file_project_type,
507
- "is_git_repo": ".git" in (root_dirs if 'root_dirs' in locals() else []),
508
- "frameworks": frameworks,
509
- "file_count": len(root_files) if 'root_files' in locals() else 0,
510
- "directory_count": len(root_dirs) if 'root_dirs' in locals() else 0,
511
- "type_confidence": project_type
512
- }
513
-
514
- def analyze_git_info(self):
515
- """Extract non-sensitive git repository information"""
516
- try:
517
- # Check if it's a git repository
518
- result = subprocess.run(["git", "rev-parse", "--is-inside-work-tree"],
519
- capture_output=True, text=True)
520
-
521
- if result.returncode != 0:
522
- return {"has_git": False}
523
-
524
- # Get repository root path - ADD THIS CODE
525
- repo_root = subprocess.run(["git", "rev-parse", "--show-toplevel"],
526
- capture_output=True, text=True)
527
- repo_path = repo_root.stdout.strip() if repo_root.stdout else None
528
-
529
- # Extract repository name from path - ADD THIS CODE
530
- repo_name = os.path.basename(repo_path) if repo_path else None
531
-
532
- # Check for license file - ADD THIS CODE
533
- has_license = False
534
- license_type = "unknown"
535
- if repo_path:
536
- license_files = [
537
- os.path.join(repo_path, "LICENSE"),
538
- os.path.join(repo_path, "LICENSE.txt"),
539
- os.path.join(repo_path, "LICENSE.md")
540
- ]
541
- for license_file in license_files:
542
- if os.path.exists(license_file):
543
- has_license = True
544
- # Try to determine license type by scanning content
545
- try:
546
- with open(license_file, 'r') as f:
547
- content = f.read().lower()
548
- if "mit license" in content:
549
- license_type = "MIT"
550
- elif "apache license" in content:
551
- license_type = "Apache"
552
- elif "gnu general public" in content:
553
- license_type = "GPL"
554
- elif "bsd " in content:
555
- license_type = "BSD"
556
- # Add more license type detection as needed
557
- except:
558
- pass
559
- break
560
-
561
- # Get remote URL (only domain, not full URL)
562
- remote = subprocess.run(["git", "config", "--get", "remote.origin.url"],
563
- capture_output=True, text=True)
564
-
565
- remote_url = remote.stdout.strip() if remote.stdout else None
566
-
567
- if remote_url:
568
- # Clean the remote URL string
569
- remote_url = remote_url.strip()
570
-
571
- # Properly extract domain without authentication information
572
- domain = None
573
- if remote_url:
574
- # For SSH URLs (git@github.com:user/repo.git)
575
- if remote_url.startswith('git@') or '@' in remote_url and ':' in remote_url.split('@')[1]:
576
- domain = remote_url.split('@')[1].split(':')[0]
577
- # For HTTPS URLs with or without authentication
578
- elif remote_url.startswith('http'):
579
- # Remove authentication part if present
580
- url_parts = remote_url.split('//')
581
- if len(url_parts) > 1:
582
- auth_and_domain = url_parts[1].split('/', 1)[0]
583
- # If auth info exists (contains @), take only domain part
584
- if '@' in auth_and_domain:
585
- domain = auth_and_domain.split('@')[-1]
586
- else:
587
- domain = auth_and_domain
588
- # Handle other URL formats
589
- else:
590
- # Try a general regex as fallback for unusual formats
591
- import re
592
- domain_match = re.search(r'@([^:/]+)|https?://(?:[^@/]+@)?([^/]+)', remote_url)
593
- if domain_match:
594
- domain = domain_match.group(1) or domain_match.group(2)
595
-
596
- # Extract owner and repo info securely
597
- owner = None
598
- repo_name = None
599
-
600
- if domain:
601
- # For GitHub repositories
602
- if "github" in domain:
603
- # SSH format: git@github.com:username/repo.git
604
- if ':' in remote_url and '@' in remote_url:
605
- parts = remote_url.split(':')[-1].split('/')
606
- if len(parts) >= 2:
607
- owner = parts[0]
608
- repo_name = parts[1].replace('.git', '')
609
- # HTTPS format
610
- else:
611
- url_parts = remote_url.split('//')
612
- if len(url_parts) > 1:
613
- path_parts = url_parts[1].split('/')
614
- if len(path_parts) >= 3:
615
- # Skip domain and authentication part
616
- domain_part = path_parts[0]
617
- if '@' in domain_part:
618
- # Path starts after domain
619
- owner_index = 1
620
- else:
621
- owner_index = 1
622
-
623
- if len(path_parts) > owner_index:
624
- owner = path_parts[owner_index]
625
- if len(path_parts) > owner_index + 1:
626
- repo_name = path_parts[owner_index + 1].replace('.git', '')
627
-
628
- # Get commit count
629
- commit_count = subprocess.run(["git", "rev-list", "--count", "HEAD"],
630
- capture_output=True, text=True)
631
-
632
- # Get branch count
633
- branch_count = subprocess.run(["git", "branch", "--list"],
634
- capture_output=True, text=True)
635
- branch_count = len(branch_count.stdout.strip().split('\n')) if branch_count.stdout else 0
636
-
637
- return {
638
- "domain": domain, # Only domain, not full URL
639
- "owner": owner, # Repository owner (for GitHub)
640
- "commit_count": int(commit_count.stdout.strip()) if commit_count.stdout else 0,
641
- "branch_count": branch_count,
642
- "has_git": True,
643
- "repo_path": repo_path if 'repo_path' in locals() else None,
644
- "repo_name": repo_name,
645
- "has_license": has_license if 'has_license' in locals() else False,
646
- "license_type": license_type if 'license_type' in locals() else "unknown"
647
- }
648
-
649
- except Exception as e:
650
- # Optionally log the exception for debugging
651
- pass
652
- return {"has_git": False}
653
-
654
-
655
- def detect_usage_pattern(self):
656
- """Detect usage patterns that indicate commercial use"""
657
- current_time = datetime.now()
658
-
659
- # Check if using during business hours
660
- is_weekday = current_time.weekday() < 5 # 0-4 are Monday to Friday
661
- hour = current_time.hour
662
- is_business_hours = 9 <= hour <= 18
663
-
664
- return {
665
- "business_hours_usage": is_weekday and is_business_hours,
666
- "weekday": is_weekday,
667
- "hour": hour,
668
- "timestamp": current_time.isoformat()
669
- }
670
-
671
- def enhanced_commercial_detection(self):
672
- """More thorough commercial usage detection"""
673
- basic = self.detect_commercial_usage()
674
-
675
- # Additional commercial indicators
676
- try:
677
- project_files = os.listdir(os.getcwd())
678
-
679
- # Look for commercial frameworks
680
- commercial_frameworks = ["django-oscar", "opencart", "magento",
681
- "saleor", "odoo", "shopify", "woocommerce"]
682
-
683
- framework_match = False
684
- for framework in commercial_frameworks:
685
- if any(framework in f for f in project_files):
686
- framework_match = True
687
- break
688
-
689
- # Check for database connections
690
- db_files = [f for f in project_files if "database" in f.lower()
691
- or "db_config" in f.lower() or f.endswith(".db")]
692
- has_database = len(db_files) > 0
693
- except:
694
- framework_match = False
695
- has_database = False
696
-
697
- # Domain name registration check
698
- domain_check = self.analyze_git_info()
699
- domain_is_commercial = False
700
- if domain_check and domain_check.get("domain"):
701
- commercial_tlds = [".com", ".io", ".co", ".org", ".net"]
702
- domain_is_commercial = any(tld in domain_check["domain"] for tld in commercial_tlds)
703
-
704
- # Check project structure
705
- project_structure = self.analyze_project_structure()
706
-
707
- # Calculate enhanced commercial score
708
- indicators = [
709
- basic["commercial_probability"],
710
- framework_match,
711
- has_database,
712
- domain_is_commercial,
713
- project_structure.get("type_confidence", {}).get("commercial_app", 0),
714
- self.detect_usage_pattern()["business_hours_usage"]
715
- ]
716
-
717
- # Filter out None values
718
- indicators = [i for i in indicators if i is not None]
719
-
720
- # Calculate score - convert booleans to 1.0 and average
721
- if indicators:
722
- score = sum(1.0 if isinstance(i, bool) and i else (i if isinstance(i, (int, float)) else 0)
723
- for i in indicators) / len(indicators)
724
- else:
725
- score = 0
726
-
727
- return {
728
- "commercial_probability": score,
729
- "likely_commercial": score > 0.4,
730
- "indicators": {
731
- "basic_indicators": basic["commercial_indicators"],
732
- "framework_match": framework_match,
733
- "has_database": has_database,
734
- "domain_is_commercial": domain_is_commercial,
735
- "project_structure": project_structure.get("detected_type"),
736
- "business_hours_usage": self.detect_usage_pattern()["business_hours_usage"]
737
- }
738
- }
739
-
740
- def analyze_dependencies(self):
741
- """Analyze package dependencies for commercial patterns"""
742
- try:
743
- import pkg_resources
744
-
745
- # Commercial/enterprise package indicators
746
- enterprise_packages = [
747
- "snowflake-connector-python", "databricks", "azure",
748
- "aws", "google-cloud", "stripe", "atlassian",
749
- "salesforce", "bigquery", "tableau", "sap"
750
- ]
751
-
752
- # Find installed packages that match enterprise indicators
753
- commercial_deps = []
754
- for pkg in pkg_resources.working_set:
755
- if any(ent in pkg.key for ent in enterprise_packages):
756
- commercial_deps.append({"name": pkg.key, "version": pkg.version})
757
-
758
- return {
759
- "has_commercial_deps": len(commercial_deps) > 0,
760
- "commercial_deps_count": len(commercial_deps),
761
- "commercial_deps": commercial_deps
762
- }
763
- except:
764
- return {"has_commercial_deps": False}
765
-
766
- # Create singleton instance
767
- inspector = Inspector()
29
+ _instance=_B;_lock=_B
30
+ def __new__(cls):
31
+ import threading
32
+ if cls._lock is _B:cls._lock=threading.Lock()
33
+ with cls._lock:
34
+ if cls._instance is _B:cls._instance=super(Inspector,cls).__new__(cls);cls._instance._initialize()
35
+ return cls._instance
36
+ def _initialize(self):self.cache={};self.cache_ttl=3600;self.last_examination=0;self.machine_id=_B;self._colab_auth_triggered=_C;self.home_dir=Path.home();self.project_dir=self.home_dir/'.vnstock';self.project_dir.mkdir(exist_ok=_A);self.id_dir=self.project_dir/'id';self.id_dir.mkdir(exist_ok=_A);self.machine_id_path=self.id_dir/'machine_id.txt';self.examine()
37
+ def examine(self,force_refresh=_C):
38
+ D='script';C='terminal';B='hosting_service';A='environment';current_time=time.time()
39
+ if not force_refresh and current_time-self.last_examination<self.cache_ttl:return self.cache
40
+ info={_G:datetime.now().isoformat(),'python_version':platform.python_version(),'os_name':platform.system(),'platform':platform.platform()};info[_L]=self.fingerprint()
41
+ try:
42
+ import importlib.util;ipython_spec=importlib.util.find_spec('IPython')
43
+ if ipython_spec:
44
+ from IPython import get_ipython;ipython=get_ipython()
45
+ if ipython is not _B:
46
+ info[A]='jupyter'
47
+ if _E in sys.modules:info[B]='colab'
48
+ elif _M in os.environ:info[B]='kaggle'
49
+ else:info[B]='local_jupyter'
50
+ elif sys.stdout.isatty():info[A]=C
51
+ else:info[A]=D
52
+ elif sys.stdout.isatty():info[A]=C
53
+ else:info[A]=D
54
+ except:info[A]=_D
55
+ try:info['cpu_count']=os.cpu_count();info['memory_gb']=round(psutil.virtual_memory().total/1024**3,1)
56
+ except:pass
57
+ is_colab=_E in sys.modules
58
+ if is_colab:info['is_colab']=_A;self.detect_colab_with_delayed_auth()
59
+ try:info['commercial_usage']=self.enhanced_commercial_detection();info['project_context']=self.analyze_project_structure();info['git_info']=self.analyze_git_info();info['usage_pattern']=self.detect_usage_pattern();info['dependencies']=self.analyze_dependencies()
60
+ except Exception as e:info['detection_error']=str(e)
61
+ self.cache=info;self.last_examination=current_time;return info
62
+ def fingerprint(self):
63
+ if self.machine_id:return self.machine_id
64
+ if self.machine_id_path.exists():
65
+ try:
66
+ with open(self.machine_id_path,'r')as f:self.machine_id=f.read().strip();return self.machine_id
67
+ except:pass
68
+ is_colab=self.detect_colab_with_delayed_auth()
69
+ try:system_info=platform.node()+platform.platform()+platform.machine();self.machine_id=hashlib.md5(system_info.encode()).hexdigest()
70
+ except:self.machine_id=str(uuid.uuid4())
71
+ try:
72
+ with open(self.machine_id_path,'w')as f:f.write(self.machine_id)
73
+ except:pass
74
+ return self.machine_id
75
+ def detect_hosting(self):
76
+ A='Google Colab';hosting_markers={'COLAB_GPU':A,_M:'Kaggle','BINDER_SERVICE_HOST':'Binder','CODESPACE_NAME':'GitHub Codespaces','STREAMLIT_SERVER_HEADLESS':'Streamlit Cloud','CLOUD_SHELL':'Cloud Shell'}
77
+ for(env_var,host_name)in hosting_markers.items():
78
+ if env_var in os.environ:return host_name
79
+ if _E in sys.modules:return A
80
+ return'local'
81
+ def detect_commercial_usage(self):
82
+ F='client';E='enterprise';D='dir_patterns';C='env_vars';B='file_patterns';A='env_domains';commercial_indicators={A:['.com','.io','.co',E,'corp','inc'],B:['invoice','payment','customer',F,'product','sale'],C:['COMPANY','BUSINESS','ENTERPRISE','CORPORATE'],D:['company','business',E,'corporate',F]};env_values=' '.join(os.environ.values()).lower();domain_match=any(domain in env_values for domain in commercial_indicators[A]);env_var_match=any(var in os.environ for var in commercial_indicators[C]);current_dir=os.getcwd().lower();dir_match=any(pattern in current_dir for pattern in commercial_indicators[D])
83
+ try:files=[f.lower()for f in os.listdir()if os.path.isfile(f)];file_match=any(any(pattern in f for pattern in commercial_indicators[B])for f in files)
84
+ except:file_match=_C
85
+ indicators=[domain_match,env_var_match,dir_match,file_match];commercial_probability=sum(indicators)/len(indicators);return{_N:commercial_probability>.3,_H:commercial_probability,_O:{'domain_match':domain_match,'env_var_match':env_var_match,'dir_match':dir_match,'file_match':file_match}}
86
+ def scan_packages(self):
87
+ A='financetoolkit';package_groups={'vnstock_family':['vnstock','vnstock3','vnstock_ezchart','vnstock_data_pro','vnstock_market_data_pipeline','vnstock_ta','vnii','vnai'],'analytics':['openbb','pandas_ta'],'static_charts':['matplotlib','seaborn','altair'],'dashboard':[_Q,'voila','panel','shiny','dash'],'interactive_charts':['mplfinance','plotly','plotline','bokeh','pyecharts','highcharts-core','highcharts-stock','mplchart'],'datafeed':['yfinance','alpha_vantage','pandas-datareader','investpy'],'official_api':['ssi-fc-data','ssi-fctrading'],'risk_return':['pyfolio','empyrical','quantstats',A],'machine_learning':['scipy','sklearn','statsmodels','pytorch','tensorflow','keras','xgboost'],_P:['stochastic','talib','tqdm','finta',A,'tulipindicators'],_I:['vectorbt',_I,'bt','zipline','pyalgotrade','backtrader','pybacktest','fastquant','lean','ta','finmarketpy','qstrader'],'server':[_R,'flask','uvicorn','gunicorn'],'framework':['lightgbm','catboost',_S]};installed={}
88
+ for(category,packages)in package_groups.items():
89
+ installed[category]=[]
90
+ for pkg in packages:
91
+ try:version=importlib.metadata.version(pkg);installed[category].append({'name':pkg,_T:version})
92
+ except:pass
93
+ return installed
94
+ def setup_vnstock_environment(self):
95
+ env_file=self.id_dir/'environment.json';env_data={'accepted_agreement':_A,_G:datetime.now().isoformat(),_L:self.fingerprint()}
96
+ try:
97
+ with open(env_file,'w')as f:import json;json.dump(env_data,f)
98
+ return _A
99
+ except Exception as e:print(f"Failed to set up vnstock environment: {e}");return _C
100
+ def detect_colab_with_delayed_auth(self,immediate=_C):
101
+ is_colab=_E in sys.modules
102
+ if is_colab and not self._colab_auth_triggered:
103
+ if immediate:
104
+ self._colab_auth_triggered=_A;user_id=self.get_or_create_user_id()
105
+ if user_id and user_id!=self.machine_id:
106
+ self.machine_id=user_id
107
+ try:
108
+ with open(self.machine_id_path,'w')as f:f.write(user_id)
109
+ except:pass
110
+ else:
111
+ def delayed_auth():
112
+ time.sleep(300);user_id=self.get_or_create_user_id()
113
+ if user_id and user_id!=self.machine_id:
114
+ self.machine_id=user_id
115
+ try:
116
+ with open(self.machine_id_path,'w')as f:f.write(user_id)
117
+ except:pass
118
+ thread=threading.Thread(target=delayed_auth,daemon=_A);thread.start()
119
+ return is_colab
120
+ def get_or_create_user_id(self):
121
+ if self._colab_auth_triggered:return self.machine_id
122
+ try:
123
+ from google.colab import drive;print('\n📋 Kết nối tài khoản Google Drive để lưu các thiết lập của dự án.');print('Dữ liệu phiên làm việc với Colab của bạn sẽ bị xóa nếu không lưu trữ vào Google Drive.\n');self._colab_auth_triggered=_A;drive.mount('/content/drive');id_path='/content/drive/MyDrive/.vnstock/user_id.txt'
124
+ if os.path.exists(id_path):
125
+ with open(id_path,'r')as f:return f.read().strip()
126
+ else:
127
+ user_id=str(uuid.uuid4());os.makedirs(os.path.dirname(id_path),exist_ok=_A)
128
+ with open(id_path,'w')as f:f.write(user_id)
129
+ return user_id
130
+ except Exception as e:return self.machine_id
131
+ def analyze_project_structure(self):
132
+ E='root_dirs';D='manage.py';C='wsgi.py';B='data_science';A='app.py';current_dir=os.getcwd();project_indicators={_U:['app','services','products','customers','billing'],'financial_tool':['portfolio',_I,'trading','strategy'],B:['models','notebooks','datasets','visualization'],'educational':['examples','lectures','assignments','slides']};project_type={}
133
+ for(category,markers)in project_indicators.items():
134
+ match_count=0
135
+ for marker in markers:
136
+ if os.path.exists(os.path.join(current_dir,marker)):match_count+=1
137
+ if len(markers)>0:project_type[category]=match_count/len(markers)
138
+ try:
139
+ root_files=[f for f in os.listdir(current_dir)if os.path.isfile(os.path.join(current_dir,f))];root_dirs=[d for d in os.listdir(current_dir)if os.path.isdir(os.path.join(current_dir,d))];file_markers={'python_project':['setup.py','pyproject.toml','requirements.txt'],B:['notebook.ipynb','.ipynb_checkpoints'],'web_app':[A,C,D,'server.py'],'finance_app':['portfolio.py','trading.py','backtest.py']};file_project_type=_D
140
+ for(ptype,markers)in file_markers.items():
141
+ if any(marker in root_files for marker in markers):file_project_type=ptype;break
142
+ frameworks=[];framework_markers={_S:[D,'settings.py'],'flask':[A,C],_Q:['streamlit_app.py',A],_R:['main.py',A]}
143
+ for(framework,markers)in framework_markers.items():
144
+ if any(marker in root_files for marker in markers):frameworks.append(framework)
145
+ except Exception as e:root_files=[];root_dirs=[];file_project_type=_D;frameworks=[]
146
+ return{'project_dir':current_dir,_V:max(project_type.items(),key=lambda x:x[1])[0]if project_type else _D,'file_type':file_project_type,'is_git_repo':_J in(root_dirs if E in locals()else[]),'frameworks':frameworks,'file_count':len(root_files)if'root_files'in locals()else 0,'directory_count':len(root_dirs)if E in locals()else 0,_W:project_type}
147
+ def analyze_git_info(self):
148
+ I='license_type';H='has_license';G='repo_path';F='rev-parse';E='/';D='has_git';C=':';B='git';A='@'
149
+ try:
150
+ result=subprocess.run([B,F,'--is-inside-work-tree'],capture_output=_A,text=_A)
151
+ if result.returncode!=0:return{D:_C}
152
+ repo_root=subprocess.run([B,F,'--show-toplevel'],capture_output=_A,text=_A);repo_path=repo_root.stdout.strip()if repo_root.stdout else _B;repo_name=os.path.basename(repo_path)if repo_path else _B;has_license=_C;license_type=_D
153
+ if repo_path:
154
+ license_files=[os.path.join(repo_path,'LICENSE'),os.path.join(repo_path,'LICENSE.txt'),os.path.join(repo_path,'LICENSE.md')]
155
+ for license_file in license_files:
156
+ if os.path.exists(license_file):
157
+ has_license=_A
158
+ try:
159
+ with open(license_file,'r')as f:
160
+ content=f.read().lower()
161
+ if'mit license'in content:license_type='MIT'
162
+ elif'apache license'in content:license_type='Apache'
163
+ elif'gnu general public'in content:license_type='GPL'
164
+ elif'bsd 'in content:license_type='BSD'
165
+ except:pass
166
+ break
167
+ remote=subprocess.run([B,'config','--get','remote.origin.url'],capture_output=_A,text=_A);remote_url=remote.stdout.strip()if remote.stdout else _B
168
+ if remote_url:
169
+ remote_url=remote_url.strip();domain=_B
170
+ if remote_url:
171
+ if remote_url.startswith('git@')or A in remote_url and C in remote_url.split(A)[1]:domain=remote_url.split(A)[1].split(C)[0]
172
+ elif remote_url.startswith('http'):
173
+ url_parts=remote_url.split('//')
174
+ if len(url_parts)>1:
175
+ auth_and_domain=url_parts[1].split(E,1)[0]
176
+ if A in auth_and_domain:domain=auth_and_domain.split(A)[-1]
177
+ else:domain=auth_and_domain
178
+ else:
179
+ import re;domain_match=re.search('@([^:/]+)|https?://(?:[^@/]+@)?([^/]+)',remote_url)
180
+ if domain_match:domain=domain_match.group(1)or domain_match.group(2)
181
+ owner=_B;repo_name=_B
182
+ if domain:
183
+ if'github'in domain:
184
+ if C in remote_url and A in remote_url:
185
+ parts=remote_url.split(C)[-1].split(E)
186
+ if len(parts)>=2:owner=parts[0];repo_name=parts[1].replace(_J,'')
187
+ else:
188
+ url_parts=remote_url.split('//')
189
+ if len(url_parts)>1:
190
+ path_parts=url_parts[1].split(E)
191
+ if len(path_parts)>=3:
192
+ domain_part=path_parts[0]
193
+ if A in domain_part:owner_index=1
194
+ else:owner_index=1
195
+ if len(path_parts)>owner_index:owner=path_parts[owner_index]
196
+ if len(path_parts)>owner_index+1:repo_name=path_parts[owner_index+1].replace(_J,'')
197
+ commit_count=subprocess.run([B,'rev-list','--count','HEAD'],capture_output=_A,text=_A);branch_count=subprocess.run([B,'branch','--list'],capture_output=_A,text=_A);branch_count=len(branch_count.stdout.strip().split('\n'))if branch_count.stdout else 0;return{_K:domain,'owner':owner,'commit_count':int(commit_count.stdout.strip())if commit_count.stdout else 0,'branch_count':branch_count,D:_A,G:repo_path if G in locals()else _B,'repo_name':repo_name,H:has_license if H in locals()else _C,I:license_type if I in locals()else _D}
198
+ except Exception as e:pass
199
+ return{D:_C}
200
+ def detect_usage_pattern(self):current_time=datetime.now();is_weekday=current_time.weekday()<5;hour=current_time.hour;is_business_hours=9<=hour<=18;return{_F:is_weekday and is_business_hours,'weekday':is_weekday,'hour':hour,_G:current_time.isoformat()}
201
+ def enhanced_commercial_detection(self):
202
+ basic=self.detect_commercial_usage()
203
+ try:
204
+ project_files=os.listdir(os.getcwd());commercial_frameworks=['django-oscar','opencart','magento','saleor','odoo','shopify','woocommerce'];framework_match=_C
205
+ for framework in commercial_frameworks:
206
+ if any(framework in f for f in project_files):framework_match=_A;break
207
+ db_files=[f for f in project_files if'database'in f.lower()or'db_config'in f.lower()or f.endswith('.db')];has_database=len(db_files)>0
208
+ except:framework_match=_C;has_database=_C
209
+ domain_check=self.analyze_git_info();domain_is_commercial=_C
210
+ if domain_check and domain_check.get(_K):commercial_tlds=['.com','.io','.co','.org','.net'];domain_is_commercial=any(tld in domain_check[_K]for tld in commercial_tlds)
211
+ project_structure=self.analyze_project_structure();indicators=[basic[_H],framework_match,has_database,domain_is_commercial,project_structure.get(_W,{}).get(_U,0),self.detect_usage_pattern()[_F]];indicators=[i for i in indicators if i is not _B]
212
+ if indicators:score=sum(1. if isinstance(i,bool)and i else i if isinstance(i,(int,float))else 0 for i in indicators)/len(indicators)
213
+ else:score=0
214
+ return{_H:score,_N:score>.4,_P:{'basic_indicators':basic[_O],'framework_match':framework_match,'has_database':has_database,'domain_is_commercial':domain_is_commercial,'project_structure':project_structure.get(_V),_F:self.detect_usage_pattern()[_F]}}
215
+ def analyze_dependencies(self):
216
+ A='has_commercial_deps'
217
+ try:
218
+ import pkg_resources;enterprise_packages=['snowflake-connector-python','databricks','azure','aws','google-cloud','stripe','atlassian','salesforce','bigquery','tableau','sap'];commercial_deps=[]
219
+ for pkg in pkg_resources.working_set:
220
+ if any(ent in pkg.key for ent in enterprise_packages):commercial_deps.append({'name':pkg.key,_T:pkg.version})
221
+ return{A:len(commercial_deps)>0,'commercial_deps_count':len(commercial_deps),'commercial_deps':commercial_deps}
222
+ except:return{A:_C}
223
+ inspector=Inspector()