vnai 2.1.7__py3-none-any.whl → 2.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vnai/scope/profile.py CHANGED
@@ -1,579 +1,765 @@
1
- import os
2
- import sys
3
- import platform
4
- import uuid
5
- import hashlib
6
- import psutil
7
- import threading
8
- import time
9
- import importlib.metadata
10
- from datetime import datetime
11
- import subprocess
12
- from pathlib import Path
13
-
14
- class Inspector:
15
- _instance = None
16
- _lock = None
17
-
18
- def __new__(cls):
19
- import threading
20
- if cls._lock is None:
21
- cls._lock = threading.Lock()
22
- with cls._lock:
23
- if cls._instance is None:
24
- cls._instance = super(Inspector, cls).__new__(cls)
25
- cls._instance._initialize()
26
- return cls._instance
27
-
28
- def _initialize(self):
29
- self.cache = {}
30
- self.cache_ttl = 3600
31
- self.last_examination = 0
32
- self.machine_id = None
33
- self._colab_auth_triggered = False
34
- self.home_dir = Path.home()
35
- self.project_dir = self.home_dir /".vnstock"
36
- self.project_dir.mkdir(exist_ok=True)
37
- self.id_dir = self.project_dir /'id'
38
- self.id_dir.mkdir(exist_ok=True)
39
- self.machine_id_path = self.id_dir /"machine_id.txt"
40
- self.examine()
41
-
42
- def examine(self, force_refresh=False):
43
- current_time = time.time()
44
- if not force_refresh and (current_time - self.last_examination) < self.cache_ttl:
45
- return self.cache
46
- info = {
47
- "timestamp": datetime.now().isoformat(),
48
- "python_version": platform.python_version(),
49
- "os_name": platform.system(),
50
- "platform": platform.platform()
51
- }
52
- info["machine_id"] = self.fingerprint()
53
- try:
54
- import importlib.util
55
- ipython_spec = importlib.util.find_spec("IPython")
56
- if ipython_spec:
57
- from IPython import get_ipython
58
- ipython = get_ipython()
59
- if ipython is not None:
60
- info["environment"] ="jupyter"
61
- if'google.colab' in sys.modules:
62
- info["hosting_service"] ="colab"
63
- elif'KAGGLE_KERNEL_RUN_TYPE' in os.environ:
64
- info["hosting_service"] ="kaggle"
65
- else:
66
- info["hosting_service"] ="local_jupyter"
67
- elif sys.stdout.isatty():
68
- info["environment"] ="terminal"
69
- else:
70
- info["environment"] ="script"
71
- elif sys.stdout.isatty():
72
- info["environment"] ="terminal"
73
- else:
74
- info["environment"] ="script"
75
- except:
76
- info["environment"] ="unknown"
77
- try:
78
- info["cpu_count"] = os.cpu_count()
79
- info["memory_gb"] = round(psutil.virtual_memory().total / (1024**3), 1)
80
- except:
81
- pass
82
- is_colab ='google.colab' in sys.modules
83
- if is_colab:
84
- info["is_colab"] = True
85
- self.detect_colab_with_delayed_auth()
86
- try:
87
- info["commercial_usage"] = self.enhanced_commercial_detection()
88
- info["project_context"] = self.analyze_project_structure()
89
- info["git_info"] = self.analyze_git_info()
90
- info["usage_pattern"] = self.detect_usage_pattern()
91
- info["dependencies"] = self.analyze_dependencies()
92
- except Exception as e:
93
- info["detection_error"] = str(e)
94
- self.cache = info
95
- self.last_examination = current_time
96
- return info
97
-
98
- def fingerprint(self):
99
- if self.machine_id:
100
- return self.machine_id
101
- if self.machine_id_path.exists():
102
- try:
103
- with open(self.machine_id_path,"r") as f:
104
- self.machine_id = f.read().strip()
105
- return self.machine_id
106
- except:
107
- pass
108
- is_colab = self.detect_colab_with_delayed_auth()
109
- try:
110
- system_info = platform.node() + platform.platform() + platform.machine()
111
- self.machine_id = hashlib.md5(system_info.encode()).hexdigest()
112
- except:
113
- self.machine_id = str(uuid.uuid4())
114
- try:
115
- with open(self.machine_id_path,"w") as f:
116
- f.write(self.machine_id)
117
- except:
118
- pass
119
- return self.machine_id
120
-
121
- def detect_hosting(self):
122
- hosting_markers = {
123
- "COLAB_GPU":"Google Colab",
124
- "KAGGLE_KERNEL_RUN_TYPE":"Kaggle",
125
- "BINDER_SERVICE_HOST":"Binder",
126
- "CODESPACE_NAME":"GitHub Codespaces",
127
- "STREAMLIT_SERVER_HEADLESS":"Streamlit Cloud",
128
- "CLOUD_SHELL":"Cloud Shell"
129
- }
130
- for env_var, host_name in hosting_markers.items():
131
- if env_var in os.environ:
132
- return host_name
133
- if'google.colab' in sys.modules:
134
- return"Google Colab"
135
- return"local"
136
-
137
- def detect_commercial_usage(self):
138
- commercial_indicators = {
139
- "env_domains": [".com",".io",".co","enterprise","corp","inc"],
140
- "file_patterns": ["invoice","payment","customer","client","product","sale"],
141
- "env_vars": ["COMPANY","BUSINESS","ENTERPRISE","CORPORATE"],
142
- "dir_patterns": ["company","business","enterprise","corporate","client"]
143
- }
144
- env_values =" ".join(os.environ.values()).lower()
145
- domain_match = any(domain in env_values for domain in commercial_indicators["env_domains"])
146
- env_var_match = any(var in os.environ for var in commercial_indicators["env_vars"])
147
- current_dir = os.getcwd().lower()
148
- dir_match = any(pattern in current_dir for pattern in commercial_indicators["dir_patterns"])
149
- try:
150
- files = [f.lower() for f in os.listdir() if os.path.isfile(f)]
151
- file_match = any(any(pattern in f for pattern in commercial_indicators["file_patterns"]) for f in files)
152
- except:
153
- file_match = False
154
- indicators = [domain_match, env_var_match, dir_match, file_match]
155
- commercial_probability = sum(indicators) / len(indicators)
156
- return {
157
- "likely_commercial": commercial_probability > 0.3,
158
- "commercial_probability": commercial_probability,
159
- "commercial_indicators": {
160
- "domain_match": domain_match,
161
- "env_var_match": env_var_match,
162
- "dir_match": dir_match,
163
- "file_match": file_match
164
- }
165
- }
166
-
167
- def scan_packages(self):
168
- package_groups = {
169
- "vnstock_family": [
170
- "vnstock",
171
- "vnstock3",
172
- "vnstock_ezchart",
173
- "vnstock_data_pro",
174
- "vnstock_market_data_pipeline",
175
- "vnstock_ta",
176
- "vnii",
177
- "vnai"
178
- ],
179
- "analytics": [
180
- "openbb",
181
- "pandas_ta"
182
- ],
183
- "static_charts": [
184
- "matplotlib",
185
- "seaborn",
186
- "altair"
187
- ],
188
- "dashboard": [
189
- "streamlit",
190
- "voila",
191
- "panel",
192
- "shiny",
193
- "dash"
194
- ],
195
- "interactive_charts": [
196
- "mplfinance",
197
- "plotly",
198
- "plotline",
199
- "bokeh",
200
- "pyecharts",
201
- "highcharts-core",
202
- "highcharts-stock",
203
- "mplchart"
204
- ],
205
- "datafeed": [
206
- "yfinance",
207
- "alpha_vantage",
208
- "pandas-datareader",
209
- "investpy"
210
- ],
211
- "official_api": [
212
- "ssi-fc-data",
213
- "ssi-fctrading"
214
- ],
215
- "risk_return": [
216
- "pyfolio",
217
- "empyrical",
218
- "quantstats",
219
- "financetoolkit"
220
- ],
221
- "machine_learning": [
222
- "scipy",
223
- "sklearn",
224
- "statsmodels",
225
- "pytorch",
226
- "tensorflow",
227
- "keras",
228
- "xgboost"
229
- ],
230
- "indicators": [
231
- "stochastic",
232
- "talib",
233
- "tqdm",
234
- "finta",
235
- "financetoolkit",
236
- "tulipindicators"
237
- ],
238
- "backtesting": [
239
- "vectorbt",
240
- "backtesting",
241
- "bt",
242
- "zipline",
243
- "pyalgotrade",
244
- "backtrader",
245
- "pybacktest",
246
- "fastquant",
247
- "lean",
248
- "ta",
249
- "finmarketpy",
250
- "qstrader"
251
- ],
252
- "server": [
253
- "fastapi",
254
- "flask",
255
- "uvicorn",
256
- "gunicorn"
257
- ],
258
- "framework": [
259
- "lightgbm",
260
- "catboost",
261
- "django"
262
- ]
263
- }
264
- installed = {}
265
- for category, packages in package_groups.items():
266
- installed[category] = []
267
- for pkg in packages:
268
- try:
269
- version = importlib.metadata.version(pkg)
270
- installed[category].append({"name": pkg,"version": version})
271
- except:
272
- pass
273
- return installed
274
-
275
- def setup_vnstock_environment(self):
276
- env_file = self.id_dir /"environment.json"
277
- env_data = {
278
- "accepted_agreement": True,
279
- "timestamp": datetime.now().isoformat(),
280
- "machine_id": self.fingerprint()
281
- }
282
- try:
283
- with open(env_file,"w") as f:
284
- import json
285
- json.dump(env_data, f)
286
- return True
287
- except Exception as e:
288
- print(f"Failed to set up vnstock environment: {e}")
289
- return False
290
-
291
- def detect_colab_with_delayed_auth(self, immediate=False):
292
- is_colab ='google.colab' in sys.modules
293
- if is_colab and not self._colab_auth_triggered:
294
- if immediate:
295
- self._colab_auth_triggered = True
296
- user_id = self.get_or_create_user_id()
297
- if user_id and user_id != self.machine_id:
298
- self.machine_id = user_id
299
- try:
300
- with open(self.machine_id_path,"w") as f:
301
- f.write(user_id)
302
- except:
303
- pass
304
- else:
305
-
306
- def delayed_auth():
307
- time.sleep(300)
308
- user_id = self.get_or_create_user_id()
309
- if user_id and user_id != self.machine_id:
310
- self.machine_id = user_id
311
- try:
312
- with open(self.machine_id_path,"w") as f:
313
- f.write(user_id)
314
- except:
315
- pass
316
- thread = threading.Thread(target=delayed_auth, daemon=True)
317
- thread.start()
318
- return is_colab
319
-
320
- def get_or_create_user_id(self):
321
- if self._colab_auth_triggered:
322
- return self.machine_id
323
- try:
324
- from google.colab import drive
325
- print("\n📋 Kết nối tài khoản Google Drive để lưu các thiết lập của dự án.")
326
- print("Dữ liệu phiên làm việc với Colab của bạn sẽ bị xóa nếu không lưu trữ vào Google Drive.\n")
327
- self._colab_auth_triggered = True
328
- drive.mount('/content/drive')
329
- id_path ='/content/drive/MyDrive/.vnstock/user_id.txt'
330
- if os.path.exists(id_path):
331
- with open(id_path,'r') as f:
332
- return f.read().strip()
333
- else:
334
- user_id = str(uuid.uuid4())
335
- os.makedirs(os.path.dirname(id_path), exist_ok=True)
336
- with open(id_path,'w') as f:
337
- f.write(user_id)
338
- return user_id
339
- except Exception as e:
340
- return self.machine_id
341
-
342
- def analyze_project_structure(self):
343
- current_dir = os.getcwd()
344
- project_indicators = {
345
- "commercial_app": ["app","services","products","customers","billing"],
346
- "financial_tool": ["portfolio","backtesting","trading","strategy"],
347
- "data_science": ["models","notebooks","datasets","visualization"],
348
- "educational": ["examples","lectures","assignments","slides"]
349
- }
350
- project_type = {}
351
- for category, markers in project_indicators.items():
352
- match_count = 0
353
- for marker in markers:
354
- if os.path.exists(os.path.join(current_dir, marker)):
355
- match_count += 1
356
- if len(markers) > 0:
357
- project_type[category] = match_count / len(markers)
358
- try:
359
- root_files = [f for f in os.listdir(current_dir) if os.path.isfile(os.path.join(current_dir, f))]
360
- root_dirs = [d for d in os.listdir(current_dir) if os.path.isdir(os.path.join(current_dir, d))]
361
- file_markers = {
362
- "python_project": ["setup.py","pyproject.toml","requirements.txt"],
363
- "data_science": ["notebook.ipynb",".ipynb_checkpoints"],
364
- "web_app": ["app.py","wsgi.py","manage.py","server.py"],
365
- "finance_app": ["portfolio.py","trading.py","backtest.py"],
366
- }
367
- file_project_type ="unknown"
368
- for ptype, markers in file_markers.items():
369
- if any(marker in root_files for marker in markers):
370
- file_project_type = ptype
371
- break
372
- frameworks = []
373
- framework_markers = {
374
- "django": ["manage.py","settings.py"],
375
- "flask": ["app.py","wsgi.py"],
376
- "streamlit": ["streamlit_app.py","app.py"],
377
- "fastapi": ["main.py","app.py"],
378
- }
379
- for framework, markers in framework_markers.items():
380
- if any(marker in root_files for marker in markers):
381
- frameworks.append(framework)
382
- except Exception as e:
383
- root_files = []
384
- root_dirs = []
385
- file_project_type ="unknown"
386
- frameworks = []
387
- return {
388
- "project_dir": current_dir,
389
- "detected_type": max(project_type.items(), key=lambda x: x[1])[0] if project_type else"unknown",
390
- "file_type": file_project_type,
391
- "is_git_repo":".git" in (root_dirs if'root_dirs' in locals() else []),
392
- "frameworks": frameworks,
393
- "file_count": len(root_files) if'root_files' in locals() else 0,
394
- "directory_count": len(root_dirs) if'root_dirs' in locals() else 0,
395
- "type_confidence": project_type
396
- }
397
-
398
- def analyze_git_info(self):
399
- try:
400
- result = subprocess.run(["git","rev-parse","--is-inside-work-tree"],
401
- capture_output=True, text=True)
402
- if result.returncode != 0:
403
- return {"has_git": False}
404
- repo_root = subprocess.run(["git","rev-parse","--show-toplevel"],
405
- capture_output=True, text=True)
406
- repo_path = repo_root.stdout.strip() if repo_root.stdout else None
407
- repo_name = os.path.basename(repo_path) if repo_path else None
408
- has_license = False
409
- license_type ="unknown"
410
- if repo_path:
411
- license_files = [
412
- os.path.join(repo_path,"LICENSE"),
413
- os.path.join(repo_path,"LICENSE.txt"),
414
- os.path.join(repo_path,"LICENSE.md")
415
- ]
416
- for license_file in license_files:
417
- if os.path.exists(license_file):
418
- has_license = True
419
- try:
420
- with open(license_file,'r') as f:
421
- content = f.read().lower()
422
- if"mit license" in content:
423
- license_type ="MIT"
424
- elif"apache license" in content:
425
- license_type ="Apache"
426
- elif"gnu general public" in content:
427
- license_type ="GPL"
428
- elif"bsd " in content:
429
- license_type ="BSD"
430
- except:
431
- pass
432
- break
433
- remote = subprocess.run(["git","config","--get","remote.origin.url"],
434
- capture_output=True, text=True)
435
- remote_url = remote.stdout.strip() if remote.stdout else None
436
- if remote_url:
437
- remote_url = remote_url.strip()
438
- domain = None
439
- if remote_url:
440
- if remote_url.startswith('git@') or'@' in remote_url and':' in remote_url.split('@')[1]:
441
- domain = remote_url.split('@')[1].split(':')[0]
442
- elif remote_url.startswith('http'):
443
- url_parts = remote_url.split('//')
444
- if len(url_parts) > 1:
445
- auth_and_domain = url_parts[1].split('/', 1)[0]
446
- if'@' in auth_and_domain:
447
- domain = auth_and_domain.split('@')[-1]
448
- else:
449
- domain = auth_and_domain
450
- else:
451
- import re
452
- domain_match = re.search(r'@([^:/]+)|https?://(?:[^@/]+@)?([^/]+)', remote_url)
453
- if domain_match:
454
- domain = domain_match.group(1) or domain_match.group(2)
455
- owner = None
456
- repo_name = None
457
- if domain:
458
- if"github" in domain:
459
- if':' in remote_url and'@' in remote_url:
460
- parts = remote_url.split(':')[-1].split('/')
461
- if len(parts) >= 2:
462
- owner = parts[0]
463
- repo_name = parts[1].replace('.git','')
464
- else:
465
- url_parts = remote_url.split('//')
466
- if len(url_parts) > 1:
467
- path_parts = url_parts[1].split('/')
468
- if len(path_parts) >= 3:
469
- domain_part = path_parts[0]
470
- if'@' in domain_part:
471
- owner_index = 1
472
- else:
473
- owner_index = 1
474
- if len(path_parts) > owner_index:
475
- owner = path_parts[owner_index]
476
- if len(path_parts) > owner_index + 1:
477
- repo_name = path_parts[owner_index + 1].replace('.git','')
478
- commit_count = subprocess.run(["git","rev-list","--count","HEAD"],
479
- capture_output=True, text=True)
480
- branch_count = subprocess.run(["git","branch","--list"],
481
- capture_output=True, text=True)
482
- branch_count = len(branch_count.stdout.strip().split('\n')) if branch_count.stdout else 0
483
- return {
484
- "domain": domain,
485
- "owner": owner,
486
- "commit_count": int(commit_count.stdout.strip()) if commit_count.stdout else 0,
487
- "branch_count": branch_count,
488
- "has_git": True,
489
- "repo_path": repo_path if'repo_path' in locals() else None,
490
- "repo_name": repo_name,
491
- "has_license": has_license if'has_license' in locals() else False,
492
- "license_type": license_type if'license_type' in locals() else"unknown"
493
- }
494
- except Exception as e:
495
- pass
496
- return {"has_git": False}
497
-
498
- def detect_usage_pattern(self):
499
- current_time = datetime.now()
500
- is_weekday = current_time.weekday() < 5
501
- hour = current_time.hour
502
- is_business_hours = 9 <= hour <= 18
503
- return {
504
- "business_hours_usage": is_weekday and is_business_hours,
505
- "weekday": is_weekday,
506
- "hour": hour,
507
- "timestamp": current_time.isoformat()
508
- }
509
-
510
- def enhanced_commercial_detection(self):
511
- basic = self.detect_commercial_usage()
512
- try:
513
- project_files = os.listdir(os.getcwd())
514
- commercial_frameworks = ["django-oscar","opencart","magento",
515
- "saleor","odoo","shopify","woocommerce"]
516
- framework_match = False
517
- for framework in commercial_frameworks:
518
- if any(framework in f for f in project_files):
519
- framework_match = True
520
- break
521
- db_files = [f for f in project_files if"database" in f.lower()
522
- or"db_config" in f.lower() or f.endswith(".db")]
523
- has_database = len(db_files) > 0
524
- except:
525
- framework_match = False
526
- has_database = False
527
- domain_check = self.analyze_git_info()
528
- domain_is_commercial = False
529
- if domain_check and domain_check.get("domain"):
530
- commercial_tlds = [".com",".io",".co",".org",".net"]
531
- domain_is_commercial = any(tld in domain_check["domain"] for tld in commercial_tlds)
532
- project_structure = self.analyze_project_structure()
533
- indicators = [
534
- basic["commercial_probability"],
535
- framework_match,
536
- has_database,
537
- domain_is_commercial,
538
- project_structure.get("type_confidence", {}).get("commercial_app", 0),
539
- self.detect_usage_pattern()["business_hours_usage"]
540
- ]
541
- indicators = [i for i in indicators if i is not None]
542
- if indicators:
543
- score = sum(1.0 if isinstance(i, bool) and i else (i if isinstance(i, (int, float)) else 0)
544
- for i in indicators) / len(indicators)
545
- else:
546
- score = 0
547
- return {
548
- "commercial_probability": score,
549
- "likely_commercial": score > 0.4,
550
- "indicators": {
551
- "basic_indicators": basic["commercial_indicators"],
552
- "framework_match": framework_match,
553
- "has_database": has_database,
554
- "domain_is_commercial": domain_is_commercial,
555
- "project_structure": project_structure.get("detected_type"),
556
- "business_hours_usage": self.detect_usage_pattern()["business_hours_usage"]
557
- }
558
- }
559
-
560
- def analyze_dependencies(self):
561
- try:
562
- import pkg_resources
563
- enterprise_packages = [
564
- "snowflake-connector-python","databricks","azure",
565
- "aws","google-cloud","stripe","atlassian",
566
- "salesforce","bigquery","tableau","sap"
567
- ]
568
- commercial_deps = []
569
- for pkg in pkg_resources.working_set:
570
- if any(ent in pkg.key for ent in enterprise_packages):
571
- commercial_deps.append({"name": pkg.key,"version": pkg.version})
572
- return {
573
- "has_commercial_deps": len(commercial_deps) > 0,
574
- "commercial_deps_count": len(commercial_deps),
575
- "commercial_deps": commercial_deps
576
- }
577
- except:
578
- return {"has_commercial_deps": False}
579
- inspector = Inspector()
1
+ # vnai/scope/profile.py
2
+
3
+ import os
4
+ import sys
5
+ import platform
6
+ import uuid
7
+ import hashlib
8
+ import psutil
9
+ import threading
10
+ import time
11
+ import importlib.metadata
12
+ from datetime import datetime
13
+ import subprocess
14
+ from pathlib import Path
15
+
16
+ class Inspector:
17
+ """Inspects execution environment"""
18
+
19
+ _instance = None
20
+ _lock = None
21
+
22
+ def __new__(cls):
23
+ import threading
24
+ if cls._lock is None:
25
+ cls._lock = threading.Lock()
26
+
27
+ with cls._lock:
28
+ if cls._instance is None:
29
+ cls._instance = super(Inspector, cls).__new__(cls)
30
+ cls._instance._initialize()
31
+ return cls._instance
32
+
33
+ def _initialize(self):
34
+ """Initialize inspector"""
35
+ self.cache = {}
36
+ self.cache_ttl = 3600 # 1 hour cache validity
37
+ self.last_examination = 0
38
+ self.machine_id = None
39
+ self._colab_auth_triggered = False
40
+
41
+ # Paths
42
+ self.home_dir = Path.home()
43
+ self.project_dir = self.home_dir / ".vnstock"
44
+ self.project_dir.mkdir(exist_ok=True)
45
+ self.id_dir = self.project_dir / 'id'
46
+ self.id_dir.mkdir(exist_ok=True)
47
+ self.machine_id_path = self.id_dir / "machine_id.txt"
48
+
49
+ # Perform initial examination
50
+ self.examine()
51
+
52
+ def examine(self, force_refresh=False):
53
+ """Examine current execution context"""
54
+ current_time = time.time()
55
+
56
+ # Return cached data if it's fresh enough and we're not forcing a refresh
57
+ if not force_refresh and (current_time - self.last_examination) < self.cache_ttl:
58
+ return self.cache
59
+
60
+ # Start with basic information
61
+ info = {
62
+ "timestamp": datetime.now().isoformat(),
63
+ "python_version": platform.python_version(),
64
+ "os_name": platform.system(),
65
+ "platform": platform.platform()
66
+ }
67
+
68
+ # Machine identifier
69
+ info["machine_id"] = self.fingerprint()
70
+
71
+ # Environment detection
72
+ try:
73
+ # Check for Jupyter/IPython
74
+ import importlib.util
75
+ ipython_spec = importlib.util.find_spec("IPython")
76
+
77
+ if ipython_spec:
78
+ from IPython import get_ipython
79
+ ipython = get_ipython()
80
+ if ipython is not None:
81
+ info["environment"] = "jupyter"
82
+ # Check for hosted notebooks
83
+ if 'google.colab' in sys.modules:
84
+ info["hosting_service"] = "colab"
85
+ elif 'KAGGLE_KERNEL_RUN_TYPE' in os.environ:
86
+ info["hosting_service"] = "kaggle"
87
+ else:
88
+ info["hosting_service"] = "local_jupyter"
89
+ elif sys.stdout.isatty():
90
+ info["environment"] = "terminal"
91
+ else:
92
+ info["environment"] = "script"
93
+ elif sys.stdout.isatty():
94
+ info["environment"] = "terminal"
95
+ else:
96
+ info["environment"] = "script"
97
+ except:
98
+ info["environment"] = "unknown"
99
+
100
+ # System resources
101
+ try:
102
+ info["cpu_count"] = os.cpu_count()
103
+ info["memory_gb"] = round(psutil.virtual_memory().total / (1024**3), 1)
104
+ except:
105
+ pass
106
+
107
+ # Check if in Google Colab
108
+ is_colab = 'google.colab' in sys.modules
109
+ if is_colab:
110
+ info["is_colab"] = True
111
+ # Setup delayed authentication if not already triggered
112
+ self.detect_colab_with_delayed_auth()
113
+
114
+ # Enhanced context information
115
+ try:
116
+ # Commercial usage detection
117
+ info["commercial_usage"] = self.enhanced_commercial_detection()
118
+
119
+ # Project context
120
+ info["project_context"] = self.analyze_project_structure()
121
+
122
+ # Git info
123
+ info["git_info"] = self.analyze_git_info()
124
+
125
+ # Working hours pattern
126
+ info["usage_pattern"] = self.detect_usage_pattern()
127
+
128
+ # Dependency analysis
129
+ info["dependencies"] = self.analyze_dependencies()
130
+ except Exception as e:
131
+ # Don't let enhanced detection failure stop basic functionality
132
+ info["detection_error"] = str(e)
133
+
134
+ # Update cache
135
+ self.cache = info
136
+ self.last_examination = current_time
137
+
138
+ return info
139
+
140
+ def fingerprint(self):
141
+ """Generate unique environment fingerprint"""
142
+ # Always return cached machine_id if it exists
143
+ if self.machine_id:
144
+ return self.machine_id
145
+
146
+ # Try to load from file first
147
+ if self.machine_id_path.exists():
148
+ try:
149
+ with open(self.machine_id_path, "r") as f:
150
+ self.machine_id = f.read().strip()
151
+ return self.machine_id
152
+ except:
153
+ pass
154
+
155
+ # Check for Colab and setup delayed authentication
156
+ is_colab = self.detect_colab_with_delayed_auth()
157
+
158
+ # Generate a new machine ID only if necessary
159
+ try:
160
+ # Use consistent system information
161
+ system_info = platform.node() + platform.platform() + platform.machine()
162
+ self.machine_id = hashlib.md5(system_info.encode()).hexdigest()
163
+ except:
164
+ # Fallback to UUID but only as last resort
165
+ self.machine_id = str(uuid.uuid4())
166
+
167
+ # Save to ensure consistency across calls
168
+ try:
169
+ with open(self.machine_id_path, "w") as f:
170
+ f.write(self.machine_id)
171
+ except:
172
+ pass
173
+
174
+ return self.machine_id
175
+
176
+ def detect_hosting(self):
177
+ """Detect if running in a hosted environment"""
178
+ # Check common environment variables for hosted environments
179
+ hosting_markers = {
180
+ "COLAB_GPU": "Google Colab",
181
+ "KAGGLE_KERNEL_RUN_TYPE": "Kaggle",
182
+ "BINDER_SERVICE_HOST": "Binder",
183
+ "CODESPACE_NAME": "GitHub Codespaces",
184
+ "STREAMLIT_SERVER_HEADLESS": "Streamlit Cloud",
185
+ "CLOUD_SHELL": "Cloud Shell"
186
+ }
187
+
188
+ for env_var, host_name in hosting_markers.items():
189
+ if env_var in os.environ:
190
+ return host_name
191
+
192
+ # Check for Google Colab module
193
+ if 'google.colab' in sys.modules:
194
+ return "Google Colab"
195
+
196
+ return "local"
197
+
198
+ def detect_commercial_usage(self):
199
+ """Detect if running in commercial environment"""
200
+ commercial_indicators = {
201
+ "env_domains": [".com", ".io", ".co", "enterprise", "corp", "inc"],
202
+ "file_patterns": ["invoice", "payment", "customer", "client", "product", "sale"],
203
+ "env_vars": ["COMPANY", "BUSINESS", "ENTERPRISE", "CORPORATE"],
204
+ "dir_patterns": ["company", "business", "enterprise", "corporate", "client"]
205
+ }
206
+
207
+ # Check environment variables for commercial domains
208
+ env_values = " ".join(os.environ.values()).lower()
209
+ domain_match = any(domain in env_values for domain in commercial_indicators["env_domains"])
210
+
211
+ # Check if commercial-related environment variables exist
212
+ env_var_match = any(var in os.environ for var in commercial_indicators["env_vars"])
213
+
214
+ # Check current directory for commercial indicators
215
+ current_dir = os.getcwd().lower()
216
+ dir_match = any(pattern in current_dir for pattern in commercial_indicators["dir_patterns"])
217
+
218
+ # Check files in current directory for commercial patterns
219
+ try:
220
+ files = [f.lower() for f in os.listdir() if os.path.isfile(f)]
221
+ file_match = any(any(pattern in f for pattern in commercial_indicators["file_patterns"]) for f in files)
222
+ except:
223
+ file_match = False
224
+
225
+ # Calculate probability
226
+ indicators = [domain_match, env_var_match, dir_match, file_match]
227
+ commercial_probability = sum(indicators) / len(indicators)
228
+
229
+ return {
230
+ "likely_commercial": commercial_probability > 0.3,
231
+ "commercial_probability": commercial_probability,
232
+ "commercial_indicators": {
233
+ "domain_match": domain_match,
234
+ "env_var_match": env_var_match,
235
+ "dir_match": dir_match,
236
+ "file_match": file_match
237
+ }
238
+ }
239
+
240
+ def scan_packages(self):
241
+ """Scan for installed packages by category"""
242
+ package_groups = {
243
+ "vnstock_family": [
244
+ "vnstock",
245
+ "vnstock3",
246
+ "vnstock_ezchart",
247
+ "vnstock_data_pro", # Fixed missing comma here
248
+ "vnstock_market_data_pipeline",
249
+ "vnstock_ta",
250
+ "vnii",
251
+ "vnai"
252
+ ],
253
+ "analytics": [
254
+ "openbb",
255
+ "pandas_ta"
256
+ ],
257
+ "static_charts": [
258
+ "matplotlib",
259
+ "seaborn",
260
+ "altair"
261
+ ],
262
+ "dashboard": [
263
+ "streamlit",
264
+ "voila",
265
+ "panel",
266
+ "shiny",
267
+ "dash"
268
+ ],
269
+ "interactive_charts": [
270
+ "mplfinance",
271
+ "plotly",
272
+ "plotline",
273
+ "bokeh",
274
+ "pyecharts",
275
+ "highcharts-core",
276
+ "highcharts-stock",
277
+ "mplchart"
278
+ ],
279
+ "datafeed": [
280
+ "yfinance",
281
+ "alpha_vantage",
282
+ "pandas-datareader",
283
+ "investpy"
284
+ ],
285
+ "official_api": [
286
+ "ssi-fc-data",
287
+ "ssi-fctrading"
288
+ ],
289
+ "risk_return": [
290
+ "pyfolio",
291
+ "empyrical",
292
+ "quantstats",
293
+ "financetoolkit"
294
+ ],
295
+ "machine_learning": [
296
+ "scipy",
297
+ "sklearn",
298
+ "statsmodels",
299
+ "pytorch",
300
+ "tensorflow",
301
+ "keras",
302
+ "xgboost"
303
+ ],
304
+ "indicators": [
305
+ "stochastic",
306
+ "talib",
307
+ "tqdm",
308
+ "finta",
309
+ "financetoolkit",
310
+ "tulipindicators"
311
+ ],
312
+ "backtesting": [
313
+ "vectorbt",
314
+ "backtesting",
315
+ "bt",
316
+ "zipline",
317
+ "pyalgotrade",
318
+ "backtrader",
319
+ "pybacktest",
320
+ "fastquant",
321
+ "lean",
322
+ "ta",
323
+ "finmarketpy",
324
+ "qstrader"
325
+ ],
326
+ "server": [
327
+ "fastapi",
328
+ "flask",
329
+ "uvicorn",
330
+ "gunicorn"
331
+ ],
332
+ "framework": [
333
+ "lightgbm",
334
+ "catboost",
335
+ "django"
336
+ ]
337
+ }
338
+
339
+ installed = {}
340
+
341
+ for category, packages in package_groups.items():
342
+ installed[category] = []
343
+ for pkg in packages:
344
+ try:
345
+ version = importlib.metadata.version(pkg)
346
+ installed[category].append({"name": pkg, "version": version})
347
+ except:
348
+ pass
349
+
350
+ return installed
351
+
352
+ def setup_vnstock_environment(self):
353
+ """Set up environment for vnstock library"""
354
+ # Create environment.json file
355
+ env_file = self.id_dir / "environment.json"
356
+ env_data = {
357
+ "accepted_agreement": True,
358
+ "timestamp": datetime.now().isoformat(),
359
+ "machine_id": self.fingerprint()
360
+ }
361
+
362
+ try:
363
+ with open(env_file, "w") as f:
364
+ import json
365
+ json.dump(env_data, f)
366
+ return True
367
+ except Exception as e:
368
+ print(f"Failed to set up vnstock environment: {e}")
369
+ return False
370
+
371
+ # Update detect_colab_with_delayed_auth method in Inspector class
372
+ def detect_colab_with_delayed_auth(self, immediate=False):
373
+ """Detect if running in Google Colab and setup authentication"""
374
+ # Check if we're in Colab without mounting drive yet
375
+ is_colab = 'google.colab' in sys.modules
376
+
377
+ if is_colab and not self._colab_auth_triggered:
378
+ if immediate:
379
+ # Immediate authentication
380
+ self._colab_auth_triggered = True
381
+ user_id = self.get_or_create_user_id()
382
+ if user_id and user_id != self.machine_id:
383
+ self.machine_id = user_id
384
+ try:
385
+ with open(self.machine_id_path, "w") as f:
386
+ f.write(user_id)
387
+ except:
388
+ pass
389
+ else:
390
+ # Start a delayed thread to trigger authentication after user is already using the package
391
+ def delayed_auth():
392
+ # Wait for some time (e.g., 5 minutes) before attempting auth
393
+ time.sleep(300)
394
+ # Try to get authenticated user ID
395
+ user_id = self.get_or_create_user_id()
396
+ # Update machine ID with the authenticated one
397
+ if user_id and user_id != self.machine_id:
398
+ self.machine_id = user_id
399
+ # Save to the machine_id_path
400
+ try:
401
+ with open(self.machine_id_path, "w") as f:
402
+ f.write(user_id)
403
+ except:
404
+ pass
405
+
406
+ # Start the delayed authentication thread
407
+ thread = threading.Thread(target=delayed_auth, daemon=True)
408
+ thread.start()
409
+
410
+ return is_colab
411
+
412
+ def get_or_create_user_id(self):
413
+ """Get existing user ID from Google Drive or create new one"""
414
+ if self._colab_auth_triggered:
415
+ return self.machine_id # Avoid triggering multiple times
416
+
417
+ try:
418
+ from google.colab import drive
419
+ print("\n📋 Kết nối tài khoản Google Drive để lưu các thiết lập của dự án.")
420
+ print("Dữ liệu phiên làm việc với Colab của bạn sẽ bị xóa nếu không lưu trữ vào Google Drive.\n")
421
+
422
+ # Mark that we've triggered the auth
423
+ self._colab_auth_triggered = True
424
+
425
+ # Mount Google Drive
426
+ drive.mount('/content/drive')
427
+ id_path = '/content/drive/MyDrive/.vnstock/user_id.txt'
428
+
429
+ if os.path.exists(id_path):
430
+ with open(id_path, 'r') as f:
431
+ return f.read().strip()
432
+ else:
433
+ user_id = str(uuid.uuid4())
434
+ os.makedirs(os.path.dirname(id_path), exist_ok=True)
435
+ with open(id_path, 'w') as f:
436
+ f.write(user_id)
437
+ return user_id
438
+ except Exception as e:
439
+ # Silently fail and return the existing machine ID
440
+ return self.machine_id
441
+
442
+ # Enhanced methods for project context collection
443
+
444
+ def analyze_project_structure(self):
445
+ """Analyze project directory structure for context"""
446
+ current_dir = os.getcwd()
447
+ project_indicators = {
448
+ "commercial_app": ["app", "services", "products", "customers", "billing"],
449
+ "financial_tool": ["portfolio", "backtesting", "trading", "strategy"],
450
+ "data_science": ["models", "notebooks", "datasets", "visualization"],
451
+ "educational": ["examples", "lectures", "assignments", "slides"]
452
+ }
453
+
454
+ # Look for key directories up to 2 levels deep (limited for privacy)
455
+ project_type = {}
456
+ for category, markers in project_indicators.items():
457
+ match_count = 0
458
+ for marker in markers:
459
+ if os.path.exists(os.path.join(current_dir, marker)):
460
+ match_count += 1
461
+ if len(markers) > 0:
462
+ project_type[category] = match_count / len(markers)
463
+
464
+ # Scan for direct child files and directories (limited depth for privacy)
465
+ try:
466
+ root_files = [f for f in os.listdir(current_dir) if os.path.isfile(os.path.join(current_dir, f))]
467
+ root_dirs = [d for d in os.listdir(current_dir) if os.path.isdir(os.path.join(current_dir, d))]
468
+
469
+ # Detect project type
470
+ file_markers = {
471
+ "python_project": ["setup.py", "pyproject.toml", "requirements.txt"],
472
+ "data_science": ["notebook.ipynb", ".ipynb_checkpoints"],
473
+ "web_app": ["app.py", "wsgi.py", "manage.py", "server.py"],
474
+ "finance_app": ["portfolio.py", "trading.py", "backtest.py"],
475
+ }
476
+
477
+ file_project_type = "unknown"
478
+ for ptype, markers in file_markers.items():
479
+ if any(marker in root_files for marker in markers):
480
+ file_project_type = ptype
481
+ break
482
+
483
+ # Scan for specific frameworks
484
+ frameworks = []
485
+ framework_markers = {
486
+ "django": ["manage.py", "settings.py"],
487
+ "flask": ["app.py", "wsgi.py"],
488
+ "streamlit": ["streamlit_app.py", "app.py"],
489
+ "fastapi": ["main.py", "app.py"],
490
+ }
491
+
492
+ for framework, markers in framework_markers.items():
493
+ if any(marker in root_files for marker in markers):
494
+ frameworks.append(framework)
495
+
496
+ except Exception as e:
497
+ root_files = []
498
+ root_dirs = []
499
+ file_project_type = "unknown"
500
+ frameworks = []
501
+
502
+ return {
503
+ "project_dir": current_dir,
504
+ "detected_type": max(project_type.items(), key=lambda x: x[1])[0] if project_type else "unknown",
505
+ "file_type": file_project_type,
506
+ "is_git_repo": ".git" in (root_dirs if 'root_dirs' in locals() else []),
507
+ "frameworks": frameworks,
508
+ "file_count": len(root_files) if 'root_files' in locals() else 0,
509
+ "directory_count": len(root_dirs) if 'root_dirs' in locals() else 0,
510
+ "type_confidence": project_type
511
+ }
512
+
513
+ def analyze_git_info(self):
514
+ """Extract non-sensitive git repository information"""
515
+ try:
516
+ # Check if it's a git repository
517
+ result = subprocess.run(["git", "rev-parse", "--is-inside-work-tree"],
518
+ capture_output=True, text=True)
519
+
520
+ if result.returncode != 0:
521
+ return {"has_git": False}
522
+
523
+ # Get repository root path - ADD THIS CODE
524
+ repo_root = subprocess.run(["git", "rev-parse", "--show-toplevel"],
525
+ capture_output=True, text=True)
526
+ repo_path = repo_root.stdout.strip() if repo_root.stdout else None
527
+
528
+ # Extract repository name from path - ADD THIS CODE
529
+ repo_name = os.path.basename(repo_path) if repo_path else None
530
+
531
+ # Check for license file - ADD THIS CODE
532
+ has_license = False
533
+ license_type = "unknown"
534
+ if repo_path:
535
+ license_files = [
536
+ os.path.join(repo_path, "LICENSE"),
537
+ os.path.join(repo_path, "LICENSE.txt"),
538
+ os.path.join(repo_path, "LICENSE.md")
539
+ ]
540
+ for license_file in license_files:
541
+ if os.path.exists(license_file):
542
+ has_license = True
543
+ # Try to determine license type by scanning content
544
+ try:
545
+ with open(license_file, 'r') as f:
546
+ content = f.read().lower()
547
+ if "mit license" in content:
548
+ license_type = "MIT"
549
+ elif "apache license" in content:
550
+ license_type = "Apache"
551
+ elif "gnu general public" in content:
552
+ license_type = "GPL"
553
+ elif "bsd " in content:
554
+ license_type = "BSD"
555
+ # Add more license type detection as needed
556
+ except:
557
+ pass
558
+ break
559
+
560
+ # Get remote URL (only domain, not full URL)
561
+ remote = subprocess.run(["git", "config", "--get", "remote.origin.url"],
562
+ capture_output=True, text=True)
563
+
564
+ remote_url = remote.stdout.strip() if remote.stdout else None
565
+
566
+ if remote_url:
567
+ # Clean the remote URL string
568
+ remote_url = remote_url.strip()
569
+
570
+ # Properly extract domain without authentication information
571
+ domain = None
572
+ if remote_url:
573
+ # For SSH URLs (git@github.com:user/repo.git)
574
+ if remote_url.startswith('git@') or '@' in remote_url and ':' in remote_url.split('@')[1]:
575
+ domain = remote_url.split('@')[1].split(':')[0]
576
+ # For HTTPS URLs with or without authentication
577
+ elif remote_url.startswith('http'):
578
+ # Remove authentication part if present
579
+ url_parts = remote_url.split('//')
580
+ if len(url_parts) > 1:
581
+ auth_and_domain = url_parts[1].split('/', 1)[0]
582
+ # If auth info exists (contains @), take only domain part
583
+ if '@' in auth_and_domain:
584
+ domain = auth_and_domain.split('@')[-1]
585
+ else:
586
+ domain = auth_and_domain
587
+ # Handle other URL formats
588
+ else:
589
+ # Try a general regex as fallback for unusual formats
590
+ import re
591
+ domain_match = re.search(r'@([^:/]+)|https?://(?:[^@/]+@)?([^/]+)', remote_url)
592
+ if domain_match:
593
+ domain = domain_match.group(1) or domain_match.group(2)
594
+
595
+ # Extract owner and repo info securely
596
+ owner = None
597
+ repo_name = None
598
+
599
+ if domain:
600
+ # For GitHub repositories
601
+ if "github" in domain:
602
+ # SSH format: git@github.com:username/repo.git
603
+ if ':' in remote_url and '@' in remote_url:
604
+ parts = remote_url.split(':')[-1].split('/')
605
+ if len(parts) >= 2:
606
+ owner = parts[0]
607
+ repo_name = parts[1].replace('.git', '')
608
+ # HTTPS format
609
+ else:
610
+ url_parts = remote_url.split('//')
611
+ if len(url_parts) > 1:
612
+ path_parts = url_parts[1].split('/')
613
+ if len(path_parts) >= 3:
614
+ # Skip domain and authentication part
615
+ domain_part = path_parts[0]
616
+ if '@' in domain_part:
617
+ # Path starts after domain
618
+ owner_index = 1
619
+ else:
620
+ owner_index = 1
621
+
622
+ if len(path_parts) > owner_index:
623
+ owner = path_parts[owner_index]
624
+ if len(path_parts) > owner_index + 1:
625
+ repo_name = path_parts[owner_index + 1].replace('.git', '')
626
+
627
+ # Get commit count
628
+ commit_count = subprocess.run(["git", "rev-list", "--count", "HEAD"],
629
+ capture_output=True, text=True)
630
+
631
+ # Get branch count
632
+ branch_count = subprocess.run(["git", "branch", "--list"],
633
+ capture_output=True, text=True)
634
+ branch_count = len(branch_count.stdout.strip().split('\n')) if branch_count.stdout else 0
635
+
636
+ return {
637
+ "domain": domain, # Only domain, not full URL
638
+ "owner": owner, # Repository owner (for GitHub)
639
+ "commit_count": int(commit_count.stdout.strip()) if commit_count.stdout else 0,
640
+ "branch_count": branch_count,
641
+ "has_git": True,
642
+ "repo_path": repo_path if 'repo_path' in locals() else None,
643
+ "repo_name": repo_name,
644
+ "has_license": has_license if 'has_license' in locals() else False,
645
+ "license_type": license_type if 'license_type' in locals() else "unknown"
646
+ }
647
+
648
+ except Exception as e:
649
+ # Optionally log the exception for debugging
650
+ pass
651
+ return {"has_git": False}
652
+
653
+ def detect_usage_pattern(self):
654
+ """Detect usage patterns that indicate commercial use"""
655
+ current_time = datetime.now()
656
+
657
+ # Check if using during business hours
658
+ is_weekday = current_time.weekday() < 5 # 0-4 are Monday to Friday
659
+ hour = current_time.hour
660
+ is_business_hours = 9 <= hour <= 18
661
+
662
+ return {
663
+ "business_hours_usage": is_weekday and is_business_hours,
664
+ "weekday": is_weekday,
665
+ "hour": hour,
666
+ "timestamp": current_time.isoformat()
667
+ }
668
+
669
+ def enhanced_commercial_detection(self):
670
+ """More thorough commercial usage detection"""
671
+ basic = self.detect_commercial_usage()
672
+
673
+ # Additional commercial indicators
674
+ try:
675
+ project_files = os.listdir(os.getcwd())
676
+
677
+ # Look for commercial frameworks
678
+ commercial_frameworks = ["django-oscar", "opencart", "magento",
679
+ "saleor", "odoo", "shopify", "woocommerce"]
680
+
681
+ framework_match = False
682
+ for framework in commercial_frameworks:
683
+ if any(framework in f for f in project_files):
684
+ framework_match = True
685
+ break
686
+
687
+ # Check for database connections
688
+ db_files = [f for f in project_files if "database" in f.lower()
689
+ or "db_config" in f.lower() or f.endswith(".db")]
690
+ has_database = len(db_files) > 0
691
+ except:
692
+ framework_match = False
693
+ has_database = False
694
+
695
+ # Domain name registration check
696
+ domain_check = self.analyze_git_info()
697
+ domain_is_commercial = False
698
+ if domain_check and domain_check.get("domain"):
699
+ commercial_tlds = [".com", ".io", ".co", ".org", ".net"]
700
+ domain_is_commercial = any(tld in domain_check["domain"] for tld in commercial_tlds)
701
+
702
+ # Check project structure
703
+ project_structure = self.analyze_project_structure()
704
+
705
+ # Calculate enhanced commercial score
706
+ indicators = [
707
+ basic["commercial_probability"],
708
+ framework_match,
709
+ has_database,
710
+ domain_is_commercial,
711
+ project_structure.get("type_confidence", {}).get("commercial_app", 0),
712
+ self.detect_usage_pattern()["business_hours_usage"]
713
+ ]
714
+
715
+ # Filter out None values
716
+ indicators = [i for i in indicators if i is not None]
717
+
718
+ # Calculate score - convert booleans to 1.0 and average
719
+ if indicators:
720
+ score = sum(1.0 if isinstance(i, bool) and i else (i if isinstance(i, (int, float)) else 0)
721
+ for i in indicators) / len(indicators)
722
+ else:
723
+ score = 0
724
+
725
+ return {
726
+ "commercial_probability": score,
727
+ "likely_commercial": score > 0.4,
728
+ "indicators": {
729
+ "basic_indicators": basic["commercial_indicators"],
730
+ "framework_match": framework_match,
731
+ "has_database": has_database,
732
+ "domain_is_commercial": domain_is_commercial,
733
+ "project_structure": project_structure.get("detected_type"),
734
+ "business_hours_usage": self.detect_usage_pattern()["business_hours_usage"]
735
+ }
736
+ }
737
+
738
+ def analyze_dependencies(self):
739
+ """Analyze package dependencies for commercial patterns"""
740
+ try:
741
+ import pkg_resources
742
+
743
+ # Commercial/enterprise package indicators
744
+ enterprise_packages = [
745
+ "snowflake-connector-python", "databricks", "azure",
746
+ "aws", "google-cloud", "stripe", "atlassian",
747
+ "salesforce", "bigquery", "tableau", "sap"
748
+ ]
749
+
750
+ # Find installed packages that match enterprise indicators
751
+ commercial_deps = []
752
+ for pkg in pkg_resources.working_set:
753
+ if any(ent in pkg.key for ent in enterprise_packages):
754
+ commercial_deps.append({"name": pkg.key, "version": pkg.version})
755
+
756
+ return {
757
+ "has_commercial_deps": len(commercial_deps) > 0,
758
+ "commercial_deps_count": len(commercial_deps),
759
+ "commercial_deps": commercial_deps
760
+ }
761
+ except:
762
+ return {"has_commercial_deps": False}
763
+
764
+ # Create singleton instance
765
+ inspector = Inspector()