vnai 0.1.3__py3-none-any.whl → 2.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vnai/scope/profile.py ADDED
@@ -0,0 +1,599 @@
1
+ import os
2
+ import sys
3
+ import platform
4
+ import uuid
5
+ import hashlib
6
+ import threading
7
+ import time
8
+ import importlib.metadata
9
+ from datetime import datetime
10
+ import subprocess
11
+ from pathlib import Path
12
+
13
+ class Inspector:
14
+ _instance = None
15
+ _lock = None
16
+
17
+ def __new__(cls):
18
+ import threading
19
+ if cls._lock is None:
20
+ cls._lock = threading.Lock()
21
+ with cls._lock:
22
+ if cls._instance is None:
23
+ cls._instance = super(Inspector, cls).__new__(cls)
24
+ cls._instance._initialize()
25
+ return cls._instance
26
+
27
+ def _initialize(self):
28
+ self.cache = {}
29
+ self.cache_ttl = 3600
30
+ self.last_examination = 0
31
+ self.machine_id = None
32
+ self._colab_auth_triggered = False
33
+ self.home_dir = Path.home()
34
+ self.project_dir = self._get_project_dir()
35
+ self.project_dir.mkdir(parents=True, exist_ok=True)
36
+ self.id_dir = self.project_dir /'id'
37
+ self.id_dir.mkdir(parents=True, exist_ok=True)
38
+ self.machine_id_path = self.id_dir /"machine_id.txt"
39
+ self.examine()
40
+
41
+ def _get_home_dir(self) -> Path:
42
+ return Path.home()
43
+
44
+ def _get_project_dir(self) -> Path:
45
+ try:
46
+ from vnstock.core.config.ggcolab import get_vnstock_directory
47
+ return get_vnstock_directory()
48
+ except ImportError:
49
+ if not hasattr(self,'home_dir'):
50
+ self.home_dir = Path.home()
51
+ return self.home_dir /".vnstock"
52
+
53
+ def examine(self, force_refresh=False):
54
+ current_time = time.time()
55
+ if not force_refresh and (current_time - self.last_examination) < self.cache_ttl:
56
+ return self.cache
57
+ info = {
58
+ "timestamp": datetime.now().isoformat(),
59
+ "python_version": platform.python_version(),
60
+ "os_name": platform.system(),
61
+ "platform": platform.platform()
62
+ }
63
+ info["machine_id"] = self.fingerprint()
64
+ try:
65
+ import importlib.util
66
+ ipython_spec = importlib.util.find_spec("IPython")
67
+ if ipython_spec:
68
+ from IPython import get_ipython
69
+ ipython = get_ipython()
70
+ if ipython is not None:
71
+ info["environment"] ="jupyter"
72
+ if'google.colab' in sys.modules:
73
+ info["hosting_service"] ="colab"
74
+ elif'KAGGLE_KERNEL_RUN_TYPE' in os.environ:
75
+ info["hosting_service"] ="kaggle"
76
+ else:
77
+ info["hosting_service"] ="local_jupyter"
78
+ elif sys.stdout.isatty():
79
+ info["environment"] ="terminal"
80
+ else:
81
+ info["environment"] ="script"
82
+ elif sys.stdout.isatty():
83
+ info["environment"] ="terminal"
84
+ else:
85
+ info["environment"] ="script"
86
+ except:
87
+ info["environment"] ="unknown"
88
+ try:
89
+ import psutil
90
+ info["cpu_count"] = os.cpu_count()
91
+ memory_total = psutil.virtual_memory().total
92
+ info["memory_gb"] = round(memory_total / (1024**3), 1)
93
+ except Exception:
94
+ pass
95
+ is_colab ='google.colab' in sys.modules
96
+ if is_colab:
97
+ info["is_colab"] = True
98
+ self.detect_colab_with_delayed_auth()
99
+ try:
100
+ info["commercial_usage"] = self.enhanced_commercial_detection()
101
+ info["project_context"] = self.analyze_project_structure()
102
+ info["git_info"] = self.analyze_git_info()
103
+ info["usage_pattern"] = self.detect_usage_pattern()
104
+ info["dependencies"] = self.analyze_dependencies()
105
+ except Exception as e:
106
+ info["detection_error"] = str(e)
107
+ self.cache = info
108
+ self.last_examination = current_time
109
+ return info
110
+
111
+ def fingerprint(self):
112
+ if self.machine_id:
113
+ return self.machine_id
114
+ try:
115
+ from vnai.scope.device import device_registry
116
+ registry = device_registry.get_registry()
117
+ if registry and registry.get('device_id'):
118
+ self.machine_id = registry['device_id']
119
+ return self.machine_id
120
+ except Exception:
121
+ pass
122
+ if self.machine_id_path.exists():
123
+ try:
124
+ with open(self.machine_id_path,"r") as f:
125
+ self.machine_id = f.read().strip()
126
+ return self.machine_id
127
+ except Exception:
128
+ pass
129
+ is_colab = self.detect_colab_with_delayed_auth()
130
+ try:
131
+ system_info = platform.node() + platform.platform() + platform.machine()
132
+ self.machine_id = hashlib.md5(system_info.encode()).hexdigest()
133
+ except Exception:
134
+ self.machine_id = str(uuid.uuid4())
135
+ return self.machine_id
136
+
137
+ def detect_hosting(self):
138
+ hosting_markers = {
139
+ "COLAB_GPU":"Google Colab",
140
+ "KAGGLE_KERNEL_RUN_TYPE":"Kaggle",
141
+ "BINDER_SERVICE_HOST":"Binder",
142
+ "CODESPACE_NAME":"GitHub Codespaces",
143
+ "STREAMLIT_SERVER_HEADLESS":"Streamlit Cloud",
144
+ "CLOUD_SHELL":"Cloud Shell"
145
+ }
146
+ for env_var, host_name in hosting_markers.items():
147
+ if env_var in os.environ:
148
+ return host_name
149
+ if'google.colab' in sys.modules:
150
+ return"Google Colab"
151
+ return"local"
152
+
153
+ def detect_commercial_usage(self):
154
+ commercial_indicators = {
155
+ "env_domains": [".com",".io",".co","enterprise","corp","inc"],
156
+ "file_patterns": ["invoice","payment","customer","client","product","sale"],
157
+ "env_vars": ["COMPANY","BUSINESS","ENTERPRISE","CORPORATE"],
158
+ "dir_patterns": ["company","business","enterprise","corporate","client"]
159
+ }
160
+ env_values =" ".join(os.environ.values()).lower()
161
+ domain_match = any(domain in env_values for domain in commercial_indicators["env_domains"])
162
+ env_var_match = any(var in os.environ for var in commercial_indicators["env_vars"])
163
+ current_dir = os.getcwd().lower()
164
+ dir_match = any(pattern in current_dir for pattern in commercial_indicators["dir_patterns"])
165
+ try:
166
+ files = [f.lower() for f in os.listdir() if os.path.isfile(f)]
167
+ file_match = any(any(pattern in f for pattern in commercial_indicators["file_patterns"]) for f in files)
168
+ except:
169
+ file_match = False
170
+ indicators = [domain_match, env_var_match, dir_match, file_match]
171
+ commercial_probability = sum(indicators) / len(indicators)
172
+ return {
173
+ "likely_commercial": commercial_probability > 0.3,
174
+ "commercial_probability": commercial_probability,
175
+ "commercial_indicators": {
176
+ "domain_match": domain_match,
177
+ "env_var_match": env_var_match,
178
+ "dir_match": dir_match,
179
+ "file_match": file_match
180
+ }
181
+ }
182
+
183
+ def scan_packages(self):
184
+ package_groups = {
185
+ "vnstock_family": [
186
+ "vnstock",
187
+ "vnstock3",
188
+ "vnstock_ezchart",
189
+ "vnstock_data_pro",
190
+ "vnstock_market_data_pipeline",
191
+ "vnstock_ta",
192
+ "vnii",
193
+ "vnai"
194
+ ],
195
+ "analytics": [
196
+ "openbb",
197
+ "pandas_ta"
198
+ ],
199
+ "static_charts": [
200
+ "matplotlib",
201
+ "seaborn",
202
+ "altair"
203
+ ],
204
+ "dashboard": [
205
+ "streamlit",
206
+ "voila",
207
+ "panel",
208
+ "shiny",
209
+ "dash"
210
+ ],
211
+ "interactive_charts": [
212
+ "mplfinance",
213
+ "plotly",
214
+ "plotline",
215
+ "bokeh",
216
+ "pyecharts",
217
+ "highcharts-core",
218
+ "highcharts-stock",
219
+ "mplchart"
220
+ ],
221
+ "datafeed": [
222
+ "yfinance",
223
+ "alpha_vantage",
224
+ "pandas-datareader",
225
+ "investpy"
226
+ ],
227
+ "official_api": [
228
+ "ssi-fc-data",
229
+ "ssi-fctrading"
230
+ ],
231
+ "risk_return": [
232
+ "pyfolio",
233
+ "empyrical",
234
+ "quantstats",
235
+ "financetoolkit"
236
+ ],
237
+ "machine_learning": [
238
+ "scipy",
239
+ "sklearn",
240
+ "statsmodels",
241
+ "pytorch",
242
+ "tensorflow",
243
+ "keras",
244
+ "xgboost"
245
+ ],
246
+ "indicators": [
247
+ "stochastic",
248
+ "talib",
249
+ "tqdm",
250
+ "finta",
251
+ "financetoolkit",
252
+ "tulipindicators"
253
+ ],
254
+ "backtesting": [
255
+ "vectorbt",
256
+ "backtesting",
257
+ "bt",
258
+ "zipline",
259
+ "pyalgotrade",
260
+ "backtrader",
261
+ "pybacktest",
262
+ "fastquant",
263
+ "lean",
264
+ "ta",
265
+ "finmarketpy",
266
+ "qstrader"
267
+ ],
268
+ "server": [
269
+ "fastapi",
270
+ "flask",
271
+ "uvicorn",
272
+ "gunicorn"
273
+ ],
274
+ "framework": [
275
+ "lightgbm",
276
+ "catboost",
277
+ "django"
278
+ ]
279
+ }
280
+ installed = {}
281
+ for category, packages in package_groups.items():
282
+ installed[category] = []
283
+ for pkg in packages:
284
+ try:
285
+ version = importlib.metadata.version(pkg)
286
+ installed[category].append({"name": pkg,"version": version})
287
+ except:
288
+ pass
289
+ return installed
290
+
291
+ def setup_vnstock_environment(self):
292
+ env_file = self.id_dir /"environment.json"
293
+ env_data = {
294
+ "accepted_agreement": True,
295
+ "timestamp": datetime.now().isoformat(),
296
+ "machine_id": self.fingerprint()
297
+ }
298
+ try:
299
+ with open(env_file,"w") as f:
300
+ import json
301
+ json.dump(env_data, f)
302
+ return True
303
+ except Exception as e:
304
+ print(f"Failed to set up vnstock environment: {e}")
305
+ return False
306
+
307
+ def detect_colab_with_delayed_auth(self, immediate=False):
308
+ is_colab ='google.colab' in sys.modules
309
+ if is_colab and not self._colab_auth_triggered:
310
+ if immediate:
311
+ self._colab_auth_triggered = True
312
+ user_id = self.get_or_create_user_id()
313
+ if user_id and user_id != self.machine_id:
314
+ self.machine_id = user_id
315
+ try:
316
+ with open(self.machine_id_path,"w") as f:
317
+ f.write(user_id)
318
+ except:
319
+ pass
320
+ else:
321
+
322
+ def delayed_auth():
323
+ time.sleep(300)
324
+ user_id = self.get_or_create_user_id()
325
+ if user_id and user_id != self.machine_id:
326
+ self.machine_id = user_id
327
+ try:
328
+ with open(self.machine_id_path,"w") as f:
329
+ f.write(user_id)
330
+ except:
331
+ pass
332
+ thread = threading.Thread(target=delayed_auth, daemon=True)
333
+ thread.start()
334
+ return is_colab
335
+
336
+ def get_or_create_user_id(self):
337
+ if self._colab_auth_triggered:
338
+ return self.machine_id
339
+ try:
340
+ from google.colab import drive
341
+ print("\n📋 Kết nối tài khoản Google Drive để lưu các thiết lập của dự án.")
342
+ print("Dữ liệu phiên làm việc với Colab của bạn sẽ bị xóa nếu không lưu trữ vào Google Drive.\n")
343
+ self._colab_auth_triggered = True
344
+ drive.mount('/content/drive')
345
+ id_path ='/content/drive/MyDrive/.vnstock/user_id.txt'
346
+ if os.path.exists(id_path):
347
+ with open(id_path,'r') as f:
348
+ return f.read().strip()
349
+ else:
350
+ user_id = str(uuid.uuid4())
351
+ os.makedirs(os.path.dirname(id_path), exist_ok=True)
352
+ with open(id_path,'w') as f:
353
+ f.write(user_id)
354
+ return user_id
355
+ except Exception as e:
356
+ return self.machine_id
357
+
358
+ def analyze_project_structure(self):
359
+ current_dir = os.getcwd()
360
+ project_indicators = {
361
+ "commercial_app": ["app","services","products","customers","billing"],
362
+ "financial_tool": ["portfolio","backtesting","trading","strategy"],
363
+ "data_science": ["models","notebooks","datasets","visualization"],
364
+ "educational": ["examples","lectures","assignments","slides"]
365
+ }
366
+ project_type = {}
367
+ for category, markers in project_indicators.items():
368
+ match_count = 0
369
+ for marker in markers:
370
+ if os.path.exists(os.path.join(current_dir, marker)):
371
+ match_count += 1
372
+ if len(markers) > 0:
373
+ project_type[category] = match_count / len(markers)
374
+ try:
375
+ root_files = [f for f in os.listdir(current_dir) if os.path.isfile(os.path.join(current_dir, f))]
376
+ root_dirs = [d for d in os.listdir(current_dir) if os.path.isdir(os.path.join(current_dir, d))]
377
+ file_markers = {
378
+ "python_project": ["setup.py","pyproject.toml","requirements.txt"],
379
+ "data_science": ["notebook.ipynb",".ipynb_checkpoints"],
380
+ "web_app": ["app.py","wsgi.py","manage.py","server.py"],
381
+ "finance_app": ["portfolio.py","trading.py","backtest.py"],
382
+ }
383
+ file_project_type ="unknown"
384
+ for ptype, markers in file_markers.items():
385
+ if any(marker in root_files for marker in markers):
386
+ file_project_type = ptype
387
+ break
388
+ frameworks = []
389
+ framework_markers = {
390
+ "django": ["manage.py","settings.py"],
391
+ "flask": ["app.py","wsgi.py"],
392
+ "streamlit": ["streamlit_app.py","app.py"],
393
+ "fastapi": ["main.py","app.py"],
394
+ }
395
+ for framework, markers in framework_markers.items():
396
+ if any(marker in root_files for marker in markers):
397
+ frameworks.append(framework)
398
+ except Exception as e:
399
+ root_files = []
400
+ root_dirs = []
401
+ file_project_type ="unknown"
402
+ frameworks = []
403
+ return {
404
+ "project_dir": current_dir,
405
+ "detected_type": max(project_type.items(), key=lambda x: x[1])[0] if project_type else"unknown",
406
+ "file_type": file_project_type,
407
+ "is_git_repo":".git" in (root_dirs if'root_dirs' in locals() else []),
408
+ "frameworks": frameworks,
409
+ "file_count": len(root_files) if'root_files' in locals() else 0,
410
+ "directory_count": len(root_dirs) if'root_dirs' in locals() else 0,
411
+ "type_confidence": project_type
412
+ }
413
+
414
+ def analyze_git_info(self):
415
+ try:
416
+ result = subprocess.run(["git","rev-parse","--is-inside-work-tree"],
417
+ capture_output=True, text=True)
418
+ if result.returncode != 0:
419
+ return {"has_git": False}
420
+ repo_root = subprocess.run(["git","rev-parse","--show-toplevel"],
421
+ capture_output=True, text=True)
422
+ repo_path = repo_root.stdout.strip() if repo_root.stdout else None
423
+ repo_name = os.path.basename(repo_path) if repo_path else None
424
+ has_license = False
425
+ license_type ="unknown"
426
+ if repo_path:
427
+ license_files = [
428
+ os.path.join(repo_path,"LICENSE"),
429
+ os.path.join(repo_path,"LICENSE.txt"),
430
+ os.path.join(repo_path,"LICENSE.md")
431
+ ]
432
+ for license_file in license_files:
433
+ if os.path.exists(license_file):
434
+ has_license = True
435
+ try:
436
+ with open(license_file,'r') as f:
437
+ content = f.read().lower()
438
+ if"mit license" in content:
439
+ license_type ="MIT"
440
+ elif"apache license" in content:
441
+ license_type ="Apache"
442
+ elif"gnu general public" in content:
443
+ license_type ="GPL"
444
+ elif"bsd " in content:
445
+ license_type ="BSD"
446
+ except:
447
+ pass
448
+ break
449
+ remote = subprocess.run(["git","config","--get","remote.origin.url"],
450
+ capture_output=True, text=True)
451
+ remote_url = remote.stdout.strip() if remote.stdout else None
452
+ if remote_url:
453
+ remote_url = remote_url.strip()
454
+ domain = None
455
+ if remote_url:
456
+ if remote_url.startswith('git@') or'@' in remote_url and':' in remote_url.split('@')[1]:
457
+ domain = remote_url.split('@')[1].split(':')[0]
458
+ elif remote_url.startswith('http'):
459
+ url_parts = remote_url.split('//')
460
+ if len(url_parts) > 1:
461
+ auth_and_domain = url_parts[1].split('/', 1)[0]
462
+ if'@' in auth_and_domain:
463
+ domain = auth_and_domain.split('@')[-1]
464
+ else:
465
+ domain = auth_and_domain
466
+ else:
467
+ import re
468
+ domain_match = re.search(r'@([^:/]+)|https?://(?:[^@/]+@)?([^/]+)', remote_url)
469
+ if domain_match:
470
+ domain = domain_match.group(1) or domain_match.group(2)
471
+ owner = None
472
+ repo_name = None
473
+ if domain:
474
+ if"github" in domain:
475
+ if':' in remote_url and'@' in remote_url:
476
+ parts = remote_url.split(':')[-1].split('/')
477
+ if len(parts) >= 2:
478
+ owner = parts[0]
479
+ repo_name = parts[1].replace('.git','')
480
+ else:
481
+ url_parts = remote_url.split('//')
482
+ if len(url_parts) > 1:
483
+ path_parts = url_parts[1].split('/')
484
+ if len(path_parts) >= 3:
485
+ domain_part = path_parts[0]
486
+ if'@' in domain_part:
487
+ owner_index = 1
488
+ else:
489
+ owner_index = 1
490
+ if len(path_parts) > owner_index:
491
+ owner = path_parts[owner_index]
492
+ if len(path_parts) > owner_index + 1:
493
+ repo_name = path_parts[owner_index + 1].replace('.git','')
494
+ commit_count = subprocess.run(["git","rev-list","--count","HEAD"],
495
+ capture_output=True, text=True)
496
+ branch_count = subprocess.run(["git","branch","--list"],
497
+ capture_output=True, text=True)
498
+ branch_count = len(branch_count.stdout.strip().split('\n')) if branch_count.stdout else 0
499
+ return {
500
+ "domain": domain,
501
+ "owner": owner,
502
+ "commit_count": int(commit_count.stdout.strip()) if commit_count.stdout else 0,
503
+ "branch_count": branch_count,
504
+ "has_git": True,
505
+ "repo_path": repo_path if'repo_path' in locals() else None,
506
+ "repo_name": repo_name,
507
+ "has_license": has_license if'has_license' in locals() else False,
508
+ "license_type": license_type if'license_type' in locals() else"unknown"
509
+ }
510
+ except Exception as e:
511
+ pass
512
+ return {"has_git": False}
513
+
514
+ def detect_usage_pattern(self):
515
+ current_time = datetime.now()
516
+ is_weekday = current_time.weekday() < 5
517
+ hour = current_time.hour
518
+ is_business_hours = 9 <= hour <= 18
519
+ return {
520
+ "business_hours_usage": is_weekday and is_business_hours,
521
+ "weekday": is_weekday,
522
+ "hour": hour,
523
+ "timestamp": current_time.isoformat()
524
+ }
525
+
526
+ def enhanced_commercial_detection(self):
527
+ basic = self.detect_commercial_usage()
528
+ try:
529
+ project_files = os.listdir(os.getcwd())
530
+ commercial_frameworks = ["django-oscar","opencart","magento",
531
+ "saleor","odoo","shopify","woocommerce"]
532
+ framework_match = False
533
+ for framework in commercial_frameworks:
534
+ if any(framework in f for f in project_files):
535
+ framework_match = True
536
+ break
537
+ db_files = [f for f in project_files if"database" in f.lower()
538
+ or"db_config" in f.lower() or f.endswith(".db")]
539
+ has_database = len(db_files) > 0
540
+ except:
541
+ framework_match = False
542
+ has_database = False
543
+ domain_check = self.analyze_git_info()
544
+ domain_is_commercial = False
545
+ if domain_check and domain_check.get("domain"):
546
+ commercial_tlds = [".com",".io",".co",".org",".net"]
547
+ domain_is_commercial = any(tld in domain_check["domain"] for tld in commercial_tlds)
548
+ project_structure = self.analyze_project_structure()
549
+ indicators = [
550
+ basic["commercial_probability"],
551
+ framework_match,
552
+ has_database,
553
+ domain_is_commercial,
554
+ project_structure.get("type_confidence", {}).get("commercial_app", 0),
555
+ self.detect_usage_pattern()["business_hours_usage"]
556
+ ]
557
+ indicators = [i for i in indicators if i is not None]
558
+ if indicators:
559
+ score = sum(1.0 if isinstance(i, bool) and i else (i if isinstance(i, (int, float)) else 0)
560
+ for i in indicators) / len(indicators)
561
+ else:
562
+ score = 0
563
+ return {
564
+ "commercial_probability": score,
565
+ "likely_commercial": score > 0.4,
566
+ "indicators": {
567
+ "basic_indicators": basic["commercial_indicators"],
568
+ "framework_match": framework_match,
569
+ "has_database": has_database,
570
+ "domain_is_commercial": domain_is_commercial,
571
+ "project_structure": project_structure.get("detected_type"),
572
+ "business_hours_usage": self.detect_usage_pattern()["business_hours_usage"]
573
+ }
574
+ }
575
+
576
+ def analyze_dependencies(self):
577
+ try:
578
+ try:
579
+ from importlib.metadata import distributions
580
+ except ImportError:
581
+ from importlib_metadata import distributions
582
+ enterprise_packages = [
583
+ "snowflake-connector-python","databricks","azure",
584
+ "aws","google-cloud","stripe","atlassian",
585
+ "salesforce","bigquery","tableau","sap"
586
+ ]
587
+ commercial_deps = []
588
+ for dist in distributions():
589
+ pkg_name = dist.metadata['Name'].lower()
590
+ if any(ent in pkg_name for ent in enterprise_packages):
591
+ commercial_deps.append({"name": pkg_name,"version": dist.version})
592
+ return {
593
+ "has_commercial_deps": len(commercial_deps) > 0,
594
+ "commercial_deps_count": len(commercial_deps),
595
+ "commercial_deps": commercial_deps
596
+ }
597
+ except:
598
+ return {"has_commercial_deps": False}
599
+ inspector = Inspector()