vnai 2.1.7__py3-none-any.whl → 2.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vnai/scope/profile.py CHANGED
@@ -1,579 +1,579 @@
1
- import os
2
- import sys
3
- import platform
4
- import uuid
5
- import hashlib
6
- import psutil
7
- import threading
8
- import time
9
- import importlib.metadata
10
- from datetime import datetime
11
- import subprocess
12
- from pathlib import Path
13
-
14
- class Inspector:
15
- _instance = None
16
- _lock = None
17
-
18
- def __new__(cls):
19
- import threading
20
- if cls._lock is None:
21
- cls._lock = threading.Lock()
22
- with cls._lock:
23
- if cls._instance is None:
24
- cls._instance = super(Inspector, cls).__new__(cls)
25
- cls._instance._initialize()
26
- return cls._instance
27
-
28
- def _initialize(self):
29
- self.cache = {}
30
- self.cache_ttl = 3600
31
- self.last_examination = 0
32
- self.machine_id = None
33
- self._colab_auth_triggered = False
34
- self.home_dir = Path.home()
35
- self.project_dir = self.home_dir /".vnstock"
36
- self.project_dir.mkdir(exist_ok=True)
37
- self.id_dir = self.project_dir /'id'
38
- self.id_dir.mkdir(exist_ok=True)
39
- self.machine_id_path = self.id_dir /"machine_id.txt"
40
- self.examine()
41
-
42
- def examine(self, force_refresh=False):
43
- current_time = time.time()
44
- if not force_refresh and (current_time - self.last_examination) < self.cache_ttl:
45
- return self.cache
46
- info = {
47
- "timestamp": datetime.now().isoformat(),
48
- "python_version": platform.python_version(),
49
- "os_name": platform.system(),
50
- "platform": platform.platform()
51
- }
52
- info["machine_id"] = self.fingerprint()
53
- try:
54
- import importlib.util
55
- ipython_spec = importlib.util.find_spec("IPython")
56
- if ipython_spec:
57
- from IPython import get_ipython
58
- ipython = get_ipython()
59
- if ipython is not None:
60
- info["environment"] ="jupyter"
61
- if'google.colab' in sys.modules:
62
- info["hosting_service"] ="colab"
63
- elif'KAGGLE_KERNEL_RUN_TYPE' in os.environ:
64
- info["hosting_service"] ="kaggle"
65
- else:
66
- info["hosting_service"] ="local_jupyter"
67
- elif sys.stdout.isatty():
68
- info["environment"] ="terminal"
69
- else:
70
- info["environment"] ="script"
71
- elif sys.stdout.isatty():
72
- info["environment"] ="terminal"
73
- else:
74
- info["environment"] ="script"
75
- except:
76
- info["environment"] ="unknown"
77
- try:
78
- info["cpu_count"] = os.cpu_count()
79
- info["memory_gb"] = round(psutil.virtual_memory().total / (1024**3), 1)
80
- except:
81
- pass
82
- is_colab ='google.colab' in sys.modules
83
- if is_colab:
84
- info["is_colab"] = True
85
- self.detect_colab_with_delayed_auth()
86
- try:
87
- info["commercial_usage"] = self.enhanced_commercial_detection()
88
- info["project_context"] = self.analyze_project_structure()
89
- info["git_info"] = self.analyze_git_info()
90
- info["usage_pattern"] = self.detect_usage_pattern()
91
- info["dependencies"] = self.analyze_dependencies()
92
- except Exception as e:
93
- info["detection_error"] = str(e)
94
- self.cache = info
95
- self.last_examination = current_time
96
- return info
97
-
98
- def fingerprint(self):
99
- if self.machine_id:
100
- return self.machine_id
101
- if self.machine_id_path.exists():
102
- try:
103
- with open(self.machine_id_path,"r") as f:
104
- self.machine_id = f.read().strip()
105
- return self.machine_id
106
- except:
107
- pass
108
- is_colab = self.detect_colab_with_delayed_auth()
109
- try:
110
- system_info = platform.node() + platform.platform() + platform.machine()
111
- self.machine_id = hashlib.md5(system_info.encode()).hexdigest()
112
- except:
113
- self.machine_id = str(uuid.uuid4())
114
- try:
115
- with open(self.machine_id_path,"w") as f:
116
- f.write(self.machine_id)
117
- except:
118
- pass
119
- return self.machine_id
120
-
121
- def detect_hosting(self):
122
- hosting_markers = {
123
- "COLAB_GPU":"Google Colab",
124
- "KAGGLE_KERNEL_RUN_TYPE":"Kaggle",
125
- "BINDER_SERVICE_HOST":"Binder",
126
- "CODESPACE_NAME":"GitHub Codespaces",
127
- "STREAMLIT_SERVER_HEADLESS":"Streamlit Cloud",
128
- "CLOUD_SHELL":"Cloud Shell"
129
- }
130
- for env_var, host_name in hosting_markers.items():
131
- if env_var in os.environ:
132
- return host_name
133
- if'google.colab' in sys.modules:
134
- return"Google Colab"
135
- return"local"
136
-
137
- def detect_commercial_usage(self):
138
- commercial_indicators = {
139
- "env_domains": [".com",".io",".co","enterprise","corp","inc"],
140
- "file_patterns": ["invoice","payment","customer","client","product","sale"],
141
- "env_vars": ["COMPANY","BUSINESS","ENTERPRISE","CORPORATE"],
142
- "dir_patterns": ["company","business","enterprise","corporate","client"]
143
- }
144
- env_values =" ".join(os.environ.values()).lower()
145
- domain_match = any(domain in env_values for domain in commercial_indicators["env_domains"])
146
- env_var_match = any(var in os.environ for var in commercial_indicators["env_vars"])
147
- current_dir = os.getcwd().lower()
148
- dir_match = any(pattern in current_dir for pattern in commercial_indicators["dir_patterns"])
149
- try:
150
- files = [f.lower() for f in os.listdir() if os.path.isfile(f)]
151
- file_match = any(any(pattern in f for pattern in commercial_indicators["file_patterns"]) for f in files)
152
- except:
153
- file_match = False
154
- indicators = [domain_match, env_var_match, dir_match, file_match]
155
- commercial_probability = sum(indicators) / len(indicators)
156
- return {
157
- "likely_commercial": commercial_probability > 0.3,
158
- "commercial_probability": commercial_probability,
159
- "commercial_indicators": {
160
- "domain_match": domain_match,
161
- "env_var_match": env_var_match,
162
- "dir_match": dir_match,
163
- "file_match": file_match
164
- }
165
- }
166
-
167
- def scan_packages(self):
168
- package_groups = {
169
- "vnstock_family": [
170
- "vnstock",
171
- "vnstock3",
172
- "vnstock_ezchart",
173
- "vnstock_data_pro",
174
- "vnstock_market_data_pipeline",
175
- "vnstock_ta",
176
- "vnii",
177
- "vnai"
178
- ],
179
- "analytics": [
180
- "openbb",
181
- "pandas_ta"
182
- ],
183
- "static_charts": [
184
- "matplotlib",
185
- "seaborn",
186
- "altair"
187
- ],
188
- "dashboard": [
189
- "streamlit",
190
- "voila",
191
- "panel",
192
- "shiny",
193
- "dash"
194
- ],
195
- "interactive_charts": [
196
- "mplfinance",
197
- "plotly",
198
- "plotline",
199
- "bokeh",
200
- "pyecharts",
201
- "highcharts-core",
202
- "highcharts-stock",
203
- "mplchart"
204
- ],
205
- "datafeed": [
206
- "yfinance",
207
- "alpha_vantage",
208
- "pandas-datareader",
209
- "investpy"
210
- ],
211
- "official_api": [
212
- "ssi-fc-data",
213
- "ssi-fctrading"
214
- ],
215
- "risk_return": [
216
- "pyfolio",
217
- "empyrical",
218
- "quantstats",
219
- "financetoolkit"
220
- ],
221
- "machine_learning": [
222
- "scipy",
223
- "sklearn",
224
- "statsmodels",
225
- "pytorch",
226
- "tensorflow",
227
- "keras",
228
- "xgboost"
229
- ],
230
- "indicators": [
231
- "stochastic",
232
- "talib",
233
- "tqdm",
234
- "finta",
235
- "financetoolkit",
236
- "tulipindicators"
237
- ],
238
- "backtesting": [
239
- "vectorbt",
240
- "backtesting",
241
- "bt",
242
- "zipline",
243
- "pyalgotrade",
244
- "backtrader",
245
- "pybacktest",
246
- "fastquant",
247
- "lean",
248
- "ta",
249
- "finmarketpy",
250
- "qstrader"
251
- ],
252
- "server": [
253
- "fastapi",
254
- "flask",
255
- "uvicorn",
256
- "gunicorn"
257
- ],
258
- "framework": [
259
- "lightgbm",
260
- "catboost",
261
- "django"
262
- ]
263
- }
264
- installed = {}
265
- for category, packages in package_groups.items():
266
- installed[category] = []
267
- for pkg in packages:
268
- try:
269
- version = importlib.metadata.version(pkg)
270
- installed[category].append({"name": pkg,"version": version})
271
- except:
272
- pass
273
- return installed
274
-
275
- def setup_vnstock_environment(self):
276
- env_file = self.id_dir /"environment.json"
277
- env_data = {
278
- "accepted_agreement": True,
279
- "timestamp": datetime.now().isoformat(),
280
- "machine_id": self.fingerprint()
281
- }
282
- try:
283
- with open(env_file,"w") as f:
284
- import json
285
- json.dump(env_data, f)
286
- return True
287
- except Exception as e:
288
- print(f"Failed to set up vnstock environment: {e}")
289
- return False
290
-
291
- def detect_colab_with_delayed_auth(self, immediate=False):
292
- is_colab ='google.colab' in sys.modules
293
- if is_colab and not self._colab_auth_triggered:
294
- if immediate:
295
- self._colab_auth_triggered = True
296
- user_id = self.get_or_create_user_id()
297
- if user_id and user_id != self.machine_id:
298
- self.machine_id = user_id
299
- try:
300
- with open(self.machine_id_path,"w") as f:
301
- f.write(user_id)
302
- except:
303
- pass
304
- else:
305
-
306
- def delayed_auth():
307
- time.sleep(300)
308
- user_id = self.get_or_create_user_id()
309
- if user_id and user_id != self.machine_id:
310
- self.machine_id = user_id
311
- try:
312
- with open(self.machine_id_path,"w") as f:
313
- f.write(user_id)
314
- except:
315
- pass
316
- thread = threading.Thread(target=delayed_auth, daemon=True)
317
- thread.start()
318
- return is_colab
319
-
320
- def get_or_create_user_id(self):
321
- if self._colab_auth_triggered:
322
- return self.machine_id
323
- try:
324
- from google.colab import drive
325
- print("\n📋 Kết nối tài khoản Google Drive để lưu các thiết lập của dự án.")
326
- print("Dữ liệu phiên làm việc với Colab của bạn sẽ bị xóa nếu không lưu trữ vào Google Drive.\n")
327
- self._colab_auth_triggered = True
328
- drive.mount('/content/drive')
329
- id_path ='/content/drive/MyDrive/.vnstock/user_id.txt'
330
- if os.path.exists(id_path):
331
- with open(id_path,'r') as f:
332
- return f.read().strip()
333
- else:
334
- user_id = str(uuid.uuid4())
335
- os.makedirs(os.path.dirname(id_path), exist_ok=True)
336
- with open(id_path,'w') as f:
337
- f.write(user_id)
338
- return user_id
339
- except Exception as e:
340
- return self.machine_id
341
-
342
- def analyze_project_structure(self):
343
- current_dir = os.getcwd()
344
- project_indicators = {
345
- "commercial_app": ["app","services","products","customers","billing"],
346
- "financial_tool": ["portfolio","backtesting","trading","strategy"],
347
- "data_science": ["models","notebooks","datasets","visualization"],
348
- "educational": ["examples","lectures","assignments","slides"]
349
- }
350
- project_type = {}
351
- for category, markers in project_indicators.items():
352
- match_count = 0
353
- for marker in markers:
354
- if os.path.exists(os.path.join(current_dir, marker)):
355
- match_count += 1
356
- if len(markers) > 0:
357
- project_type[category] = match_count / len(markers)
358
- try:
359
- root_files = [f for f in os.listdir(current_dir) if os.path.isfile(os.path.join(current_dir, f))]
360
- root_dirs = [d for d in os.listdir(current_dir) if os.path.isdir(os.path.join(current_dir, d))]
361
- file_markers = {
362
- "python_project": ["setup.py","pyproject.toml","requirements.txt"],
363
- "data_science": ["notebook.ipynb",".ipynb_checkpoints"],
364
- "web_app": ["app.py","wsgi.py","manage.py","server.py"],
365
- "finance_app": ["portfolio.py","trading.py","backtest.py"],
366
- }
367
- file_project_type ="unknown"
368
- for ptype, markers in file_markers.items():
369
- if any(marker in root_files for marker in markers):
370
- file_project_type = ptype
371
- break
372
- frameworks = []
373
- framework_markers = {
374
- "django": ["manage.py","settings.py"],
375
- "flask": ["app.py","wsgi.py"],
376
- "streamlit": ["streamlit_app.py","app.py"],
377
- "fastapi": ["main.py","app.py"],
378
- }
379
- for framework, markers in framework_markers.items():
380
- if any(marker in root_files for marker in markers):
381
- frameworks.append(framework)
382
- except Exception as e:
383
- root_files = []
384
- root_dirs = []
385
- file_project_type ="unknown"
386
- frameworks = []
387
- return {
388
- "project_dir": current_dir,
389
- "detected_type": max(project_type.items(), key=lambda x: x[1])[0] if project_type else"unknown",
390
- "file_type": file_project_type,
391
- "is_git_repo":".git" in (root_dirs if'root_dirs' in locals() else []),
392
- "frameworks": frameworks,
393
- "file_count": len(root_files) if'root_files' in locals() else 0,
394
- "directory_count": len(root_dirs) if'root_dirs' in locals() else 0,
395
- "type_confidence": project_type
396
- }
397
-
398
- def analyze_git_info(self):
399
- try:
400
- result = subprocess.run(["git","rev-parse","--is-inside-work-tree"],
401
- capture_output=True, text=True)
402
- if result.returncode != 0:
403
- return {"has_git": False}
404
- repo_root = subprocess.run(["git","rev-parse","--show-toplevel"],
405
- capture_output=True, text=True)
406
- repo_path = repo_root.stdout.strip() if repo_root.stdout else None
407
- repo_name = os.path.basename(repo_path) if repo_path else None
408
- has_license = False
409
- license_type ="unknown"
410
- if repo_path:
411
- license_files = [
412
- os.path.join(repo_path,"LICENSE"),
413
- os.path.join(repo_path,"LICENSE.txt"),
414
- os.path.join(repo_path,"LICENSE.md")
415
- ]
416
- for license_file in license_files:
417
- if os.path.exists(license_file):
418
- has_license = True
419
- try:
420
- with open(license_file,'r') as f:
421
- content = f.read().lower()
422
- if"mit license" in content:
423
- license_type ="MIT"
424
- elif"apache license" in content:
425
- license_type ="Apache"
426
- elif"gnu general public" in content:
427
- license_type ="GPL"
428
- elif"bsd " in content:
429
- license_type ="BSD"
430
- except:
431
- pass
432
- break
433
- remote = subprocess.run(["git","config","--get","remote.origin.url"],
434
- capture_output=True, text=True)
435
- remote_url = remote.stdout.strip() if remote.stdout else None
436
- if remote_url:
437
- remote_url = remote_url.strip()
438
- domain = None
439
- if remote_url:
440
- if remote_url.startswith('git@') or'@' in remote_url and':' in remote_url.split('@')[1]:
441
- domain = remote_url.split('@')[1].split(':')[0]
442
- elif remote_url.startswith('http'):
443
- url_parts = remote_url.split('//')
444
- if len(url_parts) > 1:
445
- auth_and_domain = url_parts[1].split('/', 1)[0]
446
- if'@' in auth_and_domain:
447
- domain = auth_and_domain.split('@')[-1]
448
- else:
449
- domain = auth_and_domain
450
- else:
451
- import re
452
- domain_match = re.search(r'@([^:/]+)|https?://(?:[^@/]+@)?([^/]+)', remote_url)
453
- if domain_match:
454
- domain = domain_match.group(1) or domain_match.group(2)
455
- owner = None
456
- repo_name = None
457
- if domain:
458
- if"github" in domain:
459
- if':' in remote_url and'@' in remote_url:
460
- parts = remote_url.split(':')[-1].split('/')
461
- if len(parts) >= 2:
462
- owner = parts[0]
463
- repo_name = parts[1].replace('.git','')
464
- else:
465
- url_parts = remote_url.split('//')
466
- if len(url_parts) > 1:
467
- path_parts = url_parts[1].split('/')
468
- if len(path_parts) >= 3:
469
- domain_part = path_parts[0]
470
- if'@' in domain_part:
471
- owner_index = 1
472
- else:
473
- owner_index = 1
474
- if len(path_parts) > owner_index:
475
- owner = path_parts[owner_index]
476
- if len(path_parts) > owner_index + 1:
477
- repo_name = path_parts[owner_index + 1].replace('.git','')
478
- commit_count = subprocess.run(["git","rev-list","--count","HEAD"],
479
- capture_output=True, text=True)
480
- branch_count = subprocess.run(["git","branch","--list"],
481
- capture_output=True, text=True)
482
- branch_count = len(branch_count.stdout.strip().split('\n')) if branch_count.stdout else 0
483
- return {
484
- "domain": domain,
485
- "owner": owner,
486
- "commit_count": int(commit_count.stdout.strip()) if commit_count.stdout else 0,
487
- "branch_count": branch_count,
488
- "has_git": True,
489
- "repo_path": repo_path if'repo_path' in locals() else None,
490
- "repo_name": repo_name,
491
- "has_license": has_license if'has_license' in locals() else False,
492
- "license_type": license_type if'license_type' in locals() else"unknown"
493
- }
494
- except Exception as e:
495
- pass
496
- return {"has_git": False}
497
-
498
- def detect_usage_pattern(self):
499
- current_time = datetime.now()
500
- is_weekday = current_time.weekday() < 5
501
- hour = current_time.hour
502
- is_business_hours = 9 <= hour <= 18
503
- return {
504
- "business_hours_usage": is_weekday and is_business_hours,
505
- "weekday": is_weekday,
506
- "hour": hour,
507
- "timestamp": current_time.isoformat()
508
- }
509
-
510
- def enhanced_commercial_detection(self):
511
- basic = self.detect_commercial_usage()
512
- try:
513
- project_files = os.listdir(os.getcwd())
514
- commercial_frameworks = ["django-oscar","opencart","magento",
515
- "saleor","odoo","shopify","woocommerce"]
516
- framework_match = False
517
- for framework in commercial_frameworks:
518
- if any(framework in f for f in project_files):
519
- framework_match = True
520
- break
521
- db_files = [f for f in project_files if"database" in f.lower()
522
- or"db_config" in f.lower() or f.endswith(".db")]
523
- has_database = len(db_files) > 0
524
- except:
525
- framework_match = False
526
- has_database = False
527
- domain_check = self.analyze_git_info()
528
- domain_is_commercial = False
529
- if domain_check and domain_check.get("domain"):
530
- commercial_tlds = [".com",".io",".co",".org",".net"]
531
- domain_is_commercial = any(tld in domain_check["domain"] for tld in commercial_tlds)
532
- project_structure = self.analyze_project_structure()
533
- indicators = [
534
- basic["commercial_probability"],
535
- framework_match,
536
- has_database,
537
- domain_is_commercial,
538
- project_structure.get("type_confidence", {}).get("commercial_app", 0),
539
- self.detect_usage_pattern()["business_hours_usage"]
540
- ]
541
- indicators = [i for i in indicators if i is not None]
542
- if indicators:
543
- score = sum(1.0 if isinstance(i, bool) and i else (i if isinstance(i, (int, float)) else 0)
544
- for i in indicators) / len(indicators)
545
- else:
546
- score = 0
547
- return {
548
- "commercial_probability": score,
549
- "likely_commercial": score > 0.4,
550
- "indicators": {
551
- "basic_indicators": basic["commercial_indicators"],
552
- "framework_match": framework_match,
553
- "has_database": has_database,
554
- "domain_is_commercial": domain_is_commercial,
555
- "project_structure": project_structure.get("detected_type"),
556
- "business_hours_usage": self.detect_usage_pattern()["business_hours_usage"]
557
- }
558
- }
559
-
560
- def analyze_dependencies(self):
561
- try:
562
- import pkg_resources
563
- enterprise_packages = [
564
- "snowflake-connector-python","databricks","azure",
565
- "aws","google-cloud","stripe","atlassian",
566
- "salesforce","bigquery","tableau","sap"
567
- ]
568
- commercial_deps = []
569
- for pkg in pkg_resources.working_set:
570
- if any(ent in pkg.key for ent in enterprise_packages):
571
- commercial_deps.append({"name": pkg.key,"version": pkg.version})
572
- return {
573
- "has_commercial_deps": len(commercial_deps) > 0,
574
- "commercial_deps_count": len(commercial_deps),
575
- "commercial_deps": commercial_deps
576
- }
577
- except:
578
- return {"has_commercial_deps": False}
1
+ import os
2
+ import sys
3
+ import platform
4
+ import uuid
5
+ import hashlib
6
+ import psutil
7
+ import threading
8
+ import time
9
+ import importlib.metadata
10
+ from datetime import datetime
11
+ import subprocess
12
+ from pathlib import Path
13
+
14
+ class Inspector:
15
+ _instance = None
16
+ _lock = None
17
+
18
+ def __new__(cls):
19
+ import threading
20
+ if cls._lock is None:
21
+ cls._lock = threading.Lock()
22
+ with cls._lock:
23
+ if cls._instance is None:
24
+ cls._instance = super(Inspector, cls).__new__(cls)
25
+ cls._instance._initialize()
26
+ return cls._instance
27
+
28
+ def _initialize(self):
29
+ self.cache = {}
30
+ self.cache_ttl = 3600
31
+ self.last_examination = 0
32
+ self.machine_id = None
33
+ self._colab_auth_triggered = False
34
+ self.home_dir = Path.home()
35
+ self.project_dir = self.home_dir /".vnstock"
36
+ self.project_dir.mkdir(exist_ok=True)
37
+ self.id_dir = self.project_dir /'id'
38
+ self.id_dir.mkdir(exist_ok=True)
39
+ self.machine_id_path = self.id_dir /"machine_id.txt"
40
+ self.examine()
41
+
42
+ def examine(self, force_refresh=False):
43
+ current_time = time.time()
44
+ if not force_refresh and (current_time - self.last_examination) < self.cache_ttl:
45
+ return self.cache
46
+ info = {
47
+ "timestamp": datetime.now().isoformat(),
48
+ "python_version": platform.python_version(),
49
+ "os_name": platform.system(),
50
+ "platform": platform.platform()
51
+ }
52
+ info["machine_id"] = self.fingerprint()
53
+ try:
54
+ import importlib.util
55
+ ipython_spec = importlib.util.find_spec("IPython")
56
+ if ipython_spec:
57
+ from IPython import get_ipython
58
+ ipython = get_ipython()
59
+ if ipython is not None:
60
+ info["environment"] ="jupyter"
61
+ if'google.colab' in sys.modules:
62
+ info["hosting_service"] ="colab"
63
+ elif'KAGGLE_KERNEL_RUN_TYPE' in os.environ:
64
+ info["hosting_service"] ="kaggle"
65
+ else:
66
+ info["hosting_service"] ="local_jupyter"
67
+ elif sys.stdout.isatty():
68
+ info["environment"] ="terminal"
69
+ else:
70
+ info["environment"] ="script"
71
+ elif sys.stdout.isatty():
72
+ info["environment"] ="terminal"
73
+ else:
74
+ info["environment"] ="script"
75
+ except:
76
+ info["environment"] ="unknown"
77
+ try:
78
+ info["cpu_count"] = os.cpu_count()
79
+ info["memory_gb"] = round(psutil.virtual_memory().total / (1024**3), 1)
80
+ except:
81
+ pass
82
+ is_colab ='google.colab' in sys.modules
83
+ if is_colab:
84
+ info["is_colab"] = True
85
+ self.detect_colab_with_delayed_auth()
86
+ try:
87
+ info["commercial_usage"] = self.enhanced_commercial_detection()
88
+ info["project_context"] = self.analyze_project_structure()
89
+ info["git_info"] = self.analyze_git_info()
90
+ info["usage_pattern"] = self.detect_usage_pattern()
91
+ info["dependencies"] = self.analyze_dependencies()
92
+ except Exception as e:
93
+ info["detection_error"] = str(e)
94
+ self.cache = info
95
+ self.last_examination = current_time
96
+ return info
97
+
98
+ def fingerprint(self):
99
+ if self.machine_id:
100
+ return self.machine_id
101
+ if self.machine_id_path.exists():
102
+ try:
103
+ with open(self.machine_id_path,"r") as f:
104
+ self.machine_id = f.read().strip()
105
+ return self.machine_id
106
+ except:
107
+ pass
108
+ is_colab = self.detect_colab_with_delayed_auth()
109
+ try:
110
+ system_info = platform.node() + platform.platform() + platform.machine()
111
+ self.machine_id = hashlib.md5(system_info.encode()).hexdigest()
112
+ except:
113
+ self.machine_id = str(uuid.uuid4())
114
+ try:
115
+ with open(self.machine_id_path,"w") as f:
116
+ f.write(self.machine_id)
117
+ except:
118
+ pass
119
+ return self.machine_id
120
+
121
+ def detect_hosting(self):
122
+ hosting_markers = {
123
+ "COLAB_GPU":"Google Colab",
124
+ "KAGGLE_KERNEL_RUN_TYPE":"Kaggle",
125
+ "BINDER_SERVICE_HOST":"Binder",
126
+ "CODESPACE_NAME":"GitHub Codespaces",
127
+ "STREAMLIT_SERVER_HEADLESS":"Streamlit Cloud",
128
+ "CLOUD_SHELL":"Cloud Shell"
129
+ }
130
+ for env_var, host_name in hosting_markers.items():
131
+ if env_var in os.environ:
132
+ return host_name
133
+ if'google.colab' in sys.modules:
134
+ return"Google Colab"
135
+ return"local"
136
+
137
+ def detect_commercial_usage(self):
138
+ commercial_indicators = {
139
+ "env_domains": [".com",".io",".co","enterprise","corp","inc"],
140
+ "file_patterns": ["invoice","payment","customer","client","product","sale"],
141
+ "env_vars": ["COMPANY","BUSINESS","ENTERPRISE","CORPORATE"],
142
+ "dir_patterns": ["company","business","enterprise","corporate","client"]
143
+ }
144
+ env_values =" ".join(os.environ.values()).lower()
145
+ domain_match = any(domain in env_values for domain in commercial_indicators["env_domains"])
146
+ env_var_match = any(var in os.environ for var in commercial_indicators["env_vars"])
147
+ current_dir = os.getcwd().lower()
148
+ dir_match = any(pattern in current_dir for pattern in commercial_indicators["dir_patterns"])
149
+ try:
150
+ files = [f.lower() for f in os.listdir() if os.path.isfile(f)]
151
+ file_match = any(any(pattern in f for pattern in commercial_indicators["file_patterns"]) for f in files)
152
+ except:
153
+ file_match = False
154
+ indicators = [domain_match, env_var_match, dir_match, file_match]
155
+ commercial_probability = sum(indicators) / len(indicators)
156
+ return {
157
+ "likely_commercial": commercial_probability > 0.3,
158
+ "commercial_probability": commercial_probability,
159
+ "commercial_indicators": {
160
+ "domain_match": domain_match,
161
+ "env_var_match": env_var_match,
162
+ "dir_match": dir_match,
163
+ "file_match": file_match
164
+ }
165
+ }
166
+
167
+ def scan_packages(self):
168
+ package_groups = {
169
+ "vnstock_family": [
170
+ "vnstock",
171
+ "vnstock3",
172
+ "vnstock_ezchart",
173
+ "vnstock_data_pro",
174
+ "vnstock_market_data_pipeline",
175
+ "vnstock_ta",
176
+ "vnii",
177
+ "vnai"
178
+ ],
179
+ "analytics": [
180
+ "openbb",
181
+ "pandas_ta"
182
+ ],
183
+ "static_charts": [
184
+ "matplotlib",
185
+ "seaborn",
186
+ "altair"
187
+ ],
188
+ "dashboard": [
189
+ "streamlit",
190
+ "voila",
191
+ "panel",
192
+ "shiny",
193
+ "dash"
194
+ ],
195
+ "interactive_charts": [
196
+ "mplfinance",
197
+ "plotly",
198
+ "plotline",
199
+ "bokeh",
200
+ "pyecharts",
201
+ "highcharts-core",
202
+ "highcharts-stock",
203
+ "mplchart"
204
+ ],
205
+ "datafeed": [
206
+ "yfinance",
207
+ "alpha_vantage",
208
+ "pandas-datareader",
209
+ "investpy"
210
+ ],
211
+ "official_api": [
212
+ "ssi-fc-data",
213
+ "ssi-fctrading"
214
+ ],
215
+ "risk_return": [
216
+ "pyfolio",
217
+ "empyrical",
218
+ "quantstats",
219
+ "financetoolkit"
220
+ ],
221
+ "machine_learning": [
222
+ "scipy",
223
+ "sklearn",
224
+ "statsmodels",
225
+ "pytorch",
226
+ "tensorflow",
227
+ "keras",
228
+ "xgboost"
229
+ ],
230
+ "indicators": [
231
+ "stochastic",
232
+ "talib",
233
+ "tqdm",
234
+ "finta",
235
+ "financetoolkit",
236
+ "tulipindicators"
237
+ ],
238
+ "backtesting": [
239
+ "vectorbt",
240
+ "backtesting",
241
+ "bt",
242
+ "zipline",
243
+ "pyalgotrade",
244
+ "backtrader",
245
+ "pybacktest",
246
+ "fastquant",
247
+ "lean",
248
+ "ta",
249
+ "finmarketpy",
250
+ "qstrader"
251
+ ],
252
+ "server": [
253
+ "fastapi",
254
+ "flask",
255
+ "uvicorn",
256
+ "gunicorn"
257
+ ],
258
+ "framework": [
259
+ "lightgbm",
260
+ "catboost",
261
+ "django"
262
+ ]
263
+ }
264
+ installed = {}
265
+ for category, packages in package_groups.items():
266
+ installed[category] = []
267
+ for pkg in packages:
268
+ try:
269
+ version = importlib.metadata.version(pkg)
270
+ installed[category].append({"name": pkg,"version": version})
271
+ except:
272
+ pass
273
+ return installed
274
+
275
+ def setup_vnstock_environment(self):
276
+ env_file = self.id_dir /"environment.json"
277
+ env_data = {
278
+ "accepted_agreement": True,
279
+ "timestamp": datetime.now().isoformat(),
280
+ "machine_id": self.fingerprint()
281
+ }
282
+ try:
283
+ with open(env_file,"w") as f:
284
+ import json
285
+ json.dump(env_data, f)
286
+ return True
287
+ except Exception as e:
288
+ print(f"Failed to set up vnstock environment: {e}")
289
+ return False
290
+
291
+ def detect_colab_with_delayed_auth(self, immediate=False):
292
+ is_colab ='google.colab' in sys.modules
293
+ if is_colab and not self._colab_auth_triggered:
294
+ if immediate:
295
+ self._colab_auth_triggered = True
296
+ user_id = self.get_or_create_user_id()
297
+ if user_id and user_id != self.machine_id:
298
+ self.machine_id = user_id
299
+ try:
300
+ with open(self.machine_id_path,"w") as f:
301
+ f.write(user_id)
302
+ except:
303
+ pass
304
+ else:
305
+
306
+ def delayed_auth():
307
+ time.sleep(300)
308
+ user_id = self.get_or_create_user_id()
309
+ if user_id and user_id != self.machine_id:
310
+ self.machine_id = user_id
311
+ try:
312
+ with open(self.machine_id_path,"w") as f:
313
+ f.write(user_id)
314
+ except:
315
+ pass
316
+ thread = threading.Thread(target=delayed_auth, daemon=True)
317
+ thread.start()
318
+ return is_colab
319
+
320
+ def get_or_create_user_id(self):
321
+ if self._colab_auth_triggered:
322
+ return self.machine_id
323
+ try:
324
+ from google.colab import drive
325
+ print("\n📋 Kết nối tài khoản Google Drive để lưu các thiết lập của dự án.")
326
+ print("Dữ liệu phiên làm việc với Colab của bạn sẽ bị xóa nếu không lưu trữ vào Google Drive.\n")
327
+ self._colab_auth_triggered = True
328
+ drive.mount('/content/drive')
329
+ id_path ='/content/drive/MyDrive/.vnstock/user_id.txt'
330
+ if os.path.exists(id_path):
331
+ with open(id_path,'r') as f:
332
+ return f.read().strip()
333
+ else:
334
+ user_id = str(uuid.uuid4())
335
+ os.makedirs(os.path.dirname(id_path), exist_ok=True)
336
+ with open(id_path,'w') as f:
337
+ f.write(user_id)
338
+ return user_id
339
+ except Exception as e:
340
+ return self.machine_id
341
+
342
+ def analyze_project_structure(self):
343
+ current_dir = os.getcwd()
344
+ project_indicators = {
345
+ "commercial_app": ["app","services","products","customers","billing"],
346
+ "financial_tool": ["portfolio","backtesting","trading","strategy"],
347
+ "data_science": ["models","notebooks","datasets","visualization"],
348
+ "educational": ["examples","lectures","assignments","slides"]
349
+ }
350
+ project_type = {}
351
+ for category, markers in project_indicators.items():
352
+ match_count = 0
353
+ for marker in markers:
354
+ if os.path.exists(os.path.join(current_dir, marker)):
355
+ match_count += 1
356
+ if len(markers) > 0:
357
+ project_type[category] = match_count / len(markers)
358
+ try:
359
+ root_files = [f for f in os.listdir(current_dir) if os.path.isfile(os.path.join(current_dir, f))]
360
+ root_dirs = [d for d in os.listdir(current_dir) if os.path.isdir(os.path.join(current_dir, d))]
361
+ file_markers = {
362
+ "python_project": ["setup.py","pyproject.toml","requirements.txt"],
363
+ "data_science": ["notebook.ipynb",".ipynb_checkpoints"],
364
+ "web_app": ["app.py","wsgi.py","manage.py","server.py"],
365
+ "finance_app": ["portfolio.py","trading.py","backtest.py"],
366
+ }
367
+ file_project_type ="unknown"
368
+ for ptype, markers in file_markers.items():
369
+ if any(marker in root_files for marker in markers):
370
+ file_project_type = ptype
371
+ break
372
+ frameworks = []
373
+ framework_markers = {
374
+ "django": ["manage.py","settings.py"],
375
+ "flask": ["app.py","wsgi.py"],
376
+ "streamlit": ["streamlit_app.py","app.py"],
377
+ "fastapi": ["main.py","app.py"],
378
+ }
379
+ for framework, markers in framework_markers.items():
380
+ if any(marker in root_files for marker in markers):
381
+ frameworks.append(framework)
382
+ except Exception as e:
383
+ root_files = []
384
+ root_dirs = []
385
+ file_project_type ="unknown"
386
+ frameworks = []
387
+ return {
388
+ "project_dir": current_dir,
389
+ "detected_type": max(project_type.items(), key=lambda x: x[1])[0] if project_type else"unknown",
390
+ "file_type": file_project_type,
391
+ "is_git_repo":".git" in (root_dirs if'root_dirs' in locals() else []),
392
+ "frameworks": frameworks,
393
+ "file_count": len(root_files) if'root_files' in locals() else 0,
394
+ "directory_count": len(root_dirs) if'root_dirs' in locals() else 0,
395
+ "type_confidence": project_type
396
+ }
397
+
398
+ def analyze_git_info(self):
399
+ try:
400
+ result = subprocess.run(["git","rev-parse","--is-inside-work-tree"],
401
+ capture_output=True, text=True)
402
+ if result.returncode != 0:
403
+ return {"has_git": False}
404
+ repo_root = subprocess.run(["git","rev-parse","--show-toplevel"],
405
+ capture_output=True, text=True)
406
+ repo_path = repo_root.stdout.strip() if repo_root.stdout else None
407
+ repo_name = os.path.basename(repo_path) if repo_path else None
408
+ has_license = False
409
+ license_type ="unknown"
410
+ if repo_path:
411
+ license_files = [
412
+ os.path.join(repo_path,"LICENSE"),
413
+ os.path.join(repo_path,"LICENSE.txt"),
414
+ os.path.join(repo_path,"LICENSE.md")
415
+ ]
416
+ for license_file in license_files:
417
+ if os.path.exists(license_file):
418
+ has_license = True
419
+ try:
420
+ with open(license_file,'r') as f:
421
+ content = f.read().lower()
422
+ if"mit license" in content:
423
+ license_type ="MIT"
424
+ elif"apache license" in content:
425
+ license_type ="Apache"
426
+ elif"gnu general public" in content:
427
+ license_type ="GPL"
428
+ elif"bsd " in content:
429
+ license_type ="BSD"
430
+ except:
431
+ pass
432
+ break
433
+ remote = subprocess.run(["git","config","--get","remote.origin.url"],
434
+ capture_output=True, text=True)
435
+ remote_url = remote.stdout.strip() if remote.stdout else None
436
+ if remote_url:
437
+ remote_url = remote_url.strip()
438
+ domain = None
439
+ if remote_url:
440
+ if remote_url.startswith('git@') or'@' in remote_url and':' in remote_url.split('@')[1]:
441
+ domain = remote_url.split('@')[1].split(':')[0]
442
+ elif remote_url.startswith('http'):
443
+ url_parts = remote_url.split('//')
444
+ if len(url_parts) > 1:
445
+ auth_and_domain = url_parts[1].split('/', 1)[0]
446
+ if'@' in auth_and_domain:
447
+ domain = auth_and_domain.split('@')[-1]
448
+ else:
449
+ domain = auth_and_domain
450
+ else:
451
+ import re
452
+ domain_match = re.search(r'@([^:/]+)|https?://(?:[^@/]+@)?([^/]+)', remote_url)
453
+ if domain_match:
454
+ domain = domain_match.group(1) or domain_match.group(2)
455
+ owner = None
456
+ repo_name = None
457
+ if domain:
458
+ if"github" in domain:
459
+ if':' in remote_url and'@' in remote_url:
460
+ parts = remote_url.split(':')[-1].split('/')
461
+ if len(parts) >= 2:
462
+ owner = parts[0]
463
+ repo_name = parts[1].replace('.git','')
464
+ else:
465
+ url_parts = remote_url.split('//')
466
+ if len(url_parts) > 1:
467
+ path_parts = url_parts[1].split('/')
468
+ if len(path_parts) >= 3:
469
+ domain_part = path_parts[0]
470
+ if'@' in domain_part:
471
+ owner_index = 1
472
+ else:
473
+ owner_index = 1
474
+ if len(path_parts) > owner_index:
475
+ owner = path_parts[owner_index]
476
+ if len(path_parts) > owner_index + 1:
477
+ repo_name = path_parts[owner_index + 1].replace('.git','')
478
+ commit_count = subprocess.run(["git","rev-list","--count","HEAD"],
479
+ capture_output=True, text=True)
480
+ branch_count = subprocess.run(["git","branch","--list"],
481
+ capture_output=True, text=True)
482
+ branch_count = len(branch_count.stdout.strip().split('\n')) if branch_count.stdout else 0
483
+ return {
484
+ "domain": domain,
485
+ "owner": owner,
486
+ "commit_count": int(commit_count.stdout.strip()) if commit_count.stdout else 0,
487
+ "branch_count": branch_count,
488
+ "has_git": True,
489
+ "repo_path": repo_path if'repo_path' in locals() else None,
490
+ "repo_name": repo_name,
491
+ "has_license": has_license if'has_license' in locals() else False,
492
+ "license_type": license_type if'license_type' in locals() else"unknown"
493
+ }
494
+ except Exception as e:
495
+ pass
496
+ return {"has_git": False}
497
+
498
+ def detect_usage_pattern(self):
499
+ current_time = datetime.now()
500
+ is_weekday = current_time.weekday() < 5
501
+ hour = current_time.hour
502
+ is_business_hours = 9 <= hour <= 18
503
+ return {
504
+ "business_hours_usage": is_weekday and is_business_hours,
505
+ "weekday": is_weekday,
506
+ "hour": hour,
507
+ "timestamp": current_time.isoformat()
508
+ }
509
+
510
+ def enhanced_commercial_detection(self):
511
+ basic = self.detect_commercial_usage()
512
+ try:
513
+ project_files = os.listdir(os.getcwd())
514
+ commercial_frameworks = ["django-oscar","opencart","magento",
515
+ "saleor","odoo","shopify","woocommerce"]
516
+ framework_match = False
517
+ for framework in commercial_frameworks:
518
+ if any(framework in f for f in project_files):
519
+ framework_match = True
520
+ break
521
+ db_files = [f for f in project_files if"database" in f.lower()
522
+ or"db_config" in f.lower() or f.endswith(".db")]
523
+ has_database = len(db_files) > 0
524
+ except:
525
+ framework_match = False
526
+ has_database = False
527
+ domain_check = self.analyze_git_info()
528
+ domain_is_commercial = False
529
+ if domain_check and domain_check.get("domain"):
530
+ commercial_tlds = [".com",".io",".co",".org",".net"]
531
+ domain_is_commercial = any(tld in domain_check["domain"] for tld in commercial_tlds)
532
+ project_structure = self.analyze_project_structure()
533
+ indicators = [
534
+ basic["commercial_probability"],
535
+ framework_match,
536
+ has_database,
537
+ domain_is_commercial,
538
+ project_structure.get("type_confidence", {}).get("commercial_app", 0),
539
+ self.detect_usage_pattern()["business_hours_usage"]
540
+ ]
541
+ indicators = [i for i in indicators if i is not None]
542
+ if indicators:
543
+ score = sum(1.0 if isinstance(i, bool) and i else (i if isinstance(i, (int, float)) else 0)
544
+ for i in indicators) / len(indicators)
545
+ else:
546
+ score = 0
547
+ return {
548
+ "commercial_probability": score,
549
+ "likely_commercial": score > 0.4,
550
+ "indicators": {
551
+ "basic_indicators": basic["commercial_indicators"],
552
+ "framework_match": framework_match,
553
+ "has_database": has_database,
554
+ "domain_is_commercial": domain_is_commercial,
555
+ "project_structure": project_structure.get("detected_type"),
556
+ "business_hours_usage": self.detect_usage_pattern()["business_hours_usage"]
557
+ }
558
+ }
559
+
560
+ def analyze_dependencies(self):
561
+ try:
562
+ import pkg_resources
563
+ enterprise_packages = [
564
+ "snowflake-connector-python","databricks","azure",
565
+ "aws","google-cloud","stripe","atlassian",
566
+ "salesforce","bigquery","tableau","sap"
567
+ ]
568
+ commercial_deps = []
569
+ for pkg in pkg_resources.working_set:
570
+ if any(ent in pkg.key for ent in enterprise_packages):
571
+ commercial_deps.append({"name": pkg.key,"version": pkg.version})
572
+ return {
573
+ "has_commercial_deps": len(commercial_deps) > 0,
574
+ "commercial_deps_count": len(commercial_deps),
575
+ "commercial_deps": commercial_deps
576
+ }
577
+ except:
578
+ return {"has_commercial_deps": False}
579
579
  inspector = Inspector()