vnai 2.0.2__py3-none-any.whl → 2.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vnai/scope/profile.py CHANGED
@@ -1,223 +1,862 @@
1
- _W='type_confidence'
2
- _V='detected_type'
3
- _U='commercial_app'
4
- _T='version'
5
- _S='django'
6
- _R='fastapi'
7
- _Q='streamlit'
8
- _P='indicators'
9
- _O='commercial_indicators'
10
- _N='likely_commercial'
11
- _M='KAGGLE_KERNEL_RUN_TYPE'
12
- _L='machine_id'
13
- _K='domain'
14
- _J='.git'
15
- _I='backtesting'
16
- _H='commercial_probability'
17
- _G='timestamp'
18
- _F='business_hours_usage'
19
- _E='google.colab'
20
- _D='unknown'
21
- _C=False
22
- _B=None
23
- _A=True
24
- import os,sys,platform,uuid,hashlib,psutil,threading,time,importlib.metadata
1
+ ##
2
+
3
+ ##
4
+
5
+
6
+ import os
7
+ import sys
8
+ import platform
9
+ import uuid
10
+ import hashlib
11
+ import psutil
12
+ import threading
13
+ import time
14
+ import importlib.metadata
25
15
  from datetime import datetime
26
16
  import subprocess
27
17
  from pathlib import Path
18
+
28
19
  class Inspector:
29
- _instance=_B;_lock=_B
30
- def __new__(cls):
31
- import threading
32
- if cls._lock is _B:cls._lock=threading.Lock()
33
- with cls._lock:
34
- if cls._instance is _B:cls._instance=super(Inspector,cls).__new__(cls);cls._instance._initialize()
35
- return cls._instance
36
- def _initialize(self):self.cache={};self.cache_ttl=3600;self.last_examination=0;self.machine_id=_B;self._colab_auth_triggered=_C;self.home_dir=Path.home();self.project_dir=self.home_dir/'.vnstock';self.project_dir.mkdir(exist_ok=_A);self.id_dir=self.project_dir/'id';self.id_dir.mkdir(exist_ok=_A);self.machine_id_path=self.id_dir/'machine_id.txt';self.examine()
37
- def examine(self,force_refresh=_C):
38
- D='script';C='terminal';B='hosting_service';A='environment';current_time=time.time()
39
- if not force_refresh and current_time-self.last_examination<self.cache_ttl:return self.cache
40
- info={_G:datetime.now().isoformat(),'python_version':platform.python_version(),'os_name':platform.system(),'platform':platform.platform()};info[_L]=self.fingerprint()
41
- try:
42
- import importlib.util;ipython_spec=importlib.util.find_spec('IPython')
43
- if ipython_spec:
44
- from IPython import get_ipython;ipython=get_ipython()
45
- if ipython is not _B:
46
- info[A]='jupyter'
47
- if _E in sys.modules:info[B]='colab'
48
- elif _M in os.environ:info[B]='kaggle'
49
- else:info[B]='local_jupyter'
50
- elif sys.stdout.isatty():info[A]=C
51
- else:info[A]=D
52
- elif sys.stdout.isatty():info[A]=C
53
- else:info[A]=D
54
- except:info[A]=_D
55
- try:info['cpu_count']=os.cpu_count();info['memory_gb']=round(psutil.virtual_memory().total/1024**3,1)
56
- except:pass
57
- is_colab=_E in sys.modules
58
- if is_colab:info['is_colab']=_A;self.detect_colab_with_delayed_auth()
59
- try:info['commercial_usage']=self.enhanced_commercial_detection();info['project_context']=self.analyze_project_structure();info['git_info']=self.analyze_git_info();info['usage_pattern']=self.detect_usage_pattern();info['dependencies']=self.analyze_dependencies()
60
- except Exception as e:info['detection_error']=str(e)
61
- self.cache=info;self.last_examination=current_time;return info
62
- def fingerprint(self):
63
- if self.machine_id:return self.machine_id
64
- if self.machine_id_path.exists():
65
- try:
66
- with open(self.machine_id_path,'r')as f:self.machine_id=f.read().strip();return self.machine_id
67
- except:pass
68
- is_colab=self.detect_colab_with_delayed_auth()
69
- try:system_info=platform.node()+platform.platform()+platform.machine();self.machine_id=hashlib.md5(system_info.encode()).hexdigest()
70
- except:self.machine_id=str(uuid.uuid4())
71
- try:
72
- with open(self.machine_id_path,'w')as f:f.write(self.machine_id)
73
- except:pass
74
- return self.machine_id
75
- def detect_hosting(self):
76
- A='Google Colab';hosting_markers={'COLAB_GPU':A,_M:'Kaggle','BINDER_SERVICE_HOST':'Binder','CODESPACE_NAME':'GitHub Codespaces','STREAMLIT_SERVER_HEADLESS':'Streamlit Cloud','CLOUD_SHELL':'Cloud Shell'}
77
- for(env_var,host_name)in hosting_markers.items():
78
- if env_var in os.environ:return host_name
79
- if _E in sys.modules:return A
80
- return'local'
81
- def detect_commercial_usage(self):
82
- F='client';E='enterprise';D='dir_patterns';C='env_vars';B='file_patterns';A='env_domains';commercial_indicators={A:['.com','.io','.co',E,'corp','inc'],B:['invoice','payment','customer',F,'product','sale'],C:['COMPANY','BUSINESS','ENTERPRISE','CORPORATE'],D:['company','business',E,'corporate',F]};env_values=' '.join(os.environ.values()).lower();domain_match=any(domain in env_values for domain in commercial_indicators[A]);env_var_match=any(var in os.environ for var in commercial_indicators[C]);current_dir=os.getcwd().lower();dir_match=any(pattern in current_dir for pattern in commercial_indicators[D])
83
- try:files=[f.lower()for f in os.listdir()if os.path.isfile(f)];file_match=any(any(pattern in f for pattern in commercial_indicators[B])for f in files)
84
- except:file_match=_C
85
- indicators=[domain_match,env_var_match,dir_match,file_match];commercial_probability=sum(indicators)/len(indicators);return{_N:commercial_probability>.3,_H:commercial_probability,_O:{'domain_match':domain_match,'env_var_match':env_var_match,'dir_match':dir_match,'file_match':file_match}}
86
- def scan_packages(self):
87
- A='financetoolkit';package_groups={'vnstock_family':['vnstock','vnstock3','vnstock_ezchart','vnstock_data_pro','vnstock_market_data_pipeline','vnstock_ta','vnii','vnai'],'analytics':['openbb','pandas_ta'],'static_charts':['matplotlib','seaborn','altair'],'dashboard':[_Q,'voila','panel','shiny','dash'],'interactive_charts':['mplfinance','plotly','plotline','bokeh','pyecharts','highcharts-core','highcharts-stock','mplchart'],'datafeed':['yfinance','alpha_vantage','pandas-datareader','investpy'],'official_api':['ssi-fc-data','ssi-fctrading'],'risk_return':['pyfolio','empyrical','quantstats',A],'machine_learning':['scipy','sklearn','statsmodels','pytorch','tensorflow','keras','xgboost'],_P:['stochastic','talib','tqdm','finta',A,'tulipindicators'],_I:['vectorbt',_I,'bt','zipline','pyalgotrade','backtrader','pybacktest','fastquant','lean','ta','finmarketpy','qstrader'],'server':[_R,'flask','uvicorn','gunicorn'],'framework':['lightgbm','catboost',_S]};installed={}
88
- for(category,packages)in package_groups.items():
89
- installed[category]=[]
90
- for pkg in packages:
91
- try:version=importlib.metadata.version(pkg);installed[category].append({'name':pkg,_T:version})
92
- except:pass
93
- return installed
94
- def setup_vnstock_environment(self):
95
- env_file=self.id_dir/'environment.json';env_data={'accepted_agreement':_A,_G:datetime.now().isoformat(),_L:self.fingerprint()}
96
- try:
97
- with open(env_file,'w')as f:import json;json.dump(env_data,f)
98
- return _A
99
- except Exception as e:print(f"Failed to set up vnstock environment: {e}");return _C
100
- def detect_colab_with_delayed_auth(self,immediate=_C):
101
- is_colab=_E in sys.modules
102
- if is_colab and not self._colab_auth_triggered:
103
- if immediate:
104
- self._colab_auth_triggered=_A;user_id=self.get_or_create_user_id()
105
- if user_id and user_id!=self.machine_id:
106
- self.machine_id=user_id
107
- try:
108
- with open(self.machine_id_path,'w')as f:f.write(user_id)
109
- except:pass
110
- else:
111
- def delayed_auth():
112
- time.sleep(300);user_id=self.get_or_create_user_id()
113
- if user_id and user_id!=self.machine_id:
114
- self.machine_id=user_id
115
- try:
116
- with open(self.machine_id_path,'w')as f:f.write(user_id)
117
- except:pass
118
- thread=threading.Thread(target=delayed_auth,daemon=_A);thread.start()
119
- return is_colab
120
- def get_or_create_user_id(self):
121
- if self._colab_auth_triggered:return self.machine_id
122
- try:
123
- from google.colab import drive;print('\n📋 Kết nối tài khoản Google Drive để lưu các thiết lập của dự án.');print('Dữ liệu phiên làm việc với Colab của bạn sẽ bị xóa nếu không lưu trữ vào Google Drive.\n');self._colab_auth_triggered=_A;drive.mount('/content/drive');id_path='/content/drive/MyDrive/.vnstock/user_id.txt'
124
- if os.path.exists(id_path):
125
- with open(id_path,'r')as f:return f.read().strip()
126
- else:
127
- user_id=str(uuid.uuid4());os.makedirs(os.path.dirname(id_path),exist_ok=_A)
128
- with open(id_path,'w')as f:f.write(user_id)
129
- return user_id
130
- except Exception as e:return self.machine_id
131
- def analyze_project_structure(self):
132
- E='root_dirs';D='manage.py';C='wsgi.py';B='data_science';A='app.py';current_dir=os.getcwd();project_indicators={_U:['app','services','products','customers','billing'],'financial_tool':['portfolio',_I,'trading','strategy'],B:['models','notebooks','datasets','visualization'],'educational':['examples','lectures','assignments','slides']};project_type={}
133
- for(category,markers)in project_indicators.items():
134
- match_count=0
135
- for marker in markers:
136
- if os.path.exists(os.path.join(current_dir,marker)):match_count+=1
137
- if len(markers)>0:project_type[category]=match_count/len(markers)
138
- try:
139
- root_files=[f for f in os.listdir(current_dir)if os.path.isfile(os.path.join(current_dir,f))];root_dirs=[d for d in os.listdir(current_dir)if os.path.isdir(os.path.join(current_dir,d))];file_markers={'python_project':['setup.py','pyproject.toml','requirements.txt'],B:['notebook.ipynb','.ipynb_checkpoints'],'web_app':[A,C,D,'server.py'],'finance_app':['portfolio.py','trading.py','backtest.py']};file_project_type=_D
140
- for(ptype,markers)in file_markers.items():
141
- if any(marker in root_files for marker in markers):file_project_type=ptype;break
142
- frameworks=[];framework_markers={_S:[D,'settings.py'],'flask':[A,C],_Q:['streamlit_app.py',A],_R:['main.py',A]}
143
- for(framework,markers)in framework_markers.items():
144
- if any(marker in root_files for marker in markers):frameworks.append(framework)
145
- except Exception as e:root_files=[];root_dirs=[];file_project_type=_D;frameworks=[]
146
- return{'project_dir':current_dir,_V:max(project_type.items(),key=lambda x:x[1])[0]if project_type else _D,'file_type':file_project_type,'is_git_repo':_J in(root_dirs if E in locals()else[]),'frameworks':frameworks,'file_count':len(root_files)if'root_files'in locals()else 0,'directory_count':len(root_dirs)if E in locals()else 0,_W:project_type}
147
- def analyze_git_info(self):
148
- I='license_type';H='has_license';G='repo_path';F='rev-parse';E='/';D='has_git';C=':';B='git';A='@'
149
- try:
150
- result=subprocess.run([B,F,'--is-inside-work-tree'],capture_output=_A,text=_A)
151
- if result.returncode!=0:return{D:_C}
152
- repo_root=subprocess.run([B,F,'--show-toplevel'],capture_output=_A,text=_A);repo_path=repo_root.stdout.strip()if repo_root.stdout else _B;repo_name=os.path.basename(repo_path)if repo_path else _B;has_license=_C;license_type=_D
153
- if repo_path:
154
- license_files=[os.path.join(repo_path,'LICENSE'),os.path.join(repo_path,'LICENSE.txt'),os.path.join(repo_path,'LICENSE.md')]
155
- for license_file in license_files:
156
- if os.path.exists(license_file):
157
- has_license=_A
158
- try:
159
- with open(license_file,'r')as f:
160
- content=f.read().lower()
161
- if'mit license'in content:license_type='MIT'
162
- elif'apache license'in content:license_type='Apache'
163
- elif'gnu general public'in content:license_type='GPL'
164
- elif'bsd 'in content:license_type='BSD'
165
- except:pass
166
- break
167
- remote=subprocess.run([B,'config','--get','remote.origin.url'],capture_output=_A,text=_A);remote_url=remote.stdout.strip()if remote.stdout else _B
168
- if remote_url:
169
- remote_url=remote_url.strip();domain=_B
170
- if remote_url:
171
- if remote_url.startswith('git@')or A in remote_url and C in remote_url.split(A)[1]:domain=remote_url.split(A)[1].split(C)[0]
172
- elif remote_url.startswith('http'):
173
- url_parts=remote_url.split('//')
174
- if len(url_parts)>1:
175
- auth_and_domain=url_parts[1].split(E,1)[0]
176
- if A in auth_and_domain:domain=auth_and_domain.split(A)[-1]
177
- else:domain=auth_and_domain
178
- else:
179
- import re;domain_match=re.search('@([^:/]+)|https?://(?:[^@/]+@)?([^/]+)',remote_url)
180
- if domain_match:domain=domain_match.group(1)or domain_match.group(2)
181
- owner=_B;repo_name=_B
182
- if domain:
183
- if'github'in domain:
184
- if C in remote_url and A in remote_url:
185
- parts=remote_url.split(C)[-1].split(E)
186
- if len(parts)>=2:owner=parts[0];repo_name=parts[1].replace(_J,'')
187
- else:
188
- url_parts=remote_url.split('//')
189
- if len(url_parts)>1:
190
- path_parts=url_parts[1].split(E)
191
- if len(path_parts)>=3:
192
- domain_part=path_parts[0]
193
- if A in domain_part:owner_index=1
194
- else:owner_index=1
195
- if len(path_parts)>owner_index:owner=path_parts[owner_index]
196
- if len(path_parts)>owner_index+1:repo_name=path_parts[owner_index+1].replace(_J,'')
197
- commit_count=subprocess.run([B,'rev-list','--count','HEAD'],capture_output=_A,text=_A);branch_count=subprocess.run([B,'branch','--list'],capture_output=_A,text=_A);branch_count=len(branch_count.stdout.strip().split('\n'))if branch_count.stdout else 0;return{_K:domain,'owner':owner,'commit_count':int(commit_count.stdout.strip())if commit_count.stdout else 0,'branch_count':branch_count,D:_A,G:repo_path if G in locals()else _B,'repo_name':repo_name,H:has_license if H in locals()else _C,I:license_type if I in locals()else _D}
198
- except Exception as e:pass
199
- return{D:_C}
200
- def detect_usage_pattern(self):current_time=datetime.now();is_weekday=current_time.weekday()<5;hour=current_time.hour;is_business_hours=9<=hour<=18;return{_F:is_weekday and is_business_hours,'weekday':is_weekday,'hour':hour,_G:current_time.isoformat()}
201
- def enhanced_commercial_detection(self):
202
- basic=self.detect_commercial_usage()
203
- try:
204
- project_files=os.listdir(os.getcwd());commercial_frameworks=['django-oscar','opencart','magento','saleor','odoo','shopify','woocommerce'];framework_match=_C
205
- for framework in commercial_frameworks:
206
- if any(framework in f for f in project_files):framework_match=_A;break
207
- db_files=[f for f in project_files if'database'in f.lower()or'db_config'in f.lower()or f.endswith('.db')];has_database=len(db_files)>0
208
- except:framework_match=_C;has_database=_C
209
- domain_check=self.analyze_git_info();domain_is_commercial=_C
210
- if domain_check and domain_check.get(_K):commercial_tlds=['.com','.io','.co','.org','.net'];domain_is_commercial=any(tld in domain_check[_K]for tld in commercial_tlds)
211
- project_structure=self.analyze_project_structure();indicators=[basic[_H],framework_match,has_database,domain_is_commercial,project_structure.get(_W,{}).get(_U,0),self.detect_usage_pattern()[_F]];indicators=[i for i in indicators if i is not _B]
212
- if indicators:score=sum(1. if isinstance(i,bool)and i else i if isinstance(i,(int,float))else 0 for i in indicators)/len(indicators)
213
- else:score=0
214
- return{_H:score,_N:score>.4,_P:{'basic_indicators':basic[_O],'framework_match':framework_match,'has_database':has_database,'domain_is_commercial':domain_is_commercial,'project_structure':project_structure.get(_V),_F:self.detect_usage_pattern()[_F]}}
215
- def analyze_dependencies(self):
216
- A='has_commercial_deps'
217
- try:
218
- import pkg_resources;enterprise_packages=['snowflake-connector-python','databricks','azure','aws','google-cloud','stripe','atlassian','salesforce','bigquery','tableau','sap'];commercial_deps=[]
219
- for pkg in pkg_resources.working_set:
220
- if any(ent in pkg.key for ent in enterprise_packages):commercial_deps.append({'name':pkg.key,_T:pkg.version})
221
- return{A:len(commercial_deps)>0,'commercial_deps_count':len(commercial_deps),'commercial_deps':commercial_deps}
222
- except:return{A:_C}
223
- inspector=Inspector()
20
+ #--
21
+
22
+ _instance = None
23
+ _lock = None
24
+
25
+ def __new__(cls):
26
+ import threading
27
+ if cls._lock is None:
28
+ cls._lock = threading.Lock()
29
+
30
+ with cls._lock:
31
+ if cls._instance is None:
32
+ cls._instance = super(Inspector, cls).__new__(cls)
33
+ cls._instance._initialize()
34
+ return cls._instance
35
+
36
+ def _initialize(self):
37
+ #--
38
+ self.cache = {}
39
+ self.cache_ttl = 3600 ##
40
+
41
+ self.last_examination = 0
42
+ self.machine_id = None
43
+ self._colab_auth_triggered = False
44
+
45
+ ##
46
+
47
+ self.home_dir = Path.home()
48
+ self.project_dir = self.home_dir / ".vnstock"
49
+ self.project_dir.mkdir(exist_ok=True)
50
+ self.id_dir = self.project_dir / 'id'
51
+ self.id_dir.mkdir(exist_ok=True)
52
+ self.machine_id_path = self.id_dir / "machine_id.txt"
53
+
54
+ ##
55
+
56
+ self.examine()
57
+
58
+ def examine(self, force_refresh=False):
59
+ #--
60
+ current_time = time.time()
61
+
62
+ ##
63
+
64
+ if not force_refresh and (current_time - self.last_examination) < self.cache_ttl:
65
+ return self.cache
66
+
67
+ ##
68
+
69
+ info = {
70
+ "timestamp": datetime.now().isoformat(),
71
+ "python_version": platform.python_version(),
72
+ "os_name": platform.system(),
73
+ "platform": platform.platform()
74
+ }
75
+
76
+ ##
77
+
78
+ info["machine_id"] = self.fingerprint()
79
+
80
+ ##
81
+
82
+ try:
83
+ ##
84
+
85
+ import importlib.util
86
+ ipython_spec = importlib.util.find_spec("IPython")
87
+
88
+ if ipython_spec:
89
+ from IPython import get_ipython
90
+ ipython = get_ipython()
91
+ if ipython is not None:
92
+ info["environment"] = "jupyter"
93
+ ##
94
+
95
+ if 'google.colab' in sys.modules:
96
+ info["hosting_service"] = "colab"
97
+ elif 'KAGGLE_KERNEL_RUN_TYPE' in os.environ:
98
+ info["hosting_service"] = "kaggle"
99
+ else:
100
+ info["hosting_service"] = "local_jupyter"
101
+ elif sys.stdout.isatty():
102
+ info["environment"] = "terminal"
103
+ else:
104
+ info["environment"] = "script"
105
+ elif sys.stdout.isatty():
106
+ info["environment"] = "terminal"
107
+ else:
108
+ info["environment"] = "script"
109
+ except:
110
+ info["environment"] = "unknown"
111
+
112
+ ##
113
+
114
+ try:
115
+ info["cpu_count"] = os.cpu_count()
116
+ info["memory_gb"] = round(psutil.virtual_memory().total / (1024**3), 1)
117
+ except:
118
+ pass
119
+
120
+ ##
121
+
122
+ is_colab = 'google.colab' in sys.modules
123
+ if is_colab:
124
+ info["is_colab"] = True
125
+ ##
126
+
127
+ self.detect_colab_with_delayed_auth()
128
+
129
+ ##
130
+
131
+ try:
132
+ ##
133
+
134
+ info["commercial_usage"] = self.enhanced_commercial_detection()
135
+
136
+ ##
137
+
138
+ info["project_context"] = self.analyze_project_structure()
139
+
140
+ ##
141
+
142
+ info["git_info"] = self.analyze_git_info()
143
+
144
+ ##
145
+
146
+ info["usage_pattern"] = self.detect_usage_pattern()
147
+
148
+ ##
149
+
150
+ info["dependencies"] = self.analyze_dependencies()
151
+ except Exception as e:
152
+ ##
153
+
154
+ info["detection_error"] = str(e)
155
+
156
+ ##
157
+
158
+ self.cache = info
159
+ self.last_examination = current_time
160
+
161
+ return info
162
+
163
+ def fingerprint(self):
164
+ #--
165
+ ##
166
+
167
+ if self.machine_id:
168
+ return self.machine_id
169
+
170
+ ##
171
+
172
+ if self.machine_id_path.exists():
173
+ try:
174
+ with open(self.machine_id_path, "r") as f:
175
+ self.machine_id = f.read().strip()
176
+ return self.machine_id
177
+ except:
178
+ pass
179
+
180
+ ##
181
+
182
+ is_colab = self.detect_colab_with_delayed_auth()
183
+
184
+ ##
185
+
186
+ try:
187
+ ##
188
+
189
+ system_info = platform.node() + platform.platform() + platform.machine()
190
+ self.machine_id = hashlib.md5(system_info.encode()).hexdigest()
191
+ except:
192
+ ##
193
+
194
+ self.machine_id = str(uuid.uuid4())
195
+
196
+ ##
197
+
198
+ try:
199
+ with open(self.machine_id_path, "w") as f:
200
+ f.write(self.machine_id)
201
+ except:
202
+ pass
203
+
204
+ return self.machine_id
205
+
206
+ def detect_hosting(self):
207
+ #--
208
+ ##
209
+
210
+ hosting_markers = {
211
+ "COLAB_GPU": "Google Colab",
212
+ "KAGGLE_KERNEL_RUN_TYPE": "Kaggle",
213
+ "BINDER_SERVICE_HOST": "Binder",
214
+ "CODESPACE_NAME": "GitHub Codespaces",
215
+ "STREAMLIT_SERVER_HEADLESS": "Streamlit Cloud",
216
+ "CLOUD_SHELL": "Cloud Shell"
217
+ }
218
+
219
+ for env_var, host_name in hosting_markers.items():
220
+ if env_var in os.environ:
221
+ return host_name
222
+
223
+ ##
224
+
225
+ if 'google.colab' in sys.modules:
226
+ return "Google Colab"
227
+
228
+ return "local"
229
+
230
+ def detect_commercial_usage(self):
231
+ #--
232
+ commercial_indicators = {
233
+ "env_domains": [".com", ".io", ".co", "enterprise", "corp", "inc"],
234
+ "file_patterns": ["invoice", "payment", "customer", "client", "product", "sale"],
235
+ "env_vars": ["COMPANY", "BUSINESS", "ENTERPRISE", "CORPORATE"],
236
+ "dir_patterns": ["company", "business", "enterprise", "corporate", "client"]
237
+ }
238
+
239
+ ##
240
+
241
+ env_values = " ".join(os.environ.values()).lower()
242
+ domain_match = any(domain in env_values for domain in commercial_indicators["env_domains"])
243
+
244
+ ##
245
+
246
+ env_var_match = any(var in os.environ for var in commercial_indicators["env_vars"])
247
+
248
+ ##
249
+
250
+ current_dir = os.getcwd().lower()
251
+ dir_match = any(pattern in current_dir for pattern in commercial_indicators["dir_patterns"])
252
+
253
+ ##
254
+
255
+ try:
256
+ files = [f.lower() for f in os.listdir() if os.path.isfile(f)]
257
+ file_match = any(any(pattern in f for pattern in commercial_indicators["file_patterns"]) for f in files)
258
+ except:
259
+ file_match = False
260
+
261
+ ##
262
+
263
+ indicators = [domain_match, env_var_match, dir_match, file_match]
264
+ commercial_probability = sum(indicators) / len(indicators)
265
+
266
+ return {
267
+ "likely_commercial": commercial_probability > 0.3,
268
+ "commercial_probability": commercial_probability,
269
+ "commercial_indicators": {
270
+ "domain_match": domain_match,
271
+ "env_var_match": env_var_match,
272
+ "dir_match": dir_match,
273
+ "file_match": file_match
274
+ }
275
+ }
276
+
277
+ def scan_packages(self):
278
+ #--
279
+ package_groups = {
280
+ "vnstock_family": [
281
+ "vnstock",
282
+ "vnstock3",
283
+ "vnstock_ezchart",
284
+ "vnstock_data_pro", ##
285
+
286
+ "vnstock_market_data_pipeline",
287
+ "vnstock_ta",
288
+ "vnii",
289
+ "vnai"
290
+ ],
291
+ "analytics": [
292
+ "openbb",
293
+ "pandas_ta"
294
+ ],
295
+ "static_charts": [
296
+ "matplotlib",
297
+ "seaborn",
298
+ "altair"
299
+ ],
300
+ "dashboard": [
301
+ "streamlit",
302
+ "voila",
303
+ "panel",
304
+ "shiny",
305
+ "dash"
306
+ ],
307
+ "interactive_charts": [
308
+ "mplfinance",
309
+ "plotly",
310
+ "plotline",
311
+ "bokeh",
312
+ "pyecharts",
313
+ "highcharts-core",
314
+ "highcharts-stock",
315
+ "mplchart"
316
+ ],
317
+ "datafeed": [
318
+ "yfinance",
319
+ "alpha_vantage",
320
+ "pandas-datareader",
321
+ "investpy"
322
+ ],
323
+ "official_api": [
324
+ "ssi-fc-data",
325
+ "ssi-fctrading"
326
+ ],
327
+ "risk_return": [
328
+ "pyfolio",
329
+ "empyrical",
330
+ "quantstats",
331
+ "financetoolkit"
332
+ ],
333
+ "machine_learning": [
334
+ "scipy",
335
+ "sklearn",
336
+ "statsmodels",
337
+ "pytorch",
338
+ "tensorflow",
339
+ "keras",
340
+ "xgboost"
341
+ ],
342
+ "indicators": [
343
+ "stochastic",
344
+ "talib",
345
+ "tqdm",
346
+ "finta",
347
+ "financetoolkit",
348
+ "tulipindicators"
349
+ ],
350
+ "backtesting": [
351
+ "vectorbt",
352
+ "backtesting",
353
+ "bt",
354
+ "zipline",
355
+ "pyalgotrade",
356
+ "backtrader",
357
+ "pybacktest",
358
+ "fastquant",
359
+ "lean",
360
+ "ta",
361
+ "finmarketpy",
362
+ "qstrader"
363
+ ],
364
+ "server": [
365
+ "fastapi",
366
+ "flask",
367
+ "uvicorn",
368
+ "gunicorn"
369
+ ],
370
+ "framework": [
371
+ "lightgbm",
372
+ "catboost",
373
+ "django"
374
+ ]
375
+ }
376
+
377
+ installed = {}
378
+
379
+ for category, packages in package_groups.items():
380
+ installed[category] = []
381
+ for pkg in packages:
382
+ try:
383
+ version = importlib.metadata.version(pkg)
384
+ installed[category].append({"name": pkg, "version": version})
385
+ except:
386
+ pass
387
+
388
+ return installed
389
+
390
+ def setup_vnstock_environment(self):
391
+ #--
392
+ ##
393
+
394
+ env_file = self.id_dir / "environment.json"
395
+ env_data = {
396
+ "accepted_agreement": True,
397
+ "timestamp": datetime.now().isoformat(),
398
+ "machine_id": self.fingerprint()
399
+ }
400
+
401
+ try:
402
+ with open(env_file, "w") as f:
403
+ import json
404
+ json.dump(env_data, f)
405
+ return True
406
+ except Exception as e:
407
+ print(f"Failed to set up vnstock environment: {e}")
408
+ return False
409
+
410
+ ##
411
+
412
+ def detect_colab_with_delayed_auth(self, immediate=False):
413
+ #--
414
+ ##
415
+
416
+ is_colab = 'google.colab' in sys.modules
417
+
418
+ if is_colab and not self._colab_auth_triggered:
419
+ if immediate:
420
+ ##
421
+
422
+ self._colab_auth_triggered = True
423
+ user_id = self.get_or_create_user_id()
424
+ if user_id and user_id != self.machine_id:
425
+ self.machine_id = user_id
426
+ try:
427
+ with open(self.machine_id_path, "w") as f:
428
+ f.write(user_id)
429
+ except:
430
+ pass
431
+ else:
432
+ ##
433
+
434
+ def delayed_auth():
435
+ ##
436
+
437
+ time.sleep(300)
438
+ ##
439
+
440
+ user_id = self.get_or_create_user_id()
441
+ ##
442
+
443
+ if user_id and user_id != self.machine_id:
444
+ self.machine_id = user_id
445
+ ##
446
+
447
+ try:
448
+ with open(self.machine_id_path, "w") as f:
449
+ f.write(user_id)
450
+ except:
451
+ pass
452
+
453
+ ##
454
+
455
+ thread = threading.Thread(target=delayed_auth, daemon=True)
456
+ thread.start()
457
+
458
+ return is_colab
459
+
460
+ def get_or_create_user_id(self):
461
+ #--
462
+ if self._colab_auth_triggered:
463
+ return self.machine_id ##
464
+
465
+
466
+ try:
467
+ from google.colab import drive
468
+ print("\n📋 Kết nối tài khoản Google Drive để lưu các thiết lập của dự án.")
469
+ print("Dữ liệu phiên làm việc với Colab của bạn sẽ bị xóa nếu không lưu trữ vào Google Drive.\n")
470
+
471
+ ##
472
+
473
+ self._colab_auth_triggered = True
474
+
475
+ ##
476
+
477
+ drive.mount('/content/drive')
478
+ id_path = '/content/drive/MyDrive/.vnstock/user_id.txt'
479
+
480
+ if os.path.exists(id_path):
481
+ with open(id_path, 'r') as f:
482
+ return f.read().strip()
483
+ else:
484
+ user_id = str(uuid.uuid4())
485
+ os.makedirs(os.path.dirname(id_path), exist_ok=True)
486
+ with open(id_path, 'w') as f:
487
+ f.write(user_id)
488
+ return user_id
489
+ except Exception as e:
490
+ ##
491
+
492
+ return self.machine_id
493
+
494
+ ##
495
+
496
+
497
+ def analyze_project_structure(self):
498
+ #--
499
+ current_dir = os.getcwd()
500
+ project_indicators = {
501
+ "commercial_app": ["app", "services", "products", "customers", "billing"],
502
+ "financial_tool": ["portfolio", "backtesting", "trading", "strategy"],
503
+ "data_science": ["models", "notebooks", "datasets", "visualization"],
504
+ "educational": ["examples", "lectures", "assignments", "slides"]
505
+ }
506
+
507
+ ##
508
+
509
+ project_type = {}
510
+ for category, markers in project_indicators.items():
511
+ match_count = 0
512
+ for marker in markers:
513
+ if os.path.exists(os.path.join(current_dir, marker)):
514
+ match_count += 1
515
+ if len(markers) > 0:
516
+ project_type[category] = match_count / len(markers)
517
+
518
+ ##
519
+
520
+ try:
521
+ root_files = [f for f in os.listdir(current_dir) if os.path.isfile(os.path.join(current_dir, f))]
522
+ root_dirs = [d for d in os.listdir(current_dir) if os.path.isdir(os.path.join(current_dir, d))]
523
+
524
+ ##
525
+
526
+ file_markers = {
527
+ "python_project": ["setup.py", "pyproject.toml", "requirements.txt"],
528
+ "data_science": ["notebook.ipynb", ".ipynb_checkpoints"],
529
+ "web_app": ["app.py", "wsgi.py", "manage.py", "server.py"],
530
+ "finance_app": ["portfolio.py", "trading.py", "backtest.py"],
531
+ }
532
+
533
+ file_project_type = "unknown"
534
+ for ptype, markers in file_markers.items():
535
+ if any(marker in root_files for marker in markers):
536
+ file_project_type = ptype
537
+ break
538
+
539
+ ##
540
+
541
+ frameworks = []
542
+ framework_markers = {
543
+ "django": ["manage.py", "settings.py"],
544
+ "flask": ["app.py", "wsgi.py"],
545
+ "streamlit": ["streamlit_app.py", "app.py"],
546
+ "fastapi": ["main.py", "app.py"],
547
+ }
548
+
549
+ for framework, markers in framework_markers.items():
550
+ if any(marker in root_files for marker in markers):
551
+ frameworks.append(framework)
552
+
553
+ except Exception as e:
554
+ root_files = []
555
+ root_dirs = []
556
+ file_project_type = "unknown"
557
+ frameworks = []
558
+
559
+ return {
560
+ "project_dir": current_dir,
561
+ "detected_type": max(project_type.items(), key=lambda x: x[1])[0] if project_type else "unknown",
562
+ "file_type": file_project_type,
563
+ "is_git_repo": ".git" in (root_dirs if 'root_dirs' in locals() else []),
564
+ "frameworks": frameworks,
565
+ "file_count": len(root_files) if 'root_files' in locals() else 0,
566
+ "directory_count": len(root_dirs) if 'root_dirs' in locals() else 0,
567
+ "type_confidence": project_type
568
+ }
569
+
570
+ def analyze_git_info(self):
571
+ #--
572
+ try:
573
+ ##
574
+
575
+ result = subprocess.run(["git", "rev-parse", "--is-inside-work-tree"],
576
+ capture_output=True, text=True)
577
+
578
+ if result.returncode != 0:
579
+ return {"has_git": False}
580
+
581
+ ##
582
+
583
+ repo_root = subprocess.run(["git", "rev-parse", "--show-toplevel"],
584
+ capture_output=True, text=True)
585
+ repo_path = repo_root.stdout.strip() if repo_root.stdout else None
586
+
587
+ ##
588
+
589
+ repo_name = os.path.basename(repo_path) if repo_path else None
590
+
591
+ ##
592
+
593
+ has_license = False
594
+ license_type = "unknown"
595
+ if repo_path:
596
+ license_files = [
597
+ os.path.join(repo_path, "LICENSE"),
598
+ os.path.join(repo_path, "LICENSE.txt"),
599
+ os.path.join(repo_path, "LICENSE.md")
600
+ ]
601
+ for license_file in license_files:
602
+ if os.path.exists(license_file):
603
+ has_license = True
604
+ ##
605
+
606
+ try:
607
+ with open(license_file, 'r') as f:
608
+ content = f.read().lower()
609
+ if "mit license" in content:
610
+ license_type = "MIT"
611
+ elif "apache license" in content:
612
+ license_type = "Apache"
613
+ elif "gnu general public" in content:
614
+ license_type = "GPL"
615
+ elif "bsd " in content:
616
+ license_type = "BSD"
617
+ ##
618
+
619
+ except:
620
+ pass
621
+ break
622
+
623
+ ##
624
+
625
+ remote = subprocess.run(["git", "config", "--get", "remote.origin.url"],
626
+ capture_output=True, text=True)
627
+
628
+ remote_url = remote.stdout.strip() if remote.stdout else None
629
+
630
+ if remote_url:
631
+ ##
632
+
633
+ remote_url = remote_url.strip()
634
+
635
+ ##
636
+
637
+ domain = None
638
+ if remote_url:
639
+ ##
640
+
641
+ if remote_url.startswith('git@') or '@' in remote_url and ':' in remote_url.split('@')[1]:
642
+ domain = remote_url.split('@')[1].split(':')[0]
643
+ ##
644
+
645
+ elif remote_url.startswith('http'):
646
+ ##
647
+
648
+ url_parts = remote_url.split('//')
649
+ if len(url_parts) > 1:
650
+ auth_and_domain = url_parts[1].split('/', 1)[0]
651
+ ##
652
+
653
+ if '@' in auth_and_domain:
654
+ domain = auth_and_domain.split('@')[-1]
655
+ else:
656
+ domain = auth_and_domain
657
+ ##
658
+
659
+ else:
660
+ ##
661
+
662
+ import re
663
+ domain_match = re.search(r'@([^:/]+)|https?://(?:[^@/]+@)?([^/]+)', remote_url)
664
+ if domain_match:
665
+ domain = domain_match.group(1) or domain_match.group(2)
666
+
667
+ ##
668
+
669
+ owner = None
670
+ repo_name = None
671
+
672
+ if domain:
673
+ ##
674
+
675
+ if "github" in domain:
676
+ ##
677
+
678
+ if ':' in remote_url and '@' in remote_url:
679
+ parts = remote_url.split(':')[-1].split('/')
680
+ if len(parts) >= 2:
681
+ owner = parts[0]
682
+ repo_name = parts[1].replace('.git', '')
683
+ ##
684
+
685
+ else:
686
+ url_parts = remote_url.split('//')
687
+ if len(url_parts) > 1:
688
+ path_parts = url_parts[1].split('/')
689
+ if len(path_parts) >= 3:
690
+ ##
691
+
692
+ domain_part = path_parts[0]
693
+ if '@' in domain_part:
694
+ ##
695
+
696
+ owner_index = 1
697
+ else:
698
+ owner_index = 1
699
+
700
+ if len(path_parts) > owner_index:
701
+ owner = path_parts[owner_index]
702
+ if len(path_parts) > owner_index + 1:
703
+ repo_name = path_parts[owner_index + 1].replace('.git', '')
704
+
705
+ ##
706
+
707
+ commit_count = subprocess.run(["git", "rev-list", "--count", "HEAD"],
708
+ capture_output=True, text=True)
709
+
710
+ ##
711
+
712
+ branch_count = subprocess.run(["git", "branch", "--list"],
713
+ capture_output=True, text=True)
714
+ branch_count = len(branch_count.stdout.strip().split('\n')) if branch_count.stdout else 0
715
+
716
+ return {
717
+ "domain": domain, ##
718
+
719
+ "owner": owner, ##
720
+
721
+ "commit_count": int(commit_count.stdout.strip()) if commit_count.stdout else 0,
722
+ "branch_count": branch_count,
723
+ "has_git": True,
724
+ "repo_path": repo_path if 'repo_path' in locals() else None,
725
+ "repo_name": repo_name,
726
+ "has_license": has_license if 'has_license' in locals() else False,
727
+ "license_type": license_type if 'license_type' in locals() else "unknown"
728
+ }
729
+
730
+ except Exception as e:
731
+ ##
732
+
733
+ pass
734
+ return {"has_git": False}
735
+
736
+
737
+ def detect_usage_pattern(self):
738
+ #--
739
+ current_time = datetime.now()
740
+
741
+ ##
742
+
743
+ is_weekday = current_time.weekday() < 5 ##
744
+
745
+ hour = current_time.hour
746
+ is_business_hours = 9 <= hour <= 18
747
+
748
+ return {
749
+ "business_hours_usage": is_weekday and is_business_hours,
750
+ "weekday": is_weekday,
751
+ "hour": hour,
752
+ "timestamp": current_time.isoformat()
753
+ }
754
+
755
+ def enhanced_commercial_detection(self):
756
+ #--
757
+ basic = self.detect_commercial_usage()
758
+
759
+ ##
760
+
761
+ try:
762
+ project_files = os.listdir(os.getcwd())
763
+
764
+ ##
765
+
766
+ commercial_frameworks = ["django-oscar", "opencart", "magento",
767
+ "saleor", "odoo", "shopify", "woocommerce"]
768
+
769
+ framework_match = False
770
+ for framework in commercial_frameworks:
771
+ if any(framework in f for f in project_files):
772
+ framework_match = True
773
+ break
774
+
775
+ ##
776
+
777
+ db_files = [f for f in project_files if "database" in f.lower()
778
+ or "db_config" in f.lower() or f.endswith(".db")]
779
+ has_database = len(db_files) > 0
780
+ except:
781
+ framework_match = False
782
+ has_database = False
783
+
784
+ ##
785
+
786
+ domain_check = self.analyze_git_info()
787
+ domain_is_commercial = False
788
+ if domain_check and domain_check.get("domain"):
789
+ commercial_tlds = [".com", ".io", ".co", ".org", ".net"]
790
+ domain_is_commercial = any(tld in domain_check["domain"] for tld in commercial_tlds)
791
+
792
+ ##
793
+
794
+ project_structure = self.analyze_project_structure()
795
+
796
+ ##
797
+
798
+ indicators = [
799
+ basic["commercial_probability"],
800
+ framework_match,
801
+ has_database,
802
+ domain_is_commercial,
803
+ project_structure.get("type_confidence", {}).get("commercial_app", 0),
804
+ self.detect_usage_pattern()["business_hours_usage"]
805
+ ]
806
+
807
+ ##
808
+
809
+ indicators = [i for i in indicators if i is not None]
810
+
811
+ ##
812
+
813
+ if indicators:
814
+ score = sum(1.0 if isinstance(i, bool) and i else (i if isinstance(i, (int, float)) else 0)
815
+ for i in indicators) / len(indicators)
816
+ else:
817
+ score = 0
818
+
819
+ return {
820
+ "commercial_probability": score,
821
+ "likely_commercial": score > 0.4,
822
+ "indicators": {
823
+ "basic_indicators": basic["commercial_indicators"],
824
+ "framework_match": framework_match,
825
+ "has_database": has_database,
826
+ "domain_is_commercial": domain_is_commercial,
827
+ "project_structure": project_structure.get("detected_type"),
828
+ "business_hours_usage": self.detect_usage_pattern()["business_hours_usage"]
829
+ }
830
+ }
831
+
832
+ def analyze_dependencies(self):
833
+ #--
834
+ try:
835
+ import pkg_resources
836
+
837
+ ##
838
+
839
+ enterprise_packages = [
840
+ "snowflake-connector-python", "databricks", "azure",
841
+ "aws", "google-cloud", "stripe", "atlassian",
842
+ "salesforce", "bigquery", "tableau", "sap"
843
+ ]
844
+
845
+ ##
846
+
847
+ commercial_deps = []
848
+ for pkg in pkg_resources.working_set:
849
+ if any(ent in pkg.key for ent in enterprise_packages):
850
+ commercial_deps.append({"name": pkg.key, "version": pkg.version})
851
+
852
+ return {
853
+ "has_commercial_deps": len(commercial_deps) > 0,
854
+ "commercial_deps_count": len(commercial_deps),
855
+ "commercial_deps": commercial_deps
856
+ }
857
+ except:
858
+ return {"has_commercial_deps": False}
859
+
860
+ ##
861
+
862
+ inspector = Inspector()