vnai 2.0.2__py3-none-any.whl → 2.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vnai/__init__.py +291 -72
- vnai/beam/__init__.py +2 -2
- vnai/beam/metrics.py +207 -57
- vnai/beam/pulse.py +122 -29
- vnai/beam/quota.py +507 -102
- vnai/flow/__init__.py +7 -2
- vnai/flow/queue.py +142 -55
- vnai/flow/relay.py +476 -149
- vnai/scope/__init__.py +7 -2
- vnai/scope/profile.py +858 -219
- vnai/scope/promo.py +197 -55
- vnai/scope/state.py +246 -71
- {vnai-2.0.2.dist-info → vnai-2.0.3.dist-info}/METADATA +1 -1
- vnai-2.0.3.dist-info/RECORD +16 -0
- {vnai-2.0.2.dist-info → vnai-2.0.3.dist-info}/WHEEL +1 -1
- vnai-2.0.2.dist-info/RECORD +0 -16
- {vnai-2.0.2.dist-info → vnai-2.0.3.dist-info}/top_level.txt +0 -0
vnai/scope/profile.py
CHANGED
@@ -1,223 +1,862 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
_I='backtesting'
|
16
|
-
_H='commercial_probability'
|
17
|
-
_G='timestamp'
|
18
|
-
_F='business_hours_usage'
|
19
|
-
_E='google.colab'
|
20
|
-
_D='unknown'
|
21
|
-
_C=False
|
22
|
-
_B=None
|
23
|
-
_A=True
|
24
|
-
import os,sys,platform,uuid,hashlib,psutil,threading,time,importlib.metadata
|
1
|
+
##
|
2
|
+
|
3
|
+
##
|
4
|
+
|
5
|
+
|
6
|
+
import os
|
7
|
+
import sys
|
8
|
+
import platform
|
9
|
+
import uuid
|
10
|
+
import hashlib
|
11
|
+
import psutil
|
12
|
+
import threading
|
13
|
+
import time
|
14
|
+
import importlib.metadata
|
25
15
|
from datetime import datetime
|
26
16
|
import subprocess
|
27
17
|
from pathlib import Path
|
18
|
+
|
28
19
|
class Inspector:
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
20
|
+
#--
|
21
|
+
|
22
|
+
_instance = None
|
23
|
+
_lock = None
|
24
|
+
|
25
|
+
def __new__(cls):
|
26
|
+
import threading
|
27
|
+
if cls._lock is None:
|
28
|
+
cls._lock = threading.Lock()
|
29
|
+
|
30
|
+
with cls._lock:
|
31
|
+
if cls._instance is None:
|
32
|
+
cls._instance = super(Inspector, cls).__new__(cls)
|
33
|
+
cls._instance._initialize()
|
34
|
+
return cls._instance
|
35
|
+
|
36
|
+
def _initialize(self):
|
37
|
+
#--
|
38
|
+
self.cache = {}
|
39
|
+
self.cache_ttl = 3600 ##
|
40
|
+
|
41
|
+
self.last_examination = 0
|
42
|
+
self.machine_id = None
|
43
|
+
self._colab_auth_triggered = False
|
44
|
+
|
45
|
+
##
|
46
|
+
|
47
|
+
self.home_dir = Path.home()
|
48
|
+
self.project_dir = self.home_dir / ".vnstock"
|
49
|
+
self.project_dir.mkdir(exist_ok=True)
|
50
|
+
self.id_dir = self.project_dir / 'id'
|
51
|
+
self.id_dir.mkdir(exist_ok=True)
|
52
|
+
self.machine_id_path = self.id_dir / "machine_id.txt"
|
53
|
+
|
54
|
+
##
|
55
|
+
|
56
|
+
self.examine()
|
57
|
+
|
58
|
+
def examine(self, force_refresh=False):
|
59
|
+
#--
|
60
|
+
current_time = time.time()
|
61
|
+
|
62
|
+
##
|
63
|
+
|
64
|
+
if not force_refresh and (current_time - self.last_examination) < self.cache_ttl:
|
65
|
+
return self.cache
|
66
|
+
|
67
|
+
##
|
68
|
+
|
69
|
+
info = {
|
70
|
+
"timestamp": datetime.now().isoformat(),
|
71
|
+
"python_version": platform.python_version(),
|
72
|
+
"os_name": platform.system(),
|
73
|
+
"platform": platform.platform()
|
74
|
+
}
|
75
|
+
|
76
|
+
##
|
77
|
+
|
78
|
+
info["machine_id"] = self.fingerprint()
|
79
|
+
|
80
|
+
##
|
81
|
+
|
82
|
+
try:
|
83
|
+
##
|
84
|
+
|
85
|
+
import importlib.util
|
86
|
+
ipython_spec = importlib.util.find_spec("IPython")
|
87
|
+
|
88
|
+
if ipython_spec:
|
89
|
+
from IPython import get_ipython
|
90
|
+
ipython = get_ipython()
|
91
|
+
if ipython is not None:
|
92
|
+
info["environment"] = "jupyter"
|
93
|
+
##
|
94
|
+
|
95
|
+
if 'google.colab' in sys.modules:
|
96
|
+
info["hosting_service"] = "colab"
|
97
|
+
elif 'KAGGLE_KERNEL_RUN_TYPE' in os.environ:
|
98
|
+
info["hosting_service"] = "kaggle"
|
99
|
+
else:
|
100
|
+
info["hosting_service"] = "local_jupyter"
|
101
|
+
elif sys.stdout.isatty():
|
102
|
+
info["environment"] = "terminal"
|
103
|
+
else:
|
104
|
+
info["environment"] = "script"
|
105
|
+
elif sys.stdout.isatty():
|
106
|
+
info["environment"] = "terminal"
|
107
|
+
else:
|
108
|
+
info["environment"] = "script"
|
109
|
+
except:
|
110
|
+
info["environment"] = "unknown"
|
111
|
+
|
112
|
+
##
|
113
|
+
|
114
|
+
try:
|
115
|
+
info["cpu_count"] = os.cpu_count()
|
116
|
+
info["memory_gb"] = round(psutil.virtual_memory().total / (1024**3), 1)
|
117
|
+
except:
|
118
|
+
pass
|
119
|
+
|
120
|
+
##
|
121
|
+
|
122
|
+
is_colab = 'google.colab' in sys.modules
|
123
|
+
if is_colab:
|
124
|
+
info["is_colab"] = True
|
125
|
+
##
|
126
|
+
|
127
|
+
self.detect_colab_with_delayed_auth()
|
128
|
+
|
129
|
+
##
|
130
|
+
|
131
|
+
try:
|
132
|
+
##
|
133
|
+
|
134
|
+
info["commercial_usage"] = self.enhanced_commercial_detection()
|
135
|
+
|
136
|
+
##
|
137
|
+
|
138
|
+
info["project_context"] = self.analyze_project_structure()
|
139
|
+
|
140
|
+
##
|
141
|
+
|
142
|
+
info["git_info"] = self.analyze_git_info()
|
143
|
+
|
144
|
+
##
|
145
|
+
|
146
|
+
info["usage_pattern"] = self.detect_usage_pattern()
|
147
|
+
|
148
|
+
##
|
149
|
+
|
150
|
+
info["dependencies"] = self.analyze_dependencies()
|
151
|
+
except Exception as e:
|
152
|
+
##
|
153
|
+
|
154
|
+
info["detection_error"] = str(e)
|
155
|
+
|
156
|
+
##
|
157
|
+
|
158
|
+
self.cache = info
|
159
|
+
self.last_examination = current_time
|
160
|
+
|
161
|
+
return info
|
162
|
+
|
163
|
+
def fingerprint(self):
|
164
|
+
#--
|
165
|
+
##
|
166
|
+
|
167
|
+
if self.machine_id:
|
168
|
+
return self.machine_id
|
169
|
+
|
170
|
+
##
|
171
|
+
|
172
|
+
if self.machine_id_path.exists():
|
173
|
+
try:
|
174
|
+
with open(self.machine_id_path, "r") as f:
|
175
|
+
self.machine_id = f.read().strip()
|
176
|
+
return self.machine_id
|
177
|
+
except:
|
178
|
+
pass
|
179
|
+
|
180
|
+
##
|
181
|
+
|
182
|
+
is_colab = self.detect_colab_with_delayed_auth()
|
183
|
+
|
184
|
+
##
|
185
|
+
|
186
|
+
try:
|
187
|
+
##
|
188
|
+
|
189
|
+
system_info = platform.node() + platform.platform() + platform.machine()
|
190
|
+
self.machine_id = hashlib.md5(system_info.encode()).hexdigest()
|
191
|
+
except:
|
192
|
+
##
|
193
|
+
|
194
|
+
self.machine_id = str(uuid.uuid4())
|
195
|
+
|
196
|
+
##
|
197
|
+
|
198
|
+
try:
|
199
|
+
with open(self.machine_id_path, "w") as f:
|
200
|
+
f.write(self.machine_id)
|
201
|
+
except:
|
202
|
+
pass
|
203
|
+
|
204
|
+
return self.machine_id
|
205
|
+
|
206
|
+
def detect_hosting(self):
|
207
|
+
#--
|
208
|
+
##
|
209
|
+
|
210
|
+
hosting_markers = {
|
211
|
+
"COLAB_GPU": "Google Colab",
|
212
|
+
"KAGGLE_KERNEL_RUN_TYPE": "Kaggle",
|
213
|
+
"BINDER_SERVICE_HOST": "Binder",
|
214
|
+
"CODESPACE_NAME": "GitHub Codespaces",
|
215
|
+
"STREAMLIT_SERVER_HEADLESS": "Streamlit Cloud",
|
216
|
+
"CLOUD_SHELL": "Cloud Shell"
|
217
|
+
}
|
218
|
+
|
219
|
+
for env_var, host_name in hosting_markers.items():
|
220
|
+
if env_var in os.environ:
|
221
|
+
return host_name
|
222
|
+
|
223
|
+
##
|
224
|
+
|
225
|
+
if 'google.colab' in sys.modules:
|
226
|
+
return "Google Colab"
|
227
|
+
|
228
|
+
return "local"
|
229
|
+
|
230
|
+
def detect_commercial_usage(self):
|
231
|
+
#--
|
232
|
+
commercial_indicators = {
|
233
|
+
"env_domains": [".com", ".io", ".co", "enterprise", "corp", "inc"],
|
234
|
+
"file_patterns": ["invoice", "payment", "customer", "client", "product", "sale"],
|
235
|
+
"env_vars": ["COMPANY", "BUSINESS", "ENTERPRISE", "CORPORATE"],
|
236
|
+
"dir_patterns": ["company", "business", "enterprise", "corporate", "client"]
|
237
|
+
}
|
238
|
+
|
239
|
+
##
|
240
|
+
|
241
|
+
env_values = " ".join(os.environ.values()).lower()
|
242
|
+
domain_match = any(domain in env_values for domain in commercial_indicators["env_domains"])
|
243
|
+
|
244
|
+
##
|
245
|
+
|
246
|
+
env_var_match = any(var in os.environ for var in commercial_indicators["env_vars"])
|
247
|
+
|
248
|
+
##
|
249
|
+
|
250
|
+
current_dir = os.getcwd().lower()
|
251
|
+
dir_match = any(pattern in current_dir for pattern in commercial_indicators["dir_patterns"])
|
252
|
+
|
253
|
+
##
|
254
|
+
|
255
|
+
try:
|
256
|
+
files = [f.lower() for f in os.listdir() if os.path.isfile(f)]
|
257
|
+
file_match = any(any(pattern in f for pattern in commercial_indicators["file_patterns"]) for f in files)
|
258
|
+
except:
|
259
|
+
file_match = False
|
260
|
+
|
261
|
+
##
|
262
|
+
|
263
|
+
indicators = [domain_match, env_var_match, dir_match, file_match]
|
264
|
+
commercial_probability = sum(indicators) / len(indicators)
|
265
|
+
|
266
|
+
return {
|
267
|
+
"likely_commercial": commercial_probability > 0.3,
|
268
|
+
"commercial_probability": commercial_probability,
|
269
|
+
"commercial_indicators": {
|
270
|
+
"domain_match": domain_match,
|
271
|
+
"env_var_match": env_var_match,
|
272
|
+
"dir_match": dir_match,
|
273
|
+
"file_match": file_match
|
274
|
+
}
|
275
|
+
}
|
276
|
+
|
277
|
+
def scan_packages(self):
|
278
|
+
#--
|
279
|
+
package_groups = {
|
280
|
+
"vnstock_family": [
|
281
|
+
"vnstock",
|
282
|
+
"vnstock3",
|
283
|
+
"vnstock_ezchart",
|
284
|
+
"vnstock_data_pro", ##
|
285
|
+
|
286
|
+
"vnstock_market_data_pipeline",
|
287
|
+
"vnstock_ta",
|
288
|
+
"vnii",
|
289
|
+
"vnai"
|
290
|
+
],
|
291
|
+
"analytics": [
|
292
|
+
"openbb",
|
293
|
+
"pandas_ta"
|
294
|
+
],
|
295
|
+
"static_charts": [
|
296
|
+
"matplotlib",
|
297
|
+
"seaborn",
|
298
|
+
"altair"
|
299
|
+
],
|
300
|
+
"dashboard": [
|
301
|
+
"streamlit",
|
302
|
+
"voila",
|
303
|
+
"panel",
|
304
|
+
"shiny",
|
305
|
+
"dash"
|
306
|
+
],
|
307
|
+
"interactive_charts": [
|
308
|
+
"mplfinance",
|
309
|
+
"plotly",
|
310
|
+
"plotline",
|
311
|
+
"bokeh",
|
312
|
+
"pyecharts",
|
313
|
+
"highcharts-core",
|
314
|
+
"highcharts-stock",
|
315
|
+
"mplchart"
|
316
|
+
],
|
317
|
+
"datafeed": [
|
318
|
+
"yfinance",
|
319
|
+
"alpha_vantage",
|
320
|
+
"pandas-datareader",
|
321
|
+
"investpy"
|
322
|
+
],
|
323
|
+
"official_api": [
|
324
|
+
"ssi-fc-data",
|
325
|
+
"ssi-fctrading"
|
326
|
+
],
|
327
|
+
"risk_return": [
|
328
|
+
"pyfolio",
|
329
|
+
"empyrical",
|
330
|
+
"quantstats",
|
331
|
+
"financetoolkit"
|
332
|
+
],
|
333
|
+
"machine_learning": [
|
334
|
+
"scipy",
|
335
|
+
"sklearn",
|
336
|
+
"statsmodels",
|
337
|
+
"pytorch",
|
338
|
+
"tensorflow",
|
339
|
+
"keras",
|
340
|
+
"xgboost"
|
341
|
+
],
|
342
|
+
"indicators": [
|
343
|
+
"stochastic",
|
344
|
+
"talib",
|
345
|
+
"tqdm",
|
346
|
+
"finta",
|
347
|
+
"financetoolkit",
|
348
|
+
"tulipindicators"
|
349
|
+
],
|
350
|
+
"backtesting": [
|
351
|
+
"vectorbt",
|
352
|
+
"backtesting",
|
353
|
+
"bt",
|
354
|
+
"zipline",
|
355
|
+
"pyalgotrade",
|
356
|
+
"backtrader",
|
357
|
+
"pybacktest",
|
358
|
+
"fastquant",
|
359
|
+
"lean",
|
360
|
+
"ta",
|
361
|
+
"finmarketpy",
|
362
|
+
"qstrader"
|
363
|
+
],
|
364
|
+
"server": [
|
365
|
+
"fastapi",
|
366
|
+
"flask",
|
367
|
+
"uvicorn",
|
368
|
+
"gunicorn"
|
369
|
+
],
|
370
|
+
"framework": [
|
371
|
+
"lightgbm",
|
372
|
+
"catboost",
|
373
|
+
"django"
|
374
|
+
]
|
375
|
+
}
|
376
|
+
|
377
|
+
installed = {}
|
378
|
+
|
379
|
+
for category, packages in package_groups.items():
|
380
|
+
installed[category] = []
|
381
|
+
for pkg in packages:
|
382
|
+
try:
|
383
|
+
version = importlib.metadata.version(pkg)
|
384
|
+
installed[category].append({"name": pkg, "version": version})
|
385
|
+
except:
|
386
|
+
pass
|
387
|
+
|
388
|
+
return installed
|
389
|
+
|
390
|
+
def setup_vnstock_environment(self):
|
391
|
+
#--
|
392
|
+
##
|
393
|
+
|
394
|
+
env_file = self.id_dir / "environment.json"
|
395
|
+
env_data = {
|
396
|
+
"accepted_agreement": True,
|
397
|
+
"timestamp": datetime.now().isoformat(),
|
398
|
+
"machine_id": self.fingerprint()
|
399
|
+
}
|
400
|
+
|
401
|
+
try:
|
402
|
+
with open(env_file, "w") as f:
|
403
|
+
import json
|
404
|
+
json.dump(env_data, f)
|
405
|
+
return True
|
406
|
+
except Exception as e:
|
407
|
+
print(f"Failed to set up vnstock environment: {e}")
|
408
|
+
return False
|
409
|
+
|
410
|
+
##
|
411
|
+
|
412
|
+
def detect_colab_with_delayed_auth(self, immediate=False):
|
413
|
+
#--
|
414
|
+
##
|
415
|
+
|
416
|
+
is_colab = 'google.colab' in sys.modules
|
417
|
+
|
418
|
+
if is_colab and not self._colab_auth_triggered:
|
419
|
+
if immediate:
|
420
|
+
##
|
421
|
+
|
422
|
+
self._colab_auth_triggered = True
|
423
|
+
user_id = self.get_or_create_user_id()
|
424
|
+
if user_id and user_id != self.machine_id:
|
425
|
+
self.machine_id = user_id
|
426
|
+
try:
|
427
|
+
with open(self.machine_id_path, "w") as f:
|
428
|
+
f.write(user_id)
|
429
|
+
except:
|
430
|
+
pass
|
431
|
+
else:
|
432
|
+
##
|
433
|
+
|
434
|
+
def delayed_auth():
|
435
|
+
##
|
436
|
+
|
437
|
+
time.sleep(300)
|
438
|
+
##
|
439
|
+
|
440
|
+
user_id = self.get_or_create_user_id()
|
441
|
+
##
|
442
|
+
|
443
|
+
if user_id and user_id != self.machine_id:
|
444
|
+
self.machine_id = user_id
|
445
|
+
##
|
446
|
+
|
447
|
+
try:
|
448
|
+
with open(self.machine_id_path, "w") as f:
|
449
|
+
f.write(user_id)
|
450
|
+
except:
|
451
|
+
pass
|
452
|
+
|
453
|
+
##
|
454
|
+
|
455
|
+
thread = threading.Thread(target=delayed_auth, daemon=True)
|
456
|
+
thread.start()
|
457
|
+
|
458
|
+
return is_colab
|
459
|
+
|
460
|
+
def get_or_create_user_id(self):
|
461
|
+
#--
|
462
|
+
if self._colab_auth_triggered:
|
463
|
+
return self.machine_id ##
|
464
|
+
|
465
|
+
|
466
|
+
try:
|
467
|
+
from google.colab import drive
|
468
|
+
print("\n📋 Kết nối tài khoản Google Drive để lưu các thiết lập của dự án.")
|
469
|
+
print("Dữ liệu phiên làm việc với Colab của bạn sẽ bị xóa nếu không lưu trữ vào Google Drive.\n")
|
470
|
+
|
471
|
+
##
|
472
|
+
|
473
|
+
self._colab_auth_triggered = True
|
474
|
+
|
475
|
+
##
|
476
|
+
|
477
|
+
drive.mount('/content/drive')
|
478
|
+
id_path = '/content/drive/MyDrive/.vnstock/user_id.txt'
|
479
|
+
|
480
|
+
if os.path.exists(id_path):
|
481
|
+
with open(id_path, 'r') as f:
|
482
|
+
return f.read().strip()
|
483
|
+
else:
|
484
|
+
user_id = str(uuid.uuid4())
|
485
|
+
os.makedirs(os.path.dirname(id_path), exist_ok=True)
|
486
|
+
with open(id_path, 'w') as f:
|
487
|
+
f.write(user_id)
|
488
|
+
return user_id
|
489
|
+
except Exception as e:
|
490
|
+
##
|
491
|
+
|
492
|
+
return self.machine_id
|
493
|
+
|
494
|
+
##
|
495
|
+
|
496
|
+
|
497
|
+
def analyze_project_structure(self):
|
498
|
+
#--
|
499
|
+
current_dir = os.getcwd()
|
500
|
+
project_indicators = {
|
501
|
+
"commercial_app": ["app", "services", "products", "customers", "billing"],
|
502
|
+
"financial_tool": ["portfolio", "backtesting", "trading", "strategy"],
|
503
|
+
"data_science": ["models", "notebooks", "datasets", "visualization"],
|
504
|
+
"educational": ["examples", "lectures", "assignments", "slides"]
|
505
|
+
}
|
506
|
+
|
507
|
+
##
|
508
|
+
|
509
|
+
project_type = {}
|
510
|
+
for category, markers in project_indicators.items():
|
511
|
+
match_count = 0
|
512
|
+
for marker in markers:
|
513
|
+
if os.path.exists(os.path.join(current_dir, marker)):
|
514
|
+
match_count += 1
|
515
|
+
if len(markers) > 0:
|
516
|
+
project_type[category] = match_count / len(markers)
|
517
|
+
|
518
|
+
##
|
519
|
+
|
520
|
+
try:
|
521
|
+
root_files = [f for f in os.listdir(current_dir) if os.path.isfile(os.path.join(current_dir, f))]
|
522
|
+
root_dirs = [d for d in os.listdir(current_dir) if os.path.isdir(os.path.join(current_dir, d))]
|
523
|
+
|
524
|
+
##
|
525
|
+
|
526
|
+
file_markers = {
|
527
|
+
"python_project": ["setup.py", "pyproject.toml", "requirements.txt"],
|
528
|
+
"data_science": ["notebook.ipynb", ".ipynb_checkpoints"],
|
529
|
+
"web_app": ["app.py", "wsgi.py", "manage.py", "server.py"],
|
530
|
+
"finance_app": ["portfolio.py", "trading.py", "backtest.py"],
|
531
|
+
}
|
532
|
+
|
533
|
+
file_project_type = "unknown"
|
534
|
+
for ptype, markers in file_markers.items():
|
535
|
+
if any(marker in root_files for marker in markers):
|
536
|
+
file_project_type = ptype
|
537
|
+
break
|
538
|
+
|
539
|
+
##
|
540
|
+
|
541
|
+
frameworks = []
|
542
|
+
framework_markers = {
|
543
|
+
"django": ["manage.py", "settings.py"],
|
544
|
+
"flask": ["app.py", "wsgi.py"],
|
545
|
+
"streamlit": ["streamlit_app.py", "app.py"],
|
546
|
+
"fastapi": ["main.py", "app.py"],
|
547
|
+
}
|
548
|
+
|
549
|
+
for framework, markers in framework_markers.items():
|
550
|
+
if any(marker in root_files for marker in markers):
|
551
|
+
frameworks.append(framework)
|
552
|
+
|
553
|
+
except Exception as e:
|
554
|
+
root_files = []
|
555
|
+
root_dirs = []
|
556
|
+
file_project_type = "unknown"
|
557
|
+
frameworks = []
|
558
|
+
|
559
|
+
return {
|
560
|
+
"project_dir": current_dir,
|
561
|
+
"detected_type": max(project_type.items(), key=lambda x: x[1])[0] if project_type else "unknown",
|
562
|
+
"file_type": file_project_type,
|
563
|
+
"is_git_repo": ".git" in (root_dirs if 'root_dirs' in locals() else []),
|
564
|
+
"frameworks": frameworks,
|
565
|
+
"file_count": len(root_files) if 'root_files' in locals() else 0,
|
566
|
+
"directory_count": len(root_dirs) if 'root_dirs' in locals() else 0,
|
567
|
+
"type_confidence": project_type
|
568
|
+
}
|
569
|
+
|
570
|
+
def analyze_git_info(self):
|
571
|
+
#--
|
572
|
+
try:
|
573
|
+
##
|
574
|
+
|
575
|
+
result = subprocess.run(["git", "rev-parse", "--is-inside-work-tree"],
|
576
|
+
capture_output=True, text=True)
|
577
|
+
|
578
|
+
if result.returncode != 0:
|
579
|
+
return {"has_git": False}
|
580
|
+
|
581
|
+
##
|
582
|
+
|
583
|
+
repo_root = subprocess.run(["git", "rev-parse", "--show-toplevel"],
|
584
|
+
capture_output=True, text=True)
|
585
|
+
repo_path = repo_root.stdout.strip() if repo_root.stdout else None
|
586
|
+
|
587
|
+
##
|
588
|
+
|
589
|
+
repo_name = os.path.basename(repo_path) if repo_path else None
|
590
|
+
|
591
|
+
##
|
592
|
+
|
593
|
+
has_license = False
|
594
|
+
license_type = "unknown"
|
595
|
+
if repo_path:
|
596
|
+
license_files = [
|
597
|
+
os.path.join(repo_path, "LICENSE"),
|
598
|
+
os.path.join(repo_path, "LICENSE.txt"),
|
599
|
+
os.path.join(repo_path, "LICENSE.md")
|
600
|
+
]
|
601
|
+
for license_file in license_files:
|
602
|
+
if os.path.exists(license_file):
|
603
|
+
has_license = True
|
604
|
+
##
|
605
|
+
|
606
|
+
try:
|
607
|
+
with open(license_file, 'r') as f:
|
608
|
+
content = f.read().lower()
|
609
|
+
if "mit license" in content:
|
610
|
+
license_type = "MIT"
|
611
|
+
elif "apache license" in content:
|
612
|
+
license_type = "Apache"
|
613
|
+
elif "gnu general public" in content:
|
614
|
+
license_type = "GPL"
|
615
|
+
elif "bsd " in content:
|
616
|
+
license_type = "BSD"
|
617
|
+
##
|
618
|
+
|
619
|
+
except:
|
620
|
+
pass
|
621
|
+
break
|
622
|
+
|
623
|
+
##
|
624
|
+
|
625
|
+
remote = subprocess.run(["git", "config", "--get", "remote.origin.url"],
|
626
|
+
capture_output=True, text=True)
|
627
|
+
|
628
|
+
remote_url = remote.stdout.strip() if remote.stdout else None
|
629
|
+
|
630
|
+
if remote_url:
|
631
|
+
##
|
632
|
+
|
633
|
+
remote_url = remote_url.strip()
|
634
|
+
|
635
|
+
##
|
636
|
+
|
637
|
+
domain = None
|
638
|
+
if remote_url:
|
639
|
+
##
|
640
|
+
|
641
|
+
if remote_url.startswith('git@') or '@' in remote_url and ':' in remote_url.split('@')[1]:
|
642
|
+
domain = remote_url.split('@')[1].split(':')[0]
|
643
|
+
##
|
644
|
+
|
645
|
+
elif remote_url.startswith('http'):
|
646
|
+
##
|
647
|
+
|
648
|
+
url_parts = remote_url.split('//')
|
649
|
+
if len(url_parts) > 1:
|
650
|
+
auth_and_domain = url_parts[1].split('/', 1)[0]
|
651
|
+
##
|
652
|
+
|
653
|
+
if '@' in auth_and_domain:
|
654
|
+
domain = auth_and_domain.split('@')[-1]
|
655
|
+
else:
|
656
|
+
domain = auth_and_domain
|
657
|
+
##
|
658
|
+
|
659
|
+
else:
|
660
|
+
##
|
661
|
+
|
662
|
+
import re
|
663
|
+
domain_match = re.search(r'@([^:/]+)|https?://(?:[^@/]+@)?([^/]+)', remote_url)
|
664
|
+
if domain_match:
|
665
|
+
domain = domain_match.group(1) or domain_match.group(2)
|
666
|
+
|
667
|
+
##
|
668
|
+
|
669
|
+
owner = None
|
670
|
+
repo_name = None
|
671
|
+
|
672
|
+
if domain:
|
673
|
+
##
|
674
|
+
|
675
|
+
if "github" in domain:
|
676
|
+
##
|
677
|
+
|
678
|
+
if ':' in remote_url and '@' in remote_url:
|
679
|
+
parts = remote_url.split(':')[-1].split('/')
|
680
|
+
if len(parts) >= 2:
|
681
|
+
owner = parts[0]
|
682
|
+
repo_name = parts[1].replace('.git', '')
|
683
|
+
##
|
684
|
+
|
685
|
+
else:
|
686
|
+
url_parts = remote_url.split('//')
|
687
|
+
if len(url_parts) > 1:
|
688
|
+
path_parts = url_parts[1].split('/')
|
689
|
+
if len(path_parts) >= 3:
|
690
|
+
##
|
691
|
+
|
692
|
+
domain_part = path_parts[0]
|
693
|
+
if '@' in domain_part:
|
694
|
+
##
|
695
|
+
|
696
|
+
owner_index = 1
|
697
|
+
else:
|
698
|
+
owner_index = 1
|
699
|
+
|
700
|
+
if len(path_parts) > owner_index:
|
701
|
+
owner = path_parts[owner_index]
|
702
|
+
if len(path_parts) > owner_index + 1:
|
703
|
+
repo_name = path_parts[owner_index + 1].replace('.git', '')
|
704
|
+
|
705
|
+
##
|
706
|
+
|
707
|
+
commit_count = subprocess.run(["git", "rev-list", "--count", "HEAD"],
|
708
|
+
capture_output=True, text=True)
|
709
|
+
|
710
|
+
##
|
711
|
+
|
712
|
+
branch_count = subprocess.run(["git", "branch", "--list"],
|
713
|
+
capture_output=True, text=True)
|
714
|
+
branch_count = len(branch_count.stdout.strip().split('\n')) if branch_count.stdout else 0
|
715
|
+
|
716
|
+
return {
|
717
|
+
"domain": domain, ##
|
718
|
+
|
719
|
+
"owner": owner, ##
|
720
|
+
|
721
|
+
"commit_count": int(commit_count.stdout.strip()) if commit_count.stdout else 0,
|
722
|
+
"branch_count": branch_count,
|
723
|
+
"has_git": True,
|
724
|
+
"repo_path": repo_path if 'repo_path' in locals() else None,
|
725
|
+
"repo_name": repo_name,
|
726
|
+
"has_license": has_license if 'has_license' in locals() else False,
|
727
|
+
"license_type": license_type if 'license_type' in locals() else "unknown"
|
728
|
+
}
|
729
|
+
|
730
|
+
except Exception as e:
|
731
|
+
##
|
732
|
+
|
733
|
+
pass
|
734
|
+
return {"has_git": False}
|
735
|
+
|
736
|
+
|
737
|
+
def detect_usage_pattern(self):
|
738
|
+
#--
|
739
|
+
current_time = datetime.now()
|
740
|
+
|
741
|
+
##
|
742
|
+
|
743
|
+
is_weekday = current_time.weekday() < 5 ##
|
744
|
+
|
745
|
+
hour = current_time.hour
|
746
|
+
is_business_hours = 9 <= hour <= 18
|
747
|
+
|
748
|
+
return {
|
749
|
+
"business_hours_usage": is_weekday and is_business_hours,
|
750
|
+
"weekday": is_weekday,
|
751
|
+
"hour": hour,
|
752
|
+
"timestamp": current_time.isoformat()
|
753
|
+
}
|
754
|
+
|
755
|
+
def enhanced_commercial_detection(self):
|
756
|
+
#--
|
757
|
+
basic = self.detect_commercial_usage()
|
758
|
+
|
759
|
+
##
|
760
|
+
|
761
|
+
try:
|
762
|
+
project_files = os.listdir(os.getcwd())
|
763
|
+
|
764
|
+
##
|
765
|
+
|
766
|
+
commercial_frameworks = ["django-oscar", "opencart", "magento",
|
767
|
+
"saleor", "odoo", "shopify", "woocommerce"]
|
768
|
+
|
769
|
+
framework_match = False
|
770
|
+
for framework in commercial_frameworks:
|
771
|
+
if any(framework in f for f in project_files):
|
772
|
+
framework_match = True
|
773
|
+
break
|
774
|
+
|
775
|
+
##
|
776
|
+
|
777
|
+
db_files = [f for f in project_files if "database" in f.lower()
|
778
|
+
or "db_config" in f.lower() or f.endswith(".db")]
|
779
|
+
has_database = len(db_files) > 0
|
780
|
+
except:
|
781
|
+
framework_match = False
|
782
|
+
has_database = False
|
783
|
+
|
784
|
+
##
|
785
|
+
|
786
|
+
domain_check = self.analyze_git_info()
|
787
|
+
domain_is_commercial = False
|
788
|
+
if domain_check and domain_check.get("domain"):
|
789
|
+
commercial_tlds = [".com", ".io", ".co", ".org", ".net"]
|
790
|
+
domain_is_commercial = any(tld in domain_check["domain"] for tld in commercial_tlds)
|
791
|
+
|
792
|
+
##
|
793
|
+
|
794
|
+
project_structure = self.analyze_project_structure()
|
795
|
+
|
796
|
+
##
|
797
|
+
|
798
|
+
indicators = [
|
799
|
+
basic["commercial_probability"],
|
800
|
+
framework_match,
|
801
|
+
has_database,
|
802
|
+
domain_is_commercial,
|
803
|
+
project_structure.get("type_confidence", {}).get("commercial_app", 0),
|
804
|
+
self.detect_usage_pattern()["business_hours_usage"]
|
805
|
+
]
|
806
|
+
|
807
|
+
##
|
808
|
+
|
809
|
+
indicators = [i for i in indicators if i is not None]
|
810
|
+
|
811
|
+
##
|
812
|
+
|
813
|
+
if indicators:
|
814
|
+
score = sum(1.0 if isinstance(i, bool) and i else (i if isinstance(i, (int, float)) else 0)
|
815
|
+
for i in indicators) / len(indicators)
|
816
|
+
else:
|
817
|
+
score = 0
|
818
|
+
|
819
|
+
return {
|
820
|
+
"commercial_probability": score,
|
821
|
+
"likely_commercial": score > 0.4,
|
822
|
+
"indicators": {
|
823
|
+
"basic_indicators": basic["commercial_indicators"],
|
824
|
+
"framework_match": framework_match,
|
825
|
+
"has_database": has_database,
|
826
|
+
"domain_is_commercial": domain_is_commercial,
|
827
|
+
"project_structure": project_structure.get("detected_type"),
|
828
|
+
"business_hours_usage": self.detect_usage_pattern()["business_hours_usage"]
|
829
|
+
}
|
830
|
+
}
|
831
|
+
|
832
|
+
def analyze_dependencies(self):
|
833
|
+
#--
|
834
|
+
try:
|
835
|
+
import pkg_resources
|
836
|
+
|
837
|
+
##
|
838
|
+
|
839
|
+
enterprise_packages = [
|
840
|
+
"snowflake-connector-python", "databricks", "azure",
|
841
|
+
"aws", "google-cloud", "stripe", "atlassian",
|
842
|
+
"salesforce", "bigquery", "tableau", "sap"
|
843
|
+
]
|
844
|
+
|
845
|
+
##
|
846
|
+
|
847
|
+
commercial_deps = []
|
848
|
+
for pkg in pkg_resources.working_set:
|
849
|
+
if any(ent in pkg.key for ent in enterprise_packages):
|
850
|
+
commercial_deps.append({"name": pkg.key, "version": pkg.version})
|
851
|
+
|
852
|
+
return {
|
853
|
+
"has_commercial_deps": len(commercial_deps) > 0,
|
854
|
+
"commercial_deps_count": len(commercial_deps),
|
855
|
+
"commercial_deps": commercial_deps
|
856
|
+
}
|
857
|
+
except:
|
858
|
+
return {"has_commercial_deps": False}
|
859
|
+
|
860
|
+
##
|
861
|
+
|
862
|
+
inspector = Inspector()
|