vnai 2.1.8__py3-none-any.whl → 2.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vnai/__init__.py +37 -110
- vnai/beam/__init__.py +0 -2
- vnai/beam/metrics.py +48 -99
- vnai/beam/pulse.py +24 -53
- vnai/beam/quota.py +94 -247
- vnai/flow/__init__.py +1 -4
- vnai/flow/queue.py +17 -50
- vnai/flow/relay.py +98 -204
- vnai/scope/__init__.py +1 -4
- vnai/scope/profile.py +231 -417
- vnai/scope/promo.py +41 -123
- vnai/scope/state.py +52 -119
- {vnai-2.1.8.dist-info → vnai-2.1.9.dist-info}/METADATA +1 -1
- vnai-2.1.9.dist-info/RECORD +16 -0
- vnai-2.1.8.dist-info/RECORD +0 -16
- {vnai-2.1.8.dist-info → vnai-2.1.9.dist-info}/WHEEL +0 -0
- {vnai-2.1.8.dist-info → vnai-2.1.9.dist-info}/top_level.txt +0 -0
vnai/scope/profile.py
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
# vnai/scope/profile.py
|
2
|
-
|
3
1
|
import os
|
4
2
|
import sys
|
5
3
|
import platform
|
@@ -14,353 +12,275 @@ import subprocess
|
|
14
12
|
from pathlib import Path
|
15
13
|
|
16
14
|
class Inspector:
|
17
|
-
"""Inspects execution environment"""
|
18
|
-
|
19
15
|
_instance = None
|
20
16
|
_lock = None
|
21
|
-
|
17
|
+
|
22
18
|
def __new__(cls):
|
23
19
|
import threading
|
24
20
|
if cls._lock is None:
|
25
21
|
cls._lock = threading.Lock()
|
26
|
-
|
27
22
|
with cls._lock:
|
28
23
|
if cls._instance is None:
|
29
24
|
cls._instance = super(Inspector, cls).__new__(cls)
|
30
25
|
cls._instance._initialize()
|
31
26
|
return cls._instance
|
32
|
-
|
27
|
+
|
33
28
|
def _initialize(self):
|
34
|
-
"""Initialize inspector"""
|
35
29
|
self.cache = {}
|
36
|
-
self.cache_ttl = 3600
|
30
|
+
self.cache_ttl = 3600
|
37
31
|
self.last_examination = 0
|
38
32
|
self.machine_id = None
|
39
33
|
self._colab_auth_triggered = False
|
40
|
-
|
41
|
-
# Paths
|
42
34
|
self.home_dir = Path.home()
|
43
|
-
self.project_dir = self.home_dir /
|
35
|
+
self.project_dir = self.home_dir /".vnstock"
|
44
36
|
self.project_dir.mkdir(exist_ok=True)
|
45
|
-
self.id_dir = self.project_dir /
|
37
|
+
self.id_dir = self.project_dir /'id'
|
46
38
|
self.id_dir.mkdir(exist_ok=True)
|
47
|
-
self.machine_id_path = self.id_dir /
|
48
|
-
|
49
|
-
# Perform initial examination
|
39
|
+
self.machine_id_path = self.id_dir /"machine_id.txt"
|
50
40
|
self.examine()
|
51
|
-
|
41
|
+
|
52
42
|
def examine(self, force_refresh=False):
|
53
|
-
"""Examine current execution context"""
|
54
43
|
current_time = time.time()
|
55
|
-
|
56
|
-
# Return cached data if it's fresh enough and we're not forcing a refresh
|
57
44
|
if not force_refresh and (current_time - self.last_examination) < self.cache_ttl:
|
58
45
|
return self.cache
|
59
|
-
|
60
|
-
# Start with basic information
|
61
46
|
info = {
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
47
|
+
"timestamp": datetime.now().isoformat(),
|
48
|
+
"python_version": platform.python_version(),
|
49
|
+
"os_name": platform.system(),
|
50
|
+
"platform": platform.platform()
|
66
51
|
}
|
67
|
-
|
68
|
-
# Machine identifier
|
69
52
|
info["machine_id"] = self.fingerprint()
|
70
|
-
|
71
|
-
# Environment detection
|
72
53
|
try:
|
73
|
-
# Check for Jupyter/IPython
|
74
54
|
import importlib.util
|
75
55
|
ipython_spec = importlib.util.find_spec("IPython")
|
76
|
-
|
77
56
|
if ipython_spec:
|
78
57
|
from IPython import get_ipython
|
79
58
|
ipython = get_ipython()
|
80
59
|
if ipython is not None:
|
81
|
-
info["environment"] =
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
info["hosting_service"] = "kaggle"
|
60
|
+
info["environment"] ="jupyter"
|
61
|
+
if'google.colab' in sys.modules:
|
62
|
+
info["hosting_service"] ="colab"
|
63
|
+
elif'KAGGLE_KERNEL_RUN_TYPE' in os.environ:
|
64
|
+
info["hosting_service"] ="kaggle"
|
87
65
|
else:
|
88
|
-
info["hosting_service"] =
|
66
|
+
info["hosting_service"] ="local_jupyter"
|
89
67
|
elif sys.stdout.isatty():
|
90
|
-
info["environment"] =
|
68
|
+
info["environment"] ="terminal"
|
91
69
|
else:
|
92
|
-
info["environment"] =
|
70
|
+
info["environment"] ="script"
|
93
71
|
elif sys.stdout.isatty():
|
94
|
-
info["environment"] =
|
72
|
+
info["environment"] ="terminal"
|
95
73
|
else:
|
96
|
-
info["environment"] =
|
74
|
+
info["environment"] ="script"
|
97
75
|
except:
|
98
|
-
info["environment"] =
|
99
|
-
|
100
|
-
# System resources
|
76
|
+
info["environment"] ="unknown"
|
101
77
|
try:
|
102
78
|
info["cpu_count"] = os.cpu_count()
|
103
79
|
info["memory_gb"] = round(psutil.virtual_memory().total / (1024**3), 1)
|
104
80
|
except:
|
105
81
|
pass
|
106
|
-
|
107
|
-
# Check if in Google Colab
|
108
|
-
is_colab = 'google.colab' in sys.modules
|
82
|
+
is_colab ='google.colab' in sys.modules
|
109
83
|
if is_colab:
|
110
84
|
info["is_colab"] = True
|
111
|
-
# Setup delayed authentication if not already triggered
|
112
85
|
self.detect_colab_with_delayed_auth()
|
113
|
-
|
114
|
-
# Enhanced context information
|
115
86
|
try:
|
116
|
-
# Commercial usage detection
|
117
87
|
info["commercial_usage"] = self.enhanced_commercial_detection()
|
118
|
-
|
119
|
-
# Project context
|
120
88
|
info["project_context"] = self.analyze_project_structure()
|
121
|
-
|
122
|
-
# Git info
|
123
89
|
info["git_info"] = self.analyze_git_info()
|
124
|
-
|
125
|
-
# Working hours pattern
|
126
90
|
info["usage_pattern"] = self.detect_usage_pattern()
|
127
|
-
|
128
|
-
# Dependency analysis
|
129
91
|
info["dependencies"] = self.analyze_dependencies()
|
130
92
|
except Exception as e:
|
131
|
-
# Don't let enhanced detection failure stop basic functionality
|
132
93
|
info["detection_error"] = str(e)
|
133
|
-
|
134
|
-
# Update cache
|
135
94
|
self.cache = info
|
136
95
|
self.last_examination = current_time
|
137
|
-
|
138
96
|
return info
|
139
|
-
|
97
|
+
|
140
98
|
def fingerprint(self):
|
141
|
-
"""Generate unique environment fingerprint"""
|
142
|
-
# Always return cached machine_id if it exists
|
143
99
|
if self.machine_id:
|
144
100
|
return self.machine_id
|
145
|
-
|
146
|
-
# Try to load from file first
|
147
101
|
if self.machine_id_path.exists():
|
148
102
|
try:
|
149
|
-
with open(self.machine_id_path,
|
103
|
+
with open(self.machine_id_path,"r") as f:
|
150
104
|
self.machine_id = f.read().strip()
|
151
105
|
return self.machine_id
|
152
106
|
except:
|
153
107
|
pass
|
154
|
-
|
155
|
-
# Check for Colab and setup delayed authentication
|
156
108
|
is_colab = self.detect_colab_with_delayed_auth()
|
157
|
-
|
158
|
-
# Generate a new machine ID only if necessary
|
159
109
|
try:
|
160
|
-
# Use consistent system information
|
161
110
|
system_info = platform.node() + platform.platform() + platform.machine()
|
162
111
|
self.machine_id = hashlib.md5(system_info.encode()).hexdigest()
|
163
112
|
except:
|
164
|
-
# Fallback to UUID but only as last resort
|
165
113
|
self.machine_id = str(uuid.uuid4())
|
166
|
-
|
167
|
-
# Save to ensure consistency across calls
|
168
114
|
try:
|
169
|
-
with open(self.machine_id_path,
|
115
|
+
with open(self.machine_id_path,"w") as f:
|
170
116
|
f.write(self.machine_id)
|
171
117
|
except:
|
172
118
|
pass
|
173
|
-
|
174
119
|
return self.machine_id
|
175
|
-
|
120
|
+
|
176
121
|
def detect_hosting(self):
|
177
|
-
"""Detect if running in a hosted environment"""
|
178
|
-
# Check common environment variables for hosted environments
|
179
122
|
hosting_markers = {
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
123
|
+
"COLAB_GPU":"Google Colab",
|
124
|
+
"KAGGLE_KERNEL_RUN_TYPE":"Kaggle",
|
125
|
+
"BINDER_SERVICE_HOST":"Binder",
|
126
|
+
"CODESPACE_NAME":"GitHub Codespaces",
|
127
|
+
"STREAMLIT_SERVER_HEADLESS":"Streamlit Cloud",
|
128
|
+
"CLOUD_SHELL":"Cloud Shell"
|
186
129
|
}
|
187
|
-
|
188
130
|
for env_var, host_name in hosting_markers.items():
|
189
131
|
if env_var in os.environ:
|
190
132
|
return host_name
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
return "local"
|
197
|
-
|
133
|
+
if'google.colab' in sys.modules:
|
134
|
+
return"Google Colab"
|
135
|
+
return"local"
|
136
|
+
|
198
137
|
def detect_commercial_usage(self):
|
199
|
-
"""Detect if running in commercial environment"""
|
200
138
|
commercial_indicators = {
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
139
|
+
"env_domains": [".com",".io",".co","enterprise","corp","inc"],
|
140
|
+
"file_patterns": ["invoice","payment","customer","client","product","sale"],
|
141
|
+
"env_vars": ["COMPANY","BUSINESS","ENTERPRISE","CORPORATE"],
|
142
|
+
"dir_patterns": ["company","business","enterprise","corporate","client"]
|
205
143
|
}
|
206
|
-
|
207
|
-
# Check environment variables for commercial domains
|
208
|
-
env_values = " ".join(os.environ.values()).lower()
|
144
|
+
env_values =" ".join(os.environ.values()).lower()
|
209
145
|
domain_match = any(domain in env_values for domain in commercial_indicators["env_domains"])
|
210
|
-
|
211
|
-
# Check if commercial-related environment variables exist
|
212
146
|
env_var_match = any(var in os.environ for var in commercial_indicators["env_vars"])
|
213
|
-
|
214
|
-
# Check current directory for commercial indicators
|
215
147
|
current_dir = os.getcwd().lower()
|
216
148
|
dir_match = any(pattern in current_dir for pattern in commercial_indicators["dir_patterns"])
|
217
|
-
|
218
|
-
# Check files in current directory for commercial patterns
|
219
149
|
try:
|
220
150
|
files = [f.lower() for f in os.listdir() if os.path.isfile(f)]
|
221
151
|
file_match = any(any(pattern in f for pattern in commercial_indicators["file_patterns"]) for f in files)
|
222
152
|
except:
|
223
153
|
file_match = False
|
224
|
-
|
225
|
-
# Calculate probability
|
226
154
|
indicators = [domain_match, env_var_match, dir_match, file_match]
|
227
155
|
commercial_probability = sum(indicators) / len(indicators)
|
228
|
-
|
229
156
|
return {
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
157
|
+
"likely_commercial": commercial_probability > 0.3,
|
158
|
+
"commercial_probability": commercial_probability,
|
159
|
+
"commercial_indicators": {
|
160
|
+
"domain_match": domain_match,
|
161
|
+
"env_var_match": env_var_match,
|
162
|
+
"dir_match": dir_match,
|
163
|
+
"file_match": file_match
|
237
164
|
}
|
238
165
|
}
|
239
|
-
|
166
|
+
|
240
167
|
def scan_packages(self):
|
241
|
-
"""Scan for installed packages by category"""
|
242
168
|
package_groups = {
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
169
|
+
"vnstock_family": [
|
170
|
+
"vnstock",
|
171
|
+
"vnstock3",
|
172
|
+
"vnstock_ezchart",
|
173
|
+
"vnstock_data_pro",
|
174
|
+
"vnstock_market_data_pipeline",
|
175
|
+
"vnstock_ta",
|
176
|
+
"vnii",
|
177
|
+
"vnai"
|
252
178
|
],
|
253
|
-
|
254
|
-
|
255
|
-
|
179
|
+
"analytics": [
|
180
|
+
"openbb",
|
181
|
+
"pandas_ta"
|
256
182
|
],
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
183
|
+
"static_charts": [
|
184
|
+
"matplotlib",
|
185
|
+
"seaborn",
|
186
|
+
"altair"
|
261
187
|
],
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
188
|
+
"dashboard": [
|
189
|
+
"streamlit",
|
190
|
+
"voila",
|
191
|
+
"panel",
|
192
|
+
"shiny",
|
193
|
+
"dash"
|
268
194
|
],
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
195
|
+
"interactive_charts": [
|
196
|
+
"mplfinance",
|
197
|
+
"plotly",
|
198
|
+
"plotline",
|
199
|
+
"bokeh",
|
200
|
+
"pyecharts",
|
201
|
+
"highcharts-core",
|
202
|
+
"highcharts-stock",
|
203
|
+
"mplchart"
|
278
204
|
],
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
205
|
+
"datafeed": [
|
206
|
+
"yfinance",
|
207
|
+
"alpha_vantage",
|
208
|
+
"pandas-datareader",
|
209
|
+
"investpy"
|
284
210
|
],
|
285
|
-
|
286
|
-
|
287
|
-
|
211
|
+
"official_api": [
|
212
|
+
"ssi-fc-data",
|
213
|
+
"ssi-fctrading"
|
288
214
|
],
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
215
|
+
"risk_return": [
|
216
|
+
"pyfolio",
|
217
|
+
"empyrical",
|
218
|
+
"quantstats",
|
219
|
+
"financetoolkit"
|
294
220
|
],
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
221
|
+
"machine_learning": [
|
222
|
+
"scipy",
|
223
|
+
"sklearn",
|
224
|
+
"statsmodels",
|
225
|
+
"pytorch",
|
226
|
+
"tensorflow",
|
227
|
+
"keras",
|
228
|
+
"xgboost"
|
303
229
|
],
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
230
|
+
"indicators": [
|
231
|
+
"stochastic",
|
232
|
+
"talib",
|
233
|
+
"tqdm",
|
234
|
+
"finta",
|
235
|
+
"financetoolkit",
|
236
|
+
"tulipindicators"
|
311
237
|
],
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
238
|
+
"backtesting": [
|
239
|
+
"vectorbt",
|
240
|
+
"backtesting",
|
241
|
+
"bt",
|
242
|
+
"zipline",
|
243
|
+
"pyalgotrade",
|
244
|
+
"backtrader",
|
245
|
+
"pybacktest",
|
246
|
+
"fastquant",
|
247
|
+
"lean",
|
248
|
+
"ta",
|
249
|
+
"finmarketpy",
|
250
|
+
"qstrader"
|
325
251
|
],
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
252
|
+
"server": [
|
253
|
+
"fastapi",
|
254
|
+
"flask",
|
255
|
+
"uvicorn",
|
256
|
+
"gunicorn"
|
331
257
|
],
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
258
|
+
"framework": [
|
259
|
+
"lightgbm",
|
260
|
+
"catboost",
|
261
|
+
"django"
|
336
262
|
]
|
337
263
|
}
|
338
|
-
|
339
264
|
installed = {}
|
340
|
-
|
341
265
|
for category, packages in package_groups.items():
|
342
266
|
installed[category] = []
|
343
267
|
for pkg in packages:
|
344
268
|
try:
|
345
269
|
version = importlib.metadata.version(pkg)
|
346
|
-
installed[category].append({"name": pkg,
|
270
|
+
installed[category].append({"name": pkg,"version": version})
|
347
271
|
except:
|
348
272
|
pass
|
349
|
-
|
350
273
|
return installed
|
351
|
-
|
274
|
+
|
352
275
|
def setup_vnstock_environment(self):
|
353
|
-
|
354
|
-
# Create environment.json file
|
355
|
-
env_file = self.id_dir / "environment.json"
|
276
|
+
env_file = self.id_dir /"environment.json"
|
356
277
|
env_data = {
|
357
|
-
|
358
|
-
|
359
|
-
|
278
|
+
"accepted_agreement": True,
|
279
|
+
"timestamp": datetime.now().isoformat(),
|
280
|
+
"machine_id": self.fingerprint()
|
360
281
|
}
|
361
|
-
|
362
282
|
try:
|
363
|
-
with open(env_file,
|
283
|
+
with open(env_file,"w") as f:
|
364
284
|
import json
|
365
285
|
json.dump(env_data, f)
|
366
286
|
return True
|
@@ -368,90 +288,65 @@ class Inspector:
|
|
368
288
|
print(f"Failed to set up vnstock environment: {e}")
|
369
289
|
return False
|
370
290
|
|
371
|
-
# Update detect_colab_with_delayed_auth method in Inspector class
|
372
291
|
def detect_colab_with_delayed_auth(self, immediate=False):
|
373
|
-
|
374
|
-
# Check if we're in Colab without mounting drive yet
|
375
|
-
is_colab = 'google.colab' in sys.modules
|
376
|
-
|
292
|
+
is_colab ='google.colab' in sys.modules
|
377
293
|
if is_colab and not self._colab_auth_triggered:
|
378
294
|
if immediate:
|
379
|
-
# Immediate authentication
|
380
295
|
self._colab_auth_triggered = True
|
381
296
|
user_id = self.get_or_create_user_id()
|
382
297
|
if user_id and user_id != self.machine_id:
|
383
298
|
self.machine_id = user_id
|
384
299
|
try:
|
385
|
-
with open(self.machine_id_path,
|
300
|
+
with open(self.machine_id_path,"w") as f:
|
386
301
|
f.write(user_id)
|
387
302
|
except:
|
388
303
|
pass
|
389
304
|
else:
|
390
|
-
|
305
|
+
|
391
306
|
def delayed_auth():
|
392
|
-
# Wait for some time (e.g., 5 minutes) before attempting auth
|
393
307
|
time.sleep(300)
|
394
|
-
# Try to get authenticated user ID
|
395
308
|
user_id = self.get_or_create_user_id()
|
396
|
-
# Update machine ID with the authenticated one
|
397
309
|
if user_id and user_id != self.machine_id:
|
398
310
|
self.machine_id = user_id
|
399
|
-
# Save to the machine_id_path
|
400
311
|
try:
|
401
|
-
with open(self.machine_id_path,
|
312
|
+
with open(self.machine_id_path,"w") as f:
|
402
313
|
f.write(user_id)
|
403
314
|
except:
|
404
315
|
pass
|
405
|
-
|
406
|
-
# Start the delayed authentication thread
|
407
316
|
thread = threading.Thread(target=delayed_auth, daemon=True)
|
408
317
|
thread.start()
|
409
|
-
|
410
318
|
return is_colab
|
411
319
|
|
412
320
|
def get_or_create_user_id(self):
|
413
|
-
"""Get existing user ID from Google Drive or create new one"""
|
414
321
|
if self._colab_auth_triggered:
|
415
|
-
return self.machine_id
|
416
|
-
|
322
|
+
return self.machine_id
|
417
323
|
try:
|
418
324
|
from google.colab import drive
|
419
325
|
print("\n📋 Kết nối tài khoản Google Drive để lưu các thiết lập của dự án.")
|
420
326
|
print("Dữ liệu phiên làm việc với Colab của bạn sẽ bị xóa nếu không lưu trữ vào Google Drive.\n")
|
421
|
-
|
422
|
-
# Mark that we've triggered the auth
|
423
327
|
self._colab_auth_triggered = True
|
424
|
-
|
425
|
-
# Mount Google Drive
|
426
328
|
drive.mount('/content/drive')
|
427
|
-
id_path =
|
428
|
-
|
329
|
+
id_path ='/content/drive/MyDrive/.vnstock/user_id.txt'
|
429
330
|
if os.path.exists(id_path):
|
430
|
-
with open(id_path,
|
331
|
+
with open(id_path,'r') as f:
|
431
332
|
return f.read().strip()
|
432
333
|
else:
|
433
334
|
user_id = str(uuid.uuid4())
|
434
335
|
os.makedirs(os.path.dirname(id_path), exist_ok=True)
|
435
|
-
with open(id_path,
|
336
|
+
with open(id_path,'w') as f:
|
436
337
|
f.write(user_id)
|
437
338
|
return user_id
|
438
339
|
except Exception as e:
|
439
|
-
# Silently fail and return the existing machine ID
|
440
340
|
return self.machine_id
|
441
|
-
|
442
|
-
# Enhanced methods for project context collection
|
443
|
-
|
341
|
+
|
444
342
|
def analyze_project_structure(self):
|
445
|
-
"""Analyze project directory structure for context"""
|
446
343
|
current_dir = os.getcwd()
|
447
344
|
project_indicators = {
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
345
|
+
"commercial_app": ["app","services","products","customers","billing"],
|
346
|
+
"financial_tool": ["portfolio","backtesting","trading","strategy"],
|
347
|
+
"data_science": ["models","notebooks","datasets","visualization"],
|
348
|
+
"educational": ["examples","lectures","assignments","slides"]
|
452
349
|
}
|
453
|
-
|
454
|
-
# Look for key directories up to 2 levels deep (limited for privacy)
|
455
350
|
project_type = {}
|
456
351
|
for category, markers in project_indicators.items():
|
457
352
|
match_count = 0
|
@@ -460,249 +355,181 @@ class Inspector:
|
|
460
355
|
match_count += 1
|
461
356
|
if len(markers) > 0:
|
462
357
|
project_type[category] = match_count / len(markers)
|
463
|
-
|
464
|
-
# Scan for direct child files and directories (limited depth for privacy)
|
465
358
|
try:
|
466
359
|
root_files = [f for f in os.listdir(current_dir) if os.path.isfile(os.path.join(current_dir, f))]
|
467
360
|
root_dirs = [d for d in os.listdir(current_dir) if os.path.isdir(os.path.join(current_dir, d))]
|
468
|
-
|
469
|
-
# Detect project type
|
470
361
|
file_markers = {
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
362
|
+
"python_project": ["setup.py","pyproject.toml","requirements.txt"],
|
363
|
+
"data_science": ["notebook.ipynb",".ipynb_checkpoints"],
|
364
|
+
"web_app": ["app.py","wsgi.py","manage.py","server.py"],
|
365
|
+
"finance_app": ["portfolio.py","trading.py","backtest.py"],
|
475
366
|
}
|
476
|
-
|
477
|
-
file_project_type = "unknown"
|
367
|
+
file_project_type ="unknown"
|
478
368
|
for ptype, markers in file_markers.items():
|
479
369
|
if any(marker in root_files for marker in markers):
|
480
370
|
file_project_type = ptype
|
481
371
|
break
|
482
|
-
|
483
|
-
# Scan for specific frameworks
|
484
372
|
frameworks = []
|
485
373
|
framework_markers = {
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
374
|
+
"django": ["manage.py","settings.py"],
|
375
|
+
"flask": ["app.py","wsgi.py"],
|
376
|
+
"streamlit": ["streamlit_app.py","app.py"],
|
377
|
+
"fastapi": ["main.py","app.py"],
|
490
378
|
}
|
491
|
-
|
492
379
|
for framework, markers in framework_markers.items():
|
493
380
|
if any(marker in root_files for marker in markers):
|
494
381
|
frameworks.append(framework)
|
495
|
-
|
496
382
|
except Exception as e:
|
497
383
|
root_files = []
|
498
384
|
root_dirs = []
|
499
|
-
file_project_type =
|
385
|
+
file_project_type ="unknown"
|
500
386
|
frameworks = []
|
501
|
-
|
502
387
|
return {
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
388
|
+
"project_dir": current_dir,
|
389
|
+
"detected_type": max(project_type.items(), key=lambda x: x[1])[0] if project_type else"unknown",
|
390
|
+
"file_type": file_project_type,
|
391
|
+
"is_git_repo":".git" in (root_dirs if'root_dirs' in locals() else []),
|
392
|
+
"frameworks": frameworks,
|
393
|
+
"file_count": len(root_files) if'root_files' in locals() else 0,
|
394
|
+
"directory_count": len(root_dirs) if'root_dirs' in locals() else 0,
|
395
|
+
"type_confidence": project_type
|
511
396
|
}
|
512
397
|
|
513
398
|
def analyze_git_info(self):
|
514
|
-
"""Extract non-sensitive git repository information"""
|
515
399
|
try:
|
516
|
-
|
517
|
-
result = subprocess.run(["git", "rev-parse", "--is-inside-work-tree"],
|
400
|
+
result = subprocess.run(["git","rev-parse","--is-inside-work-tree"],
|
518
401
|
capture_output=True, text=True)
|
519
|
-
|
520
402
|
if result.returncode != 0:
|
521
403
|
return {"has_git": False}
|
522
|
-
|
523
|
-
# Get repository root path - ADD THIS CODE
|
524
|
-
repo_root = subprocess.run(["git", "rev-parse", "--show-toplevel"],
|
404
|
+
repo_root = subprocess.run(["git","rev-parse","--show-toplevel"],
|
525
405
|
capture_output=True, text=True)
|
526
406
|
repo_path = repo_root.stdout.strip() if repo_root.stdout else None
|
527
|
-
|
528
|
-
# Extract repository name from path - ADD THIS CODE
|
529
407
|
repo_name = os.path.basename(repo_path) if repo_path else None
|
530
|
-
|
531
|
-
# Check for license file - ADD THIS CODE
|
532
408
|
has_license = False
|
533
|
-
license_type =
|
409
|
+
license_type ="unknown"
|
534
410
|
if repo_path:
|
535
411
|
license_files = [
|
536
|
-
os.path.join(repo_path,
|
537
|
-
os.path.join(repo_path,
|
538
|
-
os.path.join(repo_path,
|
412
|
+
os.path.join(repo_path,"LICENSE"),
|
413
|
+
os.path.join(repo_path,"LICENSE.txt"),
|
414
|
+
os.path.join(repo_path,"LICENSE.md")
|
539
415
|
]
|
540
416
|
for license_file in license_files:
|
541
417
|
if os.path.exists(license_file):
|
542
418
|
has_license = True
|
543
|
-
# Try to determine license type by scanning content
|
544
419
|
try:
|
545
|
-
with open(license_file,
|
420
|
+
with open(license_file,'r') as f:
|
546
421
|
content = f.read().lower()
|
547
|
-
if
|
548
|
-
license_type =
|
549
|
-
elif
|
550
|
-
license_type =
|
551
|
-
elif
|
552
|
-
license_type =
|
553
|
-
elif
|
554
|
-
license_type =
|
555
|
-
# Add more license type detection as needed
|
422
|
+
if"mit license" in content:
|
423
|
+
license_type ="MIT"
|
424
|
+
elif"apache license" in content:
|
425
|
+
license_type ="Apache"
|
426
|
+
elif"gnu general public" in content:
|
427
|
+
license_type ="GPL"
|
428
|
+
elif"bsd " in content:
|
429
|
+
license_type ="BSD"
|
556
430
|
except:
|
557
431
|
pass
|
558
432
|
break
|
559
|
-
|
560
|
-
# Get remote URL (only domain, not full URL)
|
561
|
-
remote = subprocess.run(["git", "config", "--get", "remote.origin.url"],
|
433
|
+
remote = subprocess.run(["git","config","--get","remote.origin.url"],
|
562
434
|
capture_output=True, text=True)
|
563
|
-
|
564
435
|
remote_url = remote.stdout.strip() if remote.stdout else None
|
565
|
-
|
566
436
|
if remote_url:
|
567
|
-
# Clean the remote URL string
|
568
437
|
remote_url = remote_url.strip()
|
569
|
-
|
570
|
-
# Properly extract domain without authentication information
|
571
438
|
domain = None
|
572
439
|
if remote_url:
|
573
|
-
|
574
|
-
if remote_url.startswith('git@') or '@' in remote_url and ':' in remote_url.split('@')[1]:
|
440
|
+
if remote_url.startswith('git@') or'@' in remote_url and':' in remote_url.split('@')[1]:
|
575
441
|
domain = remote_url.split('@')[1].split(':')[0]
|
576
|
-
# For HTTPS URLs with or without authentication
|
577
442
|
elif remote_url.startswith('http'):
|
578
|
-
# Remove authentication part if present
|
579
443
|
url_parts = remote_url.split('//')
|
580
444
|
if len(url_parts) > 1:
|
581
445
|
auth_and_domain = url_parts[1].split('/', 1)[0]
|
582
|
-
|
583
|
-
if '@' in auth_and_domain:
|
446
|
+
if'@' in auth_and_domain:
|
584
447
|
domain = auth_and_domain.split('@')[-1]
|
585
448
|
else:
|
586
449
|
domain = auth_and_domain
|
587
|
-
# Handle other URL formats
|
588
450
|
else:
|
589
|
-
# Try a general regex as fallback for unusual formats
|
590
451
|
import re
|
591
452
|
domain_match = re.search(r'@([^:/]+)|https?://(?:[^@/]+@)?([^/]+)', remote_url)
|
592
453
|
if domain_match:
|
593
454
|
domain = domain_match.group(1) or domain_match.group(2)
|
594
|
-
|
595
|
-
# Extract owner and repo info securely
|
596
455
|
owner = None
|
597
456
|
repo_name = None
|
598
|
-
|
599
457
|
if domain:
|
600
|
-
|
601
|
-
|
602
|
-
# SSH format: git@github.com:username/repo.git
|
603
|
-
if ':' in remote_url and '@' in remote_url:
|
458
|
+
if"github" in domain:
|
459
|
+
if':' in remote_url and'@' in remote_url:
|
604
460
|
parts = remote_url.split(':')[-1].split('/')
|
605
461
|
if len(parts) >= 2:
|
606
462
|
owner = parts[0]
|
607
|
-
repo_name = parts[1].replace('.git',
|
608
|
-
# HTTPS format
|
463
|
+
repo_name = parts[1].replace('.git','')
|
609
464
|
else:
|
610
465
|
url_parts = remote_url.split('//')
|
611
466
|
if len(url_parts) > 1:
|
612
467
|
path_parts = url_parts[1].split('/')
|
613
468
|
if len(path_parts) >= 3:
|
614
|
-
# Skip domain and authentication part
|
615
469
|
domain_part = path_parts[0]
|
616
|
-
if
|
617
|
-
# Path starts after domain
|
470
|
+
if'@' in domain_part:
|
618
471
|
owner_index = 1
|
619
472
|
else:
|
620
473
|
owner_index = 1
|
621
|
-
|
622
474
|
if len(path_parts) > owner_index:
|
623
475
|
owner = path_parts[owner_index]
|
624
476
|
if len(path_parts) > owner_index + 1:
|
625
|
-
repo_name = path_parts[owner_index + 1].replace('.git',
|
626
|
-
|
627
|
-
# Get commit count
|
628
|
-
commit_count = subprocess.run(["git", "rev-list", "--count", "HEAD"],
|
477
|
+
repo_name = path_parts[owner_index + 1].replace('.git','')
|
478
|
+
commit_count = subprocess.run(["git","rev-list","--count","HEAD"],
|
629
479
|
capture_output=True, text=True)
|
630
|
-
|
631
|
-
# Get branch count
|
632
|
-
branch_count = subprocess.run(["git", "branch", "--list"],
|
480
|
+
branch_count = subprocess.run(["git","branch","--list"],
|
633
481
|
capture_output=True, text=True)
|
634
482
|
branch_count = len(branch_count.stdout.strip().split('\n')) if branch_count.stdout else 0
|
635
|
-
|
636
483
|
return {
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
484
|
+
"domain": domain,
|
485
|
+
"owner": owner,
|
486
|
+
"commit_count": int(commit_count.stdout.strip()) if commit_count.stdout else 0,
|
487
|
+
"branch_count": branch_count,
|
488
|
+
"has_git": True,
|
489
|
+
"repo_path": repo_path if'repo_path' in locals() else None,
|
490
|
+
"repo_name": repo_name,
|
491
|
+
"has_license": has_license if'has_license' in locals() else False,
|
492
|
+
"license_type": license_type if'license_type' in locals() else"unknown"
|
646
493
|
}
|
647
|
-
|
648
494
|
except Exception as e:
|
649
|
-
# Optionally log the exception for debugging
|
650
495
|
pass
|
651
496
|
return {"has_git": False}
|
652
497
|
|
653
498
|
def detect_usage_pattern(self):
|
654
|
-
"""Detect usage patterns that indicate commercial use"""
|
655
499
|
current_time = datetime.now()
|
656
|
-
|
657
|
-
# Check if using during business hours
|
658
|
-
is_weekday = current_time.weekday() < 5 # 0-4 are Monday to Friday
|
500
|
+
is_weekday = current_time.weekday() < 5
|
659
501
|
hour = current_time.hour
|
660
502
|
is_business_hours = 9 <= hour <= 18
|
661
|
-
|
662
503
|
return {
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
504
|
+
"business_hours_usage": is_weekday and is_business_hours,
|
505
|
+
"weekday": is_weekday,
|
506
|
+
"hour": hour,
|
507
|
+
"timestamp": current_time.isoformat()
|
667
508
|
}
|
668
509
|
|
669
510
|
def enhanced_commercial_detection(self):
|
670
|
-
"""More thorough commercial usage detection"""
|
671
511
|
basic = self.detect_commercial_usage()
|
672
|
-
|
673
|
-
# Additional commercial indicators
|
674
512
|
try:
|
675
513
|
project_files = os.listdir(os.getcwd())
|
676
|
-
|
677
|
-
|
678
|
-
commercial_frameworks = ["django-oscar", "opencart", "magento",
|
679
|
-
"saleor", "odoo", "shopify", "woocommerce"]
|
680
|
-
|
514
|
+
commercial_frameworks = ["django-oscar","opencart","magento",
|
515
|
+
"saleor","odoo","shopify","woocommerce"]
|
681
516
|
framework_match = False
|
682
517
|
for framework in commercial_frameworks:
|
683
518
|
if any(framework in f for f in project_files):
|
684
519
|
framework_match = True
|
685
520
|
break
|
686
|
-
|
687
|
-
|
688
|
-
db_files = [f for f in project_files if "database" in f.lower()
|
689
|
-
or "db_config" in f.lower() or f.endswith(".db")]
|
521
|
+
db_files = [f for f in project_files if"database" in f.lower()
|
522
|
+
or"db_config" in f.lower() or f.endswith(".db")]
|
690
523
|
has_database = len(db_files) > 0
|
691
524
|
except:
|
692
525
|
framework_match = False
|
693
526
|
has_database = False
|
694
|
-
|
695
|
-
# Domain name registration check
|
696
527
|
domain_check = self.analyze_git_info()
|
697
528
|
domain_is_commercial = False
|
698
529
|
if domain_check and domain_check.get("domain"):
|
699
|
-
commercial_tlds = [".com",
|
530
|
+
commercial_tlds = [".com",".io",".co",".org",".net"]
|
700
531
|
domain_is_commercial = any(tld in domain_check["domain"] for tld in commercial_tlds)
|
701
|
-
|
702
|
-
# Check project structure
|
703
532
|
project_structure = self.analyze_project_structure()
|
704
|
-
|
705
|
-
# Calculate enhanced commercial score
|
706
533
|
indicators = [
|
707
534
|
basic["commercial_probability"],
|
708
535
|
framework_match,
|
@@ -711,55 +538,42 @@ class Inspector:
|
|
711
538
|
project_structure.get("type_confidence", {}).get("commercial_app", 0),
|
712
539
|
self.detect_usage_pattern()["business_hours_usage"]
|
713
540
|
]
|
714
|
-
|
715
|
-
# Filter out None values
|
716
541
|
indicators = [i for i in indicators if i is not None]
|
717
|
-
|
718
|
-
# Calculate score - convert booleans to 1.0 and average
|
719
542
|
if indicators:
|
720
|
-
score = sum(1.0 if isinstance(i, bool) and i else (i if isinstance(i, (int, float)) else 0)
|
543
|
+
score = sum(1.0 if isinstance(i, bool) and i else (i if isinstance(i, (int, float)) else 0)
|
721
544
|
for i in indicators) / len(indicators)
|
722
545
|
else:
|
723
546
|
score = 0
|
724
|
-
|
725
547
|
return {
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
548
|
+
"commercial_probability": score,
|
549
|
+
"likely_commercial": score > 0.4,
|
550
|
+
"indicators": {
|
551
|
+
"basic_indicators": basic["commercial_indicators"],
|
552
|
+
"framework_match": framework_match,
|
553
|
+
"has_database": has_database,
|
554
|
+
"domain_is_commercial": domain_is_commercial,
|
555
|
+
"project_structure": project_structure.get("detected_type"),
|
556
|
+
"business_hours_usage": self.detect_usage_pattern()["business_hours_usage"]
|
735
557
|
}
|
736
558
|
}
|
737
559
|
|
738
560
|
def analyze_dependencies(self):
|
739
|
-
"""Analyze package dependencies for commercial patterns"""
|
740
561
|
try:
|
741
562
|
import pkg_resources
|
742
|
-
|
743
|
-
# Commercial/enterprise package indicators
|
744
563
|
enterprise_packages = [
|
745
|
-
|
746
|
-
|
747
|
-
|
564
|
+
"snowflake-connector-python","databricks","azure",
|
565
|
+
"aws","google-cloud","stripe","atlassian",
|
566
|
+
"salesforce","bigquery","tableau","sap"
|
748
567
|
]
|
749
|
-
|
750
|
-
# Find installed packages that match enterprise indicators
|
751
568
|
commercial_deps = []
|
752
569
|
for pkg in pkg_resources.working_set:
|
753
570
|
if any(ent in pkg.key for ent in enterprise_packages):
|
754
|
-
commercial_deps.append({"name": pkg.key,
|
755
|
-
|
571
|
+
commercial_deps.append({"name": pkg.key,"version": pkg.version})
|
756
572
|
return {
|
757
|
-
|
758
|
-
|
759
|
-
|
573
|
+
"has_commercial_deps": len(commercial_deps) > 0,
|
574
|
+
"commercial_deps_count": len(commercial_deps),
|
575
|
+
"commercial_deps": commercial_deps
|
760
576
|
}
|
761
577
|
except:
|
762
578
|
return {"has_commercial_deps": False}
|
763
|
-
|
764
|
-
# Create singleton instance
|
765
|
-
inspector = Inspector()
|
579
|
+
inspector = Inspector()
|