vnai 2.0.2__py3-none-any.whl → 2.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vnai/__init__.py +265 -72
- vnai/beam/__init__.py +5 -2
- vnai/beam/metrics.py +182 -57
- vnai/beam/pulse.py +107 -29
- vnai/beam/quota.py +479 -102
- vnai/flow/__init__.py +5 -2
- vnai/flow/queue.py +131 -55
- vnai/flow/relay.py +439 -149
- vnai/scope/__init__.py +5 -2
- vnai/scope/profile.py +762 -219
- vnai/scope/promo.py +249 -55
- vnai/scope/state.py +220 -71
- {vnai-2.0.2.dist-info → vnai-2.0.4.dist-info}/METADATA +4 -5
- vnai-2.0.4.dist-info/RECORD +16 -0
- {vnai-2.0.2.dist-info → vnai-2.0.4.dist-info}/WHEEL +1 -1
- vnai-2.0.2.dist-info/RECORD +0 -16
- {vnai-2.0.2.dist-info → vnai-2.0.4.dist-info}/top_level.txt +0 -0
vnai/scope/profile.py
CHANGED
@@ -1,223 +1,766 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
_K='domain'
|
14
|
-
_J='.git'
|
15
|
-
_I='backtesting'
|
16
|
-
_H='commercial_probability'
|
17
|
-
_G='timestamp'
|
18
|
-
_F='business_hours_usage'
|
19
|
-
_E='google.colab'
|
20
|
-
_D='unknown'
|
21
|
-
_C=False
|
22
|
-
_B=None
|
23
|
-
_A=True
|
24
|
-
import os,sys,platform,uuid,hashlib,psutil,threading,time,importlib.metadata
|
1
|
+
# vnai/scope/profile.py
|
2
|
+
# System environment detection
|
3
|
+
|
4
|
+
import os
|
5
|
+
import sys
|
6
|
+
import platform
|
7
|
+
import uuid
|
8
|
+
import hashlib
|
9
|
+
import psutil
|
10
|
+
import threading
|
11
|
+
import time
|
12
|
+
import importlib.metadata
|
25
13
|
from datetime import datetime
|
26
14
|
import subprocess
|
27
15
|
from pathlib import Path
|
16
|
+
|
28
17
|
class Inspector:
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
18
|
+
"""Inspects execution environment"""
|
19
|
+
|
20
|
+
_instance = None
|
21
|
+
_lock = None
|
22
|
+
|
23
|
+
def __new__(cls):
|
24
|
+
import threading
|
25
|
+
if cls._lock is None:
|
26
|
+
cls._lock = threading.Lock()
|
27
|
+
|
28
|
+
with cls._lock:
|
29
|
+
if cls._instance is None:
|
30
|
+
cls._instance = super(Inspector, cls).__new__(cls)
|
31
|
+
cls._instance._initialize()
|
32
|
+
return cls._instance
|
33
|
+
|
34
|
+
def _initialize(self):
|
35
|
+
"""Initialize inspector"""
|
36
|
+
self.cache = {}
|
37
|
+
self.cache_ttl = 3600 # 1 hour cache validity
|
38
|
+
self.last_examination = 0
|
39
|
+
self.machine_id = None
|
40
|
+
self._colab_auth_triggered = False
|
41
|
+
|
42
|
+
# Paths
|
43
|
+
self.home_dir = Path.home()
|
44
|
+
self.project_dir = self.home_dir / ".vnstock"
|
45
|
+
self.project_dir.mkdir(exist_ok=True)
|
46
|
+
self.id_dir = self.project_dir / 'id'
|
47
|
+
self.id_dir.mkdir(exist_ok=True)
|
48
|
+
self.machine_id_path = self.id_dir / "machine_id.txt"
|
49
|
+
|
50
|
+
# Perform initial examination
|
51
|
+
self.examine()
|
52
|
+
|
53
|
+
def examine(self, force_refresh=False):
|
54
|
+
"""Examine current execution context"""
|
55
|
+
current_time = time.time()
|
56
|
+
|
57
|
+
# Return cached data if it's fresh enough and we're not forcing a refresh
|
58
|
+
if not force_refresh and (current_time - self.last_examination) < self.cache_ttl:
|
59
|
+
return self.cache
|
60
|
+
|
61
|
+
# Start with basic information
|
62
|
+
info = {
|
63
|
+
"timestamp": datetime.now().isoformat(),
|
64
|
+
"python_version": platform.python_version(),
|
65
|
+
"os_name": platform.system(),
|
66
|
+
"platform": platform.platform()
|
67
|
+
}
|
68
|
+
|
69
|
+
# Machine identifier
|
70
|
+
info["machine_id"] = self.fingerprint()
|
71
|
+
|
72
|
+
# Environment detection
|
73
|
+
try:
|
74
|
+
# Check for Jupyter/IPython
|
75
|
+
import importlib.util
|
76
|
+
ipython_spec = importlib.util.find_spec("IPython")
|
77
|
+
|
78
|
+
if ipython_spec:
|
79
|
+
from IPython import get_ipython
|
80
|
+
ipython = get_ipython()
|
81
|
+
if ipython is not None:
|
82
|
+
info["environment"] = "jupyter"
|
83
|
+
# Check for hosted notebooks
|
84
|
+
if 'google.colab' in sys.modules:
|
85
|
+
info["hosting_service"] = "colab"
|
86
|
+
elif 'KAGGLE_KERNEL_RUN_TYPE' in os.environ:
|
87
|
+
info["hosting_service"] = "kaggle"
|
88
|
+
else:
|
89
|
+
info["hosting_service"] = "local_jupyter"
|
90
|
+
elif sys.stdout.isatty():
|
91
|
+
info["environment"] = "terminal"
|
92
|
+
else:
|
93
|
+
info["environment"] = "script"
|
94
|
+
elif sys.stdout.isatty():
|
95
|
+
info["environment"] = "terminal"
|
96
|
+
else:
|
97
|
+
info["environment"] = "script"
|
98
|
+
except:
|
99
|
+
info["environment"] = "unknown"
|
100
|
+
|
101
|
+
# System resources
|
102
|
+
try:
|
103
|
+
info["cpu_count"] = os.cpu_count()
|
104
|
+
info["memory_gb"] = round(psutil.virtual_memory().total / (1024**3), 1)
|
105
|
+
except:
|
106
|
+
pass
|
107
|
+
|
108
|
+
# Check if in Google Colab
|
109
|
+
is_colab = 'google.colab' in sys.modules
|
110
|
+
if is_colab:
|
111
|
+
info["is_colab"] = True
|
112
|
+
# Setup delayed authentication if not already triggered
|
113
|
+
self.detect_colab_with_delayed_auth()
|
114
|
+
|
115
|
+
# Enhanced context information
|
116
|
+
try:
|
117
|
+
# Commercial usage detection
|
118
|
+
info["commercial_usage"] = self.enhanced_commercial_detection()
|
119
|
+
|
120
|
+
# Project context
|
121
|
+
info["project_context"] = self.analyze_project_structure()
|
122
|
+
|
123
|
+
# Git info
|
124
|
+
info["git_info"] = self.analyze_git_info()
|
125
|
+
|
126
|
+
# Working hours pattern
|
127
|
+
info["usage_pattern"] = self.detect_usage_pattern()
|
128
|
+
|
129
|
+
# Dependency analysis
|
130
|
+
info["dependencies"] = self.analyze_dependencies()
|
131
|
+
except Exception as e:
|
132
|
+
# Don't let enhanced detection failure stop basic functionality
|
133
|
+
info["detection_error"] = str(e)
|
134
|
+
|
135
|
+
# Update cache
|
136
|
+
self.cache = info
|
137
|
+
self.last_examination = current_time
|
138
|
+
|
139
|
+
return info
|
140
|
+
|
141
|
+
def fingerprint(self):
|
142
|
+
"""Generate unique environment fingerprint"""
|
143
|
+
# Always return cached machine_id if it exists
|
144
|
+
if self.machine_id:
|
145
|
+
return self.machine_id
|
146
|
+
|
147
|
+
# Try to load from file first
|
148
|
+
if self.machine_id_path.exists():
|
149
|
+
try:
|
150
|
+
with open(self.machine_id_path, "r") as f:
|
151
|
+
self.machine_id = f.read().strip()
|
152
|
+
return self.machine_id
|
153
|
+
except:
|
154
|
+
pass
|
155
|
+
|
156
|
+
# Check for Colab and setup delayed authentication
|
157
|
+
is_colab = self.detect_colab_with_delayed_auth()
|
158
|
+
|
159
|
+
# Generate a new machine ID only if necessary
|
160
|
+
try:
|
161
|
+
# Use consistent system information
|
162
|
+
system_info = platform.node() + platform.platform() + platform.machine()
|
163
|
+
self.machine_id = hashlib.md5(system_info.encode()).hexdigest()
|
164
|
+
except:
|
165
|
+
# Fallback to UUID but only as last resort
|
166
|
+
self.machine_id = str(uuid.uuid4())
|
167
|
+
|
168
|
+
# Save to ensure consistency across calls
|
169
|
+
try:
|
170
|
+
with open(self.machine_id_path, "w") as f:
|
171
|
+
f.write(self.machine_id)
|
172
|
+
except:
|
173
|
+
pass
|
174
|
+
|
175
|
+
return self.machine_id
|
176
|
+
|
177
|
+
def detect_hosting(self):
|
178
|
+
"""Detect if running in a hosted environment"""
|
179
|
+
# Check common environment variables for hosted environments
|
180
|
+
hosting_markers = {
|
181
|
+
"COLAB_GPU": "Google Colab",
|
182
|
+
"KAGGLE_KERNEL_RUN_TYPE": "Kaggle",
|
183
|
+
"BINDER_SERVICE_HOST": "Binder",
|
184
|
+
"CODESPACE_NAME": "GitHub Codespaces",
|
185
|
+
"STREAMLIT_SERVER_HEADLESS": "Streamlit Cloud",
|
186
|
+
"CLOUD_SHELL": "Cloud Shell"
|
187
|
+
}
|
188
|
+
|
189
|
+
for env_var, host_name in hosting_markers.items():
|
190
|
+
if env_var in os.environ:
|
191
|
+
return host_name
|
192
|
+
|
193
|
+
# Check for Google Colab module
|
194
|
+
if 'google.colab' in sys.modules:
|
195
|
+
return "Google Colab"
|
196
|
+
|
197
|
+
return "local"
|
198
|
+
|
199
|
+
def detect_commercial_usage(self):
|
200
|
+
"""Detect if running in commercial environment"""
|
201
|
+
commercial_indicators = {
|
202
|
+
"env_domains": [".com", ".io", ".co", "enterprise", "corp", "inc"],
|
203
|
+
"file_patterns": ["invoice", "payment", "customer", "client", "product", "sale"],
|
204
|
+
"env_vars": ["COMPANY", "BUSINESS", "ENTERPRISE", "CORPORATE"],
|
205
|
+
"dir_patterns": ["company", "business", "enterprise", "corporate", "client"]
|
206
|
+
}
|
207
|
+
|
208
|
+
# Check environment variables for commercial domains
|
209
|
+
env_values = " ".join(os.environ.values()).lower()
|
210
|
+
domain_match = any(domain in env_values for domain in commercial_indicators["env_domains"])
|
211
|
+
|
212
|
+
# Check if commercial-related environment variables exist
|
213
|
+
env_var_match = any(var in os.environ for var in commercial_indicators["env_vars"])
|
214
|
+
|
215
|
+
# Check current directory for commercial indicators
|
216
|
+
current_dir = os.getcwd().lower()
|
217
|
+
dir_match = any(pattern in current_dir for pattern in commercial_indicators["dir_patterns"])
|
218
|
+
|
219
|
+
# Check files in current directory for commercial patterns
|
220
|
+
try:
|
221
|
+
files = [f.lower() for f in os.listdir() if os.path.isfile(f)]
|
222
|
+
file_match = any(any(pattern in f for pattern in commercial_indicators["file_patterns"]) for f in files)
|
223
|
+
except:
|
224
|
+
file_match = False
|
225
|
+
|
226
|
+
# Calculate probability
|
227
|
+
indicators = [domain_match, env_var_match, dir_match, file_match]
|
228
|
+
commercial_probability = sum(indicators) / len(indicators)
|
229
|
+
|
230
|
+
return {
|
231
|
+
"likely_commercial": commercial_probability > 0.3,
|
232
|
+
"commercial_probability": commercial_probability,
|
233
|
+
"commercial_indicators": {
|
234
|
+
"domain_match": domain_match,
|
235
|
+
"env_var_match": env_var_match,
|
236
|
+
"dir_match": dir_match,
|
237
|
+
"file_match": file_match
|
238
|
+
}
|
239
|
+
}
|
240
|
+
|
241
|
+
def scan_packages(self):
|
242
|
+
"""Scan for installed packages by category"""
|
243
|
+
package_groups = {
|
244
|
+
"vnstock_family": [
|
245
|
+
"vnstock",
|
246
|
+
"vnstock3",
|
247
|
+
"vnstock_ezchart",
|
248
|
+
"vnstock_data_pro", # Fixed missing comma here
|
249
|
+
"vnstock_market_data_pipeline",
|
250
|
+
"vnstock_ta",
|
251
|
+
"vnii",
|
252
|
+
"vnai"
|
253
|
+
],
|
254
|
+
"analytics": [
|
255
|
+
"openbb",
|
256
|
+
"pandas_ta"
|
257
|
+
],
|
258
|
+
"static_charts": [
|
259
|
+
"matplotlib",
|
260
|
+
"seaborn",
|
261
|
+
"altair"
|
262
|
+
],
|
263
|
+
"dashboard": [
|
264
|
+
"streamlit",
|
265
|
+
"voila",
|
266
|
+
"panel",
|
267
|
+
"shiny",
|
268
|
+
"dash"
|
269
|
+
],
|
270
|
+
"interactive_charts": [
|
271
|
+
"mplfinance",
|
272
|
+
"plotly",
|
273
|
+
"plotline",
|
274
|
+
"bokeh",
|
275
|
+
"pyecharts",
|
276
|
+
"highcharts-core",
|
277
|
+
"highcharts-stock",
|
278
|
+
"mplchart"
|
279
|
+
],
|
280
|
+
"datafeed": [
|
281
|
+
"yfinance",
|
282
|
+
"alpha_vantage",
|
283
|
+
"pandas-datareader",
|
284
|
+
"investpy"
|
285
|
+
],
|
286
|
+
"official_api": [
|
287
|
+
"ssi-fc-data",
|
288
|
+
"ssi-fctrading"
|
289
|
+
],
|
290
|
+
"risk_return": [
|
291
|
+
"pyfolio",
|
292
|
+
"empyrical",
|
293
|
+
"quantstats",
|
294
|
+
"financetoolkit"
|
295
|
+
],
|
296
|
+
"machine_learning": [
|
297
|
+
"scipy",
|
298
|
+
"sklearn",
|
299
|
+
"statsmodels",
|
300
|
+
"pytorch",
|
301
|
+
"tensorflow",
|
302
|
+
"keras",
|
303
|
+
"xgboost"
|
304
|
+
],
|
305
|
+
"indicators": [
|
306
|
+
"stochastic",
|
307
|
+
"talib",
|
308
|
+
"tqdm",
|
309
|
+
"finta",
|
310
|
+
"financetoolkit",
|
311
|
+
"tulipindicators"
|
312
|
+
],
|
313
|
+
"backtesting": [
|
314
|
+
"vectorbt",
|
315
|
+
"backtesting",
|
316
|
+
"bt",
|
317
|
+
"zipline",
|
318
|
+
"pyalgotrade",
|
319
|
+
"backtrader",
|
320
|
+
"pybacktest",
|
321
|
+
"fastquant",
|
322
|
+
"lean",
|
323
|
+
"ta",
|
324
|
+
"finmarketpy",
|
325
|
+
"qstrader"
|
326
|
+
],
|
327
|
+
"server": [
|
328
|
+
"fastapi",
|
329
|
+
"flask",
|
330
|
+
"uvicorn",
|
331
|
+
"gunicorn"
|
332
|
+
],
|
333
|
+
"framework": [
|
334
|
+
"lightgbm",
|
335
|
+
"catboost",
|
336
|
+
"django"
|
337
|
+
]
|
338
|
+
}
|
339
|
+
|
340
|
+
installed = {}
|
341
|
+
|
342
|
+
for category, packages in package_groups.items():
|
343
|
+
installed[category] = []
|
344
|
+
for pkg in packages:
|
345
|
+
try:
|
346
|
+
version = importlib.metadata.version(pkg)
|
347
|
+
installed[category].append({"name": pkg, "version": version})
|
348
|
+
except:
|
349
|
+
pass
|
350
|
+
|
351
|
+
return installed
|
352
|
+
|
353
|
+
def setup_vnstock_environment(self):
|
354
|
+
"""Set up environment for vnstock library"""
|
355
|
+
# Create environment.json file
|
356
|
+
env_file = self.id_dir / "environment.json"
|
357
|
+
env_data = {
|
358
|
+
"accepted_agreement": True,
|
359
|
+
"timestamp": datetime.now().isoformat(),
|
360
|
+
"machine_id": self.fingerprint()
|
361
|
+
}
|
362
|
+
|
363
|
+
try:
|
364
|
+
with open(env_file, "w") as f:
|
365
|
+
import json
|
366
|
+
json.dump(env_data, f)
|
367
|
+
return True
|
368
|
+
except Exception as e:
|
369
|
+
print(f"Failed to set up vnstock environment: {e}")
|
370
|
+
return False
|
371
|
+
|
372
|
+
# Update detect_colab_with_delayed_auth method in Inspector class
|
373
|
+
def detect_colab_with_delayed_auth(self, immediate=False):
|
374
|
+
"""Detect if running in Google Colab and setup authentication"""
|
375
|
+
# Check if we're in Colab without mounting drive yet
|
376
|
+
is_colab = 'google.colab' in sys.modules
|
377
|
+
|
378
|
+
if is_colab and not self._colab_auth_triggered:
|
379
|
+
if immediate:
|
380
|
+
# Immediate authentication
|
381
|
+
self._colab_auth_triggered = True
|
382
|
+
user_id = self.get_or_create_user_id()
|
383
|
+
if user_id and user_id != self.machine_id:
|
384
|
+
self.machine_id = user_id
|
385
|
+
try:
|
386
|
+
with open(self.machine_id_path, "w") as f:
|
387
|
+
f.write(user_id)
|
388
|
+
except:
|
389
|
+
pass
|
390
|
+
else:
|
391
|
+
# Start a delayed thread to trigger authentication after user is already using the package
|
392
|
+
def delayed_auth():
|
393
|
+
# Wait for some time (e.g., 5 minutes) before attempting auth
|
394
|
+
time.sleep(300)
|
395
|
+
# Try to get authenticated user ID
|
396
|
+
user_id = self.get_or_create_user_id()
|
397
|
+
# Update machine ID with the authenticated one
|
398
|
+
if user_id and user_id != self.machine_id:
|
399
|
+
self.machine_id = user_id
|
400
|
+
# Save to the machine_id_path
|
401
|
+
try:
|
402
|
+
with open(self.machine_id_path, "w") as f:
|
403
|
+
f.write(user_id)
|
404
|
+
except:
|
405
|
+
pass
|
406
|
+
|
407
|
+
# Start the delayed authentication thread
|
408
|
+
thread = threading.Thread(target=delayed_auth, daemon=True)
|
409
|
+
thread.start()
|
410
|
+
|
411
|
+
return is_colab
|
412
|
+
|
413
|
+
def get_or_create_user_id(self):
|
414
|
+
"""Get existing user ID from Google Drive or create new one"""
|
415
|
+
if self._colab_auth_triggered:
|
416
|
+
return self.machine_id # Avoid triggering multiple times
|
417
|
+
|
418
|
+
try:
|
419
|
+
from google.colab import drive
|
420
|
+
print("\n📋 Kết nối tài khoản Google Drive để lưu các thiết lập của dự án.")
|
421
|
+
print("Dữ liệu phiên làm việc với Colab của bạn sẽ bị xóa nếu không lưu trữ vào Google Drive.\n")
|
422
|
+
|
423
|
+
# Mark that we've triggered the auth
|
424
|
+
self._colab_auth_triggered = True
|
425
|
+
|
426
|
+
# Mount Google Drive
|
427
|
+
drive.mount('/content/drive')
|
428
|
+
id_path = '/content/drive/MyDrive/.vnstock/user_id.txt'
|
429
|
+
|
430
|
+
if os.path.exists(id_path):
|
431
|
+
with open(id_path, 'r') as f:
|
432
|
+
return f.read().strip()
|
433
|
+
else:
|
434
|
+
user_id = str(uuid.uuid4())
|
435
|
+
os.makedirs(os.path.dirname(id_path), exist_ok=True)
|
436
|
+
with open(id_path, 'w') as f:
|
437
|
+
f.write(user_id)
|
438
|
+
return user_id
|
439
|
+
except Exception as e:
|
440
|
+
# Silently fail and return the existing machine ID
|
441
|
+
return self.machine_id
|
442
|
+
|
443
|
+
# Enhanced methods for project context collection
|
444
|
+
|
445
|
+
def analyze_project_structure(self):
|
446
|
+
"""Analyze project directory structure for context"""
|
447
|
+
current_dir = os.getcwd()
|
448
|
+
project_indicators = {
|
449
|
+
"commercial_app": ["app", "services", "products", "customers", "billing"],
|
450
|
+
"financial_tool": ["portfolio", "backtesting", "trading", "strategy"],
|
451
|
+
"data_science": ["models", "notebooks", "datasets", "visualization"],
|
452
|
+
"educational": ["examples", "lectures", "assignments", "slides"]
|
453
|
+
}
|
454
|
+
|
455
|
+
# Look for key directories up to 2 levels deep (limited for privacy)
|
456
|
+
project_type = {}
|
457
|
+
for category, markers in project_indicators.items():
|
458
|
+
match_count = 0
|
459
|
+
for marker in markers:
|
460
|
+
if os.path.exists(os.path.join(current_dir, marker)):
|
461
|
+
match_count += 1
|
462
|
+
if len(markers) > 0:
|
463
|
+
project_type[category] = match_count / len(markers)
|
464
|
+
|
465
|
+
# Scan for direct child files and directories (limited depth for privacy)
|
466
|
+
try:
|
467
|
+
root_files = [f for f in os.listdir(current_dir) if os.path.isfile(os.path.join(current_dir, f))]
|
468
|
+
root_dirs = [d for d in os.listdir(current_dir) if os.path.isdir(os.path.join(current_dir, d))]
|
469
|
+
|
470
|
+
# Detect project type
|
471
|
+
file_markers = {
|
472
|
+
"python_project": ["setup.py", "pyproject.toml", "requirements.txt"],
|
473
|
+
"data_science": ["notebook.ipynb", ".ipynb_checkpoints"],
|
474
|
+
"web_app": ["app.py", "wsgi.py", "manage.py", "server.py"],
|
475
|
+
"finance_app": ["portfolio.py", "trading.py", "backtest.py"],
|
476
|
+
}
|
477
|
+
|
478
|
+
file_project_type = "unknown"
|
479
|
+
for ptype, markers in file_markers.items():
|
480
|
+
if any(marker in root_files for marker in markers):
|
481
|
+
file_project_type = ptype
|
482
|
+
break
|
483
|
+
|
484
|
+
# Scan for specific frameworks
|
485
|
+
frameworks = []
|
486
|
+
framework_markers = {
|
487
|
+
"django": ["manage.py", "settings.py"],
|
488
|
+
"flask": ["app.py", "wsgi.py"],
|
489
|
+
"streamlit": ["streamlit_app.py", "app.py"],
|
490
|
+
"fastapi": ["main.py", "app.py"],
|
491
|
+
}
|
492
|
+
|
493
|
+
for framework, markers in framework_markers.items():
|
494
|
+
if any(marker in root_files for marker in markers):
|
495
|
+
frameworks.append(framework)
|
496
|
+
|
497
|
+
except Exception as e:
|
498
|
+
root_files = []
|
499
|
+
root_dirs = []
|
500
|
+
file_project_type = "unknown"
|
501
|
+
frameworks = []
|
502
|
+
|
503
|
+
return {
|
504
|
+
"project_dir": current_dir,
|
505
|
+
"detected_type": max(project_type.items(), key=lambda x: x[1])[0] if project_type else "unknown",
|
506
|
+
"file_type": file_project_type,
|
507
|
+
"is_git_repo": ".git" in (root_dirs if 'root_dirs' in locals() else []),
|
508
|
+
"frameworks": frameworks,
|
509
|
+
"file_count": len(root_files) if 'root_files' in locals() else 0,
|
510
|
+
"directory_count": len(root_dirs) if 'root_dirs' in locals() else 0,
|
511
|
+
"type_confidence": project_type
|
512
|
+
}
|
513
|
+
|
514
|
+
def analyze_git_info(self):
|
515
|
+
"""Extract non-sensitive git repository information"""
|
516
|
+
try:
|
517
|
+
# Check if it's a git repository
|
518
|
+
result = subprocess.run(["git", "rev-parse", "--is-inside-work-tree"],
|
519
|
+
capture_output=True, text=True)
|
520
|
+
|
521
|
+
if result.returncode != 0:
|
522
|
+
return {"has_git": False}
|
523
|
+
|
524
|
+
# Get repository root path - ADD THIS CODE
|
525
|
+
repo_root = subprocess.run(["git", "rev-parse", "--show-toplevel"],
|
526
|
+
capture_output=True, text=True)
|
527
|
+
repo_path = repo_root.stdout.strip() if repo_root.stdout else None
|
528
|
+
|
529
|
+
# Extract repository name from path - ADD THIS CODE
|
530
|
+
repo_name = os.path.basename(repo_path) if repo_path else None
|
531
|
+
|
532
|
+
# Check for license file - ADD THIS CODE
|
533
|
+
has_license = False
|
534
|
+
license_type = "unknown"
|
535
|
+
if repo_path:
|
536
|
+
license_files = [
|
537
|
+
os.path.join(repo_path, "LICENSE"),
|
538
|
+
os.path.join(repo_path, "LICENSE.txt"),
|
539
|
+
os.path.join(repo_path, "LICENSE.md")
|
540
|
+
]
|
541
|
+
for license_file in license_files:
|
542
|
+
if os.path.exists(license_file):
|
543
|
+
has_license = True
|
544
|
+
# Try to determine license type by scanning content
|
545
|
+
try:
|
546
|
+
with open(license_file, 'r') as f:
|
547
|
+
content = f.read().lower()
|
548
|
+
if "mit license" in content:
|
549
|
+
license_type = "MIT"
|
550
|
+
elif "apache license" in content:
|
551
|
+
license_type = "Apache"
|
552
|
+
elif "gnu general public" in content:
|
553
|
+
license_type = "GPL"
|
554
|
+
elif "bsd " in content:
|
555
|
+
license_type = "BSD"
|
556
|
+
# Add more license type detection as needed
|
557
|
+
except:
|
558
|
+
pass
|
559
|
+
break
|
560
|
+
|
561
|
+
# Get remote URL (only domain, not full URL)
|
562
|
+
remote = subprocess.run(["git", "config", "--get", "remote.origin.url"],
|
563
|
+
capture_output=True, text=True)
|
564
|
+
|
565
|
+
remote_url = remote.stdout.strip() if remote.stdout else None
|
566
|
+
|
567
|
+
if remote_url:
|
568
|
+
# Clean the remote URL string
|
569
|
+
remote_url = remote_url.strip()
|
570
|
+
|
571
|
+
# Properly extract domain without authentication information
|
572
|
+
domain = None
|
573
|
+
if remote_url:
|
574
|
+
# For SSH URLs (git@github.com:user/repo.git)
|
575
|
+
if remote_url.startswith('git@') or '@' in remote_url and ':' in remote_url.split('@')[1]:
|
576
|
+
domain = remote_url.split('@')[1].split(':')[0]
|
577
|
+
# For HTTPS URLs with or without authentication
|
578
|
+
elif remote_url.startswith('http'):
|
579
|
+
# Remove authentication part if present
|
580
|
+
url_parts = remote_url.split('//')
|
581
|
+
if len(url_parts) > 1:
|
582
|
+
auth_and_domain = url_parts[1].split('/', 1)[0]
|
583
|
+
# If auth info exists (contains @), take only domain part
|
584
|
+
if '@' in auth_and_domain:
|
585
|
+
domain = auth_and_domain.split('@')[-1]
|
586
|
+
else:
|
587
|
+
domain = auth_and_domain
|
588
|
+
# Handle other URL formats
|
589
|
+
else:
|
590
|
+
# Try a general regex as fallback for unusual formats
|
591
|
+
import re
|
592
|
+
domain_match = re.search(r'@([^:/]+)|https?://(?:[^@/]+@)?([^/]+)', remote_url)
|
593
|
+
if domain_match:
|
594
|
+
domain = domain_match.group(1) or domain_match.group(2)
|
595
|
+
|
596
|
+
# Extract owner and repo info securely
|
597
|
+
owner = None
|
598
|
+
repo_name = None
|
599
|
+
|
600
|
+
if domain:
|
601
|
+
# For GitHub repositories
|
602
|
+
if "github" in domain:
|
603
|
+
# SSH format: git@github.com:username/repo.git
|
604
|
+
if ':' in remote_url and '@' in remote_url:
|
605
|
+
parts = remote_url.split(':')[-1].split('/')
|
606
|
+
if len(parts) >= 2:
|
607
|
+
owner = parts[0]
|
608
|
+
repo_name = parts[1].replace('.git', '')
|
609
|
+
# HTTPS format
|
610
|
+
else:
|
611
|
+
url_parts = remote_url.split('//')
|
612
|
+
if len(url_parts) > 1:
|
613
|
+
path_parts = url_parts[1].split('/')
|
614
|
+
if len(path_parts) >= 3:
|
615
|
+
# Skip domain and authentication part
|
616
|
+
domain_part = path_parts[0]
|
617
|
+
if '@' in domain_part:
|
618
|
+
# Path starts after domain
|
619
|
+
owner_index = 1
|
620
|
+
else:
|
621
|
+
owner_index = 1
|
622
|
+
|
623
|
+
if len(path_parts) > owner_index:
|
624
|
+
owner = path_parts[owner_index]
|
625
|
+
if len(path_parts) > owner_index + 1:
|
626
|
+
repo_name = path_parts[owner_index + 1].replace('.git', '')
|
627
|
+
|
628
|
+
# Get commit count
|
629
|
+
commit_count = subprocess.run(["git", "rev-list", "--count", "HEAD"],
|
630
|
+
capture_output=True, text=True)
|
631
|
+
|
632
|
+
# Get branch count
|
633
|
+
branch_count = subprocess.run(["git", "branch", "--list"],
|
634
|
+
capture_output=True, text=True)
|
635
|
+
branch_count = len(branch_count.stdout.strip().split('\n')) if branch_count.stdout else 0
|
636
|
+
|
637
|
+
return {
|
638
|
+
"domain": domain, # Only domain, not full URL
|
639
|
+
"owner": owner, # Repository owner (for GitHub)
|
640
|
+
"commit_count": int(commit_count.stdout.strip()) if commit_count.stdout else 0,
|
641
|
+
"branch_count": branch_count,
|
642
|
+
"has_git": True,
|
643
|
+
"repo_path": repo_path if 'repo_path' in locals() else None,
|
644
|
+
"repo_name": repo_name,
|
645
|
+
"has_license": has_license if 'has_license' in locals() else False,
|
646
|
+
"license_type": license_type if 'license_type' in locals() else "unknown"
|
647
|
+
}
|
648
|
+
|
649
|
+
except Exception as e:
|
650
|
+
# Optionally log the exception for debugging
|
651
|
+
pass
|
652
|
+
return {"has_git": False}
|
653
|
+
|
654
|
+
def detect_usage_pattern(self):
|
655
|
+
"""Detect usage patterns that indicate commercial use"""
|
656
|
+
current_time = datetime.now()
|
657
|
+
|
658
|
+
# Check if using during business hours
|
659
|
+
is_weekday = current_time.weekday() < 5 # 0-4 are Monday to Friday
|
660
|
+
hour = current_time.hour
|
661
|
+
is_business_hours = 9 <= hour <= 18
|
662
|
+
|
663
|
+
return {
|
664
|
+
"business_hours_usage": is_weekday and is_business_hours,
|
665
|
+
"weekday": is_weekday,
|
666
|
+
"hour": hour,
|
667
|
+
"timestamp": current_time.isoformat()
|
668
|
+
}
|
669
|
+
|
670
|
+
def enhanced_commercial_detection(self):
|
671
|
+
"""More thorough commercial usage detection"""
|
672
|
+
basic = self.detect_commercial_usage()
|
673
|
+
|
674
|
+
# Additional commercial indicators
|
675
|
+
try:
|
676
|
+
project_files = os.listdir(os.getcwd())
|
677
|
+
|
678
|
+
# Look for commercial frameworks
|
679
|
+
commercial_frameworks = ["django-oscar", "opencart", "magento",
|
680
|
+
"saleor", "odoo", "shopify", "woocommerce"]
|
681
|
+
|
682
|
+
framework_match = False
|
683
|
+
for framework in commercial_frameworks:
|
684
|
+
if any(framework in f for f in project_files):
|
685
|
+
framework_match = True
|
686
|
+
break
|
687
|
+
|
688
|
+
# Check for database connections
|
689
|
+
db_files = [f for f in project_files if "database" in f.lower()
|
690
|
+
or "db_config" in f.lower() or f.endswith(".db")]
|
691
|
+
has_database = len(db_files) > 0
|
692
|
+
except:
|
693
|
+
framework_match = False
|
694
|
+
has_database = False
|
695
|
+
|
696
|
+
# Domain name registration check
|
697
|
+
domain_check = self.analyze_git_info()
|
698
|
+
domain_is_commercial = False
|
699
|
+
if domain_check and domain_check.get("domain"):
|
700
|
+
commercial_tlds = [".com", ".io", ".co", ".org", ".net"]
|
701
|
+
domain_is_commercial = any(tld in domain_check["domain"] for tld in commercial_tlds)
|
702
|
+
|
703
|
+
# Check project structure
|
704
|
+
project_structure = self.analyze_project_structure()
|
705
|
+
|
706
|
+
# Calculate enhanced commercial score
|
707
|
+
indicators = [
|
708
|
+
basic["commercial_probability"],
|
709
|
+
framework_match,
|
710
|
+
has_database,
|
711
|
+
domain_is_commercial,
|
712
|
+
project_structure.get("type_confidence", {}).get("commercial_app", 0),
|
713
|
+
self.detect_usage_pattern()["business_hours_usage"]
|
714
|
+
]
|
715
|
+
|
716
|
+
# Filter out None values
|
717
|
+
indicators = [i for i in indicators if i is not None]
|
718
|
+
|
719
|
+
# Calculate score - convert booleans to 1.0 and average
|
720
|
+
if indicators:
|
721
|
+
score = sum(1.0 if isinstance(i, bool) and i else (i if isinstance(i, (int, float)) else 0)
|
722
|
+
for i in indicators) / len(indicators)
|
723
|
+
else:
|
724
|
+
score = 0
|
725
|
+
|
726
|
+
return {
|
727
|
+
"commercial_probability": score,
|
728
|
+
"likely_commercial": score > 0.4,
|
729
|
+
"indicators": {
|
730
|
+
"basic_indicators": basic["commercial_indicators"],
|
731
|
+
"framework_match": framework_match,
|
732
|
+
"has_database": has_database,
|
733
|
+
"domain_is_commercial": domain_is_commercial,
|
734
|
+
"project_structure": project_structure.get("detected_type"),
|
735
|
+
"business_hours_usage": self.detect_usage_pattern()["business_hours_usage"]
|
736
|
+
}
|
737
|
+
}
|
738
|
+
|
739
|
+
def analyze_dependencies(self):
|
740
|
+
"""Analyze package dependencies for commercial patterns"""
|
741
|
+
try:
|
742
|
+
import pkg_resources
|
743
|
+
|
744
|
+
# Commercial/enterprise package indicators
|
745
|
+
enterprise_packages = [
|
746
|
+
"snowflake-connector-python", "databricks", "azure",
|
747
|
+
"aws", "google-cloud", "stripe", "atlassian",
|
748
|
+
"salesforce", "bigquery", "tableau", "sap"
|
749
|
+
]
|
750
|
+
|
751
|
+
# Find installed packages that match enterprise indicators
|
752
|
+
commercial_deps = []
|
753
|
+
for pkg in pkg_resources.working_set:
|
754
|
+
if any(ent in pkg.key for ent in enterprise_packages):
|
755
|
+
commercial_deps.append({"name": pkg.key, "version": pkg.version})
|
756
|
+
|
757
|
+
return {
|
758
|
+
"has_commercial_deps": len(commercial_deps) > 0,
|
759
|
+
"commercial_deps_count": len(commercial_deps),
|
760
|
+
"commercial_deps": commercial_deps
|
761
|
+
}
|
762
|
+
except:
|
763
|
+
return {"has_commercial_deps": False}
|
764
|
+
|
765
|
+
# Create singleton instance
|
766
|
+
inspector = Inspector()
|