vnai 2.0.3__py3-none-any.whl → 2.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vnai/__init__.py +55 -81
- vnai/beam/__init__.py +3 -0
- vnai/beam/metrics.py +32 -57
- vnai/beam/pulse.py +21 -36
- vnai/beam/quota.py +109 -137
- vnai/flow/__init__.py +2 -4
- vnai/flow/queue.py +20 -31
- vnai/flow/relay.py +64 -101
- vnai/scope/__init__.py +2 -4
- vnai/scope/profile.py +110 -206
- vnai/scope/promo.py +80 -28
- vnai/scope/state.py +38 -64
- {vnai-2.0.3.dist-info → vnai-2.0.4.dist-info}/METADATA +4 -5
- vnai-2.0.4.dist-info/RECORD +16 -0
- vnai-2.0.3.dist-info/RECORD +0 -16
- {vnai-2.0.3.dist-info → vnai-2.0.4.dist-info}/WHEEL +0 -0
- {vnai-2.0.3.dist-info → vnai-2.0.4.dist-info}/top_level.txt +0 -0
vnai/scope/profile.py
CHANGED
@@ -1,7 +1,5 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
##
|
4
|
-
|
1
|
+
# vnai/scope/profile.py
|
2
|
+
# System environment detection
|
5
3
|
|
6
4
|
import os
|
7
5
|
import sys
|
@@ -17,7 +15,7 @@ import subprocess
|
|
17
15
|
from pathlib import Path
|
18
16
|
|
19
17
|
class Inspector:
|
20
|
-
|
18
|
+
"""Inspects execution environment"""
|
21
19
|
|
22
20
|
_instance = None
|
23
21
|
_lock = None
|
@@ -34,16 +32,14 @@ class Inspector:
|
|
34
32
|
return cls._instance
|
35
33
|
|
36
34
|
def _initialize(self):
|
37
|
-
|
35
|
+
"""Initialize inspector"""
|
38
36
|
self.cache = {}
|
39
|
-
self.cache_ttl = 3600
|
40
|
-
|
37
|
+
self.cache_ttl = 3600 # 1 hour cache validity
|
41
38
|
self.last_examination = 0
|
42
39
|
self.machine_id = None
|
43
40
|
self._colab_auth_triggered = False
|
44
41
|
|
45
|
-
|
46
|
-
|
42
|
+
# Paths
|
47
43
|
self.home_dir = Path.home()
|
48
44
|
self.project_dir = self.home_dir / ".vnstock"
|
49
45
|
self.project_dir.mkdir(exist_ok=True)
|
@@ -51,21 +47,18 @@ class Inspector:
|
|
51
47
|
self.id_dir.mkdir(exist_ok=True)
|
52
48
|
self.machine_id_path = self.id_dir / "machine_id.txt"
|
53
49
|
|
54
|
-
|
55
|
-
|
50
|
+
# Perform initial examination
|
56
51
|
self.examine()
|
57
52
|
|
58
53
|
def examine(self, force_refresh=False):
|
59
|
-
|
54
|
+
"""Examine current execution context"""
|
60
55
|
current_time = time.time()
|
61
56
|
|
62
|
-
|
63
|
-
|
57
|
+
# Return cached data if it's fresh enough and we're not forcing a refresh
|
64
58
|
if not force_refresh and (current_time - self.last_examination) < self.cache_ttl:
|
65
59
|
return self.cache
|
66
60
|
|
67
|
-
|
68
|
-
|
61
|
+
# Start with basic information
|
69
62
|
info = {
|
70
63
|
"timestamp": datetime.now().isoformat(),
|
71
64
|
"python_version": platform.python_version(),
|
@@ -73,15 +66,12 @@ class Inspector:
|
|
73
66
|
"platform": platform.platform()
|
74
67
|
}
|
75
68
|
|
76
|
-
|
77
|
-
|
69
|
+
# Machine identifier
|
78
70
|
info["machine_id"] = self.fingerprint()
|
79
71
|
|
80
|
-
|
81
|
-
|
72
|
+
# Environment detection
|
82
73
|
try:
|
83
|
-
|
84
|
-
|
74
|
+
# Check for Jupyter/IPython
|
85
75
|
import importlib.util
|
86
76
|
ipython_spec = importlib.util.find_spec("IPython")
|
87
77
|
|
@@ -90,8 +80,7 @@ class Inspector:
|
|
90
80
|
ipython = get_ipython()
|
91
81
|
if ipython is not None:
|
92
82
|
info["environment"] = "jupyter"
|
93
|
-
|
94
|
-
|
83
|
+
# Check for hosted notebooks
|
95
84
|
if 'google.colab' in sys.modules:
|
96
85
|
info["hosting_service"] = "colab"
|
97
86
|
elif 'KAGGLE_KERNEL_RUN_TYPE' in os.environ:
|
@@ -109,66 +98,53 @@ class Inspector:
|
|
109
98
|
except:
|
110
99
|
info["environment"] = "unknown"
|
111
100
|
|
112
|
-
|
113
|
-
|
101
|
+
# System resources
|
114
102
|
try:
|
115
103
|
info["cpu_count"] = os.cpu_count()
|
116
104
|
info["memory_gb"] = round(psutil.virtual_memory().total / (1024**3), 1)
|
117
105
|
except:
|
118
106
|
pass
|
119
107
|
|
120
|
-
|
121
|
-
|
108
|
+
# Check if in Google Colab
|
122
109
|
is_colab = 'google.colab' in sys.modules
|
123
110
|
if is_colab:
|
124
111
|
info["is_colab"] = True
|
125
|
-
|
126
|
-
|
112
|
+
# Setup delayed authentication if not already triggered
|
127
113
|
self.detect_colab_with_delayed_auth()
|
128
114
|
|
129
|
-
|
130
|
-
|
115
|
+
# Enhanced context information
|
131
116
|
try:
|
132
|
-
|
133
|
-
|
117
|
+
# Commercial usage detection
|
134
118
|
info["commercial_usage"] = self.enhanced_commercial_detection()
|
135
119
|
|
136
|
-
|
137
|
-
|
120
|
+
# Project context
|
138
121
|
info["project_context"] = self.analyze_project_structure()
|
139
122
|
|
140
|
-
|
141
|
-
|
123
|
+
# Git info
|
142
124
|
info["git_info"] = self.analyze_git_info()
|
143
125
|
|
144
|
-
|
145
|
-
|
126
|
+
# Working hours pattern
|
146
127
|
info["usage_pattern"] = self.detect_usage_pattern()
|
147
128
|
|
148
|
-
|
149
|
-
|
129
|
+
# Dependency analysis
|
150
130
|
info["dependencies"] = self.analyze_dependencies()
|
151
131
|
except Exception as e:
|
152
|
-
|
153
|
-
|
132
|
+
# Don't let enhanced detection failure stop basic functionality
|
154
133
|
info["detection_error"] = str(e)
|
155
134
|
|
156
|
-
|
157
|
-
|
135
|
+
# Update cache
|
158
136
|
self.cache = info
|
159
137
|
self.last_examination = current_time
|
160
138
|
|
161
139
|
return info
|
162
140
|
|
163
141
|
def fingerprint(self):
|
164
|
-
|
165
|
-
|
166
|
-
|
142
|
+
"""Generate unique environment fingerprint"""
|
143
|
+
# Always return cached machine_id if it exists
|
167
144
|
if self.machine_id:
|
168
145
|
return self.machine_id
|
169
146
|
|
170
|
-
|
171
|
-
|
147
|
+
# Try to load from file first
|
172
148
|
if self.machine_id_path.exists():
|
173
149
|
try:
|
174
150
|
with open(self.machine_id_path, "r") as f:
|
@@ -177,24 +153,19 @@ class Inspector:
|
|
177
153
|
except:
|
178
154
|
pass
|
179
155
|
|
180
|
-
|
181
|
-
|
156
|
+
# Check for Colab and setup delayed authentication
|
182
157
|
is_colab = self.detect_colab_with_delayed_auth()
|
183
158
|
|
184
|
-
|
185
|
-
|
159
|
+
# Generate a new machine ID only if necessary
|
186
160
|
try:
|
187
|
-
|
188
|
-
|
161
|
+
# Use consistent system information
|
189
162
|
system_info = platform.node() + platform.platform() + platform.machine()
|
190
163
|
self.machine_id = hashlib.md5(system_info.encode()).hexdigest()
|
191
164
|
except:
|
192
|
-
|
193
|
-
|
165
|
+
# Fallback to UUID but only as last resort
|
194
166
|
self.machine_id = str(uuid.uuid4())
|
195
167
|
|
196
|
-
|
197
|
-
|
168
|
+
# Save to ensure consistency across calls
|
198
169
|
try:
|
199
170
|
with open(self.machine_id_path, "w") as f:
|
200
171
|
f.write(self.machine_id)
|
@@ -204,9 +175,8 @@ class Inspector:
|
|
204
175
|
return self.machine_id
|
205
176
|
|
206
177
|
def detect_hosting(self):
|
207
|
-
|
208
|
-
|
209
|
-
|
178
|
+
"""Detect if running in a hosted environment"""
|
179
|
+
# Check common environment variables for hosted environments
|
210
180
|
hosting_markers = {
|
211
181
|
"COLAB_GPU": "Google Colab",
|
212
182
|
"KAGGLE_KERNEL_RUN_TYPE": "Kaggle",
|
@@ -220,15 +190,14 @@ class Inspector:
|
|
220
190
|
if env_var in os.environ:
|
221
191
|
return host_name
|
222
192
|
|
223
|
-
|
224
|
-
|
193
|
+
# Check for Google Colab module
|
225
194
|
if 'google.colab' in sys.modules:
|
226
195
|
return "Google Colab"
|
227
196
|
|
228
197
|
return "local"
|
229
198
|
|
230
199
|
def detect_commercial_usage(self):
|
231
|
-
|
200
|
+
"""Detect if running in commercial environment"""
|
232
201
|
commercial_indicators = {
|
233
202
|
"env_domains": [".com", ".io", ".co", "enterprise", "corp", "inc"],
|
234
203
|
"file_patterns": ["invoice", "payment", "customer", "client", "product", "sale"],
|
@@ -236,30 +205,25 @@ class Inspector:
|
|
236
205
|
"dir_patterns": ["company", "business", "enterprise", "corporate", "client"]
|
237
206
|
}
|
238
207
|
|
239
|
-
|
240
|
-
|
208
|
+
# Check environment variables for commercial domains
|
241
209
|
env_values = " ".join(os.environ.values()).lower()
|
242
210
|
domain_match = any(domain in env_values for domain in commercial_indicators["env_domains"])
|
243
211
|
|
244
|
-
|
245
|
-
|
212
|
+
# Check if commercial-related environment variables exist
|
246
213
|
env_var_match = any(var in os.environ for var in commercial_indicators["env_vars"])
|
247
214
|
|
248
|
-
|
249
|
-
|
215
|
+
# Check current directory for commercial indicators
|
250
216
|
current_dir = os.getcwd().lower()
|
251
217
|
dir_match = any(pattern in current_dir for pattern in commercial_indicators["dir_patterns"])
|
252
218
|
|
253
|
-
|
254
|
-
|
219
|
+
# Check files in current directory for commercial patterns
|
255
220
|
try:
|
256
221
|
files = [f.lower() for f in os.listdir() if os.path.isfile(f)]
|
257
222
|
file_match = any(any(pattern in f for pattern in commercial_indicators["file_patterns"]) for f in files)
|
258
223
|
except:
|
259
224
|
file_match = False
|
260
225
|
|
261
|
-
|
262
|
-
|
226
|
+
# Calculate probability
|
263
227
|
indicators = [domain_match, env_var_match, dir_match, file_match]
|
264
228
|
commercial_probability = sum(indicators) / len(indicators)
|
265
229
|
|
@@ -275,14 +239,13 @@ class Inspector:
|
|
275
239
|
}
|
276
240
|
|
277
241
|
def scan_packages(self):
|
278
|
-
|
242
|
+
"""Scan for installed packages by category"""
|
279
243
|
package_groups = {
|
280
244
|
"vnstock_family": [
|
281
245
|
"vnstock",
|
282
246
|
"vnstock3",
|
283
247
|
"vnstock_ezchart",
|
284
|
-
"vnstock_data_pro",
|
285
|
-
|
248
|
+
"vnstock_data_pro", # Fixed missing comma here
|
286
249
|
"vnstock_market_data_pipeline",
|
287
250
|
"vnstock_ta",
|
288
251
|
"vnii",
|
@@ -388,9 +351,8 @@ class Inspector:
|
|
388
351
|
return installed
|
389
352
|
|
390
353
|
def setup_vnstock_environment(self):
|
391
|
-
|
392
|
-
|
393
|
-
|
354
|
+
"""Set up environment for vnstock library"""
|
355
|
+
# Create environment.json file
|
394
356
|
env_file = self.id_dir / "environment.json"
|
395
357
|
env_data = {
|
396
358
|
"accepted_agreement": True,
|
@@ -407,18 +369,15 @@ class Inspector:
|
|
407
369
|
print(f"Failed to set up vnstock environment: {e}")
|
408
370
|
return False
|
409
371
|
|
410
|
-
|
411
|
-
|
372
|
+
# Update detect_colab_with_delayed_auth method in Inspector class
|
412
373
|
def detect_colab_with_delayed_auth(self, immediate=False):
|
413
|
-
|
414
|
-
|
415
|
-
|
374
|
+
"""Detect if running in Google Colab and setup authentication"""
|
375
|
+
# Check if we're in Colab without mounting drive yet
|
416
376
|
is_colab = 'google.colab' in sys.modules
|
417
377
|
|
418
378
|
if is_colab and not self._colab_auth_triggered:
|
419
379
|
if immediate:
|
420
|
-
|
421
|
-
|
380
|
+
# Immediate authentication
|
422
381
|
self._colab_auth_triggered = True
|
423
382
|
user_id = self.get_or_create_user_id()
|
424
383
|
if user_id and user_id != self.machine_id:
|
@@ -429,51 +388,42 @@ class Inspector:
|
|
429
388
|
except:
|
430
389
|
pass
|
431
390
|
else:
|
432
|
-
|
433
|
-
|
391
|
+
# Start a delayed thread to trigger authentication after user is already using the package
|
434
392
|
def delayed_auth():
|
435
|
-
|
436
|
-
|
393
|
+
# Wait for some time (e.g., 5 minutes) before attempting auth
|
437
394
|
time.sleep(300)
|
438
|
-
|
439
|
-
|
395
|
+
# Try to get authenticated user ID
|
440
396
|
user_id = self.get_or_create_user_id()
|
441
|
-
|
442
|
-
|
397
|
+
# Update machine ID with the authenticated one
|
443
398
|
if user_id and user_id != self.machine_id:
|
444
399
|
self.machine_id = user_id
|
445
|
-
|
446
|
-
|
400
|
+
# Save to the machine_id_path
|
447
401
|
try:
|
448
402
|
with open(self.machine_id_path, "w") as f:
|
449
403
|
f.write(user_id)
|
450
404
|
except:
|
451
405
|
pass
|
452
406
|
|
453
|
-
|
454
|
-
|
407
|
+
# Start the delayed authentication thread
|
455
408
|
thread = threading.Thread(target=delayed_auth, daemon=True)
|
456
409
|
thread.start()
|
457
410
|
|
458
411
|
return is_colab
|
459
412
|
|
460
413
|
def get_or_create_user_id(self):
|
461
|
-
|
414
|
+
"""Get existing user ID from Google Drive or create new one"""
|
462
415
|
if self._colab_auth_triggered:
|
463
|
-
return self.machine_id
|
464
|
-
|
416
|
+
return self.machine_id # Avoid triggering multiple times
|
465
417
|
|
466
418
|
try:
|
467
419
|
from google.colab import drive
|
468
420
|
print("\n📋 Kết nối tài khoản Google Drive để lưu các thiết lập của dự án.")
|
469
421
|
print("Dữ liệu phiên làm việc với Colab của bạn sẽ bị xóa nếu không lưu trữ vào Google Drive.\n")
|
470
422
|
|
471
|
-
|
472
|
-
|
423
|
+
# Mark that we've triggered the auth
|
473
424
|
self._colab_auth_triggered = True
|
474
425
|
|
475
|
-
|
476
|
-
|
426
|
+
# Mount Google Drive
|
477
427
|
drive.mount('/content/drive')
|
478
428
|
id_path = '/content/drive/MyDrive/.vnstock/user_id.txt'
|
479
429
|
|
@@ -487,15 +437,13 @@ class Inspector:
|
|
487
437
|
f.write(user_id)
|
488
438
|
return user_id
|
489
439
|
except Exception as e:
|
490
|
-
|
491
|
-
|
440
|
+
# Silently fail and return the existing machine ID
|
492
441
|
return self.machine_id
|
493
442
|
|
494
|
-
|
495
|
-
|
443
|
+
# Enhanced methods for project context collection
|
496
444
|
|
497
445
|
def analyze_project_structure(self):
|
498
|
-
|
446
|
+
"""Analyze project directory structure for context"""
|
499
447
|
current_dir = os.getcwd()
|
500
448
|
project_indicators = {
|
501
449
|
"commercial_app": ["app", "services", "products", "customers", "billing"],
|
@@ -504,8 +452,7 @@ class Inspector:
|
|
504
452
|
"educational": ["examples", "lectures", "assignments", "slides"]
|
505
453
|
}
|
506
454
|
|
507
|
-
|
508
|
-
|
455
|
+
# Look for key directories up to 2 levels deep (limited for privacy)
|
509
456
|
project_type = {}
|
510
457
|
for category, markers in project_indicators.items():
|
511
458
|
match_count = 0
|
@@ -515,14 +462,12 @@ class Inspector:
|
|
515
462
|
if len(markers) > 0:
|
516
463
|
project_type[category] = match_count / len(markers)
|
517
464
|
|
518
|
-
|
519
|
-
|
465
|
+
# Scan for direct child files and directories (limited depth for privacy)
|
520
466
|
try:
|
521
467
|
root_files = [f for f in os.listdir(current_dir) if os.path.isfile(os.path.join(current_dir, f))]
|
522
468
|
root_dirs = [d for d in os.listdir(current_dir) if os.path.isdir(os.path.join(current_dir, d))]
|
523
469
|
|
524
|
-
|
525
|
-
|
470
|
+
# Detect project type
|
526
471
|
file_markers = {
|
527
472
|
"python_project": ["setup.py", "pyproject.toml", "requirements.txt"],
|
528
473
|
"data_science": ["notebook.ipynb", ".ipynb_checkpoints"],
|
@@ -536,8 +481,7 @@ class Inspector:
|
|
536
481
|
file_project_type = ptype
|
537
482
|
break
|
538
483
|
|
539
|
-
|
540
|
-
|
484
|
+
# Scan for specific frameworks
|
541
485
|
frameworks = []
|
542
486
|
framework_markers = {
|
543
487
|
"django": ["manage.py", "settings.py"],
|
@@ -568,28 +512,24 @@ class Inspector:
|
|
568
512
|
}
|
569
513
|
|
570
514
|
def analyze_git_info(self):
|
571
|
-
|
515
|
+
"""Extract non-sensitive git repository information"""
|
572
516
|
try:
|
573
|
-
|
574
|
-
|
517
|
+
# Check if it's a git repository
|
575
518
|
result = subprocess.run(["git", "rev-parse", "--is-inside-work-tree"],
|
576
519
|
capture_output=True, text=True)
|
577
520
|
|
578
521
|
if result.returncode != 0:
|
579
522
|
return {"has_git": False}
|
580
523
|
|
581
|
-
|
582
|
-
|
524
|
+
# Get repository root path - ADD THIS CODE
|
583
525
|
repo_root = subprocess.run(["git", "rev-parse", "--show-toplevel"],
|
584
526
|
capture_output=True, text=True)
|
585
527
|
repo_path = repo_root.stdout.strip() if repo_root.stdout else None
|
586
528
|
|
587
|
-
|
588
|
-
|
529
|
+
# Extract repository name from path - ADD THIS CODE
|
589
530
|
repo_name = os.path.basename(repo_path) if repo_path else None
|
590
531
|
|
591
|
-
|
592
|
-
|
532
|
+
# Check for license file - ADD THIS CODE
|
593
533
|
has_license = False
|
594
534
|
license_type = "unknown"
|
595
535
|
if repo_path:
|
@@ -601,8 +541,7 @@ class Inspector:
|
|
601
541
|
for license_file in license_files:
|
602
542
|
if os.path.exists(license_file):
|
603
543
|
has_license = True
|
604
|
-
|
605
|
-
|
544
|
+
# Try to determine license type by scanning content
|
606
545
|
try:
|
607
546
|
with open(license_file, 'r') as f:
|
608
547
|
content = f.read().lower()
|
@@ -614,85 +553,69 @@ class Inspector:
|
|
614
553
|
license_type = "GPL"
|
615
554
|
elif "bsd " in content:
|
616
555
|
license_type = "BSD"
|
617
|
-
|
618
|
-
|
556
|
+
# Add more license type detection as needed
|
619
557
|
except:
|
620
558
|
pass
|
621
559
|
break
|
622
560
|
|
623
|
-
|
624
|
-
|
561
|
+
# Get remote URL (only domain, not full URL)
|
625
562
|
remote = subprocess.run(["git", "config", "--get", "remote.origin.url"],
|
626
563
|
capture_output=True, text=True)
|
627
564
|
|
628
565
|
remote_url = remote.stdout.strip() if remote.stdout else None
|
629
566
|
|
630
567
|
if remote_url:
|
631
|
-
|
632
|
-
|
568
|
+
# Clean the remote URL string
|
633
569
|
remote_url = remote_url.strip()
|
634
570
|
|
635
|
-
|
636
|
-
|
571
|
+
# Properly extract domain without authentication information
|
637
572
|
domain = None
|
638
573
|
if remote_url:
|
639
|
-
|
640
|
-
|
574
|
+
# For SSH URLs (git@github.com:user/repo.git)
|
641
575
|
if remote_url.startswith('git@') or '@' in remote_url and ':' in remote_url.split('@')[1]:
|
642
576
|
domain = remote_url.split('@')[1].split(':')[0]
|
643
|
-
|
644
|
-
|
577
|
+
# For HTTPS URLs with or without authentication
|
645
578
|
elif remote_url.startswith('http'):
|
646
|
-
|
647
|
-
|
579
|
+
# Remove authentication part if present
|
648
580
|
url_parts = remote_url.split('//')
|
649
581
|
if len(url_parts) > 1:
|
650
582
|
auth_and_domain = url_parts[1].split('/', 1)[0]
|
651
|
-
|
652
|
-
|
583
|
+
# If auth info exists (contains @), take only domain part
|
653
584
|
if '@' in auth_and_domain:
|
654
585
|
domain = auth_and_domain.split('@')[-1]
|
655
586
|
else:
|
656
587
|
domain = auth_and_domain
|
657
|
-
|
658
|
-
|
588
|
+
# Handle other URL formats
|
659
589
|
else:
|
660
|
-
|
661
|
-
|
590
|
+
# Try a general regex as fallback for unusual formats
|
662
591
|
import re
|
663
592
|
domain_match = re.search(r'@([^:/]+)|https?://(?:[^@/]+@)?([^/]+)', remote_url)
|
664
593
|
if domain_match:
|
665
594
|
domain = domain_match.group(1) or domain_match.group(2)
|
666
595
|
|
667
|
-
|
668
|
-
|
596
|
+
# Extract owner and repo info securely
|
669
597
|
owner = None
|
670
598
|
repo_name = None
|
671
599
|
|
672
600
|
if domain:
|
673
|
-
|
674
|
-
|
601
|
+
# For GitHub repositories
|
675
602
|
if "github" in domain:
|
676
|
-
|
677
|
-
|
603
|
+
# SSH format: git@github.com:username/repo.git
|
678
604
|
if ':' in remote_url and '@' in remote_url:
|
679
605
|
parts = remote_url.split(':')[-1].split('/')
|
680
606
|
if len(parts) >= 2:
|
681
607
|
owner = parts[0]
|
682
608
|
repo_name = parts[1].replace('.git', '')
|
683
|
-
|
684
|
-
|
609
|
+
# HTTPS format
|
685
610
|
else:
|
686
611
|
url_parts = remote_url.split('//')
|
687
612
|
if len(url_parts) > 1:
|
688
613
|
path_parts = url_parts[1].split('/')
|
689
614
|
if len(path_parts) >= 3:
|
690
|
-
|
691
|
-
|
615
|
+
# Skip domain and authentication part
|
692
616
|
domain_part = path_parts[0]
|
693
617
|
if '@' in domain_part:
|
694
|
-
|
695
|
-
|
618
|
+
# Path starts after domain
|
696
619
|
owner_index = 1
|
697
620
|
else:
|
698
621
|
owner_index = 1
|
@@ -702,22 +625,18 @@ class Inspector:
|
|
702
625
|
if len(path_parts) > owner_index + 1:
|
703
626
|
repo_name = path_parts[owner_index + 1].replace('.git', '')
|
704
627
|
|
705
|
-
|
706
|
-
|
628
|
+
# Get commit count
|
707
629
|
commit_count = subprocess.run(["git", "rev-list", "--count", "HEAD"],
|
708
630
|
capture_output=True, text=True)
|
709
631
|
|
710
|
-
|
711
|
-
|
632
|
+
# Get branch count
|
712
633
|
branch_count = subprocess.run(["git", "branch", "--list"],
|
713
634
|
capture_output=True, text=True)
|
714
635
|
branch_count = len(branch_count.stdout.strip().split('\n')) if branch_count.stdout else 0
|
715
636
|
|
716
637
|
return {
|
717
|
-
"domain": domain,
|
718
|
-
|
719
|
-
"owner": owner, ##
|
720
|
-
|
638
|
+
"domain": domain, # Only domain, not full URL
|
639
|
+
"owner": owner, # Repository owner (for GitHub)
|
721
640
|
"commit_count": int(commit_count.stdout.strip()) if commit_count.stdout else 0,
|
722
641
|
"branch_count": branch_count,
|
723
642
|
"has_git": True,
|
@@ -728,20 +647,16 @@ class Inspector:
|
|
728
647
|
}
|
729
648
|
|
730
649
|
except Exception as e:
|
731
|
-
|
732
|
-
|
650
|
+
# Optionally log the exception for debugging
|
733
651
|
pass
|
734
652
|
return {"has_git": False}
|
735
653
|
|
736
|
-
|
737
654
|
def detect_usage_pattern(self):
|
738
|
-
|
655
|
+
"""Detect usage patterns that indicate commercial use"""
|
739
656
|
current_time = datetime.now()
|
740
657
|
|
741
|
-
|
742
|
-
|
743
|
-
is_weekday = current_time.weekday() < 5 ##
|
744
|
-
|
658
|
+
# Check if using during business hours
|
659
|
+
is_weekday = current_time.weekday() < 5 # 0-4 are Monday to Friday
|
745
660
|
hour = current_time.hour
|
746
661
|
is_business_hours = 9 <= hour <= 18
|
747
662
|
|
@@ -753,16 +668,14 @@ class Inspector:
|
|
753
668
|
}
|
754
669
|
|
755
670
|
def enhanced_commercial_detection(self):
|
756
|
-
|
671
|
+
"""More thorough commercial usage detection"""
|
757
672
|
basic = self.detect_commercial_usage()
|
758
673
|
|
759
|
-
|
760
|
-
|
674
|
+
# Additional commercial indicators
|
761
675
|
try:
|
762
676
|
project_files = os.listdir(os.getcwd())
|
763
677
|
|
764
|
-
|
765
|
-
|
678
|
+
# Look for commercial frameworks
|
766
679
|
commercial_frameworks = ["django-oscar", "opencart", "magento",
|
767
680
|
"saleor", "odoo", "shopify", "woocommerce"]
|
768
681
|
|
@@ -772,8 +685,7 @@ class Inspector:
|
|
772
685
|
framework_match = True
|
773
686
|
break
|
774
687
|
|
775
|
-
|
776
|
-
|
688
|
+
# Check for database connections
|
777
689
|
db_files = [f for f in project_files if "database" in f.lower()
|
778
690
|
or "db_config" in f.lower() or f.endswith(".db")]
|
779
691
|
has_database = len(db_files) > 0
|
@@ -781,20 +693,17 @@ class Inspector:
|
|
781
693
|
framework_match = False
|
782
694
|
has_database = False
|
783
695
|
|
784
|
-
|
785
|
-
|
696
|
+
# Domain name registration check
|
786
697
|
domain_check = self.analyze_git_info()
|
787
698
|
domain_is_commercial = False
|
788
699
|
if domain_check and domain_check.get("domain"):
|
789
700
|
commercial_tlds = [".com", ".io", ".co", ".org", ".net"]
|
790
701
|
domain_is_commercial = any(tld in domain_check["domain"] for tld in commercial_tlds)
|
791
702
|
|
792
|
-
|
793
|
-
|
703
|
+
# Check project structure
|
794
704
|
project_structure = self.analyze_project_structure()
|
795
705
|
|
796
|
-
|
797
|
-
|
706
|
+
# Calculate enhanced commercial score
|
798
707
|
indicators = [
|
799
708
|
basic["commercial_probability"],
|
800
709
|
framework_match,
|
@@ -804,12 +713,10 @@ class Inspector:
|
|
804
713
|
self.detect_usage_pattern()["business_hours_usage"]
|
805
714
|
]
|
806
715
|
|
807
|
-
|
808
|
-
|
716
|
+
# Filter out None values
|
809
717
|
indicators = [i for i in indicators if i is not None]
|
810
718
|
|
811
|
-
|
812
|
-
|
719
|
+
# Calculate score - convert booleans to 1.0 and average
|
813
720
|
if indicators:
|
814
721
|
score = sum(1.0 if isinstance(i, bool) and i else (i if isinstance(i, (int, float)) else 0)
|
815
722
|
for i in indicators) / len(indicators)
|
@@ -830,20 +737,18 @@ class Inspector:
|
|
830
737
|
}
|
831
738
|
|
832
739
|
def analyze_dependencies(self):
|
833
|
-
|
740
|
+
"""Analyze package dependencies for commercial patterns"""
|
834
741
|
try:
|
835
742
|
import pkg_resources
|
836
743
|
|
837
|
-
|
838
|
-
|
744
|
+
# Commercial/enterprise package indicators
|
839
745
|
enterprise_packages = [
|
840
746
|
"snowflake-connector-python", "databricks", "azure",
|
841
747
|
"aws", "google-cloud", "stripe", "atlassian",
|
842
748
|
"salesforce", "bigquery", "tableau", "sap"
|
843
749
|
]
|
844
750
|
|
845
|
-
|
846
|
-
|
751
|
+
# Find installed packages that match enterprise indicators
|
847
752
|
commercial_deps = []
|
848
753
|
for pkg in pkg_resources.working_set:
|
849
754
|
if any(ent in pkg.key for ent in enterprise_packages):
|
@@ -857,6 +762,5 @@ class Inspector:
|
|
857
762
|
except:
|
858
763
|
return {"has_commercial_deps": False}
|
859
764
|
|
860
|
-
|
861
|
-
|
765
|
+
# Create singleton instance
|
862
766
|
inspector = Inspector()
|