vnai 2.1.8__py3-none-any.whl → 2.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vnai/scope/profile.py CHANGED
@@ -1,5 +1,3 @@
1
- # vnai/scope/profile.py
2
-
3
1
  import os
4
2
  import sys
5
3
  import platform
@@ -14,353 +12,275 @@ import subprocess
14
12
  from pathlib import Path
15
13
 
16
14
  class Inspector:
17
- """Inspects execution environment"""
18
-
19
15
  _instance = None
20
16
  _lock = None
21
-
17
+
22
18
  def __new__(cls):
23
19
  import threading
24
20
  if cls._lock is None:
25
21
  cls._lock = threading.Lock()
26
-
27
22
  with cls._lock:
28
23
  if cls._instance is None:
29
24
  cls._instance = super(Inspector, cls).__new__(cls)
30
25
  cls._instance._initialize()
31
26
  return cls._instance
32
-
27
+
33
28
  def _initialize(self):
34
- """Initialize inspector"""
35
29
  self.cache = {}
36
- self.cache_ttl = 3600 # 1 hour cache validity
30
+ self.cache_ttl = 3600
37
31
  self.last_examination = 0
38
32
  self.machine_id = None
39
33
  self._colab_auth_triggered = False
40
-
41
- # Paths
42
34
  self.home_dir = Path.home()
43
- self.project_dir = self.home_dir / ".vnstock"
35
+ self.project_dir = self.home_dir /".vnstock"
44
36
  self.project_dir.mkdir(exist_ok=True)
45
- self.id_dir = self.project_dir / 'id'
37
+ self.id_dir = self.project_dir /'id'
46
38
  self.id_dir.mkdir(exist_ok=True)
47
- self.machine_id_path = self.id_dir / "machine_id.txt"
48
-
49
- # Perform initial examination
39
+ self.machine_id_path = self.id_dir /"machine_id.txt"
50
40
  self.examine()
51
-
41
+
52
42
  def examine(self, force_refresh=False):
53
- """Examine current execution context"""
54
43
  current_time = time.time()
55
-
56
- # Return cached data if it's fresh enough and we're not forcing a refresh
57
44
  if not force_refresh and (current_time - self.last_examination) < self.cache_ttl:
58
45
  return self.cache
59
-
60
- # Start with basic information
61
46
  info = {
62
- "timestamp": datetime.now().isoformat(),
63
- "python_version": platform.python_version(),
64
- "os_name": platform.system(),
65
- "platform": platform.platform()
47
+ "timestamp": datetime.now().isoformat(),
48
+ "python_version": platform.python_version(),
49
+ "os_name": platform.system(),
50
+ "platform": platform.platform()
66
51
  }
67
-
68
- # Machine identifier
69
52
  info["machine_id"] = self.fingerprint()
70
-
71
- # Environment detection
72
53
  try:
73
- # Check for Jupyter/IPython
74
54
  import importlib.util
75
55
  ipython_spec = importlib.util.find_spec("IPython")
76
-
77
56
  if ipython_spec:
78
57
  from IPython import get_ipython
79
58
  ipython = get_ipython()
80
59
  if ipython is not None:
81
- info["environment"] = "jupyter"
82
- # Check for hosted notebooks
83
- if 'google.colab' in sys.modules:
84
- info["hosting_service"] = "colab"
85
- elif 'KAGGLE_KERNEL_RUN_TYPE' in os.environ:
86
- info["hosting_service"] = "kaggle"
60
+ info["environment"] ="jupyter"
61
+ if'google.colab' in sys.modules:
62
+ info["hosting_service"] ="colab"
63
+ elif'KAGGLE_KERNEL_RUN_TYPE' in os.environ:
64
+ info["hosting_service"] ="kaggle"
87
65
  else:
88
- info["hosting_service"] = "local_jupyter"
66
+ info["hosting_service"] ="local_jupyter"
89
67
  elif sys.stdout.isatty():
90
- info["environment"] = "terminal"
68
+ info["environment"] ="terminal"
91
69
  else:
92
- info["environment"] = "script"
70
+ info["environment"] ="script"
93
71
  elif sys.stdout.isatty():
94
- info["environment"] = "terminal"
72
+ info["environment"] ="terminal"
95
73
  else:
96
- info["environment"] = "script"
74
+ info["environment"] ="script"
97
75
  except:
98
- info["environment"] = "unknown"
99
-
100
- # System resources
76
+ info["environment"] ="unknown"
101
77
  try:
102
78
  info["cpu_count"] = os.cpu_count()
103
79
  info["memory_gb"] = round(psutil.virtual_memory().total / (1024**3), 1)
104
80
  except:
105
81
  pass
106
-
107
- # Check if in Google Colab
108
- is_colab = 'google.colab' in sys.modules
82
+ is_colab ='google.colab' in sys.modules
109
83
  if is_colab:
110
84
  info["is_colab"] = True
111
- # Setup delayed authentication if not already triggered
112
85
  self.detect_colab_with_delayed_auth()
113
-
114
- # Enhanced context information
115
86
  try:
116
- # Commercial usage detection
117
87
  info["commercial_usage"] = self.enhanced_commercial_detection()
118
-
119
- # Project context
120
88
  info["project_context"] = self.analyze_project_structure()
121
-
122
- # Git info
123
89
  info["git_info"] = self.analyze_git_info()
124
-
125
- # Working hours pattern
126
90
  info["usage_pattern"] = self.detect_usage_pattern()
127
-
128
- # Dependency analysis
129
91
  info["dependencies"] = self.analyze_dependencies()
130
92
  except Exception as e:
131
- # Don't let enhanced detection failure stop basic functionality
132
93
  info["detection_error"] = str(e)
133
-
134
- # Update cache
135
94
  self.cache = info
136
95
  self.last_examination = current_time
137
-
138
96
  return info
139
-
97
+
140
98
  def fingerprint(self):
141
- """Generate unique environment fingerprint"""
142
- # Always return cached machine_id if it exists
143
99
  if self.machine_id:
144
100
  return self.machine_id
145
-
146
- # Try to load from file first
147
101
  if self.machine_id_path.exists():
148
102
  try:
149
- with open(self.machine_id_path, "r") as f:
103
+ with open(self.machine_id_path,"r") as f:
150
104
  self.machine_id = f.read().strip()
151
105
  return self.machine_id
152
106
  except:
153
107
  pass
154
-
155
- # Check for Colab and setup delayed authentication
156
108
  is_colab = self.detect_colab_with_delayed_auth()
157
-
158
- # Generate a new machine ID only if necessary
159
109
  try:
160
- # Use consistent system information
161
110
  system_info = platform.node() + platform.platform() + platform.machine()
162
111
  self.machine_id = hashlib.md5(system_info.encode()).hexdigest()
163
112
  except:
164
- # Fallback to UUID but only as last resort
165
113
  self.machine_id = str(uuid.uuid4())
166
-
167
- # Save to ensure consistency across calls
168
114
  try:
169
- with open(self.machine_id_path, "w") as f:
115
+ with open(self.machine_id_path,"w") as f:
170
116
  f.write(self.machine_id)
171
117
  except:
172
118
  pass
173
-
174
119
  return self.machine_id
175
-
120
+
176
121
  def detect_hosting(self):
177
- """Detect if running in a hosted environment"""
178
- # Check common environment variables for hosted environments
179
122
  hosting_markers = {
180
- "COLAB_GPU": "Google Colab",
181
- "KAGGLE_KERNEL_RUN_TYPE": "Kaggle",
182
- "BINDER_SERVICE_HOST": "Binder",
183
- "CODESPACE_NAME": "GitHub Codespaces",
184
- "STREAMLIT_SERVER_HEADLESS": "Streamlit Cloud",
185
- "CLOUD_SHELL": "Cloud Shell"
123
+ "COLAB_GPU":"Google Colab",
124
+ "KAGGLE_KERNEL_RUN_TYPE":"Kaggle",
125
+ "BINDER_SERVICE_HOST":"Binder",
126
+ "CODESPACE_NAME":"GitHub Codespaces",
127
+ "STREAMLIT_SERVER_HEADLESS":"Streamlit Cloud",
128
+ "CLOUD_SHELL":"Cloud Shell"
186
129
  }
187
-
188
130
  for env_var, host_name in hosting_markers.items():
189
131
  if env_var in os.environ:
190
132
  return host_name
191
-
192
- # Check for Google Colab module
193
- if 'google.colab' in sys.modules:
194
- return "Google Colab"
195
-
196
- return "local"
197
-
133
+ if'google.colab' in sys.modules:
134
+ return"Google Colab"
135
+ return"local"
136
+
198
137
  def detect_commercial_usage(self):
199
- """Detect if running in commercial environment"""
200
138
  commercial_indicators = {
201
- "env_domains": [".com", ".io", ".co", "enterprise", "corp", "inc"],
202
- "file_patterns": ["invoice", "payment", "customer", "client", "product", "sale"],
203
- "env_vars": ["COMPANY", "BUSINESS", "ENTERPRISE", "CORPORATE"],
204
- "dir_patterns": ["company", "business", "enterprise", "corporate", "client"]
139
+ "env_domains": [".com",".io",".co","enterprise","corp","inc"],
140
+ "file_patterns": ["invoice","payment","customer","client","product","sale"],
141
+ "env_vars": ["COMPANY","BUSINESS","ENTERPRISE","CORPORATE"],
142
+ "dir_patterns": ["company","business","enterprise","corporate","client"]
205
143
  }
206
-
207
- # Check environment variables for commercial domains
208
- env_values = " ".join(os.environ.values()).lower()
144
+ env_values =" ".join(os.environ.values()).lower()
209
145
  domain_match = any(domain in env_values for domain in commercial_indicators["env_domains"])
210
-
211
- # Check if commercial-related environment variables exist
212
146
  env_var_match = any(var in os.environ for var in commercial_indicators["env_vars"])
213
-
214
- # Check current directory for commercial indicators
215
147
  current_dir = os.getcwd().lower()
216
148
  dir_match = any(pattern in current_dir for pattern in commercial_indicators["dir_patterns"])
217
-
218
- # Check files in current directory for commercial patterns
219
149
  try:
220
150
  files = [f.lower() for f in os.listdir() if os.path.isfile(f)]
221
151
  file_match = any(any(pattern in f for pattern in commercial_indicators["file_patterns"]) for f in files)
222
152
  except:
223
153
  file_match = False
224
-
225
- # Calculate probability
226
154
  indicators = [domain_match, env_var_match, dir_match, file_match]
227
155
  commercial_probability = sum(indicators) / len(indicators)
228
-
229
156
  return {
230
- "likely_commercial": commercial_probability > 0.3,
231
- "commercial_probability": commercial_probability,
232
- "commercial_indicators": {
233
- "domain_match": domain_match,
234
- "env_var_match": env_var_match,
235
- "dir_match": dir_match,
236
- "file_match": file_match
157
+ "likely_commercial": commercial_probability > 0.3,
158
+ "commercial_probability": commercial_probability,
159
+ "commercial_indicators": {
160
+ "domain_match": domain_match,
161
+ "env_var_match": env_var_match,
162
+ "dir_match": dir_match,
163
+ "file_match": file_match
237
164
  }
238
165
  }
239
-
166
+
240
167
  def scan_packages(self):
241
- """Scan for installed packages by category"""
242
168
  package_groups = {
243
- "vnstock_family": [
244
- "vnstock",
245
- "vnstock3",
246
- "vnstock_ezchart",
247
- "vnstock_data_pro", # Fixed missing comma here
248
- "vnstock_market_data_pipeline",
249
- "vnstock_ta",
250
- "vnii",
251
- "vnai"
169
+ "vnstock_family": [
170
+ "vnstock",
171
+ "vnstock3",
172
+ "vnstock_ezchart",
173
+ "vnstock_data_pro",
174
+ "vnstock_market_data_pipeline",
175
+ "vnstock_ta",
176
+ "vnii",
177
+ "vnai"
252
178
  ],
253
- "analytics": [
254
- "openbb",
255
- "pandas_ta"
179
+ "analytics": [
180
+ "openbb",
181
+ "pandas_ta"
256
182
  ],
257
- "static_charts": [
258
- "matplotlib",
259
- "seaborn",
260
- "altair"
183
+ "static_charts": [
184
+ "matplotlib",
185
+ "seaborn",
186
+ "altair"
261
187
  ],
262
- "dashboard": [
263
- "streamlit",
264
- "voila",
265
- "panel",
266
- "shiny",
267
- "dash"
188
+ "dashboard": [
189
+ "streamlit",
190
+ "voila",
191
+ "panel",
192
+ "shiny",
193
+ "dash"
268
194
  ],
269
- "interactive_charts": [
270
- "mplfinance",
271
- "plotly",
272
- "plotline",
273
- "bokeh",
274
- "pyecharts",
275
- "highcharts-core",
276
- "highcharts-stock",
277
- "mplchart"
195
+ "interactive_charts": [
196
+ "mplfinance",
197
+ "plotly",
198
+ "plotline",
199
+ "bokeh",
200
+ "pyecharts",
201
+ "highcharts-core",
202
+ "highcharts-stock",
203
+ "mplchart"
278
204
  ],
279
- "datafeed": [
280
- "yfinance",
281
- "alpha_vantage",
282
- "pandas-datareader",
283
- "investpy"
205
+ "datafeed": [
206
+ "yfinance",
207
+ "alpha_vantage",
208
+ "pandas-datareader",
209
+ "investpy"
284
210
  ],
285
- "official_api": [
286
- "ssi-fc-data",
287
- "ssi-fctrading"
211
+ "official_api": [
212
+ "ssi-fc-data",
213
+ "ssi-fctrading"
288
214
  ],
289
- "risk_return": [
290
- "pyfolio",
291
- "empyrical",
292
- "quantstats",
293
- "financetoolkit"
215
+ "risk_return": [
216
+ "pyfolio",
217
+ "empyrical",
218
+ "quantstats",
219
+ "financetoolkit"
294
220
  ],
295
- "machine_learning": [
296
- "scipy",
297
- "sklearn",
298
- "statsmodels",
299
- "pytorch",
300
- "tensorflow",
301
- "keras",
302
- "xgboost"
221
+ "machine_learning": [
222
+ "scipy",
223
+ "sklearn",
224
+ "statsmodels",
225
+ "pytorch",
226
+ "tensorflow",
227
+ "keras",
228
+ "xgboost"
303
229
  ],
304
- "indicators": [
305
- "stochastic",
306
- "talib",
307
- "tqdm",
308
- "finta",
309
- "financetoolkit",
310
- "tulipindicators"
230
+ "indicators": [
231
+ "stochastic",
232
+ "talib",
233
+ "tqdm",
234
+ "finta",
235
+ "financetoolkit",
236
+ "tulipindicators"
311
237
  ],
312
- "backtesting": [
313
- "vectorbt",
314
- "backtesting",
315
- "bt",
316
- "zipline",
317
- "pyalgotrade",
318
- "backtrader",
319
- "pybacktest",
320
- "fastquant",
321
- "lean",
322
- "ta",
323
- "finmarketpy",
324
- "qstrader"
238
+ "backtesting": [
239
+ "vectorbt",
240
+ "backtesting",
241
+ "bt",
242
+ "zipline",
243
+ "pyalgotrade",
244
+ "backtrader",
245
+ "pybacktest",
246
+ "fastquant",
247
+ "lean",
248
+ "ta",
249
+ "finmarketpy",
250
+ "qstrader"
325
251
  ],
326
- "server": [
327
- "fastapi",
328
- "flask",
329
- "uvicorn",
330
- "gunicorn"
252
+ "server": [
253
+ "fastapi",
254
+ "flask",
255
+ "uvicorn",
256
+ "gunicorn"
331
257
  ],
332
- "framework": [
333
- "lightgbm",
334
- "catboost",
335
- "django"
258
+ "framework": [
259
+ "lightgbm",
260
+ "catboost",
261
+ "django"
336
262
  ]
337
263
  }
338
-
339
264
  installed = {}
340
-
341
265
  for category, packages in package_groups.items():
342
266
  installed[category] = []
343
267
  for pkg in packages:
344
268
  try:
345
269
  version = importlib.metadata.version(pkg)
346
- installed[category].append({"name": pkg, "version": version})
270
+ installed[category].append({"name": pkg,"version": version})
347
271
  except:
348
272
  pass
349
-
350
273
  return installed
351
-
274
+
352
275
  def setup_vnstock_environment(self):
353
- """Set up environment for vnstock library"""
354
- # Create environment.json file
355
- env_file = self.id_dir / "environment.json"
276
+ env_file = self.id_dir /"environment.json"
356
277
  env_data = {
357
- "accepted_agreement": True,
358
- "timestamp": datetime.now().isoformat(),
359
- "machine_id": self.fingerprint()
278
+ "accepted_agreement": True,
279
+ "timestamp": datetime.now().isoformat(),
280
+ "machine_id": self.fingerprint()
360
281
  }
361
-
362
282
  try:
363
- with open(env_file, "w") as f:
283
+ with open(env_file,"w") as f:
364
284
  import json
365
285
  json.dump(env_data, f)
366
286
  return True
@@ -368,90 +288,65 @@ class Inspector:
368
288
  print(f"Failed to set up vnstock environment: {e}")
369
289
  return False
370
290
 
371
- # Update detect_colab_with_delayed_auth method in Inspector class
372
291
  def detect_colab_with_delayed_auth(self, immediate=False):
373
- """Detect if running in Google Colab and setup authentication"""
374
- # Check if we're in Colab without mounting drive yet
375
- is_colab = 'google.colab' in sys.modules
376
-
292
+ is_colab ='google.colab' in sys.modules
377
293
  if is_colab and not self._colab_auth_triggered:
378
294
  if immediate:
379
- # Immediate authentication
380
295
  self._colab_auth_triggered = True
381
296
  user_id = self.get_or_create_user_id()
382
297
  if user_id and user_id != self.machine_id:
383
298
  self.machine_id = user_id
384
299
  try:
385
- with open(self.machine_id_path, "w") as f:
300
+ with open(self.machine_id_path,"w") as f:
386
301
  f.write(user_id)
387
302
  except:
388
303
  pass
389
304
  else:
390
- # Start a delayed thread to trigger authentication after user is already using the package
305
+
391
306
  def delayed_auth():
392
- # Wait for some time (e.g., 5 minutes) before attempting auth
393
307
  time.sleep(300)
394
- # Try to get authenticated user ID
395
308
  user_id = self.get_or_create_user_id()
396
- # Update machine ID with the authenticated one
397
309
  if user_id and user_id != self.machine_id:
398
310
  self.machine_id = user_id
399
- # Save to the machine_id_path
400
311
  try:
401
- with open(self.machine_id_path, "w") as f:
312
+ with open(self.machine_id_path,"w") as f:
402
313
  f.write(user_id)
403
314
  except:
404
315
  pass
405
-
406
- # Start the delayed authentication thread
407
316
  thread = threading.Thread(target=delayed_auth, daemon=True)
408
317
  thread.start()
409
-
410
318
  return is_colab
411
319
 
412
320
  def get_or_create_user_id(self):
413
- """Get existing user ID from Google Drive or create new one"""
414
321
  if self._colab_auth_triggered:
415
- return self.machine_id # Avoid triggering multiple times
416
-
322
+ return self.machine_id
417
323
  try:
418
324
  from google.colab import drive
419
325
  print("\n📋 Kết nối tài khoản Google Drive để lưu các thiết lập của dự án.")
420
326
  print("Dữ liệu phiên làm việc với Colab của bạn sẽ bị xóa nếu không lưu trữ vào Google Drive.\n")
421
-
422
- # Mark that we've triggered the auth
423
327
  self._colab_auth_triggered = True
424
-
425
- # Mount Google Drive
426
328
  drive.mount('/content/drive')
427
- id_path = '/content/drive/MyDrive/.vnstock/user_id.txt'
428
-
329
+ id_path ='/content/drive/MyDrive/.vnstock/user_id.txt'
429
330
  if os.path.exists(id_path):
430
- with open(id_path, 'r') as f:
331
+ with open(id_path,'r') as f:
431
332
  return f.read().strip()
432
333
  else:
433
334
  user_id = str(uuid.uuid4())
434
335
  os.makedirs(os.path.dirname(id_path), exist_ok=True)
435
- with open(id_path, 'w') as f:
336
+ with open(id_path,'w') as f:
436
337
  f.write(user_id)
437
338
  return user_id
438
339
  except Exception as e:
439
- # Silently fail and return the existing machine ID
440
340
  return self.machine_id
441
-
442
- # Enhanced methods for project context collection
443
-
341
+
444
342
  def analyze_project_structure(self):
445
- """Analyze project directory structure for context"""
446
343
  current_dir = os.getcwd()
447
344
  project_indicators = {
448
- "commercial_app": ["app", "services", "products", "customers", "billing"],
449
- "financial_tool": ["portfolio", "backtesting", "trading", "strategy"],
450
- "data_science": ["models", "notebooks", "datasets", "visualization"],
451
- "educational": ["examples", "lectures", "assignments", "slides"]
345
+ "commercial_app": ["app","services","products","customers","billing"],
346
+ "financial_tool": ["portfolio","backtesting","trading","strategy"],
347
+ "data_science": ["models","notebooks","datasets","visualization"],
348
+ "educational": ["examples","lectures","assignments","slides"]
452
349
  }
453
-
454
- # Look for key directories up to 2 levels deep (limited for privacy)
455
350
  project_type = {}
456
351
  for category, markers in project_indicators.items():
457
352
  match_count = 0
@@ -460,249 +355,181 @@ class Inspector:
460
355
  match_count += 1
461
356
  if len(markers) > 0:
462
357
  project_type[category] = match_count / len(markers)
463
-
464
- # Scan for direct child files and directories (limited depth for privacy)
465
358
  try:
466
359
  root_files = [f for f in os.listdir(current_dir) if os.path.isfile(os.path.join(current_dir, f))]
467
360
  root_dirs = [d for d in os.listdir(current_dir) if os.path.isdir(os.path.join(current_dir, d))]
468
-
469
- # Detect project type
470
361
  file_markers = {
471
- "python_project": ["setup.py", "pyproject.toml", "requirements.txt"],
472
- "data_science": ["notebook.ipynb", ".ipynb_checkpoints"],
473
- "web_app": ["app.py", "wsgi.py", "manage.py", "server.py"],
474
- "finance_app": ["portfolio.py", "trading.py", "backtest.py"],
362
+ "python_project": ["setup.py","pyproject.toml","requirements.txt"],
363
+ "data_science": ["notebook.ipynb",".ipynb_checkpoints"],
364
+ "web_app": ["app.py","wsgi.py","manage.py","server.py"],
365
+ "finance_app": ["portfolio.py","trading.py","backtest.py"],
475
366
  }
476
-
477
- file_project_type = "unknown"
367
+ file_project_type ="unknown"
478
368
  for ptype, markers in file_markers.items():
479
369
  if any(marker in root_files for marker in markers):
480
370
  file_project_type = ptype
481
371
  break
482
-
483
- # Scan for specific frameworks
484
372
  frameworks = []
485
373
  framework_markers = {
486
- "django": ["manage.py", "settings.py"],
487
- "flask": ["app.py", "wsgi.py"],
488
- "streamlit": ["streamlit_app.py", "app.py"],
489
- "fastapi": ["main.py", "app.py"],
374
+ "django": ["manage.py","settings.py"],
375
+ "flask": ["app.py","wsgi.py"],
376
+ "streamlit": ["streamlit_app.py","app.py"],
377
+ "fastapi": ["main.py","app.py"],
490
378
  }
491
-
492
379
  for framework, markers in framework_markers.items():
493
380
  if any(marker in root_files for marker in markers):
494
381
  frameworks.append(framework)
495
-
496
382
  except Exception as e:
497
383
  root_files = []
498
384
  root_dirs = []
499
- file_project_type = "unknown"
385
+ file_project_type ="unknown"
500
386
  frameworks = []
501
-
502
387
  return {
503
- "project_dir": current_dir,
504
- "detected_type": max(project_type.items(), key=lambda x: x[1])[0] if project_type else "unknown",
505
- "file_type": file_project_type,
506
- "is_git_repo": ".git" in (root_dirs if 'root_dirs' in locals() else []),
507
- "frameworks": frameworks,
508
- "file_count": len(root_files) if 'root_files' in locals() else 0,
509
- "directory_count": len(root_dirs) if 'root_dirs' in locals() else 0,
510
- "type_confidence": project_type
388
+ "project_dir": current_dir,
389
+ "detected_type": max(project_type.items(), key=lambda x: x[1])[0] if project_type else"unknown",
390
+ "file_type": file_project_type,
391
+ "is_git_repo":".git" in (root_dirs if'root_dirs' in locals() else []),
392
+ "frameworks": frameworks,
393
+ "file_count": len(root_files) if'root_files' in locals() else 0,
394
+ "directory_count": len(root_dirs) if'root_dirs' in locals() else 0,
395
+ "type_confidence": project_type
511
396
  }
512
397
 
513
398
  def analyze_git_info(self):
514
- """Extract non-sensitive git repository information"""
515
399
  try:
516
- # Check if it's a git repository
517
- result = subprocess.run(["git", "rev-parse", "--is-inside-work-tree"],
400
+ result = subprocess.run(["git","rev-parse","--is-inside-work-tree"],
518
401
  capture_output=True, text=True)
519
-
520
402
  if result.returncode != 0:
521
403
  return {"has_git": False}
522
-
523
- # Get repository root path - ADD THIS CODE
524
- repo_root = subprocess.run(["git", "rev-parse", "--show-toplevel"],
404
+ repo_root = subprocess.run(["git","rev-parse","--show-toplevel"],
525
405
  capture_output=True, text=True)
526
406
  repo_path = repo_root.stdout.strip() if repo_root.stdout else None
527
-
528
- # Extract repository name from path - ADD THIS CODE
529
407
  repo_name = os.path.basename(repo_path) if repo_path else None
530
-
531
- # Check for license file - ADD THIS CODE
532
408
  has_license = False
533
- license_type = "unknown"
409
+ license_type ="unknown"
534
410
  if repo_path:
535
411
  license_files = [
536
- os.path.join(repo_path, "LICENSE"),
537
- os.path.join(repo_path, "LICENSE.txt"),
538
- os.path.join(repo_path, "LICENSE.md")
412
+ os.path.join(repo_path,"LICENSE"),
413
+ os.path.join(repo_path,"LICENSE.txt"),
414
+ os.path.join(repo_path,"LICENSE.md")
539
415
  ]
540
416
  for license_file in license_files:
541
417
  if os.path.exists(license_file):
542
418
  has_license = True
543
- # Try to determine license type by scanning content
544
419
  try:
545
- with open(license_file, 'r') as f:
420
+ with open(license_file,'r') as f:
546
421
  content = f.read().lower()
547
- if "mit license" in content:
548
- license_type = "MIT"
549
- elif "apache license" in content:
550
- license_type = "Apache"
551
- elif "gnu general public" in content:
552
- license_type = "GPL"
553
- elif "bsd " in content:
554
- license_type = "BSD"
555
- # Add more license type detection as needed
422
+ if"mit license" in content:
423
+ license_type ="MIT"
424
+ elif"apache license" in content:
425
+ license_type ="Apache"
426
+ elif"gnu general public" in content:
427
+ license_type ="GPL"
428
+ elif"bsd " in content:
429
+ license_type ="BSD"
556
430
  except:
557
431
  pass
558
432
  break
559
-
560
- # Get remote URL (only domain, not full URL)
561
- remote = subprocess.run(["git", "config", "--get", "remote.origin.url"],
433
+ remote = subprocess.run(["git","config","--get","remote.origin.url"],
562
434
  capture_output=True, text=True)
563
-
564
435
  remote_url = remote.stdout.strip() if remote.stdout else None
565
-
566
436
  if remote_url:
567
- # Clean the remote URL string
568
437
  remote_url = remote_url.strip()
569
-
570
- # Properly extract domain without authentication information
571
438
  domain = None
572
439
  if remote_url:
573
- # For SSH URLs (git@github.com:user/repo.git)
574
- if remote_url.startswith('git@') or '@' in remote_url and ':' in remote_url.split('@')[1]:
440
+ if remote_url.startswith('git@') or'@' in remote_url and':' in remote_url.split('@')[1]:
575
441
  domain = remote_url.split('@')[1].split(':')[0]
576
- # For HTTPS URLs with or without authentication
577
442
  elif remote_url.startswith('http'):
578
- # Remove authentication part if present
579
443
  url_parts = remote_url.split('//')
580
444
  if len(url_parts) > 1:
581
445
  auth_and_domain = url_parts[1].split('/', 1)[0]
582
- # If auth info exists (contains @), take only domain part
583
- if '@' in auth_and_domain:
446
+ if'@' in auth_and_domain:
584
447
  domain = auth_and_domain.split('@')[-1]
585
448
  else:
586
449
  domain = auth_and_domain
587
- # Handle other URL formats
588
450
  else:
589
- # Try a general regex as fallback for unusual formats
590
451
  import re
591
452
  domain_match = re.search(r'@([^:/]+)|https?://(?:[^@/]+@)?([^/]+)', remote_url)
592
453
  if domain_match:
593
454
  domain = domain_match.group(1) or domain_match.group(2)
594
-
595
- # Extract owner and repo info securely
596
455
  owner = None
597
456
  repo_name = None
598
-
599
457
  if domain:
600
- # For GitHub repositories
601
- if "github" in domain:
602
- # SSH format: git@github.com:username/repo.git
603
- if ':' in remote_url and '@' in remote_url:
458
+ if"github" in domain:
459
+ if':' in remote_url and'@' in remote_url:
604
460
  parts = remote_url.split(':')[-1].split('/')
605
461
  if len(parts) >= 2:
606
462
  owner = parts[0]
607
- repo_name = parts[1].replace('.git', '')
608
- # HTTPS format
463
+ repo_name = parts[1].replace('.git','')
609
464
  else:
610
465
  url_parts = remote_url.split('//')
611
466
  if len(url_parts) > 1:
612
467
  path_parts = url_parts[1].split('/')
613
468
  if len(path_parts) >= 3:
614
- # Skip domain and authentication part
615
469
  domain_part = path_parts[0]
616
- if '@' in domain_part:
617
- # Path starts after domain
470
+ if'@' in domain_part:
618
471
  owner_index = 1
619
472
  else:
620
473
  owner_index = 1
621
-
622
474
  if len(path_parts) > owner_index:
623
475
  owner = path_parts[owner_index]
624
476
  if len(path_parts) > owner_index + 1:
625
- repo_name = path_parts[owner_index + 1].replace('.git', '')
626
-
627
- # Get commit count
628
- commit_count = subprocess.run(["git", "rev-list", "--count", "HEAD"],
477
+ repo_name = path_parts[owner_index + 1].replace('.git','')
478
+ commit_count = subprocess.run(["git","rev-list","--count","HEAD"],
629
479
  capture_output=True, text=True)
630
-
631
- # Get branch count
632
- branch_count = subprocess.run(["git", "branch", "--list"],
480
+ branch_count = subprocess.run(["git","branch","--list"],
633
481
  capture_output=True, text=True)
634
482
  branch_count = len(branch_count.stdout.strip().split('\n')) if branch_count.stdout else 0
635
-
636
483
  return {
637
- "domain": domain, # Only domain, not full URL
638
- "owner": owner, # Repository owner (for GitHub)
639
- "commit_count": int(commit_count.stdout.strip()) if commit_count.stdout else 0,
640
- "branch_count": branch_count,
641
- "has_git": True,
642
- "repo_path": repo_path if 'repo_path' in locals() else None,
643
- "repo_name": repo_name,
644
- "has_license": has_license if 'has_license' in locals() else False,
645
- "license_type": license_type if 'license_type' in locals() else "unknown"
484
+ "domain": domain,
485
+ "owner": owner,
486
+ "commit_count": int(commit_count.stdout.strip()) if commit_count.stdout else 0,
487
+ "branch_count": branch_count,
488
+ "has_git": True,
489
+ "repo_path": repo_path if'repo_path' in locals() else None,
490
+ "repo_name": repo_name,
491
+ "has_license": has_license if'has_license' in locals() else False,
492
+ "license_type": license_type if'license_type' in locals() else"unknown"
646
493
  }
647
-
648
494
  except Exception as e:
649
- # Optionally log the exception for debugging
650
495
  pass
651
496
  return {"has_git": False}
652
497
 
653
498
  def detect_usage_pattern(self):
654
- """Detect usage patterns that indicate commercial use"""
655
499
  current_time = datetime.now()
656
-
657
- # Check if using during business hours
658
- is_weekday = current_time.weekday() < 5 # 0-4 are Monday to Friday
500
+ is_weekday = current_time.weekday() < 5
659
501
  hour = current_time.hour
660
502
  is_business_hours = 9 <= hour <= 18
661
-
662
503
  return {
663
- "business_hours_usage": is_weekday and is_business_hours,
664
- "weekday": is_weekday,
665
- "hour": hour,
666
- "timestamp": current_time.isoformat()
504
+ "business_hours_usage": is_weekday and is_business_hours,
505
+ "weekday": is_weekday,
506
+ "hour": hour,
507
+ "timestamp": current_time.isoformat()
667
508
  }
668
509
 
669
510
  def enhanced_commercial_detection(self):
670
- """More thorough commercial usage detection"""
671
511
  basic = self.detect_commercial_usage()
672
-
673
- # Additional commercial indicators
674
512
  try:
675
513
  project_files = os.listdir(os.getcwd())
676
-
677
- # Look for commercial frameworks
678
- commercial_frameworks = ["django-oscar", "opencart", "magento",
679
- "saleor", "odoo", "shopify", "woocommerce"]
680
-
514
+ commercial_frameworks = ["django-oscar","opencart","magento",
515
+ "saleor","odoo","shopify","woocommerce"]
681
516
  framework_match = False
682
517
  for framework in commercial_frameworks:
683
518
  if any(framework in f for f in project_files):
684
519
  framework_match = True
685
520
  break
686
-
687
- # Check for database connections
688
- db_files = [f for f in project_files if "database" in f.lower()
689
- or "db_config" in f.lower() or f.endswith(".db")]
521
+ db_files = [f for f in project_files if"database" in f.lower()
522
+ or"db_config" in f.lower() or f.endswith(".db")]
690
523
  has_database = len(db_files) > 0
691
524
  except:
692
525
  framework_match = False
693
526
  has_database = False
694
-
695
- # Domain name registration check
696
527
  domain_check = self.analyze_git_info()
697
528
  domain_is_commercial = False
698
529
  if domain_check and domain_check.get("domain"):
699
- commercial_tlds = [".com", ".io", ".co", ".org", ".net"]
530
+ commercial_tlds = [".com",".io",".co",".org",".net"]
700
531
  domain_is_commercial = any(tld in domain_check["domain"] for tld in commercial_tlds)
701
-
702
- # Check project structure
703
532
  project_structure = self.analyze_project_structure()
704
-
705
- # Calculate enhanced commercial score
706
533
  indicators = [
707
534
  basic["commercial_probability"],
708
535
  framework_match,
@@ -711,55 +538,42 @@ class Inspector:
711
538
  project_structure.get("type_confidence", {}).get("commercial_app", 0),
712
539
  self.detect_usage_pattern()["business_hours_usage"]
713
540
  ]
714
-
715
- # Filter out None values
716
541
  indicators = [i for i in indicators if i is not None]
717
-
718
- # Calculate score - convert booleans to 1.0 and average
719
542
  if indicators:
720
- score = sum(1.0 if isinstance(i, bool) and i else (i if isinstance(i, (int, float)) else 0)
543
+ score = sum(1.0 if isinstance(i, bool) and i else (i if isinstance(i, (int, float)) else 0)
721
544
  for i in indicators) / len(indicators)
722
545
  else:
723
546
  score = 0
724
-
725
547
  return {
726
- "commercial_probability": score,
727
- "likely_commercial": score > 0.4,
728
- "indicators": {
729
- "basic_indicators": basic["commercial_indicators"],
730
- "framework_match": framework_match,
731
- "has_database": has_database,
732
- "domain_is_commercial": domain_is_commercial,
733
- "project_structure": project_structure.get("detected_type"),
734
- "business_hours_usage": self.detect_usage_pattern()["business_hours_usage"]
548
+ "commercial_probability": score,
549
+ "likely_commercial": score > 0.4,
550
+ "indicators": {
551
+ "basic_indicators": basic["commercial_indicators"],
552
+ "framework_match": framework_match,
553
+ "has_database": has_database,
554
+ "domain_is_commercial": domain_is_commercial,
555
+ "project_structure": project_structure.get("detected_type"),
556
+ "business_hours_usage": self.detect_usage_pattern()["business_hours_usage"]
735
557
  }
736
558
  }
737
559
 
738
560
  def analyze_dependencies(self):
739
- """Analyze package dependencies for commercial patterns"""
740
561
  try:
741
562
  import pkg_resources
742
-
743
- # Commercial/enterprise package indicators
744
563
  enterprise_packages = [
745
- "snowflake-connector-python", "databricks", "azure",
746
- "aws", "google-cloud", "stripe", "atlassian",
747
- "salesforce", "bigquery", "tableau", "sap"
564
+ "snowflake-connector-python","databricks","azure",
565
+ "aws","google-cloud","stripe","atlassian",
566
+ "salesforce","bigquery","tableau","sap"
748
567
  ]
749
-
750
- # Find installed packages that match enterprise indicators
751
568
  commercial_deps = []
752
569
  for pkg in pkg_resources.working_set:
753
570
  if any(ent in pkg.key for ent in enterprise_packages):
754
- commercial_deps.append({"name": pkg.key, "version": pkg.version})
755
-
571
+ commercial_deps.append({"name": pkg.key,"version": pkg.version})
756
572
  return {
757
- "has_commercial_deps": len(commercial_deps) > 0,
758
- "commercial_deps_count": len(commercial_deps),
759
- "commercial_deps": commercial_deps
573
+ "has_commercial_deps": len(commercial_deps) > 0,
574
+ "commercial_deps_count": len(commercial_deps),
575
+ "commercial_deps": commercial_deps
760
576
  }
761
577
  except:
762
578
  return {"has_commercial_deps": False}
763
-
764
- # Create singleton instance
765
- inspector = Inspector()
579
+ inspector = Inspector()