vnai 2.0.1__py3-none-any.whl → 2.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vnai/scope/profile.py CHANGED
@@ -1,5 +1,7 @@
1
- # vnai/scope/profile.py
2
- # System environment detection
1
+ ##
2
+
3
+ ##
4
+
3
5
 
4
6
  import os
5
7
  import sys
@@ -15,7 +17,7 @@ import subprocess
15
17
  from pathlib import Path
16
18
 
17
19
  class Inspector:
18
- """Inspects execution environment"""
20
+ #--
19
21
 
20
22
  _instance = None
21
23
  _lock = None
@@ -32,14 +34,16 @@ class Inspector:
32
34
  return cls._instance
33
35
 
34
36
  def _initialize(self):
35
- """Initialize inspector"""
37
+ #--
36
38
  self.cache = {}
37
- self.cache_ttl = 3600 # 1 hour cache validity
39
+ self.cache_ttl = 3600 ##
40
+
38
41
  self.last_examination = 0
39
42
  self.machine_id = None
40
43
  self._colab_auth_triggered = False
41
44
 
42
- # Paths
45
+ ##
46
+
43
47
  self.home_dir = Path.home()
44
48
  self.project_dir = self.home_dir / ".vnstock"
45
49
  self.project_dir.mkdir(exist_ok=True)
@@ -47,18 +51,21 @@ class Inspector:
47
51
  self.id_dir.mkdir(exist_ok=True)
48
52
  self.machine_id_path = self.id_dir / "machine_id.txt"
49
53
 
50
- # Perform initial examination
54
+ ##
55
+
51
56
  self.examine()
52
57
 
53
58
  def examine(self, force_refresh=False):
54
- """Examine current execution context"""
59
+ #--
55
60
  current_time = time.time()
56
61
 
57
- # Return cached data if it's fresh enough and we're not forcing a refresh
62
+ ##
63
+
58
64
  if not force_refresh and (current_time - self.last_examination) < self.cache_ttl:
59
65
  return self.cache
60
66
 
61
- # Start with basic information
67
+ ##
68
+
62
69
  info = {
63
70
  "timestamp": datetime.now().isoformat(),
64
71
  "python_version": platform.python_version(),
@@ -66,12 +73,15 @@ class Inspector:
66
73
  "platform": platform.platform()
67
74
  }
68
75
 
69
- # Machine identifier
76
+ ##
77
+
70
78
  info["machine_id"] = self.fingerprint()
71
79
 
72
- # Environment detection
80
+ ##
81
+
73
82
  try:
74
- # Check for Jupyter/IPython
83
+ ##
84
+
75
85
  import importlib.util
76
86
  ipython_spec = importlib.util.find_spec("IPython")
77
87
 
@@ -80,7 +90,8 @@ class Inspector:
80
90
  ipython = get_ipython()
81
91
  if ipython is not None:
82
92
  info["environment"] = "jupyter"
83
- # Check for hosted notebooks
93
+ ##
94
+
84
95
  if 'google.colab' in sys.modules:
85
96
  info["hosting_service"] = "colab"
86
97
  elif 'KAGGLE_KERNEL_RUN_TYPE' in os.environ:
@@ -98,53 +109,66 @@ class Inspector:
98
109
  except:
99
110
  info["environment"] = "unknown"
100
111
 
101
- # System resources
112
+ ##
113
+
102
114
  try:
103
115
  info["cpu_count"] = os.cpu_count()
104
116
  info["memory_gb"] = round(psutil.virtual_memory().total / (1024**3), 1)
105
117
  except:
106
118
  pass
107
119
 
108
- # Check if in Google Colab
120
+ ##
121
+
109
122
  is_colab = 'google.colab' in sys.modules
110
123
  if is_colab:
111
124
  info["is_colab"] = True
112
- # Setup delayed authentication if not already triggered
125
+ ##
126
+
113
127
  self.detect_colab_with_delayed_auth()
114
128
 
115
- # Enhanced context information
129
+ ##
130
+
116
131
  try:
117
- # Commercial usage detection
132
+ ##
133
+
118
134
  info["commercial_usage"] = self.enhanced_commercial_detection()
119
135
 
120
- # Project context
136
+ ##
137
+
121
138
  info["project_context"] = self.analyze_project_structure()
122
139
 
123
- # Git info
140
+ ##
141
+
124
142
  info["git_info"] = self.analyze_git_info()
125
143
 
126
- # Working hours pattern
144
+ ##
145
+
127
146
  info["usage_pattern"] = self.detect_usage_pattern()
128
147
 
129
- # Dependency analysis
148
+ ##
149
+
130
150
  info["dependencies"] = self.analyze_dependencies()
131
151
  except Exception as e:
132
- # Don't let enhanced detection failure stop basic functionality
152
+ ##
153
+
133
154
  info["detection_error"] = str(e)
134
155
 
135
- # Update cache
156
+ ##
157
+
136
158
  self.cache = info
137
159
  self.last_examination = current_time
138
160
 
139
161
  return info
140
162
 
141
163
  def fingerprint(self):
142
- """Generate unique environment fingerprint"""
143
- # Always return cached machine_id if it exists
164
+ #--
165
+ ##
166
+
144
167
  if self.machine_id:
145
168
  return self.machine_id
146
169
 
147
- # Try to load from file first
170
+ ##
171
+
148
172
  if self.machine_id_path.exists():
149
173
  try:
150
174
  with open(self.machine_id_path, "r") as f:
@@ -153,19 +177,24 @@ class Inspector:
153
177
  except:
154
178
  pass
155
179
 
156
- # Check for Colab and setup delayed authentication
180
+ ##
181
+
157
182
  is_colab = self.detect_colab_with_delayed_auth()
158
183
 
159
- # Generate a new machine ID only if necessary
184
+ ##
185
+
160
186
  try:
161
- # Use consistent system information
187
+ ##
188
+
162
189
  system_info = platform.node() + platform.platform() + platform.machine()
163
190
  self.machine_id = hashlib.md5(system_info.encode()).hexdigest()
164
191
  except:
165
- # Fallback to UUID but only as last resort
192
+ ##
193
+
166
194
  self.machine_id = str(uuid.uuid4())
167
195
 
168
- # Save to ensure consistency across calls
196
+ ##
197
+
169
198
  try:
170
199
  with open(self.machine_id_path, "w") as f:
171
200
  f.write(self.machine_id)
@@ -175,8 +204,9 @@ class Inspector:
175
204
  return self.machine_id
176
205
 
177
206
  def detect_hosting(self):
178
- """Detect if running in a hosted environment"""
179
- # Check common environment variables for hosted environments
207
+ #--
208
+ ##
209
+
180
210
  hosting_markers = {
181
211
  "COLAB_GPU": "Google Colab",
182
212
  "KAGGLE_KERNEL_RUN_TYPE": "Kaggle",
@@ -190,14 +220,15 @@ class Inspector:
190
220
  if env_var in os.environ:
191
221
  return host_name
192
222
 
193
- # Check for Google Colab module
223
+ ##
224
+
194
225
  if 'google.colab' in sys.modules:
195
226
  return "Google Colab"
196
227
 
197
228
  return "local"
198
229
 
199
230
  def detect_commercial_usage(self):
200
- """Detect if running in commercial environment"""
231
+ #--
201
232
  commercial_indicators = {
202
233
  "env_domains": [".com", ".io", ".co", "enterprise", "corp", "inc"],
203
234
  "file_patterns": ["invoice", "payment", "customer", "client", "product", "sale"],
@@ -205,25 +236,30 @@ class Inspector:
205
236
  "dir_patterns": ["company", "business", "enterprise", "corporate", "client"]
206
237
  }
207
238
 
208
- # Check environment variables for commercial domains
239
+ ##
240
+
209
241
  env_values = " ".join(os.environ.values()).lower()
210
242
  domain_match = any(domain in env_values for domain in commercial_indicators["env_domains"])
211
243
 
212
- # Check if commercial-related environment variables exist
244
+ ##
245
+
213
246
  env_var_match = any(var in os.environ for var in commercial_indicators["env_vars"])
214
247
 
215
- # Check current directory for commercial indicators
248
+ ##
249
+
216
250
  current_dir = os.getcwd().lower()
217
251
  dir_match = any(pattern in current_dir for pattern in commercial_indicators["dir_patterns"])
218
252
 
219
- # Check files in current directory for commercial patterns
253
+ ##
254
+
220
255
  try:
221
256
  files = [f.lower() for f in os.listdir() if os.path.isfile(f)]
222
257
  file_match = any(any(pattern in f for pattern in commercial_indicators["file_patterns"]) for f in files)
223
258
  except:
224
259
  file_match = False
225
260
 
226
- # Calculate probability
261
+ ##
262
+
227
263
  indicators = [domain_match, env_var_match, dir_match, file_match]
228
264
  commercial_probability = sum(indicators) / len(indicators)
229
265
 
@@ -239,13 +275,14 @@ class Inspector:
239
275
  }
240
276
 
241
277
  def scan_packages(self):
242
- """Scan for installed packages by category"""
278
+ #--
243
279
  package_groups = {
244
280
  "vnstock_family": [
245
281
  "vnstock",
246
282
  "vnstock3",
247
283
  "vnstock_ezchart",
248
- "vnstock_data_pro", # Fixed missing comma here
284
+ "vnstock_data_pro", ##
285
+
249
286
  "vnstock_market_data_pipeline",
250
287
  "vnstock_ta",
251
288
  "vnii",
@@ -351,8 +388,9 @@ class Inspector:
351
388
  return installed
352
389
 
353
390
  def setup_vnstock_environment(self):
354
- """Set up environment for vnstock library"""
355
- # Create environment.json file
391
+ #--
392
+ ##
393
+
356
394
  env_file = self.id_dir / "environment.json"
357
395
  env_data = {
358
396
  "accepted_agreement": True,
@@ -369,15 +407,18 @@ class Inspector:
369
407
  print(f"Failed to set up vnstock environment: {e}")
370
408
  return False
371
409
 
372
- # Update detect_colab_with_delayed_auth method in Inspector class
410
+ ##
411
+
373
412
  def detect_colab_with_delayed_auth(self, immediate=False):
374
- """Detect if running in Google Colab and setup authentication"""
375
- # Check if we're in Colab without mounting drive yet
413
+ #--
414
+ ##
415
+
376
416
  is_colab = 'google.colab' in sys.modules
377
417
 
378
418
  if is_colab and not self._colab_auth_triggered:
379
419
  if immediate:
380
- # Immediate authentication
420
+ ##
421
+
381
422
  self._colab_auth_triggered = True
382
423
  user_id = self.get_or_create_user_id()
383
424
  if user_id and user_id != self.machine_id:
@@ -388,42 +429,51 @@ class Inspector:
388
429
  except:
389
430
  pass
390
431
  else:
391
- # Start a delayed thread to trigger authentication after user is already using the package
432
+ ##
433
+
392
434
  def delayed_auth():
393
- # Wait for some time (e.g., 5 minutes) before attempting auth
435
+ ##
436
+
394
437
  time.sleep(300)
395
- # Try to get authenticated user ID
438
+ ##
439
+
396
440
  user_id = self.get_or_create_user_id()
397
- # Update machine ID with the authenticated one
441
+ ##
442
+
398
443
  if user_id and user_id != self.machine_id:
399
444
  self.machine_id = user_id
400
- # Save to the machine_id_path
445
+ ##
446
+
401
447
  try:
402
448
  with open(self.machine_id_path, "w") as f:
403
449
  f.write(user_id)
404
450
  except:
405
451
  pass
406
452
 
407
- # Start the delayed authentication thread
453
+ ##
454
+
408
455
  thread = threading.Thread(target=delayed_auth, daemon=True)
409
456
  thread.start()
410
457
 
411
458
  return is_colab
412
459
 
413
460
  def get_or_create_user_id(self):
414
- """Get existing user ID from Google Drive or create new one"""
461
+ #--
415
462
  if self._colab_auth_triggered:
416
- return self.machine_id # Avoid triggering multiple times
463
+ return self.machine_id ##
464
+
417
465
 
418
466
  try:
419
467
  from google.colab import drive
420
468
  print("\n📋 Kết nối tài khoản Google Drive để lưu các thiết lập của dự án.")
421
469
  print("Dữ liệu phiên làm việc với Colab của bạn sẽ bị xóa nếu không lưu trữ vào Google Drive.\n")
422
470
 
423
- # Mark that we've triggered the auth
471
+ ##
472
+
424
473
  self._colab_auth_triggered = True
425
474
 
426
- # Mount Google Drive
475
+ ##
476
+
427
477
  drive.mount('/content/drive')
428
478
  id_path = '/content/drive/MyDrive/.vnstock/user_id.txt'
429
479
 
@@ -437,13 +487,15 @@ class Inspector:
437
487
  f.write(user_id)
438
488
  return user_id
439
489
  except Exception as e:
440
- # Silently fail and return the existing machine ID
490
+ ##
491
+
441
492
  return self.machine_id
442
493
 
443
- # Enhanced methods for project context collection
494
+ ##
495
+
444
496
 
445
497
  def analyze_project_structure(self):
446
- """Analyze project directory structure for context"""
498
+ #--
447
499
  current_dir = os.getcwd()
448
500
  project_indicators = {
449
501
  "commercial_app": ["app", "services", "products", "customers", "billing"],
@@ -452,7 +504,8 @@ class Inspector:
452
504
  "educational": ["examples", "lectures", "assignments", "slides"]
453
505
  }
454
506
 
455
- # Look for key directories up to 2 levels deep (limited for privacy)
507
+ ##
508
+
456
509
  project_type = {}
457
510
  for category, markers in project_indicators.items():
458
511
  match_count = 0
@@ -462,12 +515,14 @@ class Inspector:
462
515
  if len(markers) > 0:
463
516
  project_type[category] = match_count / len(markers)
464
517
 
465
- # Scan for direct child files and directories (limited depth for privacy)
518
+ ##
519
+
466
520
  try:
467
521
  root_files = [f for f in os.listdir(current_dir) if os.path.isfile(os.path.join(current_dir, f))]
468
522
  root_dirs = [d for d in os.listdir(current_dir) if os.path.isdir(os.path.join(current_dir, d))]
469
523
 
470
- # Detect project type
524
+ ##
525
+
471
526
  file_markers = {
472
527
  "python_project": ["setup.py", "pyproject.toml", "requirements.txt"],
473
528
  "data_science": ["notebook.ipynb", ".ipynb_checkpoints"],
@@ -481,7 +536,8 @@ class Inspector:
481
536
  file_project_type = ptype
482
537
  break
483
538
 
484
- # Scan for specific frameworks
539
+ ##
540
+
485
541
  frameworks = []
486
542
  framework_markers = {
487
543
  "django": ["manage.py", "settings.py"],
@@ -512,24 +568,28 @@ class Inspector:
512
568
  }
513
569
 
514
570
  def analyze_git_info(self):
515
- """Extract non-sensitive git repository information"""
571
+ #--
516
572
  try:
517
- # Check if it's a git repository
573
+ ##
574
+
518
575
  result = subprocess.run(["git", "rev-parse", "--is-inside-work-tree"],
519
576
  capture_output=True, text=True)
520
577
 
521
578
  if result.returncode != 0:
522
579
  return {"has_git": False}
523
580
 
524
- # Get repository root path - ADD THIS CODE
581
+ ##
582
+
525
583
  repo_root = subprocess.run(["git", "rev-parse", "--show-toplevel"],
526
584
  capture_output=True, text=True)
527
585
  repo_path = repo_root.stdout.strip() if repo_root.stdout else None
528
586
 
529
- # Extract repository name from path - ADD THIS CODE
587
+ ##
588
+
530
589
  repo_name = os.path.basename(repo_path) if repo_path else None
531
590
 
532
- # Check for license file - ADD THIS CODE
591
+ ##
592
+
533
593
  has_license = False
534
594
  license_type = "unknown"
535
595
  if repo_path:
@@ -541,7 +601,8 @@ class Inspector:
541
601
  for license_file in license_files:
542
602
  if os.path.exists(license_file):
543
603
  has_license = True
544
- # Try to determine license type by scanning content
604
+ ##
605
+
545
606
  try:
546
607
  with open(license_file, 'r') as f:
547
608
  content = f.read().lower()
@@ -553,69 +614,85 @@ class Inspector:
553
614
  license_type = "GPL"
554
615
  elif "bsd " in content:
555
616
  license_type = "BSD"
556
- # Add more license type detection as needed
617
+ ##
618
+
557
619
  except:
558
620
  pass
559
621
  break
560
622
 
561
- # Get remote URL (only domain, not full URL)
623
+ ##
624
+
562
625
  remote = subprocess.run(["git", "config", "--get", "remote.origin.url"],
563
626
  capture_output=True, text=True)
564
627
 
565
628
  remote_url = remote.stdout.strip() if remote.stdout else None
566
629
 
567
630
  if remote_url:
568
- # Clean the remote URL string
631
+ ##
632
+
569
633
  remote_url = remote_url.strip()
570
634
 
571
- # Properly extract domain without authentication information
635
+ ##
636
+
572
637
  domain = None
573
638
  if remote_url:
574
- # For SSH URLs (git@github.com:user/repo.git)
639
+ ##
640
+
575
641
  if remote_url.startswith('git@') or '@' in remote_url and ':' in remote_url.split('@')[1]:
576
642
  domain = remote_url.split('@')[1].split(':')[0]
577
- # For HTTPS URLs with or without authentication
643
+ ##
644
+
578
645
  elif remote_url.startswith('http'):
579
- # Remove authentication part if present
646
+ ##
647
+
580
648
  url_parts = remote_url.split('//')
581
649
  if len(url_parts) > 1:
582
650
  auth_and_domain = url_parts[1].split('/', 1)[0]
583
- # If auth info exists (contains @), take only domain part
651
+ ##
652
+
584
653
  if '@' in auth_and_domain:
585
654
  domain = auth_and_domain.split('@')[-1]
586
655
  else:
587
656
  domain = auth_and_domain
588
- # Handle other URL formats
657
+ ##
658
+
589
659
  else:
590
- # Try a general regex as fallback for unusual formats
660
+ ##
661
+
591
662
  import re
592
663
  domain_match = re.search(r'@([^:/]+)|https?://(?:[^@/]+@)?([^/]+)', remote_url)
593
664
  if domain_match:
594
665
  domain = domain_match.group(1) or domain_match.group(2)
595
666
 
596
- # Extract owner and repo info securely
667
+ ##
668
+
597
669
  owner = None
598
670
  repo_name = None
599
671
 
600
672
  if domain:
601
- # For GitHub repositories
673
+ ##
674
+
602
675
  if "github" in domain:
603
- # SSH format: git@github.com:username/repo.git
676
+ ##
677
+
604
678
  if ':' in remote_url and '@' in remote_url:
605
679
  parts = remote_url.split(':')[-1].split('/')
606
680
  if len(parts) >= 2:
607
681
  owner = parts[0]
608
682
  repo_name = parts[1].replace('.git', '')
609
- # HTTPS format
683
+ ##
684
+
610
685
  else:
611
686
  url_parts = remote_url.split('//')
612
687
  if len(url_parts) > 1:
613
688
  path_parts = url_parts[1].split('/')
614
689
  if len(path_parts) >= 3:
615
- # Skip domain and authentication part
690
+ ##
691
+
616
692
  domain_part = path_parts[0]
617
693
  if '@' in domain_part:
618
- # Path starts after domain
694
+ ##
695
+
619
696
  owner_index = 1
620
697
  else:
621
698
  owner_index = 1
@@ -625,18 +702,22 @@ class Inspector:
625
702
  if len(path_parts) > owner_index + 1:
626
703
  repo_name = path_parts[owner_index + 1].replace('.git', '')
627
704
 
628
- # Get commit count
705
+ ##
706
+
629
707
  commit_count = subprocess.run(["git", "rev-list", "--count", "HEAD"],
630
708
  capture_output=True, text=True)
631
709
 
632
- # Get branch count
710
+ ##
711
+
633
712
  branch_count = subprocess.run(["git", "branch", "--list"],
634
713
  capture_output=True, text=True)
635
714
  branch_count = len(branch_count.stdout.strip().split('\n')) if branch_count.stdout else 0
636
715
 
637
716
  return {
638
- "domain": domain, # Only domain, not full URL
639
- "owner": owner, # Repository owner (for GitHub)
717
+ "domain": domain, ##
718
+
719
+ "owner": owner, ##
720
+
640
721
  "commit_count": int(commit_count.stdout.strip()) if commit_count.stdout else 0,
641
722
  "branch_count": branch_count,
642
723
  "has_git": True,
@@ -647,17 +728,20 @@ class Inspector:
647
728
  }
648
729
 
649
730
  except Exception as e:
650
- # Optionally log the exception for debugging
731
+ ##
732
+
651
733
  pass
652
734
  return {"has_git": False}
653
735
 
654
736
 
655
737
  def detect_usage_pattern(self):
656
- """Detect usage patterns that indicate commercial use"""
738
+ #--
657
739
  current_time = datetime.now()
658
740
 
659
- # Check if using during business hours
660
- is_weekday = current_time.weekday() < 5 # 0-4 are Monday to Friday
741
+ ##
742
+
743
+ is_weekday = current_time.weekday() < 5 ##
744
+
661
745
  hour = current_time.hour
662
746
  is_business_hours = 9 <= hour <= 18
663
747
 
@@ -669,14 +753,16 @@ class Inspector:
669
753
  }
670
754
 
671
755
  def enhanced_commercial_detection(self):
672
- """More thorough commercial usage detection"""
756
+ #--
673
757
  basic = self.detect_commercial_usage()
674
758
 
675
- # Additional commercial indicators
759
+ ##
760
+
676
761
  try:
677
762
  project_files = os.listdir(os.getcwd())
678
763
 
679
- # Look for commercial frameworks
764
+ ##
765
+
680
766
  commercial_frameworks = ["django-oscar", "opencart", "magento",
681
767
  "saleor", "odoo", "shopify", "woocommerce"]
682
768
 
@@ -686,7 +772,8 @@ class Inspector:
686
772
  framework_match = True
687
773
  break
688
774
 
689
- # Check for database connections
775
+ ##
776
+
690
777
  db_files = [f for f in project_files if "database" in f.lower()
691
778
  or "db_config" in f.lower() or f.endswith(".db")]
692
779
  has_database = len(db_files) > 0
@@ -694,17 +781,20 @@ class Inspector:
694
781
  framework_match = False
695
782
  has_database = False
696
783
 
697
- # Domain name registration check
784
+ ##
785
+
698
786
  domain_check = self.analyze_git_info()
699
787
  domain_is_commercial = False
700
788
  if domain_check and domain_check.get("domain"):
701
789
  commercial_tlds = [".com", ".io", ".co", ".org", ".net"]
702
790
  domain_is_commercial = any(tld in domain_check["domain"] for tld in commercial_tlds)
703
791
 
704
- # Check project structure
792
+ ##
793
+
705
794
  project_structure = self.analyze_project_structure()
706
795
 
707
- # Calculate enhanced commercial score
796
+ ##
797
+
708
798
  indicators = [
709
799
  basic["commercial_probability"],
710
800
  framework_match,
@@ -714,10 +804,12 @@ class Inspector:
714
804
  self.detect_usage_pattern()["business_hours_usage"]
715
805
  ]
716
806
 
717
- # Filter out None values
807
+ ##
808
+
718
809
  indicators = [i for i in indicators if i is not None]
719
810
 
720
- # Calculate score - convert booleans to 1.0 and average
811
+ ##
812
+
721
813
  if indicators:
722
814
  score = sum(1.0 if isinstance(i, bool) and i else (i if isinstance(i, (int, float)) else 0)
723
815
  for i in indicators) / len(indicators)
@@ -738,18 +830,20 @@ class Inspector:
738
830
  }
739
831
 
740
832
  def analyze_dependencies(self):
741
- """Analyze package dependencies for commercial patterns"""
833
+ #--
742
834
  try:
743
835
  import pkg_resources
744
836
 
745
- # Commercial/enterprise package indicators
837
+ ##
838
+
746
839
  enterprise_packages = [
747
840
  "snowflake-connector-python", "databricks", "azure",
748
841
  "aws", "google-cloud", "stripe", "atlassian",
749
842
  "salesforce", "bigquery", "tableau", "sap"
750
843
  ]
751
844
 
752
- # Find installed packages that match enterprise indicators
845
+ ##
846
+
753
847
  commercial_deps = []
754
848
  for pkg in pkg_resources.working_set:
755
849
  if any(ent in pkg.key for ent in enterprise_packages):
@@ -763,5 +857,6 @@ class Inspector:
763
857
  except:
764
858
  return {"has_commercial_deps": False}
765
859
 
766
- # Create singleton instance
860
+ ##
861
+
767
862
  inspector = Inspector()