vnai 2.0.3__py3-none-any.whl → 2.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vnai/scope/profile.py CHANGED
@@ -1,7 +1,5 @@
1
- ##
2
-
3
- ##
4
-
1
+ # vnai/scope/profile.py
2
+ # System environment detection
5
3
 
6
4
  import os
7
5
  import sys
@@ -17,7 +15,7 @@ import subprocess
17
15
  from pathlib import Path
18
16
 
19
17
  class Inspector:
20
- #--
18
+ """Inspects execution environment"""
21
19
 
22
20
  _instance = None
23
21
  _lock = None
@@ -34,16 +32,14 @@ class Inspector:
34
32
  return cls._instance
35
33
 
36
34
  def _initialize(self):
37
- #--
35
+ """Initialize inspector"""
38
36
  self.cache = {}
39
- self.cache_ttl = 3600 ##
40
-
37
+ self.cache_ttl = 3600 # 1 hour cache validity
41
38
  self.last_examination = 0
42
39
  self.machine_id = None
43
40
  self._colab_auth_triggered = False
44
41
 
45
- ##
46
-
42
+ # Paths
47
43
  self.home_dir = Path.home()
48
44
  self.project_dir = self.home_dir / ".vnstock"
49
45
  self.project_dir.mkdir(exist_ok=True)
@@ -51,21 +47,18 @@ class Inspector:
51
47
  self.id_dir.mkdir(exist_ok=True)
52
48
  self.machine_id_path = self.id_dir / "machine_id.txt"
53
49
 
54
- ##
55
-
50
+ # Perform initial examination
56
51
  self.examine()
57
52
 
58
53
  def examine(self, force_refresh=False):
59
- #--
54
+ """Examine current execution context"""
60
55
  current_time = time.time()
61
56
 
62
- ##
63
-
57
+ # Return cached data if it's fresh enough and we're not forcing a refresh
64
58
  if not force_refresh and (current_time - self.last_examination) < self.cache_ttl:
65
59
  return self.cache
66
60
 
67
- ##
68
-
61
+ # Start with basic information
69
62
  info = {
70
63
  "timestamp": datetime.now().isoformat(),
71
64
  "python_version": platform.python_version(),
@@ -73,15 +66,12 @@ class Inspector:
73
66
  "platform": platform.platform()
74
67
  }
75
68
 
76
- ##
77
-
69
+ # Machine identifier
78
70
  info["machine_id"] = self.fingerprint()
79
71
 
80
- ##
81
-
72
+ # Environment detection
82
73
  try:
83
- ##
84
-
74
+ # Check for Jupyter/IPython
85
75
  import importlib.util
86
76
  ipython_spec = importlib.util.find_spec("IPython")
87
77
 
@@ -90,8 +80,7 @@ class Inspector:
90
80
  ipython = get_ipython()
91
81
  if ipython is not None:
92
82
  info["environment"] = "jupyter"
93
- ##
94
-
83
+ # Check for hosted notebooks
95
84
  if 'google.colab' in sys.modules:
96
85
  info["hosting_service"] = "colab"
97
86
  elif 'KAGGLE_KERNEL_RUN_TYPE' in os.environ:
@@ -109,66 +98,53 @@ class Inspector:
109
98
  except:
110
99
  info["environment"] = "unknown"
111
100
 
112
- ##
113
-
101
+ # System resources
114
102
  try:
115
103
  info["cpu_count"] = os.cpu_count()
116
104
  info["memory_gb"] = round(psutil.virtual_memory().total / (1024**3), 1)
117
105
  except:
118
106
  pass
119
107
 
120
- ##
121
-
108
+ # Check if in Google Colab
122
109
  is_colab = 'google.colab' in sys.modules
123
110
  if is_colab:
124
111
  info["is_colab"] = True
125
- ##
126
-
112
+ # Setup delayed authentication if not already triggered
127
113
  self.detect_colab_with_delayed_auth()
128
114
 
129
- ##
130
-
115
+ # Enhanced context information
131
116
  try:
132
- ##
133
-
117
+ # Commercial usage detection
134
118
  info["commercial_usage"] = self.enhanced_commercial_detection()
135
119
 
136
- ##
137
-
120
+ # Project context
138
121
  info["project_context"] = self.analyze_project_structure()
139
122
 
140
- ##
141
-
123
+ # Git info
142
124
  info["git_info"] = self.analyze_git_info()
143
125
 
144
- ##
145
-
126
+ # Working hours pattern
146
127
  info["usage_pattern"] = self.detect_usage_pattern()
147
128
 
148
- ##
149
-
129
+ # Dependency analysis
150
130
  info["dependencies"] = self.analyze_dependencies()
151
131
  except Exception as e:
152
- ##
153
-
132
+ # Don't let enhanced detection failure stop basic functionality
154
133
  info["detection_error"] = str(e)
155
134
 
156
- ##
157
-
135
+ # Update cache
158
136
  self.cache = info
159
137
  self.last_examination = current_time
160
138
 
161
139
  return info
162
140
 
163
141
  def fingerprint(self):
164
- #--
165
- ##
166
-
142
+ """Generate unique environment fingerprint"""
143
+ # Always return cached machine_id if it exists
167
144
  if self.machine_id:
168
145
  return self.machine_id
169
146
 
170
- ##
171
-
147
+ # Try to load from file first
172
148
  if self.machine_id_path.exists():
173
149
  try:
174
150
  with open(self.machine_id_path, "r") as f:
@@ -177,24 +153,19 @@ class Inspector:
177
153
  except:
178
154
  pass
179
155
 
180
- ##
181
-
156
+ # Check for Colab and setup delayed authentication
182
157
  is_colab = self.detect_colab_with_delayed_auth()
183
158
 
184
- ##
185
-
159
+ # Generate a new machine ID only if necessary
186
160
  try:
187
- ##
188
-
161
+ # Use consistent system information
189
162
  system_info = platform.node() + platform.platform() + platform.machine()
190
163
  self.machine_id = hashlib.md5(system_info.encode()).hexdigest()
191
164
  except:
192
- ##
193
-
165
+ # Fallback to UUID but only as last resort
194
166
  self.machine_id = str(uuid.uuid4())
195
167
 
196
- ##
197
-
168
+ # Save to ensure consistency across calls
198
169
  try:
199
170
  with open(self.machine_id_path, "w") as f:
200
171
  f.write(self.machine_id)
@@ -204,9 +175,8 @@ class Inspector:
204
175
  return self.machine_id
205
176
 
206
177
  def detect_hosting(self):
207
- #--
208
- ##
209
-
178
+ """Detect if running in a hosted environment"""
179
+ # Check common environment variables for hosted environments
210
180
  hosting_markers = {
211
181
  "COLAB_GPU": "Google Colab",
212
182
  "KAGGLE_KERNEL_RUN_TYPE": "Kaggle",
@@ -220,15 +190,14 @@ class Inspector:
220
190
  if env_var in os.environ:
221
191
  return host_name
222
192
 
223
- ##
224
-
193
+ # Check for Google Colab module
225
194
  if 'google.colab' in sys.modules:
226
195
  return "Google Colab"
227
196
 
228
197
  return "local"
229
198
 
230
199
  def detect_commercial_usage(self):
231
- #--
200
+ """Detect if running in commercial environment"""
232
201
  commercial_indicators = {
233
202
  "env_domains": [".com", ".io", ".co", "enterprise", "corp", "inc"],
234
203
  "file_patterns": ["invoice", "payment", "customer", "client", "product", "sale"],
@@ -236,30 +205,25 @@ class Inspector:
236
205
  "dir_patterns": ["company", "business", "enterprise", "corporate", "client"]
237
206
  }
238
207
 
239
- ##
240
-
208
+ # Check environment variables for commercial domains
241
209
  env_values = " ".join(os.environ.values()).lower()
242
210
  domain_match = any(domain in env_values for domain in commercial_indicators["env_domains"])
243
211
 
244
- ##
245
-
212
+ # Check if commercial-related environment variables exist
246
213
  env_var_match = any(var in os.environ for var in commercial_indicators["env_vars"])
247
214
 
248
- ##
249
-
215
+ # Check current directory for commercial indicators
250
216
  current_dir = os.getcwd().lower()
251
217
  dir_match = any(pattern in current_dir for pattern in commercial_indicators["dir_patterns"])
252
218
 
253
- ##
254
-
219
+ # Check files in current directory for commercial patterns
255
220
  try:
256
221
  files = [f.lower() for f in os.listdir() if os.path.isfile(f)]
257
222
  file_match = any(any(pattern in f for pattern in commercial_indicators["file_patterns"]) for f in files)
258
223
  except:
259
224
  file_match = False
260
225
 
261
- ##
262
-
226
+ # Calculate probability
263
227
  indicators = [domain_match, env_var_match, dir_match, file_match]
264
228
  commercial_probability = sum(indicators) / len(indicators)
265
229
 
@@ -275,14 +239,13 @@ class Inspector:
275
239
  }
276
240
 
277
241
  def scan_packages(self):
278
- #--
242
+ """Scan for installed packages by category"""
279
243
  package_groups = {
280
244
  "vnstock_family": [
281
245
  "vnstock",
282
246
  "vnstock3",
283
247
  "vnstock_ezchart",
284
- "vnstock_data_pro", ##
285
-
248
+ "vnstock_data_pro", # Fixed missing comma here
286
249
  "vnstock_market_data_pipeline",
287
250
  "vnstock_ta",
288
251
  "vnii",
@@ -388,9 +351,8 @@ class Inspector:
388
351
  return installed
389
352
 
390
353
  def setup_vnstock_environment(self):
391
- #--
392
- ##
393
-
354
+ """Set up environment for vnstock library"""
355
+ # Create environment.json file
394
356
  env_file = self.id_dir / "environment.json"
395
357
  env_data = {
396
358
  "accepted_agreement": True,
@@ -407,18 +369,15 @@ class Inspector:
407
369
  print(f"Failed to set up vnstock environment: {e}")
408
370
  return False
409
371
 
410
- ##
411
-
372
+ # Update detect_colab_with_delayed_auth method in Inspector class
412
373
  def detect_colab_with_delayed_auth(self, immediate=False):
413
- #--
414
- ##
415
-
374
+ """Detect if running in Google Colab and setup authentication"""
375
+ # Check if we're in Colab without mounting drive yet
416
376
  is_colab = 'google.colab' in sys.modules
417
377
 
418
378
  if is_colab and not self._colab_auth_triggered:
419
379
  if immediate:
420
- ##
421
-
380
+ # Immediate authentication
422
381
  self._colab_auth_triggered = True
423
382
  user_id = self.get_or_create_user_id()
424
383
  if user_id and user_id != self.machine_id:
@@ -429,51 +388,42 @@ class Inspector:
429
388
  except:
430
389
  pass
431
390
  else:
432
- ##
433
-
391
+ # Start a delayed thread to trigger authentication after user is already using the package
434
392
  def delayed_auth():
435
- ##
436
-
393
+ # Wait for some time (e.g., 5 minutes) before attempting auth
437
394
  time.sleep(300)
438
- ##
439
-
395
+ # Try to get authenticated user ID
440
396
  user_id = self.get_or_create_user_id()
441
- ##
442
-
397
+ # Update machine ID with the authenticated one
443
398
  if user_id and user_id != self.machine_id:
444
399
  self.machine_id = user_id
445
- ##
446
-
400
+ # Save to the machine_id_path
447
401
  try:
448
402
  with open(self.machine_id_path, "w") as f:
449
403
  f.write(user_id)
450
404
  except:
451
405
  pass
452
406
 
453
- ##
454
-
407
+ # Start the delayed authentication thread
455
408
  thread = threading.Thread(target=delayed_auth, daemon=True)
456
409
  thread.start()
457
410
 
458
411
  return is_colab
459
412
 
460
413
  def get_or_create_user_id(self):
461
- #--
414
+ """Get existing user ID from Google Drive or create new one"""
462
415
  if self._colab_auth_triggered:
463
- return self.machine_id ##
464
-
416
+ return self.machine_id # Avoid triggering multiple times
465
417
 
466
418
  try:
467
419
  from google.colab import drive
468
420
  print("\n📋 Kết nối tài khoản Google Drive để lưu các thiết lập của dự án.")
469
421
  print("Dữ liệu phiên làm việc với Colab của bạn sẽ bị xóa nếu không lưu trữ vào Google Drive.\n")
470
422
 
471
- ##
472
-
423
+ # Mark that we've triggered the auth
473
424
  self._colab_auth_triggered = True
474
425
 
475
- ##
476
-
426
+ # Mount Google Drive
477
427
  drive.mount('/content/drive')
478
428
  id_path = '/content/drive/MyDrive/.vnstock/user_id.txt'
479
429
 
@@ -487,15 +437,13 @@ class Inspector:
487
437
  f.write(user_id)
488
438
  return user_id
489
439
  except Exception as e:
490
- ##
491
-
440
+ # Silently fail and return the existing machine ID
492
441
  return self.machine_id
493
442
 
494
- ##
495
-
443
+ # Enhanced methods for project context collection
496
444
 
497
445
  def analyze_project_structure(self):
498
- #--
446
+ """Analyze project directory structure for context"""
499
447
  current_dir = os.getcwd()
500
448
  project_indicators = {
501
449
  "commercial_app": ["app", "services", "products", "customers", "billing"],
@@ -504,8 +452,7 @@ class Inspector:
504
452
  "educational": ["examples", "lectures", "assignments", "slides"]
505
453
  }
506
454
 
507
- ##
508
-
455
+ # Look for key directories up to 2 levels deep (limited for privacy)
509
456
  project_type = {}
510
457
  for category, markers in project_indicators.items():
511
458
  match_count = 0
@@ -515,14 +462,12 @@ class Inspector:
515
462
  if len(markers) > 0:
516
463
  project_type[category] = match_count / len(markers)
517
464
 
518
- ##
519
-
465
+ # Scan for direct child files and directories (limited depth for privacy)
520
466
  try:
521
467
  root_files = [f for f in os.listdir(current_dir) if os.path.isfile(os.path.join(current_dir, f))]
522
468
  root_dirs = [d for d in os.listdir(current_dir) if os.path.isdir(os.path.join(current_dir, d))]
523
469
 
524
- ##
525
-
470
+ # Detect project type
526
471
  file_markers = {
527
472
  "python_project": ["setup.py", "pyproject.toml", "requirements.txt"],
528
473
  "data_science": ["notebook.ipynb", ".ipynb_checkpoints"],
@@ -536,8 +481,7 @@ class Inspector:
536
481
  file_project_type = ptype
537
482
  break
538
483
 
539
- ##
540
-
484
+ # Scan for specific frameworks
541
485
  frameworks = []
542
486
  framework_markers = {
543
487
  "django": ["manage.py", "settings.py"],
@@ -568,28 +512,24 @@ class Inspector:
568
512
  }
569
513
 
570
514
  def analyze_git_info(self):
571
- #--
515
+ """Extract non-sensitive git repository information"""
572
516
  try:
573
- ##
574
-
517
+ # Check if it's a git repository
575
518
  result = subprocess.run(["git", "rev-parse", "--is-inside-work-tree"],
576
519
  capture_output=True, text=True)
577
520
 
578
521
  if result.returncode != 0:
579
522
  return {"has_git": False}
580
523
 
581
- ##
582
-
524
+ # Get repository root path - ADD THIS CODE
583
525
  repo_root = subprocess.run(["git", "rev-parse", "--show-toplevel"],
584
526
  capture_output=True, text=True)
585
527
  repo_path = repo_root.stdout.strip() if repo_root.stdout else None
586
528
 
587
- ##
588
-
529
+ # Extract repository name from path - ADD THIS CODE
589
530
  repo_name = os.path.basename(repo_path) if repo_path else None
590
531
 
591
- ##
592
-
532
+ # Check for license file - ADD THIS CODE
593
533
  has_license = False
594
534
  license_type = "unknown"
595
535
  if repo_path:
@@ -601,8 +541,7 @@ class Inspector:
601
541
  for license_file in license_files:
602
542
  if os.path.exists(license_file):
603
543
  has_license = True
604
- ##
605
-
544
+ # Try to determine license type by scanning content
606
545
  try:
607
546
  with open(license_file, 'r') as f:
608
547
  content = f.read().lower()
@@ -614,85 +553,69 @@ class Inspector:
614
553
  license_type = "GPL"
615
554
  elif "bsd " in content:
616
555
  license_type = "BSD"
617
- ##
618
-
556
+ # Add more license type detection as needed
619
557
  except:
620
558
  pass
621
559
  break
622
560
 
623
- ##
624
-
561
+ # Get remote URL (only domain, not full URL)
625
562
  remote = subprocess.run(["git", "config", "--get", "remote.origin.url"],
626
563
  capture_output=True, text=True)
627
564
 
628
565
  remote_url = remote.stdout.strip() if remote.stdout else None
629
566
 
630
567
  if remote_url:
631
- ##
632
-
568
+ # Clean the remote URL string
633
569
  remote_url = remote_url.strip()
634
570
 
635
- ##
636
-
571
+ # Properly extract domain without authentication information
637
572
  domain = None
638
573
  if remote_url:
639
- ##
640
-
574
+ # For SSH URLs (git@github.com:user/repo.git)
641
575
  if remote_url.startswith('git@') or '@' in remote_url and ':' in remote_url.split('@')[1]:
642
576
  domain = remote_url.split('@')[1].split(':')[0]
643
- ##
644
-
577
+ # For HTTPS URLs with or without authentication
645
578
  elif remote_url.startswith('http'):
646
- ##
647
-
579
+ # Remove authentication part if present
648
580
  url_parts = remote_url.split('//')
649
581
  if len(url_parts) > 1:
650
582
  auth_and_domain = url_parts[1].split('/', 1)[0]
651
- ##
652
-
583
+ # If auth info exists (contains @), take only domain part
653
584
  if '@' in auth_and_domain:
654
585
  domain = auth_and_domain.split('@')[-1]
655
586
  else:
656
587
  domain = auth_and_domain
657
- ##
658
-
588
+ # Handle other URL formats
659
589
  else:
660
- ##
661
-
590
+ # Try a general regex as fallback for unusual formats
662
591
  import re
663
592
  domain_match = re.search(r'@([^:/]+)|https?://(?:[^@/]+@)?([^/]+)', remote_url)
664
593
  if domain_match:
665
594
  domain = domain_match.group(1) or domain_match.group(2)
666
595
 
667
- ##
668
-
596
+ # Extract owner and repo info securely
669
597
  owner = None
670
598
  repo_name = None
671
599
 
672
600
  if domain:
673
- ##
674
-
601
+ # For GitHub repositories
675
602
  if "github" in domain:
676
- ##
677
-
603
+ # SSH format: git@github.com:username/repo.git
678
604
  if ':' in remote_url and '@' in remote_url:
679
605
  parts = remote_url.split(':')[-1].split('/')
680
606
  if len(parts) >= 2:
681
607
  owner = parts[0]
682
608
  repo_name = parts[1].replace('.git', '')
683
- ##
684
-
609
+ # HTTPS format
685
610
  else:
686
611
  url_parts = remote_url.split('//')
687
612
  if len(url_parts) > 1:
688
613
  path_parts = url_parts[1].split('/')
689
614
  if len(path_parts) >= 3:
690
- ##
691
-
615
+ # Skip domain and authentication part
692
616
  domain_part = path_parts[0]
693
617
  if '@' in domain_part:
694
- ##
695
-
618
+ # Path starts after domain
696
619
  owner_index = 1
697
620
  else:
698
621
  owner_index = 1
@@ -702,22 +625,18 @@ class Inspector:
702
625
  if len(path_parts) > owner_index + 1:
703
626
  repo_name = path_parts[owner_index + 1].replace('.git', '')
704
627
 
705
- ##
706
-
628
+ # Get commit count
707
629
  commit_count = subprocess.run(["git", "rev-list", "--count", "HEAD"],
708
630
  capture_output=True, text=True)
709
631
 
710
- ##
711
-
632
+ # Get branch count
712
633
  branch_count = subprocess.run(["git", "branch", "--list"],
713
634
  capture_output=True, text=True)
714
635
  branch_count = len(branch_count.stdout.strip().split('\n')) if branch_count.stdout else 0
715
636
 
716
637
  return {
717
- "domain": domain, ##
718
-
719
- "owner": owner, ##
720
-
638
+ "domain": domain, # Only domain, not full URL
639
+ "owner": owner, # Repository owner (for GitHub)
721
640
  "commit_count": int(commit_count.stdout.strip()) if commit_count.stdout else 0,
722
641
  "branch_count": branch_count,
723
642
  "has_git": True,
@@ -728,20 +647,16 @@ class Inspector:
728
647
  }
729
648
 
730
649
  except Exception as e:
731
- ##
732
-
650
+ # Optionally log the exception for debugging
733
651
  pass
734
652
  return {"has_git": False}
735
653
 
736
-
737
654
  def detect_usage_pattern(self):
738
- #--
655
+ """Detect usage patterns that indicate commercial use"""
739
656
  current_time = datetime.now()
740
657
 
741
- ##
742
-
743
- is_weekday = current_time.weekday() < 5 ##
744
-
658
+ # Check if using during business hours
659
+ is_weekday = current_time.weekday() < 5 # 0-4 are Monday to Friday
745
660
  hour = current_time.hour
746
661
  is_business_hours = 9 <= hour <= 18
747
662
 
@@ -753,16 +668,14 @@ class Inspector:
753
668
  }
754
669
 
755
670
  def enhanced_commercial_detection(self):
756
- #--
671
+ """More thorough commercial usage detection"""
757
672
  basic = self.detect_commercial_usage()
758
673
 
759
- ##
760
-
674
+ # Additional commercial indicators
761
675
  try:
762
676
  project_files = os.listdir(os.getcwd())
763
677
 
764
- ##
765
-
678
+ # Look for commercial frameworks
766
679
  commercial_frameworks = ["django-oscar", "opencart", "magento",
767
680
  "saleor", "odoo", "shopify", "woocommerce"]
768
681
 
@@ -772,8 +685,7 @@ class Inspector:
772
685
  framework_match = True
773
686
  break
774
687
 
775
- ##
776
-
688
+ # Check for database connections
777
689
  db_files = [f for f in project_files if "database" in f.lower()
778
690
  or "db_config" in f.lower() or f.endswith(".db")]
779
691
  has_database = len(db_files) > 0
@@ -781,20 +693,17 @@ class Inspector:
781
693
  framework_match = False
782
694
  has_database = False
783
695
 
784
- ##
785
-
696
+ # Domain name registration check
786
697
  domain_check = self.analyze_git_info()
787
698
  domain_is_commercial = False
788
699
  if domain_check and domain_check.get("domain"):
789
700
  commercial_tlds = [".com", ".io", ".co", ".org", ".net"]
790
701
  domain_is_commercial = any(tld in domain_check["domain"] for tld in commercial_tlds)
791
702
 
792
- ##
793
-
703
+ # Check project structure
794
704
  project_structure = self.analyze_project_structure()
795
705
 
796
- ##
797
-
706
+ # Calculate enhanced commercial score
798
707
  indicators = [
799
708
  basic["commercial_probability"],
800
709
  framework_match,
@@ -804,12 +713,10 @@ class Inspector:
804
713
  self.detect_usage_pattern()["business_hours_usage"]
805
714
  ]
806
715
 
807
- ##
808
-
716
+ # Filter out None values
809
717
  indicators = [i for i in indicators if i is not None]
810
718
 
811
- ##
812
-
719
+ # Calculate score - convert booleans to 1.0 and average
813
720
  if indicators:
814
721
  score = sum(1.0 if isinstance(i, bool) and i else (i if isinstance(i, (int, float)) else 0)
815
722
  for i in indicators) / len(indicators)
@@ -830,20 +737,18 @@ class Inspector:
830
737
  }
831
738
 
832
739
  def analyze_dependencies(self):
833
- #--
740
+ """Analyze package dependencies for commercial patterns"""
834
741
  try:
835
742
  import pkg_resources
836
743
 
837
- ##
838
-
744
+ # Commercial/enterprise package indicators
839
745
  enterprise_packages = [
840
746
  "snowflake-connector-python", "databricks", "azure",
841
747
  "aws", "google-cloud", "stripe", "atlassian",
842
748
  "salesforce", "bigquery", "tableau", "sap"
843
749
  ]
844
750
 
845
- ##
846
-
751
+ # Find installed packages that match enterprise indicators
847
752
  commercial_deps = []
848
753
  for pkg in pkg_resources.working_set:
849
754
  if any(ent in pkg.key for ent in enterprise_packages):
@@ -857,6 +762,5 @@ class Inspector:
857
762
  except:
858
763
  return {"has_commercial_deps": False}
859
764
 
860
- ##
861
-
765
+ # Create singleton instance
862
766
  inspector = Inspector()