sunholo 0.123.4__py3-none-any.whl → 0.124.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,7 +14,6 @@ from .uuid import generate_uuid_from_object_id
14
14
  from ..custom_logging import log
15
15
  from ..utils import ConfigManager
16
16
  from ..components import get_embeddings
17
-
18
17
  class AlloyDBClient:
19
18
  """
20
19
  A class to manage interactions with an AlloyDB instance.
@@ -530,9 +529,16 @@ class AlloyDBClient:
530
529
  bool: True if connection is valid, False otherwise
531
530
  """
532
531
  try:
533
- # Simple query to check connection
534
- _ = await self.execute_sql_async("SELECT 1")
535
- return True
532
+ # For pg8000 engine, use synchronous connection
533
+ if self.engine_type == "pg8000":
534
+ # Use direct synchronous query
535
+ with self.engine.connect() as conn:
536
+ conn.execute(sqlalchemy.text("SELECT 1"))
537
+ return True
538
+ else:
539
+ # For langchain, use async connection
540
+ await self._execute_sql_async_langchain("SELECT 1")
541
+ return True
536
542
  except Exception as e:
537
543
  log.warning(f"Database connection check failed: {e}")
538
544
  return False
@@ -1,5 +1,9 @@
1
1
 
2
2
  import datetime
3
+ import re
4
+
5
+ from ..utils.mime import guess_mime_type
6
+
3
7
  from ..custom_logging import log
4
8
 
5
9
  def audit_metadata(metadata, chunk_length=None):
@@ -8,6 +12,24 @@ def audit_metadata(metadata, chunk_length=None):
8
12
  metadata['eventTime'] = datetime.datetime.now().isoformat(timespec='microseconds') + "Z"
9
13
  metadata['eventtime'] = metadata['eventTime']
10
14
 
15
+ # Extract time-based dimensions from eventTime
16
+ try:
17
+ # Handle timestamps in ISO format with Z suffix
18
+ event_time_str = metadata['eventTime']
19
+ if event_time_str.endswith('Z'):
20
+ event_time_str = event_time_str[:-1] # Remove the Z suffix
21
+
22
+ event_time = datetime.datetime.fromisoformat(event_time_str)
23
+
24
+ # Add year dimension (e.g., 2025)
25
+ metadata['year'] = str(event_time.year)
26
+ # Add yearMonth dimension (e.g., 2025-03)
27
+ metadata['yearMonth'] = f"{event_time.year}-{event_time.month:02d}"
28
+ # Add month dimension (e.g., 03)
29
+ metadata['month'] = f"{event_time.month:02d}"
30
+ except (ValueError, TypeError) as e:
31
+ log.warning(f"Could not parse eventTime for time dimensions: {metadata['eventTime']}, error: {e}")
32
+
11
33
  if 'source' not in metadata:
12
34
  if 'objectId' in metadata:
13
35
  metadata['source'] = metadata['objectId']
@@ -23,5 +45,109 @@ def audit_metadata(metadata, chunk_length=None):
23
45
 
24
46
  if 'chunk_length' not in metadata:
25
47
  metadata['chunk_length'] = chunk_length
48
+
49
+ # Extract folder paths from source field
50
+ if 'source' in metadata and metadata['source']:
51
+ source_path = metadata['source']
52
+
53
+ metadata['mime_type'] = guess_mime_type(source_path)
54
+
55
+ # Extract file extension
56
+ if '.' in source_path.split('/')[-1]:
57
+ file_extension = source_path.split('/')[-1].split('.')[-1].lower()
58
+ metadata['file_extension'] = file_extension
59
+
60
+ # Add file type category
61
+ if file_extension in ['pdf', 'doc', 'docx', 'txt', 'rtf', 'odt']:
62
+ metadata['file_type'] = 'document'
63
+ elif file_extension in ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'tiff', 'webp', 'svg']:
64
+ metadata['file_type'] = 'image'
65
+ elif file_extension in ['mp3', 'wav', 'ogg', 'flac', 'm4a']:
66
+ metadata['file_type'] = 'audio'
67
+ elif file_extension in ['mp4', 'avi', 'mov', 'wmv', 'mkv', 'webm']:
68
+ metadata['file_type'] = 'video'
69
+ elif file_extension in ['xls', 'xlsx', 'csv']:
70
+ metadata['file_type'] = 'spreadsheet'
71
+ elif file_extension in ['ppt', 'pptx']:
72
+ metadata['file_type'] = 'presentation'
73
+ elif file_extension in ['zip', 'rar', 'tar', 'gz', '7z']:
74
+ metadata['file_type'] = 'archive'
75
+ elif file_extension in ['html', 'htm', 'xml', 'json', 'yaml', 'yml']:
76
+ metadata['file_type'] = 'markup'
77
+ elif file_extension in ['py', 'js', 'java', 'c', 'cpp', 'cs', 'go', 'rb', 'php']:
78
+ metadata['file_type'] = 'code'
79
+ else:
80
+ metadata['file_type'] = 'other'
81
+
82
+ # Check if the source looks like a GCS path
83
+ if source_path.startswith('gs://'):
84
+ # Remove the gs:// prefix
85
+ path_without_prefix = source_path[5:]
86
+
87
+ # Split the path into components
88
+ path_components = path_without_prefix.split('/')
89
+
90
+ # The first component is the bucket name
91
+ if len(path_components) > 0:
92
+ metadata['bucket_name'] = path_components[0]
93
+
94
+ # Extract up to 5 folder levels
95
+ for i in range(1, min(6, len(path_components))):
96
+ if i < len(path_components) - 1: # Skip the last component (filename)
97
+ folder_key = f'folder_{i}'
98
+ metadata[folder_key] = path_components[i]
99
+
100
+ # Extract the object name (last component)
101
+ if len(path_components) > 1:
102
+ metadata['object_name'] = path_components[-1]
103
+
104
+ # For other URL types, try to extract paths
105
+ elif re.match(r'^(http|https|s3|file)://', source_path):
106
+ # Extract path part after domain
107
+ match = re.search(r'://[^/]+/(.+)', source_path)
108
+ if match:
109
+ path_part = match.group(1)
110
+ path_components = path_part.split('/')
111
+
112
+ # Extract up to 5 folder levels
113
+ for i in range(0, min(5, len(path_components) - 1)):
114
+ folder_key = f'folder_{i+1}'
115
+ metadata[folder_key] = path_components[i]
116
+
117
+ # Extract the object name (last component)
118
+ if path_components:
119
+ metadata['object_name'] = path_components[-1]
120
+
121
+ # Add file size category if size exists
122
+ if 'size' in metadata and isinstance(metadata['size'], (int, float)):
123
+ size_bytes = metadata['size']
124
+ if size_bytes < 10 * 1024: # < 10KB
125
+ metadata['size_category'] = 'tiny'
126
+ elif size_bytes < 1024 * 1024: # < 1MB
127
+ metadata['size_category'] = 'small'
128
+ elif size_bytes < 10 * 1024 * 1024: # < 10MB
129
+ metadata['size_category'] = 'medium'
130
+ elif size_bytes < 100 * 1024 * 1024: # < 100MB
131
+ metadata['size_category'] = 'large'
132
+ else: # >= 100MB
133
+ metadata['size_category'] = 'very_large'
134
+
135
+ # Add day of week
136
+ try:
137
+ if 'eventTime' in metadata:
138
+ event_time_str = metadata['eventTime']
139
+ if event_time_str.endswith('Z'):
140
+ event_time_str = event_time_str[:-1]
141
+
142
+ event_time = datetime.datetime.fromisoformat(event_time_str)
143
+ weekday_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
144
+ metadata['day_of_week'] = weekday_names[event_time.weekday()]
145
+
146
+ # Add quarter information
147
+ quarter = (event_time.month - 1) // 3 + 1
148
+ metadata['quarter'] = f"Q{quarter}"
149
+ metadata['yearQuarter'] = f"{event_time.year}-Q{quarter}"
150
+ except (ValueError, TypeError) as e:
151
+ log.warning(f"Could not extract additional time metadata: {e}")
26
152
 
27
153
  return metadata
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sunholo
3
- Version: 0.123.4
3
+ Version: 0.124.0
4
4
  Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
5
5
  Author-email: Holosun ApS <multivac@sunholo.com>
6
6
  License: Apache License, Version 2.0
@@ -60,7 +60,7 @@ sunholo/components/retriever.py,sha256=Wmchv3huAM4w7DIS-a5Lp9Hi7M8pE6vZdxgseiT9S
60
60
  sunholo/components/vectorstore.py,sha256=k7GS1Y5c6ZGXSDAJvyCes6dTjhDAi0fjGbVLqpyfzBc,5918
61
61
  sunholo/database/__init__.py,sha256=bpB5Nk21kwqYj-qdVnvNgXjLsbflnH4g-San7OHMqR4,283
62
62
  sunholo/database/alloydb.py,sha256=x1zUMB-EVWbE2Zvp4nAs2Z-tB_kOZmS45H2lwVHdYnk,11678
63
- sunholo/database/alloydb_client.py,sha256=pppcmPx1liMmQSiKCdpNR6BLODbvEdICAQMz2EEjxnQ,27081
63
+ sunholo/database/alloydb_client.py,sha256=OCAi7Gopry7tiOOdjka-cldghFpxl6IXWWGEANmFVII,27414
64
64
  sunholo/database/database.py,sha256=VqhZdkXUNdvWn8sUcUV3YNby1JDVf7IykPVXWBtxo9U,7361
65
65
  sunholo/database/lancedb.py,sha256=DyfZntiFKBlVPaFooNN1Z6Pl-LAs4nxWKKuq8GBqN58,715
66
66
  sunholo/database/static_dbs.py,sha256=8cvcMwUK6c32AS2e_WguKXWMkFf5iN3g9WHzsh0C07Q,442
@@ -79,7 +79,7 @@ sunholo/discovery_engine/discovery_engine_client.py,sha256=NjIcP10I2-8yj6QZKrxGz
79
79
  sunholo/discovery_engine/get_ai_search_chunks.py,sha256=I6Dt1CznqEvE7XIZ2PkLqopmjpO96iVEWJJqL5cJjOU,5554
80
80
  sunholo/embedder/__init__.py,sha256=sI4N_CqgEVcrMDxXgxKp1FsfsB4FpjoXgPGkl4N_u4I,44
81
81
  sunholo/embedder/embed_chunk.py,sha256=did2pKkWM2o0KkRcb0H9l2x_WjCq6OyuHDxGbITFKPM,6530
82
- sunholo/embedder/embed_metadata.py,sha256=2ziUIdVwnbCUU8gOwQWEvkrRcyp-7IeyZfSsWNkMquA,866
82
+ sunholo/embedder/embed_metadata.py,sha256=h9_L3Mkd7Mtnr8OwV4nNRrdSKoxhqh9LnSsht6j-vIY,6600
83
83
  sunholo/excel/__init__.py,sha256=AqTMN9K4qJYi4maEgoORc5oxDVGO_eqmwzDaVP37JgY,56
84
84
  sunholo/excel/plugin.py,sha256=TJJdcKWyqEIce1agCJImvqvNp2CvLhzi4wUmLYHcLc8,4032
85
85
  sunholo/gcs/__init__.py,sha256=SZvbsMFDko40sIRHTHppA37IijvJTae54vrhooEF5-4,90
@@ -168,9 +168,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
168
168
  sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
169
169
  sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
170
170
  sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
171
- sunholo-0.123.4.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
172
- sunholo-0.123.4.dist-info/METADATA,sha256=iJAH2MBdmtJhWAoZmyMoVQZUGHs3Q8iuJYkC_JmRhSo,10001
173
- sunholo-0.123.4.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
174
- sunholo-0.123.4.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
175
- sunholo-0.123.4.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
176
- sunholo-0.123.4.dist-info/RECORD,,
171
+ sunholo-0.124.0.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
172
+ sunholo-0.124.0.dist-info/METADATA,sha256=FDOT2K4fXDiUu5jZbW8q7ozxsEAaNX-YMJiKfnLI2rM,10001
173
+ sunholo-0.124.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
174
+ sunholo-0.124.0.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
175
+ sunholo-0.124.0.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
176
+ sunholo-0.124.0.dist-info/RECORD,,