PyPI - sunholo - Versions diffs - 0.123.4__py3-none-any.whl → 0.124.0__py3-none-any.whl - Mend

sunholo 0.123.4py3-none-any.whl → 0.124.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

sunholo/database/alloydb_client.py CHANGED Viewed

@@ -14,7 +14,6 @@ from .uuid import generate_uuid_from_object_id
 from ..custom_logging import log
 from ..utils import ConfigManager
 from ..components import get_embeddings
 class AlloyDBClient:
     """
     A class to manage interactions with an AlloyDB instance.
@@ -530,9 +529,16 @@ class AlloyDBClient:
             bool: True if connection is valid, False otherwise
         """
         try:
-            # Simple query to check connection
-            _ = await self.execute_sql_async("SELECT 1")
-            return True
+            # For pg8000 engine, use synchronous connection
+            if self.engine_type == "pg8000":
+                # Use direct synchronous query
+                with self.engine.connect() as conn:
+                    conn.execute(sqlalchemy.text("SELECT 1"))
+                return True
+            else:
+                # For langchain, use async connection
+                await self._execute_sql_async_langchain("SELECT 1")
+                return True
         except Exception as e:
             log.warning(f"Database connection check failed: {e}")
             return False

sunholo/embedder/embed_metadata.py CHANGED Viewed

@@ -1,5 +1,9 @@
 import datetime
+import re
+from ..utils.mime import guess_mime_type
 from ..custom_logging import log
 def audit_metadata(metadata, chunk_length=None):
@@ -8,6 +12,24 @@ def audit_metadata(metadata, chunk_length=None):
         metadata['eventTime'] = datetime.datetime.now().isoformat(timespec='microseconds') + "Z"
     metadata['eventtime'] = metadata['eventTime']
+    # Extract time-based dimensions from eventTime
+    try:
+        # Handle timestamps in ISO format with Z suffix
+        event_time_str = metadata['eventTime']
+        if event_time_str.endswith('Z'):
+            event_time_str = event_time_str[:-1]  # Remove the Z suffix
+        event_time = datetime.datetime.fromisoformat(event_time_str)
+        # Add year dimension (e.g., 2025)
+        metadata['year'] = str(event_time.year)
+        # Add yearMonth dimension (e.g., 2025-03)
+        metadata['yearMonth'] = f"{event_time.year}-{event_time.month:02d}"
+        # Add month dimension (e.g., 03)
+        metadata['month'] = f"{event_time.month:02d}"
+    except (ValueError, TypeError) as e:
+        log.warning(f"Could not parse eventTime for time dimensions: {metadata['eventTime']}, error: {e}")
     if 'source' not in metadata:
         if 'objectId' in metadata:
             metadata['source'] = metadata['objectId']
@@ -23,5 +45,109 @@ def audit_metadata(metadata, chunk_length=None):
     if 'chunk_length' not in metadata:
         metadata['chunk_length'] = chunk_length
+     # Extract folder paths from source field
+    if 'source' in metadata and metadata['source']:
+        source_path = metadata['source']
+        metadata['mime_type'] = guess_mime_type(source_path)
+        # Extract file extension
+        if '.' in source_path.split('/')[-1]:
+            file_extension = source_path.split('/')[-1].split('.')[-1].lower()
+            metadata['file_extension'] = file_extension
+            # Add file type category
+            if file_extension in ['pdf', 'doc', 'docx', 'txt', 'rtf', 'odt']:
+                metadata['file_type'] = 'document'
+            elif file_extension in ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'tiff', 'webp', 'svg']:
+                metadata['file_type'] = 'image'
+            elif file_extension in ['mp3', 'wav', 'ogg', 'flac', 'm4a']:
+                metadata['file_type'] = 'audio'
+            elif file_extension in ['mp4', 'avi', 'mov', 'wmv', 'mkv', 'webm']:
+                metadata['file_type'] = 'video'
+            elif file_extension in ['xls', 'xlsx', 'csv']:
+                metadata['file_type'] = 'spreadsheet'
+            elif file_extension in ['ppt', 'pptx']:
+                metadata['file_type'] = 'presentation'
+            elif file_extension in ['zip', 'rar', 'tar', 'gz', '7z']:
+                metadata['file_type'] = 'archive'
+            elif file_extension in ['html', 'htm', 'xml', 'json', 'yaml', 'yml']:
+                metadata['file_type'] = 'markup'
+            elif file_extension in ['py', 'js', 'java', 'c', 'cpp', 'cs', 'go', 'rb', 'php']:
+                metadata['file_type'] = 'code'
+            else:
+                metadata['file_type'] = 'other'
+        # Check if the source looks like a GCS path
+        if source_path.startswith('gs://'):
+            # Remove the gs:// prefix
+            path_without_prefix = source_path[5:]
+            # Split the path into components
+            path_components = path_without_prefix.split('/')
+            # The first component is the bucket name
+            if len(path_components) > 0:
+                metadata['bucket_name'] = path_components[0]
+            # Extract up to 5 folder levels
+            for i in range(1, min(6, len(path_components))):
+                if i < len(path_components) - 1:  # Skip the last component (filename)
+                    folder_key = f'folder_{i}'
+                    metadata[folder_key] = path_components[i]
+            # Extract the object name (last component)
+            if len(path_components) > 1:
+                metadata['object_name'] = path_components[-1]
+        # For other URL types, try to extract paths
+        elif re.match(r'^(http|https|s3|file)://', source_path):
+            # Extract path part after domain
+            match = re.search(r'://[^/]+/(.+)', source_path)
+            if match:
+                path_part = match.group(1)
+                path_components = path_part.split('/')
+                # Extract up to 5 folder levels
+                for i in range(0, min(5, len(path_components) - 1)):
+                    folder_key = f'folder_{i+1}'
+                    metadata[folder_key] = path_components[i]
+                # Extract the object name (last component)
+                if path_components:
+                    metadata['object_name'] = path_components[-1]
+    # Add file size category if size exists
+    if 'size' in metadata and isinstance(metadata['size'], (int, float)):
+        size_bytes = metadata['size']
+        if size_bytes < 10 * 1024:  # < 10KB
+            metadata['size_category'] = 'tiny'
+        elif size_bytes < 1024 * 1024:  # < 1MB
+            metadata['size_category'] = 'small'
+        elif size_bytes < 10 * 1024 * 1024:  # < 10MB
+            metadata['size_category'] = 'medium'
+        elif size_bytes < 100 * 1024 * 1024:  # < 100MB
+            metadata['size_category'] = 'large'
+        else:  # >= 100MB
+            metadata['size_category'] = 'very_large'
+    # Add day of week
+    try:
+        if 'eventTime' in metadata:
+            event_time_str = metadata['eventTime']
+            if event_time_str.endswith('Z'):
+                event_time_str = event_time_str[:-1]
+            event_time = datetime.datetime.fromisoformat(event_time_str)
+            weekday_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
+            metadata['day_of_week'] = weekday_names[event_time.weekday()]
+            # Add quarter information
+            quarter = (event_time.month - 1) // 3 + 1
+            metadata['quarter'] = f"Q{quarter}"
+            metadata['yearQuarter'] = f"{event_time.year}-Q{quarter}"
+    except (ValueError, TypeError) as e:
+        log.warning(f"Could not extract additional time metadata: {e}")
     return metadata

{sunholo-0.123.4.dist-info → sunholo-0.124.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sunholo
-Version: 0.123.4
+Version: 0.124.0
 Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
 Author-email: Holosun ApS <multivac@sunholo.com>
 License: Apache License, Version 2.0

{sunholo-0.123.4.dist-info → sunholo-0.124.0.dist-info}/RECORD RENAMED Viewed

@@ -60,7 +60,7 @@ sunholo/components/retriever.py,sha256=Wmchv3huAM4w7DIS-a5Lp9Hi7M8pE6vZdxgseiT9S
 sunholo/components/vectorstore.py,sha256=k7GS1Y5c6ZGXSDAJvyCes6dTjhDAi0fjGbVLqpyfzBc,5918
 sunholo/database/__init__.py,sha256=bpB5Nk21kwqYj-qdVnvNgXjLsbflnH4g-San7OHMqR4,283
 sunholo/database/alloydb.py,sha256=x1zUMB-EVWbE2Zvp4nAs2Z-tB_kOZmS45H2lwVHdYnk,11678
-sunholo/database/alloydb_client.py,sha256=pppcmPx1liMmQSiKCdpNR6BLODbvEdICAQMz2EEjxnQ,27081
+sunholo/database/alloydb_client.py,sha256=OCAi7Gopry7tiOOdjka-cldghFpxl6IXWWGEANmFVII,27414
 sunholo/database/database.py,sha256=VqhZdkXUNdvWn8sUcUV3YNby1JDVf7IykPVXWBtxo9U,7361
 sunholo/database/lancedb.py,sha256=DyfZntiFKBlVPaFooNN1Z6Pl-LAs4nxWKKuq8GBqN58,715
 sunholo/database/static_dbs.py,sha256=8cvcMwUK6c32AS2e_WguKXWMkFf5iN3g9WHzsh0C07Q,442
@@ -79,7 +79,7 @@ sunholo/discovery_engine/discovery_engine_client.py,sha256=NjIcP10I2-8yj6QZKrxGz
 sunholo/discovery_engine/get_ai_search_chunks.py,sha256=I6Dt1CznqEvE7XIZ2PkLqopmjpO96iVEWJJqL5cJjOU,5554
 sunholo/embedder/__init__.py,sha256=sI4N_CqgEVcrMDxXgxKp1FsfsB4FpjoXgPGkl4N_u4I,44
 sunholo/embedder/embed_chunk.py,sha256=did2pKkWM2o0KkRcb0H9l2x_WjCq6OyuHDxGbITFKPM,6530
-sunholo/embedder/embed_metadata.py,sha256=2ziUIdVwnbCUU8gOwQWEvkrRcyp-7IeyZfSsWNkMquA,866
+sunholo/embedder/embed_metadata.py,sha256=h9_L3Mkd7Mtnr8OwV4nNRrdSKoxhqh9LnSsht6j-vIY,6600
 sunholo/excel/__init__.py,sha256=AqTMN9K4qJYi4maEgoORc5oxDVGO_eqmwzDaVP37JgY,56
 sunholo/excel/plugin.py,sha256=TJJdcKWyqEIce1agCJImvqvNp2CvLhzi4wUmLYHcLc8,4032
 sunholo/gcs/__init__.py,sha256=SZvbsMFDko40sIRHTHppA37IijvJTae54vrhooEF5-4,90
@@ -168,9 +168,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
 sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
 sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
 sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
-sunholo-0.123.4.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
-sunholo-0.123.4.dist-info/METADATA,sha256=iJAH2MBdmtJhWAoZmyMoVQZUGHs3Q8iuJYkC_JmRhSo,10001
-sunholo-0.123.4.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
-sunholo-0.123.4.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
-sunholo-0.123.4.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
-sunholo-0.123.4.dist-info/RECORD,,
+sunholo-0.124.0.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
+sunholo-0.124.0.dist-info/METADATA,sha256=FDOT2K4fXDiUu5jZbW8q7ozxsEAaNX-YMJiKfnLI2rM,10001
+sunholo-0.124.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
+sunholo-0.124.0.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
+sunholo-0.124.0.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
+sunholo-0.124.0.dist-info/RECORD,,

{sunholo-0.123.4.dist-info → sunholo-0.124.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{sunholo-0.123.4.dist-info → sunholo-0.124.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{sunholo-0.123.4.dist-info → sunholo-0.124.0.dist-info}/licenses/LICENSE.txt RENAMED Viewed

File without changes

{sunholo-0.123.4.dist-info → sunholo-0.124.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

sunholo 0.123.4__py3-none-any.whl → 0.124.0__py3-none-any.whl

sunholo 0.123.4py3-none-any.whl → 0.124.0py3-none-any.whl