sunholo 0.127.0__py3-none-any.whl → 0.127.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -882,4 +882,402 @@ class AlloyDBClient:
882
882
 
883
883
  log.info(f"Inserted data into table {table_name}")
884
884
 
885
- return result
885
+ return result
886
+
887
+ async def get_table_columns(self, table_name, schema="public"):
888
+ """
889
+ Fetch column information for an existing table.
890
+
891
+ Args:
892
+ table_name (str): The table name to get columns for
893
+ schema (str): Database schema, defaults to "public"
894
+
895
+ Returns:
896
+ List[dict]: List of column information dictionaries with keys:
897
+ - name: column name
898
+ - type: PostgreSQL data type
899
+ - is_nullable: whether the column allows NULL values
900
+ - default: default value if any
901
+ """
902
+ try:
903
+ query = f"""
904
+ SELECT
905
+ column_name,
906
+ data_type,
907
+ is_nullable,
908
+ column_default,
909
+ character_maximum_length
910
+ FROM
911
+ information_schema.columns
912
+ WHERE
913
+ table_name = '{table_name}'
914
+ AND table_schema = '{schema}'
915
+ ORDER BY
916
+ ordinal_position;
917
+ """
918
+
919
+ if self.engine_type == "pg8000":
920
+ result = self._execute_sql_pg8000(query)
921
+ rows = result.fetchall() if hasattr(result, 'fetchall') else result
922
+ else:
923
+ rows = await self._execute_sql_async_langchain(query)
924
+
925
+ columns = []
926
+ for row in rows:
927
+ column_info = {
928
+ "name": row[0],
929
+ "type": row[1],
930
+ "is_nullable": row[2] == "YES",
931
+ "default": row[3],
932
+ "max_length": row[4]
933
+ }
934
+ columns.append(column_info)
935
+
936
+ log.info(f"Retrieved {len(columns)} columns for table '{table_name}'")
937
+ return columns
938
+
939
+ except Exception as e:
940
+ log.error(f"Error getting table columns: {e}")
941
+ return []
942
+
943
+ def map_data_to_columns(self, data, column_info, case_sensitive=False):
944
+ """
945
+ Map data dictionary to available table columns, handling case sensitivity.
946
+
947
+ Args:
948
+ data (dict): Dictionary of data to map
949
+ column_info (list): List of column information dictionaries from get_table_columns
950
+ case_sensitive (bool): Whether to match column names case-sensitively
951
+
952
+ Returns:
953
+ dict: Filtered data dictionary with only columns that exist in the table
954
+ """
955
+ if not column_info:
956
+ return data # No column info, return original data
957
+
958
+ # Create lookup dictionaries for columns
959
+ columns = {}
960
+ columns_lower = {}
961
+
962
+ for col in column_info:
963
+ col_name = col["name"]
964
+ columns[col_name] = col
965
+ columns_lower[col_name.lower()] = col_name
966
+
967
+ # Filter and map the data
968
+ filtered_data = {}
969
+ for key, value in data.items():
970
+ if case_sensitive:
971
+ # Case-sensitive matching
972
+ if key in columns:
973
+ filtered_data[key] = value
974
+ else:
975
+ # Case-insensitive matching
976
+ key_lower = key.lower()
977
+ if key_lower in columns_lower:
978
+ # Use the original column name from the database
979
+ original_key = columns_lower[key_lower]
980
+ filtered_data[original_key] = value
981
+
982
+ return filtered_data
983
+
984
+ def safe_convert_value(self, value, target_type):
985
+ """
986
+ Safely convert a value to the target PostgreSQL type.
987
+ Handles various formats and placeholder values.
988
+
989
+ Args:
990
+ value: The value to convert
991
+ target_type (str): PostgreSQL data type name
992
+
993
+ Returns:
994
+ The converted value appropriate for the target type, or None if conversion fails
995
+ """
996
+ if value is None:
997
+ return None
998
+
999
+ # Handle placeholder values
1000
+ if isinstance(value, str):
1001
+ if value.startswith("No ") or value.lower() in ("none", "n/a", "null", ""):
1002
+ # Special placeholders are converted to None for most types
1003
+ return None
1004
+
1005
+ try:
1006
+ # Handle different target types
1007
+ if target_type in ("integer", "bigint", "smallint"):
1008
+ if isinstance(value, (int, float)):
1009
+ return int(value)
1010
+ elif isinstance(value, str) and value.strip():
1011
+ # Try to extract a number from the string
1012
+ cleaned = value.replace(',', '')
1013
+ # Extract the first number if there's text
1014
+ import re
1015
+ match = re.search(r'[-+]?\d+', cleaned)
1016
+ if match:
1017
+ return int(match.group())
1018
+ return None
1019
+
1020
+ elif target_type in ("numeric", "decimal", "real", "double precision"):
1021
+ if isinstance(value, (int, float)):
1022
+ return float(value)
1023
+ elif isinstance(value, str) and value.strip():
1024
+ # Remove currency symbols and try to convert
1025
+ cleaned = value.replace('$', '').replace('€', '').replace('£', '')
1026
+ cleaned = cleaned.replace(',', '.')
1027
+ # Extract the first number if there's text
1028
+ import re
1029
+ match = re.search(r'[-+]?\d+(\.\d+)?', cleaned)
1030
+ if match:
1031
+ return float(match.group())
1032
+ return None
1033
+
1034
+ elif target_type == "boolean":
1035
+ if isinstance(value, bool):
1036
+ return value
1037
+ elif isinstance(value, (int, float)):
1038
+ return bool(value)
1039
+ elif isinstance(value, str):
1040
+ value_lower = value.lower()
1041
+ if value_lower in ("true", "t", "yes", "y", "1"):
1042
+ return True
1043
+ elif value_lower in ("false", "f", "no", "n", "0"):
1044
+ return False
1045
+ return None
1046
+
1047
+ elif target_type.startswith("timestamp"):
1048
+ if isinstance(value, str):
1049
+ # For dates, keep the string format - DB driver will handle conversion
1050
+ return value
1051
+ # Other types, just return as is
1052
+ return value
1053
+
1054
+ elif target_type == "jsonb" or target_type == "json":
1055
+ if isinstance(value, (dict, list)):
1056
+ return json.dumps(value)
1057
+ elif isinstance(value, str):
1058
+ # Validate it's valid JSON
1059
+ try:
1060
+ json.loads(value)
1061
+ return value
1062
+ except:
1063
+ return None
1064
+ return None
1065
+
1066
+ else:
1067
+ # For text and other types, convert to string
1068
+ if isinstance(value, (dict, list)):
1069
+ return json.dumps(value)
1070
+ elif value is not None:
1071
+ return str(value)
1072
+ return None
1073
+
1074
+ except Exception as e:
1075
+ log.debug(f"Conversion error for value '{value}' to {target_type}: {e}")
1076
+ return None
1077
+
1078
+ async def insert_rows_safely(self, table_name, rows, metadata=None, continue_on_error=False):
1079
+ """
1080
+ Insert multiple rows into a table with error handling for individual rows.
1081
+
1082
+ Args:
1083
+ table_name (str): The table to insert into
1084
+ rows (list): List of dictionaries containing row data
1085
+ metadata (dict, optional): Additional metadata to include in each row
1086
+ continue_on_error (bool): Whether to continue if some rows fail
1087
+
1088
+ Returns:
1089
+ dict: {
1090
+ 'success': bool,
1091
+ 'total_rows': int,
1092
+ 'inserted_rows': int,
1093
+ 'failed_rows': int,
1094
+ 'errors': list of errors with row data
1095
+ }
1096
+ """
1097
+ if not rows:
1098
+ return {'success': True, 'total_rows': 0, 'inserted_rows': 0, 'failed_rows': 0, 'errors': []}
1099
+
1100
+ # Get table columns for mapping and type conversion
1101
+ columns = await self.get_table_columns(table_name)
1102
+ column_map = {col['name']: col for col in columns}
1103
+ column_map_lower = {col['name'].lower(): col for col in columns}
1104
+
1105
+ results = {
1106
+ 'success': True,
1107
+ 'total_rows': len(rows),
1108
+ 'inserted_rows': 0,
1109
+ 'failed_rows': 0,
1110
+ 'errors': []
1111
+ }
1112
+
1113
+ for i, row in enumerate(rows):
1114
+ try:
1115
+ # Map row data to actual table columns
1116
+ filtered_row = {}
1117
+
1118
+ # First, do case-insensitive mapping
1119
+ for key, value in row.items():
1120
+ key_lower = key.lower()
1121
+ if key_lower in column_map_lower:
1122
+ col_info = column_map_lower[key_lower]
1123
+ col_name = col_info['name'] # Use the correct case from DB
1124
+ col_type = col_info['type']
1125
+
1126
+ # Try to convert value to the appropriate type
1127
+ converted_value = self.safe_convert_value(value, col_type)
1128
+ filtered_row[col_name] = converted_value
1129
+
1130
+ # Add metadata if provided
1131
+ if metadata:
1132
+ for key, value in metadata.items():
1133
+ key_lower = key.lower()
1134
+ if key_lower in column_map_lower:
1135
+ col_name = column_map_lower[key_lower]['name']
1136
+ filtered_row[col_name] = value
1137
+
1138
+ # Insert the row
1139
+ result = await self._insert_single_row(table_name, filtered_row)
1140
+ results['inserted_rows'] += 1
1141
+
1142
+ except Exception as e:
1143
+ error_info = {
1144
+ 'row_index': i,
1145
+ 'error': str(e),
1146
+ 'row_data': row
1147
+ }
1148
+ results['errors'].append(error_info)
1149
+ results['failed_rows'] += 1
1150
+
1151
+ log.error(f"Error inserting row {i}: {e}")
1152
+
1153
+ if not continue_on_error:
1154
+ results['success'] = False
1155
+ return results
1156
+
1157
+ # Overall success is true if any rows were inserted successfully
1158
+ results['success'] = results['inserted_rows'] > 0
1159
+ return results
1160
+
1161
+ async def create_table_with_columns(self, table_name, column_definitions, if_not_exists=True):
1162
+ """
1163
+ Create a table with explicit column definitions.
1164
+
1165
+ Args:
1166
+ table_name (str): The name of the table to create
1167
+ column_definitions (list): List of column definition dictionaries:
1168
+ - name: Column name
1169
+ - type: PostgreSQL data type
1170
+ - nullable: Whether column allows NULL (default True)
1171
+ - default: Default value expression (optional)
1172
+ - primary_key: Whether this is a primary key (default False)
1173
+ if_not_exists (bool): Whether to use IF NOT EXISTS clause
1174
+
1175
+ Returns:
1176
+ Result of the execution
1177
+ """
1178
+ if not column_definitions:
1179
+ raise ValueError("No column definitions provided")
1180
+
1181
+ # Generate column definition strings
1182
+ column_strs = []
1183
+
1184
+ # Check if we need to add a serial primary key
1185
+ has_primary_key = any(col.get('primary_key', False) for col in column_definitions)
1186
+
1187
+ if not has_primary_key:
1188
+ # Add an ID column as primary key
1189
+ column_strs.append("id SERIAL PRIMARY KEY")
1190
+
1191
+ for col in column_definitions:
1192
+ col_name = col.get('name')
1193
+ col_type = col.get('type', 'TEXT')
1194
+ nullable = col.get('nullable', True)
1195
+ default = col.get('default')
1196
+ primary_key = col.get('primary_key', False)
1197
+
1198
+ if not col_name:
1199
+ continue
1200
+
1201
+ # Build the column definition
1202
+ col_def = f'"{col_name}" {col_type}'
1203
+
1204
+ if primary_key:
1205
+ col_def += " PRIMARY KEY"
1206
+
1207
+ if not nullable:
1208
+ col_def += " NOT NULL"
1209
+
1210
+ if default is not None:
1211
+ col_def += f" DEFAULT {default}"
1212
+
1213
+ column_strs.append(col_def)
1214
+
1215
+ # Create the SQL statement
1216
+ exists_clause = "IF NOT EXISTS " if if_not_exists else ""
1217
+ columns_sql = ",\n ".join(column_strs)
1218
+
1219
+ create_table_sql = f"""
1220
+ CREATE TABLE {exists_clause}"{table_name}" (
1221
+ {columns_sql}
1222
+ )
1223
+ """
1224
+
1225
+ # Execute the SQL based on engine type
1226
+ log.info(f"Creating table '{table_name}' with explicit column definitions")
1227
+ try:
1228
+ if self.engine_type == "pg8000":
1229
+ result = self._execute_sql_pg8000(create_table_sql)
1230
+ else:
1231
+ result = await self._execute_sql_async_langchain(create_table_sql)
1232
+
1233
+ log.info(f"Table '{table_name}' created successfully")
1234
+ return result
1235
+ except Exception as e:
1236
+ log.error(f"Error creating table: {e}")
1237
+ raise
1238
+
1239
+ def _get_sql_type_safe(self, value):
1240
+ """
1241
+ Enhanced version of _get_sql_type with better type detection.
1242
+ Handles placeholder values and common patterns.
1243
+
1244
+ Args:
1245
+ value: The value to determine the column type
1246
+
1247
+ Returns:
1248
+ str: SQL type
1249
+ """
1250
+ if value is None:
1251
+ return "TEXT"
1252
+
1253
+ # Handle placeholder values
1254
+ if isinstance(value, str) and (value.startswith("No ") or value.lower() in ("none", "n/a", "null", "")):
1255
+ return "TEXT" # Always use TEXT for placeholder values
1256
+
1257
+ if isinstance(value, dict):
1258
+ return "JSONB"
1259
+ elif isinstance(value, list):
1260
+ return "JSONB"
1261
+ elif isinstance(value, bool):
1262
+ return "BOOLEAN"
1263
+ elif isinstance(value, int):
1264
+ return "INTEGER"
1265
+ elif isinstance(value, float):
1266
+ return "NUMERIC"
1267
+ else:
1268
+ # Check if it's a date string
1269
+ if isinstance(value, str):
1270
+ # Try to detect date formats
1271
+ value_lower = value.lower()
1272
+ if len(value) in (8, 10) and ('-' in value or '/' in value):
1273
+ # Likely a date (YYYY-MM-DD or MM/DD/YYYY)
1274
+ return "DATE"
1275
+ elif 'date' in value_lower or 'time' in value_lower:
1276
+ # Column name hint suggests it's a date
1277
+ return "TIMESTAMP"
1278
+ elif any(currency in value for currency in ('$', '€', '£')):
1279
+ # Likely a monetary value
1280
+ return "NUMERIC"
1281
+
1282
+ # Default to TEXT
1283
+ return "TEXT"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sunholo
3
- Version: 0.127.0
3
+ Version: 0.127.1
4
4
  Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
5
5
  Author-email: Holosun ApS <multivac@sunholo.com>
6
6
  License: Apache License, Version 2.0
@@ -60,7 +60,7 @@ sunholo/components/retriever.py,sha256=Wmchv3huAM4w7DIS-a5Lp9Hi7M8pE6vZdxgseiT9S
60
60
  sunholo/components/vectorstore.py,sha256=k7GS1Y5c6ZGXSDAJvyCes6dTjhDAi0fjGbVLqpyfzBc,5918
61
61
  sunholo/database/__init__.py,sha256=bpB5Nk21kwqYj-qdVnvNgXjLsbflnH4g-San7OHMqR4,283
62
62
  sunholo/database/alloydb.py,sha256=x1zUMB-EVWbE2Zvp4nAs2Z-tB_kOZmS45H2lwVHdYnk,11678
63
- sunholo/database/alloydb_client.py,sha256=Ih_9nd_5fXzki02fNhssn5Grg5-GcQrfmNjqv1wLK7A,34788
63
+ sunholo/database/alloydb_client.py,sha256=B_vCN9d2wQj77TGoyHAMryCNKljKt0ehtXNTdASqTIk,50297
64
64
  sunholo/database/database.py,sha256=VqhZdkXUNdvWn8sUcUV3YNby1JDVf7IykPVXWBtxo9U,7361
65
65
  sunholo/database/lancedb.py,sha256=DyfZntiFKBlVPaFooNN1Z6Pl-LAs4nxWKKuq8GBqN58,715
66
66
  sunholo/database/static_dbs.py,sha256=8cvcMwUK6c32AS2e_WguKXWMkFf5iN3g9WHzsh0C07Q,442
@@ -168,9 +168,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
168
168
  sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
169
169
  sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
170
170
  sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
171
- sunholo-0.127.0.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
172
- sunholo-0.127.0.dist-info/METADATA,sha256=hvXLBki1RQ_ZLhcm3Ej-qQq1-9XADsZg-F6N7oxzP1A,10084
173
- sunholo-0.127.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
174
- sunholo-0.127.0.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
175
- sunholo-0.127.0.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
176
- sunholo-0.127.0.dist-info/RECORD,,
171
+ sunholo-0.127.1.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
172
+ sunholo-0.127.1.dist-info/METADATA,sha256=kKvoiijhfyGLL7CfJOEYZGYiCuIAZ6m2hGagaNjfCAQ,10084
173
+ sunholo-0.127.1.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
174
+ sunholo-0.127.1.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
175
+ sunholo-0.127.1.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
176
+ sunholo-0.127.1.dist-info/RECORD,,