sunholo 0.127.0__py3-none-any.whl → 0.127.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sunholo/database/alloydb_client.py +405 -3
- {sunholo-0.127.0.dist-info → sunholo-0.127.2.dist-info}/METADATA +1 -1
- {sunholo-0.127.0.dist-info → sunholo-0.127.2.dist-info}/RECORD +7 -7
- {sunholo-0.127.0.dist-info → sunholo-0.127.2.dist-info}/WHEEL +0 -0
- {sunholo-0.127.0.dist-info → sunholo-0.127.2.dist-info}/entry_points.txt +0 -0
- {sunholo-0.127.0.dist-info → sunholo-0.127.2.dist-info}/licenses/LICENSE.txt +0 -0
- {sunholo-0.127.0.dist-info → sunholo-0.127.2.dist-info}/top_level.txt +0 -0
@@ -823,7 +823,7 @@ class AlloyDBClient:
|
|
823
823
|
return await self._insert_single_row(table_name, regular_data, metadata)
|
824
824
|
|
825
825
|
|
826
|
-
async def _insert_single_row(self, table_name: str, data: dict, metadata: dict = None):
|
826
|
+
async def _insert_single_row(self, table_name: str, data: dict, metadata: dict = None, primary_key_column:str = "id"):
|
827
827
|
"""
|
828
828
|
Inserts a single row of data into the specified table.
|
829
829
|
|
@@ -869,7 +869,7 @@ class AlloyDBClient:
|
|
869
869
|
sql = f'''
|
870
870
|
INSERT INTO "{table_name}" ({columns_str})
|
871
871
|
VALUES ({placeholders_str})
|
872
|
-
RETURNING
|
872
|
+
RETURNING {primary_key_column}
|
873
873
|
'''
|
874
874
|
|
875
875
|
# Execute SQL to insert data based on engine type
|
@@ -882,4 +882,406 @@ class AlloyDBClient:
|
|
882
882
|
|
883
883
|
log.info(f"Inserted data into table {table_name}")
|
884
884
|
|
885
|
-
return result
|
885
|
+
return result
|
886
|
+
|
887
|
+
async def get_table_columns(self, table_name, schema="public"):
|
888
|
+
"""
|
889
|
+
Fetch column information for an existing table.
|
890
|
+
|
891
|
+
Args:
|
892
|
+
table_name (str): The table name to get columns for
|
893
|
+
schema (str): Database schema, defaults to "public"
|
894
|
+
|
895
|
+
Returns:
|
896
|
+
List[dict]: List of column information dictionaries with keys:
|
897
|
+
- name: column name
|
898
|
+
- type: PostgreSQL data type
|
899
|
+
- is_nullable: whether the column allows NULL values
|
900
|
+
- default: default value if any
|
901
|
+
"""
|
902
|
+
try:
|
903
|
+
query = f"""
|
904
|
+
SELECT
|
905
|
+
column_name,
|
906
|
+
data_type,
|
907
|
+
is_nullable,
|
908
|
+
column_default,
|
909
|
+
character_maximum_length
|
910
|
+
FROM
|
911
|
+
information_schema.columns
|
912
|
+
WHERE
|
913
|
+
table_name = '{table_name}'
|
914
|
+
AND table_schema = '{schema}'
|
915
|
+
ORDER BY
|
916
|
+
ordinal_position;
|
917
|
+
"""
|
918
|
+
|
919
|
+
if self.engine_type == "pg8000":
|
920
|
+
result = self._execute_sql_pg8000(query)
|
921
|
+
rows = result.fetchall() if hasattr(result, 'fetchall') else result
|
922
|
+
else:
|
923
|
+
rows = await self._execute_sql_async_langchain(query)
|
924
|
+
|
925
|
+
columns = []
|
926
|
+
for row in rows:
|
927
|
+
column_info = {
|
928
|
+
"name": row[0],
|
929
|
+
"type": row[1],
|
930
|
+
"is_nullable": row[2] == "YES",
|
931
|
+
"default": row[3],
|
932
|
+
"max_length": row[4]
|
933
|
+
}
|
934
|
+
columns.append(column_info)
|
935
|
+
|
936
|
+
log.info(f"Retrieved {len(columns)} columns for table '{table_name}'")
|
937
|
+
return columns
|
938
|
+
|
939
|
+
except Exception as e:
|
940
|
+
log.error(f"Error getting table columns: {e}")
|
941
|
+
return []
|
942
|
+
|
943
|
+
def map_data_to_columns(self, data, column_info, case_sensitive=False):
|
944
|
+
"""
|
945
|
+
Map data dictionary to available table columns, handling case sensitivity.
|
946
|
+
|
947
|
+
Args:
|
948
|
+
data (dict): Dictionary of data to map
|
949
|
+
column_info (list): List of column information dictionaries from get_table_columns
|
950
|
+
case_sensitive (bool): Whether to match column names case-sensitively
|
951
|
+
|
952
|
+
Returns:
|
953
|
+
dict: Filtered data dictionary with only columns that exist in the table
|
954
|
+
"""
|
955
|
+
if not column_info:
|
956
|
+
return data # No column info, return original data
|
957
|
+
|
958
|
+
# Create lookup dictionaries for columns
|
959
|
+
columns = {}
|
960
|
+
columns_lower = {}
|
961
|
+
|
962
|
+
for col in column_info:
|
963
|
+
col_name = col["name"]
|
964
|
+
columns[col_name] = col
|
965
|
+
columns_lower[col_name.lower()] = col_name
|
966
|
+
|
967
|
+
# Filter and map the data
|
968
|
+
filtered_data = {}
|
969
|
+
for key, value in data.items():
|
970
|
+
if case_sensitive:
|
971
|
+
# Case-sensitive matching
|
972
|
+
if key in columns:
|
973
|
+
filtered_data[key] = value
|
974
|
+
else:
|
975
|
+
# Case-insensitive matching
|
976
|
+
key_lower = key.lower()
|
977
|
+
if key_lower in columns_lower:
|
978
|
+
# Use the original column name from the database
|
979
|
+
original_key = columns_lower[key_lower]
|
980
|
+
filtered_data[original_key] = value
|
981
|
+
|
982
|
+
return filtered_data
|
983
|
+
|
984
|
+
def safe_convert_value(self, value, target_type):
|
985
|
+
"""
|
986
|
+
Safely convert a value to the target PostgreSQL type.
|
987
|
+
Handles various formats and placeholder values.
|
988
|
+
|
989
|
+
Args:
|
990
|
+
value: The value to convert
|
991
|
+
target_type (str): PostgreSQL data type name
|
992
|
+
|
993
|
+
Returns:
|
994
|
+
The converted value appropriate for the target type, or None if conversion fails
|
995
|
+
"""
|
996
|
+
if value is None:
|
997
|
+
return None
|
998
|
+
|
999
|
+
# Handle placeholder values
|
1000
|
+
if isinstance(value, str):
|
1001
|
+
if value.startswith("No ") or value.lower() in ("none", "n/a", "null", ""):
|
1002
|
+
# Special placeholders are converted to None for most types
|
1003
|
+
return None
|
1004
|
+
|
1005
|
+
try:
|
1006
|
+
# Handle different target types
|
1007
|
+
if target_type in ("integer", "bigint", "smallint"):
|
1008
|
+
if isinstance(value, (int, float)):
|
1009
|
+
return int(value)
|
1010
|
+
elif isinstance(value, str) and value.strip():
|
1011
|
+
# Try to extract a number from the string
|
1012
|
+
cleaned = value.replace(',', '')
|
1013
|
+
# Extract the first number if there's text
|
1014
|
+
import re
|
1015
|
+
match = re.search(r'[-+]?\d+', cleaned)
|
1016
|
+
if match:
|
1017
|
+
return int(match.group())
|
1018
|
+
return None
|
1019
|
+
|
1020
|
+
elif target_type in ("numeric", "decimal", "real", "double precision"):
|
1021
|
+
if isinstance(value, (int, float)):
|
1022
|
+
return float(value)
|
1023
|
+
elif isinstance(value, str) and value.strip():
|
1024
|
+
# Remove currency symbols and try to convert
|
1025
|
+
cleaned = value.replace('$', '').replace('€', '').replace('£', '')
|
1026
|
+
cleaned = cleaned.replace(',', '.')
|
1027
|
+
# Extract the first number if there's text
|
1028
|
+
import re
|
1029
|
+
match = re.search(r'[-+]?\d+(\.\d+)?', cleaned)
|
1030
|
+
if match:
|
1031
|
+
return float(match.group())
|
1032
|
+
return None
|
1033
|
+
|
1034
|
+
elif target_type == "boolean":
|
1035
|
+
if isinstance(value, bool):
|
1036
|
+
return value
|
1037
|
+
elif isinstance(value, (int, float)):
|
1038
|
+
return bool(value)
|
1039
|
+
elif isinstance(value, str):
|
1040
|
+
value_lower = value.lower()
|
1041
|
+
if value_lower in ("true", "t", "yes", "y", "1"):
|
1042
|
+
return True
|
1043
|
+
elif value_lower in ("false", "f", "no", "n", "0"):
|
1044
|
+
return False
|
1045
|
+
return None
|
1046
|
+
|
1047
|
+
elif target_type.startswith("timestamp"):
|
1048
|
+
if isinstance(value, str):
|
1049
|
+
# For dates, keep the string format - DB driver will handle conversion
|
1050
|
+
return value
|
1051
|
+
# Other types, just return as is
|
1052
|
+
return value
|
1053
|
+
|
1054
|
+
elif target_type == "jsonb" or target_type == "json":
|
1055
|
+
if isinstance(value, (dict, list)):
|
1056
|
+
return json.dumps(value)
|
1057
|
+
elif isinstance(value, str):
|
1058
|
+
# Validate it's valid JSON
|
1059
|
+
try:
|
1060
|
+
json.loads(value)
|
1061
|
+
return value
|
1062
|
+
except:
|
1063
|
+
return None
|
1064
|
+
return None
|
1065
|
+
|
1066
|
+
else:
|
1067
|
+
# For text and other types, convert to string
|
1068
|
+
if isinstance(value, (dict, list)):
|
1069
|
+
return json.dumps(value)
|
1070
|
+
elif value is not None:
|
1071
|
+
return str(value)
|
1072
|
+
return None
|
1073
|
+
|
1074
|
+
except Exception as e:
|
1075
|
+
log.debug(f"Conversion error for value '{value}' to {target_type}: {e}")
|
1076
|
+
return None
|
1077
|
+
|
1078
|
+
async def insert_rows_safely(self, table_name, rows, metadata=None, continue_on_error=False, primary_key_column="id" # Specify the correct primary key column here
|
1079
|
+
):
|
1080
|
+
"""
|
1081
|
+
Insert multiple rows into a table with error handling for individual rows.
|
1082
|
+
|
1083
|
+
Args:
|
1084
|
+
table_name (str): The table to insert into
|
1085
|
+
rows (list): List of dictionaries containing row data
|
1086
|
+
metadata (dict, optional): Additional metadata to include in each row
|
1087
|
+
continue_on_error (bool): Whether to continue if some rows fail
|
1088
|
+
primary_key_column (str): The primary key in the table, default 'id'
|
1089
|
+
|
1090
|
+
Returns:
|
1091
|
+
dict: {
|
1092
|
+
'success': bool,
|
1093
|
+
'total_rows': int,
|
1094
|
+
'inserted_rows': int,
|
1095
|
+
'failed_rows': int,
|
1096
|
+
'errors': list of errors with row data
|
1097
|
+
}
|
1098
|
+
"""
|
1099
|
+
if not rows:
|
1100
|
+
return {'success': True, 'total_rows': 0, 'inserted_rows': 0, 'failed_rows': 0, 'errors': []}
|
1101
|
+
|
1102
|
+
# Get table columns for mapping and type conversion
|
1103
|
+
columns = await self.get_table_columns(table_name)
|
1104
|
+
column_map = {col['name']: col for col in columns}
|
1105
|
+
column_map_lower = {col['name'].lower(): col for col in columns}
|
1106
|
+
|
1107
|
+
results = {
|
1108
|
+
'success': True,
|
1109
|
+
'total_rows': len(rows),
|
1110
|
+
'inserted_rows': 0,
|
1111
|
+
'failed_rows': 0,
|
1112
|
+
'errors': []
|
1113
|
+
}
|
1114
|
+
|
1115
|
+
for i, row in enumerate(rows):
|
1116
|
+
try:
|
1117
|
+
# Map row data to actual table columns
|
1118
|
+
filtered_row = {}
|
1119
|
+
|
1120
|
+
# First, do case-insensitive mapping
|
1121
|
+
for key, value in row.items():
|
1122
|
+
key_lower = key.lower()
|
1123
|
+
if key_lower in column_map_lower:
|
1124
|
+
col_info = column_map_lower[key_lower]
|
1125
|
+
col_name = col_info['name'] # Use the correct case from DB
|
1126
|
+
col_type = col_info['type']
|
1127
|
+
|
1128
|
+
# Try to convert value to the appropriate type
|
1129
|
+
converted_value = self.safe_convert_value(value, col_type)
|
1130
|
+
filtered_row[col_name] = converted_value
|
1131
|
+
|
1132
|
+
# Add metadata if provided
|
1133
|
+
if metadata:
|
1134
|
+
for key, value in metadata.items():
|
1135
|
+
key_lower = key.lower()
|
1136
|
+
if key_lower in column_map_lower:
|
1137
|
+
col_name = column_map_lower[key_lower]['name']
|
1138
|
+
filtered_row[col_name] = value
|
1139
|
+
|
1140
|
+
# Insert the row
|
1141
|
+
result = await self._insert_single_row(table_name, filtered_row, primary_key_column=primary_key_column)
|
1142
|
+
results['inserted_rows'] += 1
|
1143
|
+
|
1144
|
+
except Exception as e:
|
1145
|
+
error_info = {
|
1146
|
+
'row_index': i,
|
1147
|
+
'error': str(e),
|
1148
|
+
'row_data': row
|
1149
|
+
}
|
1150
|
+
results['errors'].append(error_info)
|
1151
|
+
results['failed_rows'] += 1
|
1152
|
+
|
1153
|
+
log.error(f"Error inserting row {i}: {e}")
|
1154
|
+
|
1155
|
+
if not continue_on_error:
|
1156
|
+
results['success'] = False
|
1157
|
+
return results
|
1158
|
+
|
1159
|
+
# Overall success is true if any rows were inserted successfully
|
1160
|
+
results['success'] = results['inserted_rows'] > 0
|
1161
|
+
return results
|
1162
|
+
|
1163
|
+
async def create_table_with_columns(self, table_name, column_definitions, if_not_exists=True, primary_key_column="id"):
|
1164
|
+
"""
|
1165
|
+
Create a table with explicit column definitions.
|
1166
|
+
|
1167
|
+
Args:
|
1168
|
+
table_name (str): The name of the table to create
|
1169
|
+
column_definitions (list): List of column definition dictionaries:
|
1170
|
+
- name: Column name
|
1171
|
+
- type: PostgreSQL data type
|
1172
|
+
- nullable: Whether column allows NULL (default True)
|
1173
|
+
- default: Default value expression (optional)
|
1174
|
+
- primary_key: Whether this is a primary key (default False)
|
1175
|
+
if_not_exists (bool): Whether to use IF NOT EXISTS clause
|
1176
|
+
primary_key_column (str): default name of primary key if not specified in column_definitions
|
1177
|
+
|
1178
|
+
|
1179
|
+
Returns:
|
1180
|
+
Result of the execution
|
1181
|
+
"""
|
1182
|
+
if not column_definitions:
|
1183
|
+
raise ValueError("No column definitions provided")
|
1184
|
+
|
1185
|
+
# Generate column definition strings
|
1186
|
+
column_strs = []
|
1187
|
+
|
1188
|
+
# Check if we need to add a serial primary key
|
1189
|
+
has_primary_key = any(col.get('primary_key', False) for col in column_definitions)
|
1190
|
+
|
1191
|
+
if not has_primary_key:
|
1192
|
+
# Add an ID column as primary key
|
1193
|
+
column_strs.append(f'"{primary_key_column}" SERIAL PRIMARY KEY')
|
1194
|
+
|
1195
|
+
for col in column_definitions:
|
1196
|
+
col_name = col.get('name')
|
1197
|
+
col_type = col.get('type', 'TEXT')
|
1198
|
+
nullable = col.get('nullable', True)
|
1199
|
+
default = col.get('default')
|
1200
|
+
primary_key = col.get('primary_key', False)
|
1201
|
+
|
1202
|
+
if not col_name:
|
1203
|
+
continue
|
1204
|
+
|
1205
|
+
# Build the column definition
|
1206
|
+
col_def = f'"{col_name}" {col_type}'
|
1207
|
+
|
1208
|
+
if primary_key:
|
1209
|
+
col_def += " PRIMARY KEY"
|
1210
|
+
|
1211
|
+
if not nullable:
|
1212
|
+
col_def += " NOT NULL"
|
1213
|
+
|
1214
|
+
if default is not None:
|
1215
|
+
col_def += f" DEFAULT {default}"
|
1216
|
+
|
1217
|
+
column_strs.append(col_def)
|
1218
|
+
|
1219
|
+
# Create the SQL statement
|
1220
|
+
exists_clause = "IF NOT EXISTS " if if_not_exists else ""
|
1221
|
+
columns_sql = ",\n ".join(column_strs)
|
1222
|
+
|
1223
|
+
create_table_sql = f"""
|
1224
|
+
CREATE TABLE {exists_clause}"{table_name}" (
|
1225
|
+
{columns_sql}
|
1226
|
+
)
|
1227
|
+
"""
|
1228
|
+
|
1229
|
+
# Execute the SQL based on engine type
|
1230
|
+
log.info(f"Creating table '{table_name}' with explicit column definitions")
|
1231
|
+
try:
|
1232
|
+
if self.engine_type == "pg8000":
|
1233
|
+
result = self._execute_sql_pg8000(create_table_sql)
|
1234
|
+
else:
|
1235
|
+
result = await self._execute_sql_async_langchain(create_table_sql)
|
1236
|
+
|
1237
|
+
log.info(f"Table '{table_name}' created successfully")
|
1238
|
+
return result
|
1239
|
+
except Exception as e:
|
1240
|
+
log.error(f"Error creating table: {e}")
|
1241
|
+
raise
|
1242
|
+
|
1243
|
+
def _get_sql_type_safe(self, value):
|
1244
|
+
"""
|
1245
|
+
Enhanced version of _get_sql_type with better type detection.
|
1246
|
+
Handles placeholder values and common patterns.
|
1247
|
+
|
1248
|
+
Args:
|
1249
|
+
value: The value to determine the column type
|
1250
|
+
|
1251
|
+
Returns:
|
1252
|
+
str: SQL type
|
1253
|
+
"""
|
1254
|
+
if value is None:
|
1255
|
+
return "TEXT"
|
1256
|
+
|
1257
|
+
# Handle placeholder values
|
1258
|
+
if isinstance(value, str) and (value.startswith("No ") or value.lower() in ("none", "n/a", "null", "")):
|
1259
|
+
return "TEXT" # Always use TEXT for placeholder values
|
1260
|
+
|
1261
|
+
if isinstance(value, dict):
|
1262
|
+
return "JSONB"
|
1263
|
+
elif isinstance(value, list):
|
1264
|
+
return "JSONB"
|
1265
|
+
elif isinstance(value, bool):
|
1266
|
+
return "BOOLEAN"
|
1267
|
+
elif isinstance(value, int):
|
1268
|
+
return "INTEGER"
|
1269
|
+
elif isinstance(value, float):
|
1270
|
+
return "NUMERIC"
|
1271
|
+
else:
|
1272
|
+
# Check if it's a date string
|
1273
|
+
if isinstance(value, str):
|
1274
|
+
# Try to detect date formats
|
1275
|
+
value_lower = value.lower()
|
1276
|
+
if len(value) in (8, 10) and ('-' in value or '/' in value):
|
1277
|
+
# Likely a date (YYYY-MM-DD or MM/DD/YYYY)
|
1278
|
+
return "DATE"
|
1279
|
+
elif 'date' in value_lower or 'time' in value_lower:
|
1280
|
+
# Column name hint suggests it's a date
|
1281
|
+
return "TIMESTAMP"
|
1282
|
+
elif any(currency in value for currency in ('$', '€', '£')):
|
1283
|
+
# Likely a monetary value
|
1284
|
+
return "NUMERIC"
|
1285
|
+
|
1286
|
+
# Default to TEXT
|
1287
|
+
return "TEXT"
|
@@ -60,7 +60,7 @@ sunholo/components/retriever.py,sha256=Wmchv3huAM4w7DIS-a5Lp9Hi7M8pE6vZdxgseiT9S
|
|
60
60
|
sunholo/components/vectorstore.py,sha256=k7GS1Y5c6ZGXSDAJvyCes6dTjhDAi0fjGbVLqpyfzBc,5918
|
61
61
|
sunholo/database/__init__.py,sha256=bpB5Nk21kwqYj-qdVnvNgXjLsbflnH4g-San7OHMqR4,283
|
62
62
|
sunholo/database/alloydb.py,sha256=x1zUMB-EVWbE2Zvp4nAs2Z-tB_kOZmS45H2lwVHdYnk,11678
|
63
|
-
sunholo/database/alloydb_client.py,sha256=
|
63
|
+
sunholo/database/alloydb_client.py,sha256=bVP91jz6gizJrzgAize5EIx64htHybDVN9UGZpGSXi4,50703
|
64
64
|
sunholo/database/database.py,sha256=VqhZdkXUNdvWn8sUcUV3YNby1JDVf7IykPVXWBtxo9U,7361
|
65
65
|
sunholo/database/lancedb.py,sha256=DyfZntiFKBlVPaFooNN1Z6Pl-LAs4nxWKKuq8GBqN58,715
|
66
66
|
sunholo/database/static_dbs.py,sha256=8cvcMwUK6c32AS2e_WguKXWMkFf5iN3g9WHzsh0C07Q,442
|
@@ -168,9 +168,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
|
|
168
168
|
sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
|
169
169
|
sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
|
170
170
|
sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
|
171
|
-
sunholo-0.127.
|
172
|
-
sunholo-0.127.
|
173
|
-
sunholo-0.127.
|
174
|
-
sunholo-0.127.
|
175
|
-
sunholo-0.127.
|
176
|
-
sunholo-0.127.
|
171
|
+
sunholo-0.127.2.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
|
172
|
+
sunholo-0.127.2.dist-info/METADATA,sha256=vflFwcPWETDwMH45GRhr5McMoghZpm8-hspdiP3qNZs,10084
|
173
|
+
sunholo-0.127.2.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
174
|
+
sunholo-0.127.2.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
|
175
|
+
sunholo-0.127.2.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
|
176
|
+
sunholo-0.127.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|