unit-saas 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
unit_saas/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ from .main import loader
2
+
3
+ __all__ = ['loader']
unit_saas/main.py ADDED
@@ -0,0 +1,475 @@
1
+ import pandas as pd
2
+ import boto3
3
+ from sqlalchemy import create_engine, text
4
+ from botocore.exceptions import NoCredentialsError, ClientError
5
+ import numpy as np
6
+ import matplotlib.pyplot as plt
7
+ import importlib.util
8
+ import requests
9
+
10
+
11
+ def run_sql_query(connection_string: str, query: str) -> pd.DataFrame:
12
+ """
13
+ Executes a SQL query against a database and returns the result as a Pandas DataFrame.
14
+ The connection_string should be a valid SQLAlchemy connection string.
15
+
16
+ Example connection_string for PostgreSQL:
17
+ 'postgresql://user:password@host:port/database'
18
+ """
19
+ try:
20
+ # Create a SQLAlchemy engine to connect to the database
21
+ engine = create_engine(connection_string)
22
+
23
+ # Use a context manager to ensure the connection is closed
24
+ with engine.connect() as connection:
25
+ # Use pandas to directly read the SQL query into a DataFrame
26
+ dataframe = pd.read_sql_query(sql=text(query), con=connection)
27
+
28
+ print("✅ SQL query executed successfully.")
29
+ return dataframe
30
+
31
+ except Exception as e:
32
+ print(f"❌ Error executing SQL query: {e}")
33
+ # Return an empty DataFrame in case of an error
34
+ return pd.DataFrame()
35
+
36
+
37
+ def load_csv_from_s3(bucket: str, key: str, separator: str = ',') -> pd.DataFrame:
38
+ """
39
+ Loads a CSV file from an AWS S3 bucket into a Pandas DataFrame.
40
+ 'key' is the full path to the file within the bucket.
41
+
42
+ Assumes AWS credentials are configured (e.g., via environment variables, IAM role).
43
+ """
44
+ try:
45
+ # Create an S3 client
46
+ s3_client = boto3.client('s3')
47
+
48
+ # Get the object from S3
49
+ response = s3_client.get_object(Bucket=bucket, Key=key)
50
+
51
+ # Read the object's body directly into a pandas DataFrame
52
+ dataframe = pd.read_csv(response.get("Body"), sep=separator)
53
+
54
+ print(f"✅ Successfully loaded '{key}' from bucket '{bucket}'.")
55
+ return dataframe
56
+
57
+ except NoCredentialsError:
58
+ print("❌ AWS credentials not found. Please configure them.")
59
+ return pd.DataFrame()
60
+ except ClientError as e:
61
+ if e.response['Error']['Code'] == 'NoSuchKey':
62
+ print(f"❌ The file '{key}' does not exist in bucket '{bucket}'.")
63
+ else:
64
+ print(f"❌ An unexpected AWS error occurred: {e}")
65
+ return pd.DataFrame()
66
+ except Exception as e:
67
+ print(f"❌ An error occurred while loading the CSV: {e}")
68
+ return pd.DataFrame()
69
+
70
+ # Data Cleaning & Preprocessing Functions
71
+ def handle_missing_values(dataframe: pd.DataFrame, column: str, strategy: str, fill_value=None) -> pd.DataFrame:
72
+ """
73
+ Handles missing (NaN) values in a specific column of a DataFrame.
74
+ Supported strategies are: 'drop' (removes rows), 'fill' (uses fill_value), 'mean', 'median'.
75
+ """
76
+ df = dataframe.copy()
77
+
78
+ if column not in df.columns:
79
+ print(f"❌ Error: Column '{column}' not found in the DataFrame.")
80
+ return dataframe # Return original dataframe
81
+
82
+ if strategy == 'drop':
83
+ df.dropna(subset=[column], inplace=True)
84
+ elif strategy == 'fill':
85
+ if fill_value is None:
86
+ print("❌ Error: 'fill' strategy requires a 'fill_value'.")
87
+ return dataframe
88
+ df[column].fillna(fill_value, inplace=True)
89
+ elif strategy in ['mean', 'median']:
90
+ if pd.api.types.is_numeric_dtype(df[column]):
91
+ if strategy == 'mean':
92
+ fill_val = df[column].mean()
93
+ else: # median
94
+ fill_val = df[column].median()
95
+ df[column].fillna(fill_val, inplace=True)
96
+ else:
97
+ print(f"❌ Error: Cannot calculate '{strategy}' for non-numeric column '{column}'.")
98
+ return dataframe
99
+ else:
100
+ print(f"❌ Error: Invalid strategy '{strategy}'. Supported strategies are 'drop', 'fill', 'mean', 'median'.")
101
+ return dataframe
102
+
103
+ print(f"✅ Handled missing values in '{column}' using strategy '{strategy}'.")
104
+ return df
105
+
106
+ def remove_duplicates(dataframe: pd.DataFrame) -> pd.DataFrame:
107
+ """
108
+ Removes duplicate rows from the entire DataFrame.
109
+ """
110
+ df = dataframe.copy()
111
+ initial_rows = len(df)
112
+ df.drop_duplicates(inplace=True)
113
+ rows_removed = initial_rows - len(df)
114
+ print(f"✅ Removed {rows_removed} duplicate row(s).")
115
+ return df
116
+
117
+ def change_column_type(dataframe: pd.DataFrame, column: str, new_type: str) -> pd.DataFrame:
118
+ """
119
+ Changes the data type of a specified column.
120
+ Supported new_types: 'integer', 'float', 'string', 'datetime'.
121
+ """
122
+ df = dataframe.copy()
123
+
124
+ if column not in df.columns:
125
+ print(f"❌ Error: Column '{column}' not found.")
126
+ return dataframe
127
+
128
+ type_map = {
129
+ 'integer': 'Int64', # Using nullable integer type
130
+ 'float': 'float64',
131
+ 'string': 'str'
132
+ }
133
+
134
+ try:
135
+ if new_type == 'datetime':
136
+ df[column] = pd.to_datetime(df[column])
137
+ elif new_type in type_map:
138
+ df[column] = df[column].astype(type_map[new_type])
139
+ else:
140
+ print(f"❌ Error: Unsupported type '{new_type}'.")
141
+ return dataframe
142
+
143
+ print(f"✅ Changed column '{column}' to type '{new_type}'.")
144
+ return df
145
+ except Exception as e:
146
+ print(f"❌ Error converting column '{column}' to type '{new_type}': {e}")
147
+ return dataframe
148
+
149
+ def rename_columns(dataframe: pd.DataFrame, rename_map: dict) -> pd.DataFrame:
150
+ """
151
+ Renames one or more columns in the DataFrame.
152
+ The rename_map should be a dictionary like {'old_name_1': 'new_name_1', 'old_name_2': 'new_name_2'}.
153
+ """
154
+ df = dataframe.copy()
155
+
156
+ if not isinstance(rename_map, dict):
157
+ print("❌ Error: 'rename_map' must be a dictionary.")
158
+ return dataframe
159
+
160
+ df.rename(columns=rename_map, inplace=True)
161
+ print(f"✅ Renamed columns: {list(rename_map.keys())}.")
162
+ return df
163
+
164
+ def filter_rows(dataframe: pd.DataFrame, column: str, operator: str, value) -> pd.DataFrame:
165
+ """
166
+ Filters rows in a DataFrame based on a condition.
167
+ Supported operators: '==', '!=', '>', '<', '>=', '<=', 'contains'.
168
+ 'value' is the value to compare against. For 'contains', value must be a string.
169
+ """
170
+ df = dataframe.copy()
171
+
172
+ if column not in df.columns:
173
+ print(f"❌ Error: Column '{column}' not found in the DataFrame.")
174
+ return dataframe
175
+
176
+ try:
177
+ if operator == '==':
178
+ result_df = df[df[column] == value]
179
+ elif operator == '!=':
180
+ result_df = df[df[column] != value]
181
+ elif operator == '>':
182
+ result_df = df[df[column] > value]
183
+ elif operator == '<':
184
+ result_df = df[df[column] < value]
185
+ elif operator == '>=':
186
+ result_df = df[df[column] >= value]
187
+ elif operator == '<=':
188
+ result_df = df[df[column] <= value]
189
+ elif operator == 'contains':
190
+ if not isinstance(value, str):
191
+ print("❌ Error: 'contains' operator requires a string value.")
192
+ return dataframe
193
+ result_df = df[df[column].str.contains(value, na=False)]
194
+ else:
195
+ print(f"❌ Error: Invalid operator '{operator}'.")
196
+ return dataframe
197
+
198
+ print(f"✅ Filtered rows where '{column}' {operator} '{value}'.")
199
+ return result_df
200
+ except TypeError:
201
+ print(f"❌ Error: The data in column '{column}' is not compatible with the operator '{operator}'. Please check the column's data type.")
202
+ return dataframe
203
+
204
+ def select_columns(dataframe: pd.DataFrame, columns_to_keep: list) -> pd.DataFrame:
205
+ """
206
+ Selects a subset of columns from a DataFrame, discarding the rest.
207
+ columns_to_keep is a list of column names.
208
+ """
209
+ if not isinstance(columns_to_keep, list):
210
+ print("❌ Error: 'columns_to_keep' must be a list.")
211
+ return dataframe
212
+
213
+ # Check if all columns to keep exist in the dataframe
214
+ missing_cols = [col for col in columns_to_keep if col not in dataframe.columns]
215
+ if missing_cols:
216
+ print(f"❌ Error: The following columns were not found: {missing_cols}")
217
+ return dataframe
218
+
219
+ print(f"✅ Selected columns: {columns_to_keep}.")
220
+ return dataframe[columns_to_keep].copy()
221
+
222
+ def join_dataframes(df1: pd.DataFrame, df2: pd.DataFrame, on_column: str, how: str = 'inner') -> pd.DataFrame:
223
+ """
224
+ Joins two DataFrames together based on a common column.
225
+ Supported 'how' methods: 'inner', 'left', 'right', 'outer'.
226
+ """
227
+ if on_column not in df1.columns or on_column not in df2.columns:
228
+ print(f"❌ Error: Join column '{on_column}' not found in both DataFrames.")
229
+ return pd.DataFrame() # Return an empty DataFrame
230
+
231
+ supported_joins = ['inner', 'left', 'right', 'outer']
232
+ if how not in supported_joins:
233
+ print(f"❌ Error: Invalid join method '{how}'. Supported methods are: {supported_joins}.")
234
+ return pd.DataFrame()
235
+
236
+ try:
237
+ merged_df = pd.merge(df1, df2, on=on_column, how=how)
238
+ print(f"✅ Successfully joined DataFrames on '{on_column}' using a '{how}' join.")
239
+ return merged_df
240
+ except Exception as e:
241
+ print(f"❌ An error occurred during the join operation: {e}")
242
+ return pd.DataFrame()
243
+
244
+ def group_by_aggregate(dataframe: pd.DataFrame, group_by_col: str, agg_col: str, agg_func: str) -> pd.DataFrame:
245
+ """
246
+ Groups a DataFrame by a column and performs an aggregation on another column.
247
+ Supported agg_func: 'sum', 'mean', 'count', 'std' (standard deviation), 'min', 'max'.
248
+ Returns a new DataFrame with the grouped and aggregated results.
249
+ """
250
+ if group_by_col not in dataframe.columns or agg_col not in dataframe.columns:
251
+ print(f"❌ Error: One or both columns ('{group_by_col}', '{agg_col}') not found.")
252
+ return pd.DataFrame()
253
+
254
+ supported_funcs = ['sum', 'mean', 'count', 'std', 'min', 'max']
255
+ if agg_func not in supported_funcs:
256
+ print(f"❌ Error: Invalid aggregation function '{agg_func}'. Supported functions are: {supported_funcs}.")
257
+ return pd.DataFrame()
258
+
259
+ # Mean, std, sum, min, max require a numeric aggregation column
260
+ if agg_func != 'count' and not pd.api.types.is_numeric_dtype(dataframe[agg_col]):
261
+ print(f"❌ Error: Aggregation function '{agg_func}' requires a numeric column for '{agg_col}'.")
262
+ return pd.DataFrame()
263
+
264
+ try:
265
+ print(f"✅ Grouping by '{group_by_col}' and aggregating '{agg_col}' with '{agg_func}'.")
266
+ # Group, aggregate, and then reset the index to turn the result back into a DataFrame
267
+ result_df = dataframe.groupby(group_by_col)[agg_col].agg(agg_func).reset_index()
268
+ return result_df
269
+ except Exception as e:
270
+ print(f"❌ An error occurred during aggregation: {e}")
271
+ return pd.DataFrame()
272
+
273
+
274
+ def sort_values(dataframe: pd.DataFrame, by_column: str, ascending: bool = False) -> pd.DataFrame:
275
+ """
276
+ Sorts the DataFrame by the values in a specified column.
277
+ """
278
+ if by_column not in dataframe.columns:
279
+ print(f"❌ Error: Column '{by_column}' not found for sorting.")
280
+ return dataframe
281
+
282
+ print(f"✅ Sorting DataFrame by '{by_column}' in {'ascending' if ascending else 'descending'} order.")
283
+ # sort_values returns a new DataFrame by default
284
+ return dataframe.sort_values(by=by_column, ascending=ascending)
285
+
286
+
287
+ def get_descriptive_statistics(dataframe: pd.DataFrame, column: str) -> dict:
288
+ """
289
+ Calculates descriptive statistics for a numerical column.
290
+ Returns a dictionary with metrics like mean, median, std, min, max, and count.
291
+ """
292
+ if column not in dataframe.columns:
293
+ print(f"❌ Error: Column '{column}' not found.")
294
+ return {}
295
+
296
+ if not pd.api.types.is_numeric_dtype(dataframe[column]):
297
+ print(f"❌ Error: Descriptive statistics can only be calculated for numeric columns. '{column}' is not numeric.")
298
+ return {}
299
+
300
+ print(f"✅ Calculating descriptive statistics for '{column}'.")
301
+ # .describe() returns a pandas Series, .to_dict() converts it to a dictionary
302
+ return dataframe[column].describe().to_dict()
303
+
304
+ def display_stats(stats: dict, title: str = "Descriptive Statistics"):
305
+ """
306
+ Displays the contents of a descriptive statistics dictionary in a readable format.
307
+
308
+ Args:
309
+ stats (dict): Dictionary containing descriptive statistics.
310
+ title (str): Optional title to display above the stats.
311
+ """
312
+ if not stats:
313
+ print("❌ No statistics to display.")
314
+ return
315
+
316
+ print(f"--- {title} ---")
317
+ for key, value in stats.items():
318
+ print(f"{key}: {value}")
319
+ print("--------------------")
320
+
321
+
322
+ def display_head(dataframe: pd.DataFrame, n: int = 5) -> pd.DataFrame:
323
+ """
324
+ Displays the first N rows of the DataFrame. Useful for inspecting the data at any step.
325
+ This function should be called frequently by the AI for sanity checks.
326
+ """
327
+ print(f"--- Displaying first {n} rows ---")
328
+ print(dataframe.head(n))
329
+ # Return the original DataFrame to allow for further chaining
330
+ return dataframe
331
+
332
+ def plot_bar_chart(dataframe: pd.DataFrame, x_col: str, y_col: str, title: str):
333
+ """
334
+ Generates and displays a bar chart from the DataFrame.
335
+ """
336
+ if x_col not in dataframe.columns or y_col not in dataframe.columns:
337
+ print(f"❌ Error: One or both columns ('{x_col}', '{y_col}') not found for plotting.")
338
+ return
339
+
340
+ try:
341
+ print(f"📊 Displaying bar chart: '{title}'... (Close the plot window to continue)")
342
+ plt.figure(figsize=(10, 6)) # Create a figure with a nice size
343
+ plt.bar(dataframe[x_col], dataframe[y_col])
344
+ plt.xlabel(x_col)
345
+ plt.ylabel(y_col)
346
+ plt.title(title)
347
+ plt.xticks(rotation=45, ha='right') # Rotate x-axis labels for readability
348
+ plt.tight_layout() # Adjust layout to make room for labels
349
+ plt.show() # This opens the plot window
350
+ except Exception as e:
351
+ print(f"❌ An error occurred while plotting: {e}")
352
+
353
+
354
+ def plot_line_chart(dataframe: pd.DataFrame, x_col: str, y_col: str, title: str):
355
+ """
356
+ Generates and displays a line chart from the DataFrame. Ideal for time-series data.
357
+ """
358
+ if x_col not in dataframe.columns or y_col not in dataframe.columns:
359
+ print(f"❌ Error: One or both columns ('{x_col}', '{y_col}') not found for plotting.")
360
+ return
361
+
362
+ try:
363
+ print(f"📈 Displaying line chart: '{title}'... (Close the plot window to continue)")
364
+ plt.figure(figsize=(10, 6))
365
+ plt.plot(dataframe[x_col], dataframe[y_col], marker='o') # Add markers for data points
366
+ plt.xlabel(x_col)
367
+ plt.ylabel(y_col)
368
+ plt.title(title)
369
+ plt.grid(True) # Add a grid for better readability
370
+ plt.xticks(rotation=45, ha='right')
371
+ plt.tight_layout()
372
+ plt.show()
373
+ except Exception as e:
374
+ print(f"❌ An error occurred while plotting: {e}")
375
+
376
+
377
+ def save_dataframe_to_csv(dataframe: pd.DataFrame, filename: str):
378
+ """
379
+ Saves the final DataFrame to a local CSV file.
380
+ """
381
+ try:
382
+ # index=False is important to avoid writing the DataFrame index as a column
383
+ dataframe.to_csv(filename, index=False)
384
+ print(f"✅ Successfully saved DataFrame to '{filename}'.")
385
+ except Exception as e:
386
+ print(f"❌ An error occurred while saving the file: {e}")
387
+
388
+
389
+ def loader(security_token: str,frame: pd.DataFrame=None,data: dict=None,rules: str=None,address: str="http://localhost:8000/call"):
390
+ # Create a sample DataFrame
391
+
392
+ df = None
393
+ if frame is not None:
394
+ df = frame
395
+
396
+ elif data is not None:
397
+ df = pd.DataFrame(data)
398
+
399
+
400
+ params = {
401
+ "prompt": rules
402
+ }
403
+
404
+
405
+
406
+ headers = {"Authorization": f"token {security_token}"}
407
+
408
+ response = requests.post(url=address,json=params,headers=headers)
409
+ response = response.json()
410
+ print("LLM Response:", response)
411
+
412
+
413
+ # --------------------------
414
+ # Step 2: Execute functions directly
415
+ # --------------------------
416
+ store = {}
417
+ dataframes = {
418
+ 'df': df
419
+ }
420
+
421
+ for it in response:
422
+ func_name = it.get('function')
423
+ args = it.get('arguments', {})
424
+
425
+
426
+ # Map dataframe placeholders to actual DataFrame objects
427
+ for key, val in args.items():
428
+ if key == "dataframe":
429
+ if val != 'df':
430
+ if val not in dataframes:
431
+ dataframes[val] = list(store.values())[-1]
432
+ args[key] = dataframes[val]
433
+
434
+ else:
435
+ args[key] = dataframes[val]
436
+
437
+ else:
438
+ args[key] = dataframes[val]
439
+
440
+ if key == "stats":
441
+ if val not in dataframes:
442
+ dataframes[val] = list(store.values())[-1]
443
+ args[key] = dataframes[val]
444
+
445
+ else:
446
+ args[key] = dataframes[val]
447
+
448
+
449
+
450
+ # Call the function directly
451
+ try:
452
+ result = globals()[func_name](**args)
453
+ except Exception as e:
454
+ raise RuntimeError(f"Error executing {func_name} with args {args}: {e}")
455
+
456
+ store[func_name] = result
457
+
458
+
459
+
460
+ # # 1. Display the head of the DataFrame to inspect it
461
+ # display_head(df, n=3)
462
+
463
+ # # 2. Plot a line chart of daily sales over time
464
+ # plot_line_chart(df, x_col='date', y_col='daily_sales', title='Daily Sales Trend')
465
+
466
+ # # 3. For a bar chart, let's first aggregate the data
467
+ # category_sales = df.groupby('category')['daily_sales'].sum().reset_index()
468
+ # print("\n--- Aggregated Sales by Category ---")
469
+ # print(category_sales)
470
+
471
+ # # Plot a bar chart of the aggregated sales
472
+ # plot_bar_chart(category_sales, x_col='category', y_col='daily_sales', title='Total Sales by Category')
473
+
474
+ # # 4. Save the aggregated data to a CSV file
475
+ # save_dataframe_to_csv(category_sales, 'category_sales_report.csv')
@@ -0,0 +1,10 @@
1
+ Metadata-Version: 2.4
2
+ Name: unit_saas
3
+ Version: 0.1.0
4
+ Summary: My SAAS internal package
5
+ Author: Sanidhya Agrawal
6
+ Author-email: Sanidhya <sanidhyaagrawal2002@gmail.com>
7
+ License-Expression: MIT
8
+ Requires-Python: >=3.8
9
+ Dynamic: author
10
+ Dynamic: requires-python
@@ -0,0 +1,6 @@
1
+ unit_saas/__init__.py,sha256=Ly0EhEyamLKL8b74XL-BJjp1HFQZuQVIyacafgnXFbo,48
2
+ unit_saas/main.py,sha256=RbrwsfNvH9dDUdIZm6q9elp_VRq-2IEsIbHYHmYZB04,18539
3
+ unit_saas-0.1.0.dist-info/METADATA,sha256=Qu4-7-yq8pO7tzGKqF96Wlfs0y_eLHWNfP2EYapDOBo,265
4
+ unit_saas-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
5
+ unit_saas-0.1.0.dist-info/top_level.txt,sha256=MndPMstkGls9hLt8ITrDGSDVWPINno0dFceuT2a5bEs,10
6
+ unit_saas-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ unit_saas