synapseTools 0.1.2__tar.gz → 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: synapseTools
3
- Version: 0.1.2
3
+ Version: 0.1.3
4
4
  Summary: Utility toolkit for data exploration, audio mel-spectrogram generation, and Spanish phonetic processing.
5
5
  Home-page: https://github.com/synapse-ai-hub/synapseTools
6
6
  Author: SYNAPSE AI SAS
@@ -84,7 +84,7 @@ Core principles:
84
84
 
85
85
  - **Practical** – simple, focused functions with sensible defaults.
86
86
  - **Composable** – integrate easily into your pipelines and notebooks.
87
- - **Minimal dependencies** – no heavy deep-learning frameworks required unless you opt for specific extras.
87
+
88
88
 
89
89
  ---
90
90
 
@@ -28,7 +28,7 @@ Core principles:
28
28
 
29
29
  - **Practical** – simple, focused functions with sensible defaults.
30
30
  - **Composable** – integrate easily into your pipelines and notebooks.
31
- - **Minimal dependencies** – no heavy deep-learning frameworks required unless you opt for specific extras.
31
+
32
32
 
33
33
  ---
34
34
 
@@ -8,7 +8,7 @@ ROOT = Path(__file__).parent
8
8
 
9
9
  setup(
10
10
  name="synapseTools",
11
- version="0.1.2",
11
+ version="0.1.3",
12
12
  description=(
13
13
  "Utility toolkit for data exploration, audio mel-spectrogram generation, "
14
14
  "and Spanish phonetic processing."
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: synapseTools
3
- Version: 0.1.2
3
+ Version: 0.1.3
4
4
  Summary: Utility toolkit for data exploration, audio mel-spectrogram generation, and Spanish phonetic processing.
5
5
  Home-page: https://github.com/synapse-ai-hub/synapseTools
6
6
  Author: SYNAPSE AI SAS
@@ -84,7 +84,7 @@ Core principles:
84
84
 
85
85
  - **Practical** – simple, focused functions with sensible defaults.
86
86
  - **Composable** – integrate easily into your pipelines and notebooks.
87
- - **Minimal dependencies** – no heavy deep-learning frameworks required unless you opt for specific extras.
87
+
88
88
 
89
89
  ---
90
90
 
@@ -61,7 +61,7 @@ def nulls(data:pd.DataFrame, column:Union[str, int]) -> None:
61
61
 
62
62
 
63
63
 
64
- def outliers(data:pd.DataFrame, column:Union[str, int], title:str="Descriptive Statistics", color:str='violet', fig_size:tuple[int, int]=(15,5), visualization:bool=True, return_dict:bool=False) -> Union[dict, None]:
64
+ def outliers(data:pd.DataFrame, column:Union[str, int], title:str="Descriptive Statistics", color:str='violet', fig_size:tuple[int, int]=(15,5), visualization:bool=True, output_dir:str='', name:str='Outliers', save:bool=False, return_dict:bool=False) -> Union[dict, None]:
65
65
  """
66
66
  Analyzes numerical outliers in a specified column of a DataFrame, visualizing its distribution,
67
67
  boxplot, and basic statistics.
@@ -73,11 +73,17 @@ def outliers(data:pd.DataFrame, column:Union[str, int], title:str="Descriptive S
73
73
  ### Args
74
74
  - data (pd.DataFrame): The input DataFrame containing the data to analyze.
75
75
  - column (str or int): The name of the column to analyze. Must contain numerical data.
76
+ - title (str): The title of the statistics subplot. Defaults to "Descriptive Statistics".
76
77
  - color (str): The color for the histogram and boxplot. Defaults to 'violet'.
77
- - fig_size (tuple[int, int]): The size of the figure for the visualizations. Defaults to (15, 4).
78
+ - fig_size (tuple[int, int]): The size of the figure for the visualizations. Defaults to (15, 5).
79
+ - visualization (bool): Whether to display the visualization. Defaults to True.
80
+ - output_dir (str): The directory where the image will be saved, if `save` is True. Defaults to an empty string (current working directory).
81
+ - name (str): The name of the saved image file (without extension). Defaults to 'Outliers'.
82
+ - save (bool): Whether to save the visualization as an image file. Defaults to False.
83
+ - return_dict (bool): Whether to return the statistics as a dictionary. Defaults to False.
78
84
 
79
85
  ### Returns
80
- - None: The function generates plots and prints statistical metrics directly.
86
+ - dict or None: If `return_dict` is True, returns a dictionary with statistics. Otherwise, returns None.
81
87
 
82
88
  ### Features
83
89
  - Converts the column to numeric, coercing non-numeric values to NaN.
@@ -91,7 +97,10 @@ def outliers(data:pd.DataFrame, column:Union[str, int], title:str="Descriptive S
91
97
  import pandas as pd
92
98
  df = pd.DataFrame({'A': [1, 2, 2, 3, 100]})
93
99
  outliers(df, column='A')
94
-
100
+
101
+ To save the visualization:
102
+ outliers(df, column='A', output_dir='./plots', name='outliers_example', save=True)
103
+
95
104
  For multiple columns:
96
105
  for column in df.select_dtypes(include=['number']).columns:
97
106
  ... outliers(df, column)
@@ -118,16 +127,6 @@ def outliers(data:pd.DataFrame, column:Union[str, int], title:str="Descriptive S
118
127
 
119
128
  data[column] = pd.to_numeric(data[column], errors='coerce')
120
129
 
121
-
122
- if visualization:
123
- fig, axes = plt.subplots(1, 3, figsize=fig_size)
124
- fig.suptitle(f'Analysis for column {column}')
125
- sns.histplot(data=data, x=column, kde=True, ax=axes[0], color=color)
126
- axes[0].set_title('Distribution')
127
- sns.boxplot(data=data, y=column, ax=axes[1], color=color)
128
- axes[1].set_title('Boxplot')
129
-
130
-
131
130
  serie = data[column].dropna()
132
131
  if not serie.empty:
133
132
  min = np.min(serie)
@@ -146,7 +145,14 @@ def outliers(data:pd.DataFrame, column:Union[str, int], title:str="Descriptive S
146
145
 
147
146
  oq = serie[(serie > tlo) | (serie < blo)].count()
148
147
 
149
- if visualization:
148
+ if visualization or save:
149
+ fig, axes = plt.subplots(1, 3, figsize=fig_size)
150
+ fig.suptitle(f'Analysis for column {column}')
151
+ sns.histplot(data=data, x=column, kde=True, ax=axes[0], color=color)
152
+ axes[0].set_title('Distribution')
153
+ sns.boxplot(data=data, y=column, ax=axes[1], color=color)
154
+ axes[1].set_title('Boxplot')
155
+
150
156
  step = 0.082
151
157
  y = 0.9
152
158
  axes[2].text(0.1, y, f'Min: {min:.4f}', transform=axes[2].transAxes); y -= step
@@ -168,7 +174,13 @@ def outliers(data:pd.DataFrame, column:Union[str, int], title:str="Descriptive S
168
174
  axes[2].set_title(title)
169
175
  axes[2].axis('off')
170
176
  plt.tight_layout()
171
- plt.show()
177
+
178
+ if save:
179
+ output_path = os.path.join(output_dir, f'{name}.png')
180
+ plt.savefig(output_path)
181
+ if visualization:
182
+ plt.show()
183
+ if visualization or save:
172
184
  plt.close(fig)
173
185
 
174
186
  if return_dict:
File without changes
File without changes
File without changes