synapseTools 0.1.2__tar.gz → 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {synapsetools-0.1.2/synapseTools.egg-info → synapsetools-0.1.3}/PKG-INFO +2 -2
- {synapsetools-0.1.2 → synapsetools-0.1.3}/README.md +1 -1
- {synapsetools-0.1.2 → synapsetools-0.1.3}/setup.py +1 -1
- {synapsetools-0.1.2 → synapsetools-0.1.3/synapseTools.egg-info}/PKG-INFO +2 -2
- {synapsetools-0.1.2 → synapsetools-0.1.3}/synapse_tools/eda.py +28 -16
- {synapsetools-0.1.2 → synapsetools-0.1.3}/LICENSE +0 -0
- {synapsetools-0.1.2 → synapsetools-0.1.3}/MANIFEST.in +0 -0
- {synapsetools-0.1.2 → synapsetools-0.1.3}/setup.cfg +0 -0
- {synapsetools-0.1.2 → synapsetools-0.1.3}/synapseTools.egg-info/SOURCES.txt +0 -0
- {synapsetools-0.1.2 → synapsetools-0.1.3}/synapseTools.egg-info/dependency_links.txt +0 -0
- {synapsetools-0.1.2 → synapsetools-0.1.3}/synapseTools.egg-info/requires.txt +0 -0
- {synapsetools-0.1.2 → synapsetools-0.1.3}/synapseTools.egg-info/top_level.txt +0 -0
- {synapsetools-0.1.2 → synapsetools-0.1.3}/synapse_tools/__init__.py +0 -0
- {synapsetools-0.1.2 → synapsetools-0.1.3}/synapse_tools/mel_spectrograms.py +0 -0
- {synapsetools-0.1.2 → synapsetools-0.1.3}/synapse_tools/phonemes.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: synapseTools
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.3
|
|
4
4
|
Summary: Utility toolkit for data exploration, audio mel-spectrogram generation, and Spanish phonetic processing.
|
|
5
5
|
Home-page: https://github.com/synapse-ai-hub/synapseTools
|
|
6
6
|
Author: SYNAPSE AI SAS
|
|
@@ -84,7 +84,7 @@ Core principles:
|
|
|
84
84
|
|
|
85
85
|
- **Practical** – simple, focused functions with sensible defaults.
|
|
86
86
|
- **Composable** – integrate easily into your pipelines and notebooks.
|
|
87
|
-
|
|
87
|
+
|
|
88
88
|
|
|
89
89
|
---
|
|
90
90
|
|
|
@@ -28,7 +28,7 @@ Core principles:
|
|
|
28
28
|
|
|
29
29
|
- **Practical** – simple, focused functions with sensible defaults.
|
|
30
30
|
- **Composable** – integrate easily into your pipelines and notebooks.
|
|
31
|
-
|
|
31
|
+
|
|
32
32
|
|
|
33
33
|
---
|
|
34
34
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: synapseTools
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.3
|
|
4
4
|
Summary: Utility toolkit for data exploration, audio mel-spectrogram generation, and Spanish phonetic processing.
|
|
5
5
|
Home-page: https://github.com/synapse-ai-hub/synapseTools
|
|
6
6
|
Author: SYNAPSE AI SAS
|
|
@@ -84,7 +84,7 @@ Core principles:
|
|
|
84
84
|
|
|
85
85
|
- **Practical** – simple, focused functions with sensible defaults.
|
|
86
86
|
- **Composable** – integrate easily into your pipelines and notebooks.
|
|
87
|
-
|
|
87
|
+
|
|
88
88
|
|
|
89
89
|
---
|
|
90
90
|
|
|
@@ -61,7 +61,7 @@ def nulls(data:pd.DataFrame, column:Union[str, int]) -> None:
|
|
|
61
61
|
|
|
62
62
|
|
|
63
63
|
|
|
64
|
-
def outliers(data:pd.DataFrame, column:Union[str, int], title:str="Descriptive Statistics", color:str='violet', fig_size:tuple[int, int]=(15,5), visualization:bool=True, return_dict:bool=False) -> Union[dict, None]:
|
|
64
|
+
def outliers(data:pd.DataFrame, column:Union[str, int], title:str="Descriptive Statistics", color:str='violet', fig_size:tuple[int, int]=(15,5), visualization:bool=True, output_dir:str='', name:str='Outliers', save:bool=False, return_dict:bool=False) -> Union[dict, None]:
|
|
65
65
|
"""
|
|
66
66
|
Analyzes numerical outliers in a specified column of a DataFrame, visualizing its distribution,
|
|
67
67
|
boxplot, and basic statistics.
|
|
@@ -73,11 +73,17 @@ def outliers(data:pd.DataFrame, column:Union[str, int], title:str="Descriptive S
|
|
|
73
73
|
### Args
|
|
74
74
|
- data (pd.DataFrame): The input DataFrame containing the data to analyze.
|
|
75
75
|
- column (str or int): The name of the column to analyze. Must contain numerical data.
|
|
76
|
+
- title (str): The title of the statistics subplot. Defaults to "Descriptive Statistics".
|
|
76
77
|
- color (str): The color for the histogram and boxplot. Defaults to 'violet'.
|
|
77
|
-
- fig_size (tuple[int, int]): The size of the figure for the visualizations. Defaults to (15,
|
|
78
|
+
- fig_size (tuple[int, int]): The size of the figure for the visualizations. Defaults to (15, 5).
|
|
79
|
+
- visualization (bool): Whether to display the visualization. Defaults to True.
|
|
80
|
+
- output_dir (str): The directory where the image will be saved, if `save` is True. Defaults to an empty string (current working directory).
|
|
81
|
+
- name (str): The name of the saved image file (without extension). Defaults to 'Outliers'.
|
|
82
|
+
- save (bool): Whether to save the visualization as an image file. Defaults to False.
|
|
83
|
+
- return_dict (bool): Whether to return the statistics as a dictionary. Defaults to False.
|
|
78
84
|
|
|
79
85
|
### Returns
|
|
80
|
-
- None:
|
|
86
|
+
- dict or None: If `return_dict` is True, returns a dictionary with statistics. Otherwise, returns None.
|
|
81
87
|
|
|
82
88
|
### Features
|
|
83
89
|
- Converts the column to numeric, coercing non-numeric values to NaN.
|
|
@@ -91,7 +97,10 @@ def outliers(data:pd.DataFrame, column:Union[str, int], title:str="Descriptive S
|
|
|
91
97
|
import pandas as pd
|
|
92
98
|
df = pd.DataFrame({'A': [1, 2, 2, 3, 100]})
|
|
93
99
|
outliers(df, column='A')
|
|
94
|
-
|
|
100
|
+
|
|
101
|
+
To save the visualization:
|
|
102
|
+
outliers(df, column='A', output_dir='./plots', name='outliers_example', save=True)
|
|
103
|
+
|
|
95
104
|
For multiple columns:
|
|
96
105
|
for column in df.select_dtypes(include=['number']).columns:
|
|
97
106
|
... outliers(df, column)
|
|
@@ -118,16 +127,6 @@ def outliers(data:pd.DataFrame, column:Union[str, int], title:str="Descriptive S
|
|
|
118
127
|
|
|
119
128
|
data[column] = pd.to_numeric(data[column], errors='coerce')
|
|
120
129
|
|
|
121
|
-
|
|
122
|
-
if visualization:
|
|
123
|
-
fig, axes = plt.subplots(1, 3, figsize=fig_size)
|
|
124
|
-
fig.suptitle(f'Analysis for column {column}')
|
|
125
|
-
sns.histplot(data=data, x=column, kde=True, ax=axes[0], color=color)
|
|
126
|
-
axes[0].set_title('Distribution')
|
|
127
|
-
sns.boxplot(data=data, y=column, ax=axes[1], color=color)
|
|
128
|
-
axes[1].set_title('Boxplot')
|
|
129
|
-
|
|
130
|
-
|
|
131
130
|
serie = data[column].dropna()
|
|
132
131
|
if not serie.empty:
|
|
133
132
|
min = np.min(serie)
|
|
@@ -146,7 +145,14 @@ def outliers(data:pd.DataFrame, column:Union[str, int], title:str="Descriptive S
|
|
|
146
145
|
|
|
147
146
|
oq = serie[(serie > tlo) | (serie < blo)].count()
|
|
148
147
|
|
|
149
|
-
if visualization:
|
|
148
|
+
if visualization or save:
|
|
149
|
+
fig, axes = plt.subplots(1, 3, figsize=fig_size)
|
|
150
|
+
fig.suptitle(f'Analysis for column {column}')
|
|
151
|
+
sns.histplot(data=data, x=column, kde=True, ax=axes[0], color=color)
|
|
152
|
+
axes[0].set_title('Distribution')
|
|
153
|
+
sns.boxplot(data=data, y=column, ax=axes[1], color=color)
|
|
154
|
+
axes[1].set_title('Boxplot')
|
|
155
|
+
|
|
150
156
|
step = 0.082
|
|
151
157
|
y = 0.9
|
|
152
158
|
axes[2].text(0.1, y, f'Min: {min:.4f}', transform=axes[2].transAxes); y -= step
|
|
@@ -168,7 +174,13 @@ def outliers(data:pd.DataFrame, column:Union[str, int], title:str="Descriptive S
|
|
|
168
174
|
axes[2].set_title(title)
|
|
169
175
|
axes[2].axis('off')
|
|
170
176
|
plt.tight_layout()
|
|
171
|
-
|
|
177
|
+
|
|
178
|
+
if save:
|
|
179
|
+
output_path = os.path.join(output_dir, f'{name}.png')
|
|
180
|
+
plt.savefig(output_path)
|
|
181
|
+
if visualization:
|
|
182
|
+
plt.show()
|
|
183
|
+
if visualization or save:
|
|
172
184
|
plt.close(fig)
|
|
173
185
|
|
|
174
186
|
if return_dict:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|