statslibx 0.2.0__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. {statslibx-0.2.0/statslibx.egg-info → statslibx-0.2.1}/PKG-INFO +1 -2
  2. {statslibx-0.2.0 → statslibx-0.2.1}/pyproject.toml +2 -3
  3. {statslibx-0.2.0 → statslibx-0.2.1}/statslibx/__init__.py +2 -2
  4. {statslibx-0.2.0 → statslibx-0.2.1}/statslibx/datasets/__init__.py +4 -3
  5. {statslibx-0.2.0 → statslibx-0.2.1}/statslibx/descriptive.py +10 -63
  6. {statslibx-0.2.0 → statslibx-0.2.1}/statslibx/inferential.py +7 -49
  7. {statslibx-0.2.0 → statslibx-0.2.1/statslibx.egg-info}/PKG-INFO +1 -2
  8. {statslibx-0.2.0 → statslibx-0.2.1}/statslibx.egg-info/SOURCES.txt +2 -1
  9. {statslibx-0.2.0 → statslibx-0.2.1}/statslibx.egg-info/requires.txt +0 -1
  10. {statslibx-0.2.0 → statslibx-0.2.1}/statslibx.egg-info/top_level.txt +1 -0
  11. statslibx-0.2.1/tests/test1.py +14 -0
  12. {statslibx-0.2.0 → statslibx-0.2.1}/MANIFEST.in +0 -0
  13. {statslibx-0.2.0 → statslibx-0.2.1}/README.md +0 -0
  14. {statslibx-0.2.0 → statslibx-0.2.1}/setup.cfg +0 -0
  15. {statslibx-0.2.0 → statslibx-0.2.1}/statslibx/cli.py +0 -0
  16. {statslibx-0.2.0 → statslibx-0.2.1}/statslibx/computacional.py +0 -0
  17. {statslibx-0.2.0 → statslibx-0.2.1}/statslibx/datasets/course_completion.csv +0 -0
  18. {statslibx-0.2.0 → statslibx-0.2.1}/statslibx/datasets/iris.csv +0 -0
  19. {statslibx-0.2.0 → statslibx-0.2.1}/statslibx/datasets/penguins.csv +0 -0
  20. {statslibx-0.2.0 → statslibx-0.2.1}/statslibx/datasets/sp500_companies.csv +0 -0
  21. {statslibx-0.2.0 → statslibx-0.2.1}/statslibx/datasets/titanic.csv +0 -0
  22. {statslibx-0.2.0 → statslibx-0.2.1}/statslibx/io.py +0 -0
  23. {statslibx-0.2.0 → statslibx-0.2.1}/statslibx/preprocessing/__init__.py +0 -0
  24. {statslibx-0.2.0 → statslibx-0.2.1}/statslibx/utils.py +0 -0
  25. {statslibx-0.2.0 → statslibx-0.2.1}/statslibx.egg-info/dependency_links.txt +0 -0
  26. {statslibx-0.2.0 → statslibx-0.2.1}/statslibx.egg-info/entry_points.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: statslibx
3
- Version: 0.2.0
3
+ Version: 0.2.1
4
4
  Summary: StatsLibx - Librería de estadística descriptiva, inferencial y computacional
5
5
  Author-email: Emmanuel Ascendra Perez <ascendraemmanuel@gmail.com>
6
6
  License: MIT
@@ -16,7 +16,6 @@ Classifier: Programming Language :: Python :: 3.12
16
16
  Requires-Python: >=3.8
17
17
  Description-Content-Type: text/markdown
18
18
  Requires-Dist: pandas>=1.5
19
- Requires-Dist: polars>=0.20
20
19
  Provides-Extra: viz
21
20
  Requires-Dist: seaborn>=0.11; extra == "viz"
22
21
  Requires-Dist: plotly>=5.0; extra == "viz"
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "statslibx"
7
- version = "0.2.0"
7
+ version = "0.2.1"
8
8
  description = "StatsLibx - Librería de estadística descriptiva, inferencial y computacional"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -27,8 +27,7 @@ classifiers = [
27
27
  ]
28
28
 
29
29
  dependencies = [
30
- "pandas>=1.5",
31
- "polars>=0.20"
30
+ "pandas>=1.5"
32
31
  ]
33
32
 
34
33
  [project.optional-dependencies]
@@ -1,10 +1,10 @@
1
1
  """
2
2
  StatsLibx - Librería de Estadística para Python
3
3
  Autor: Emmanuel Ascendra
4
- Versión: 0.2.0
4
+ Versión: 0.2.1
5
5
  """
6
6
 
7
- __version__ = "0.2.0"
7
+ __version__ = "0.2.1"
8
8
  __author__ = "Emmanuel Ascendra"
9
9
 
10
10
  # Importar las clases principales
@@ -61,8 +61,9 @@ def load_dataset(
61
61
  name: str,
62
62
  backend: Literal["pandas", "polars"] = "pandas",
63
63
  return_X_y: Optional[Tuple[List[str], str]] = None,
64
+ sep: str = ",",
64
65
  save: Optional[bool] = False,
65
- filename: Optional[str] = None
66
+ filename: Optional[str] = None,
66
67
  ) -> Union[pd.DataFrame, pl.DataFrame, Tuple[NDArray, NDArray]]:
67
68
  """
68
69
  Carga un dataset interno del paquete.
@@ -101,7 +102,7 @@ def load_dataset(
101
102
  data_bytes = pkgutil.get_data("statslibx.datasets", name)
102
103
  if data_bytes is not None:
103
104
  df = (
104
- pd.read_csv(io.BytesIO(data_bytes))
105
+ pd.read_csv(io.BytesIO(data_bytes), sep=sep)
105
106
  if backend == "pandas"
106
107
  else pl.read_csv(io.BytesIO(data_bytes))
107
108
  )
@@ -112,7 +113,7 @@ def load_dataset(
112
113
  if df is None:
113
114
  try:
114
115
  df = (
115
- pd.read_csv(name)
116
+ pd.read_csv(name, sep=sep)
116
117
  if backend == "pandas"
117
118
  else pl.read_csv(name)
118
119
  )
@@ -3,7 +3,6 @@ import pandas as pd
3
3
  import polars as pl
4
4
  from typing import Optional, Union, Literal, List
5
5
  from datetime import datetime
6
- import flet as ft
7
6
  import os
8
7
  import matplotlib.pyplot as plt
9
8
  import seaborn as sns
@@ -81,10 +80,6 @@ class DescriptiveStats:
81
80
  """
82
81
 
83
82
  def __init__(self, data: Union[pd.DataFrame, np.ndarray],
84
- sep: str = None,
85
- decimal: str = None,
86
- thousand: str = None,
87
- backend: Literal['pandas', 'polars'] = 'pandas',
88
83
  lang: Literal['es-ES', 'en-US'] = 'es-ES'):
89
84
  """
90
85
  # Initialize DataFrame
@@ -92,9 +87,6 @@ class DescriptiveStats:
92
87
  ## **Parameters:**
93
88
 
94
89
  - **data** : Data to analyze
95
- - **sep** : Column separator
96
- - **decimal** : Decimal separator
97
- - **thousand** : Thousand separator
98
90
  - **backend** : 'pandas' or 'polars' for processing
99
91
  (Proximamente estara habilitado polars para big data)
100
92
 
@@ -104,72 +96,26 @@ class DescriptiveStats:
104
96
  stats = DescriptiveStats(data)
105
97
  ``
106
98
  """
107
-
108
- if isinstance(data, str) and os.path.exists(data):
109
- data = DescriptiveStats.from_file(data).data
110
-
111
- if isinstance(data, pl.DataFrame):
99
+ if isinstance(data, pd.DataFrame):
100
+ self.data = data
101
+ elif isinstance(data, np.ndarray):
102
+ self.data = pd.DataFrame(data)
103
+ else:
112
104
  raise TypeError(
113
- "Polars aún no soportado. Use pandas.DataFrame."
105
+ "Data must be a pandas.DataFrame or numpy.ndarray."
114
106
  )
115
107
 
116
-
117
108
  if isinstance(data, np.ndarray):
118
109
  if data.ndim == 1:
119
110
  data = pd.DataFrame({'var': data})
120
111
  else:
121
- data = pd.DataFrame(data, columns=[f'var_{i}' for i in range(data.shape[1])]) \
112
+ data = pd.DataFrame(data, columns=[f'var_{i}' for i in range(data.shape[1])],
113
+ sep=self.sep) \
122
114
  if isinstance(data, pd.DataFrame) else pl.DataFrame(data, )
123
-
124
- self.data = data
125
- self.backend = backend
115
+
126
116
  self._numeric_cols = data.select_dtypes(include=[np.number]).columns.tolist()
127
- self.sep = sep
128
- self.decimal = decimal
129
- self.thousand = thousand
130
117
  self.lang = lang
131
-
132
- @classmethod
133
- def from_file(self, path: str):
134
- """
135
- Carga automática de archivos y devuelve instancia de Intelligence.
136
- Soporta CSV, Excel, TXT, JSON, Parquet, Feather, TSV.
137
- Automatic file upload and returns Intelligence instance.
138
- Supports CSV, Excel, TXT, JSON, Parquet, Feather, TSV.
139
-
140
- Parametros / Parameters:
141
- ------------------------
142
- path : str
143
- Ruta del archivo
144
- File path
145
- """
146
- if not os.path.exists(path):
147
- raise FileNotFoundError(f"Archivo no encontrado / File not found: {path}")
148
-
149
- ext = os.path.splitext(path)[1].lower()
150
-
151
- if ext == ".csv":
152
- df = pd.read_csv(path, sep=self.sep, decimal=self.decimal, thousand=self.thousand)
153
-
154
- elif ext in [".xlsx", ".xls"]:
155
- df = pd.read_excel(path, decimal=self.decimal, thousand=self.thousand)
156
-
157
- elif ext in [".txt", ".tsv"]:
158
- df = pd.read_table(path, sep=self.sep, decimal=self.decimal, thousand=self.thousand)
159
-
160
- elif ext == ".json":
161
- df = pd.read_json(path)
162
118
 
163
- elif ext == ".parquet":
164
- df = pd.read_parquet(path)
165
-
166
- elif ext == ".feather":
167
- df = pd.read_feather(path)
168
-
169
- else:
170
- raise ValueError(f"Formato no soportado / Unsupported format: {ext}")
171
-
172
- return DescriptiveStats(df)
173
119
 
174
120
  # ============= MÉTODOS UNIVARIADOS =============
175
121
 
@@ -1262,3 +1208,4 @@ class LinearRegressionResult:
1262
1208
  plt.ylabel("Residuos")
1263
1209
  plt.title("Residuos vs Predicciones")
1264
1210
  plt.show()
1211
+
@@ -69,8 +69,6 @@ class InferentialStats:
69
69
  """
70
70
 
71
71
  def __init__(self, data: Union[pd.DataFrame, np.ndarray],
72
- backend: Literal['pandas', 'polars'] = 'pandas',
73
- sep: str = None, decimal: str = None, thousand: str = None,
74
72
  lang: Literal['es-ES', 'en-US'] = 'es-ES'):
75
73
  """
76
74
  Initialize DataFrame
@@ -79,17 +77,16 @@ class InferentialStats:
79
77
  -----------
80
78
  data : DataFrame o ndarray
81
79
  Data to analyze
82
- backend : str
83
- 'pandas' or 'polars' for processing
84
80
  """
85
81
 
86
- if isinstance(data, str) and os.path.exists(data):
87
- data = InferentialStats.from_file(data).data
88
-
89
- if isinstance(data, pl.DataFrame):
82
+ if isinstance(data, pd.DataFrame):
83
+ self.data = data
84
+ elif isinstance(data, np.ndarray):
85
+ self.data = pd.DataFrame(data)
86
+ else:
90
87
  raise TypeError(
91
- "Polars aún no soportado. Use pandas.DataFrame."
92
- )
88
+ "Data must be a pandas.DataFrame or numpy.ndarray."
89
+ )
93
90
 
94
91
  if isinstance(data, np.ndarray):
95
92
  if data.ndim == 1:
@@ -98,48 +95,9 @@ class InferentialStats:
98
95
  data = pd.DataFrame(data, columns=[f'var_{i}' for i in range(data.shape[1])])
99
96
 
100
97
  self.data = data
101
- self.backend = backend
102
98
  self._numeric_cols = data.select_dtypes(include=[np.number]).columns.tolist()
103
- self.sep = sep
104
- self.decimal = decimal
105
- self.thousand = thousand
106
99
  self.lang = lang
107
100
 
108
- @classmethod
109
- def from_file(path: str):
110
- """
111
- Carga automática de archivos y devuelve instancia de Intelligence.
112
- Soporta CSV, Excel, TXT, JSON, Parquet, Feather, TSV.
113
- """
114
-
115
- if not os.path.exists(path):
116
- raise FileNotFoundError(f"Archivo no encontrado / File not found: {path}")
117
-
118
- ext = os.path.splitext(path)[1].lower()
119
-
120
- if ext == ".csv":
121
- df = pd.read_csv(path, sep=self.sep, decimal=self.decimal, thousand=self.thousand)
122
-
123
- elif ext in [".xlsx", ".xls"]:
124
- df = pd.read_excel(path, decimal=self.decimal, thousand=self.thousand)
125
-
126
- elif ext in [".txt", ".tsv"]:
127
- df = pd.read_table(path, sep=self.sep, decimal=self.decimal, thousand=self.thousand)
128
-
129
- elif ext == ".json":
130
- df = pd.read_json(path)
131
-
132
- elif ext == ".parquet":
133
- df = pd.read_parquet(path)
134
-
135
- elif ext == ".feather":
136
- df = pd.read_feather(path)
137
-
138
- else:
139
- raise ValueError(f"Formato no soportado: {ext}")
140
-
141
- return InferentialStats(df)
142
-
143
101
  # ============= INTERVALOS DE CONFIANZA =============
144
102
 
145
103
  def confidence_interval(self, column: str, confidence: float = 0.95,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: statslibx
3
- Version: 0.2.0
3
+ Version: 0.2.1
4
4
  Summary: StatsLibx - Librería de estadística descriptiva, inferencial y computacional
5
5
  Author-email: Emmanuel Ascendra Perez <ascendraemmanuel@gmail.com>
6
6
  License: MIT
@@ -16,7 +16,6 @@ Classifier: Programming Language :: Python :: 3.12
16
16
  Requires-Python: >=3.8
17
17
  Description-Content-Type: text/markdown
18
18
  Requires-Dist: pandas>=1.5
19
- Requires-Dist: polars>=0.20
20
19
  Provides-Extra: viz
21
20
  Requires-Dist: seaborn>=0.11; extra == "viz"
22
21
  Requires-Dist: plotly>=5.0; extra == "viz"
@@ -20,4 +20,5 @@ statslibx/datasets/iris.csv
20
20
  statslibx/datasets/penguins.csv
21
21
  statslibx/datasets/sp500_companies.csv
22
22
  statslibx/datasets/titanic.csv
23
- statslibx/preprocessing/__init__.py
23
+ statslibx/preprocessing/__init__.py
24
+ tests/test1.py
@@ -1,5 +1,4 @@
1
1
  pandas>=1.5
2
- polars>=0.20
3
2
 
4
3
  [advanced]
5
4
  scikit-learn>=1.0
@@ -1,3 +1,4 @@
1
1
  dist
2
2
  figures
3
3
  statslibx
4
+ tests
@@ -0,0 +1,14 @@
1
+ from statslibx import load_dataset, DescriptiveStats, InferentialStats
2
+ import pandas as pd
3
+ # df = pd.read_csv(r"tests\bank (1).csv", sep=";")
4
+
5
+ df = load_dataset(r"tests\bank (1).csv", sep=";")
6
+ stats = DescriptiveStats(df)
7
+ print(stats.data)
8
+
9
+ infer = InferentialStats(df)
10
+ print(infer.data)
11
+
12
+
13
+
14
+
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes