utilitz 0.0.3__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {utilitz-0.0.3 → utilitz-0.2.0}/.gitignore +1 -0
- {utilitz-0.0.3 → utilitz-0.2.0}/PKG-INFO +1 -1
- utilitz-0.2.0/__upload__.cmd +3 -0
- {utilitz-0.0.3 → utilitz-0.2.0}/data/prueba.xlsx +0 -0
- utilitz-0.2.0/notebooks/.ipynb +267 -0
- {utilitz-0.0.3 → utilitz-0.2.0}/pyproject.toml +1 -1
- {utilitz-0.0.3 → utilitz-0.2.0}/src/utilitz/excel.py +42 -11
- utilitz-0.2.0/src/utilitz/path.py +54 -0
- utilitz-0.0.3/notebooks/.ipynb +0 -323
- {utilitz-0.0.3 → utilitz-0.2.0}/LICENSE +0 -0
- {utilitz-0.0.3 → utilitz-0.2.0}/README.md +0 -0
- {utilitz-0.0.3 → utilitz-0.2.0}/src/utilitz/__init__.py +0 -0
|
Binary file
|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
{
|
|
2
|
+
"cells": [
|
|
3
|
+
{
|
|
4
|
+
"cell_type": "code",
|
|
5
|
+
"execution_count": 2,
|
|
6
|
+
"id": "8d63269f",
|
|
7
|
+
"metadata": {},
|
|
8
|
+
"outputs": [],
|
|
9
|
+
"source": [
|
|
10
|
+
"import os\n",
|
|
11
|
+
"if not 'id_0123456789876543210' in locals():\n",
|
|
12
|
+
" os.chdir(os.path.split(os.getcwd())[0])\n",
|
|
13
|
+
" id_0123456789876543210 = None"
|
|
14
|
+
]
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
"cell_type": "code",
|
|
18
|
+
"execution_count": 3,
|
|
19
|
+
"id": "c580ee9c",
|
|
20
|
+
"metadata": {},
|
|
21
|
+
"outputs": [],
|
|
22
|
+
"source": [
|
|
23
|
+
"import numpy as np"
|
|
24
|
+
]
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
"cell_type": "code",
|
|
28
|
+
"execution_count": 4,
|
|
29
|
+
"id": "9970c022",
|
|
30
|
+
"metadata": {},
|
|
31
|
+
"outputs": [],
|
|
32
|
+
"source": [
|
|
33
|
+
"import pandas as pd\n",
|
|
34
|
+
"import re\n",
|
|
35
|
+
"\n",
|
|
36
|
+
"from src.utilitz.excel import decode_column, read_excel_table"
|
|
37
|
+
]
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
"cell_type": "code",
|
|
41
|
+
"execution_count": 6,
|
|
42
|
+
"id": "edfb3b9e",
|
|
43
|
+
"metadata": {},
|
|
44
|
+
"outputs": [],
|
|
45
|
+
"source": [
|
|
46
|
+
"data = read_excel_table('data/prueba.xlsx',\n",
|
|
47
|
+
" usecols='B:H',\n",
|
|
48
|
+
" findheaders=True,\n",
|
|
49
|
+
" checkcol='B',\n",
|
|
50
|
+
" patterncol=r'\\d+')\n",
|
|
51
|
+
"\n"
|
|
52
|
+
]
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
"cell_type": "code",
|
|
56
|
+
"execution_count": 10,
|
|
57
|
+
"id": "1cbeb7e6",
|
|
58
|
+
"metadata": {},
|
|
59
|
+
"outputs": [
|
|
60
|
+
{
|
|
61
|
+
"data": {
|
|
62
|
+
"text/html": [
|
|
63
|
+
"<div>\n",
|
|
64
|
+
"<style scoped>\n",
|
|
65
|
+
" .dataframe tbody tr th:only-of-type {\n",
|
|
66
|
+
" vertical-align: middle;\n",
|
|
67
|
+
" }\n",
|
|
68
|
+
"\n",
|
|
69
|
+
" .dataframe tbody tr th {\n",
|
|
70
|
+
" vertical-align: top;\n",
|
|
71
|
+
" }\n",
|
|
72
|
+
"\n",
|
|
73
|
+
" .dataframe thead th {\n",
|
|
74
|
+
" text-align: right;\n",
|
|
75
|
+
" }\n",
|
|
76
|
+
"</style>\n",
|
|
77
|
+
"<table border=\"1\" class=\"dataframe\">\n",
|
|
78
|
+
" <thead>\n",
|
|
79
|
+
" <tr style=\"text-align: right;\">\n",
|
|
80
|
+
" <th></th>\n",
|
|
81
|
+
" <th>Unnamed: 1</th>\n",
|
|
82
|
+
" <th>ProductSubcategory</th>\n",
|
|
83
|
+
" <th>Product</th>\n",
|
|
84
|
+
" <th>ProductColor</th>\n",
|
|
85
|
+
" <th>Model</th>\n",
|
|
86
|
+
" <th>Unnamed: 6</th>\n",
|
|
87
|
+
" <th>Unnamed: 7</th>\n",
|
|
88
|
+
" </tr>\n",
|
|
89
|
+
" </thead>\n",
|
|
90
|
+
" <tbody>\n",
|
|
91
|
+
" <tr>\n",
|
|
92
|
+
" <th>0</th>\n",
|
|
93
|
+
" <td>1</td>\n",
|
|
94
|
+
" <td>Road Bikes</td>\n",
|
|
95
|
+
" <td>Road-150 Red, 62</td>\n",
|
|
96
|
+
" <td>Red</td>\n",
|
|
97
|
+
" <td>Road-150</td>\n",
|
|
98
|
+
" <td>NaN</td>\n",
|
|
99
|
+
" <td>NaN</td>\n",
|
|
100
|
+
" </tr>\n",
|
|
101
|
+
" <tr>\n",
|
|
102
|
+
" <th>1</th>\n",
|
|
103
|
+
" <td>2</td>\n",
|
|
104
|
+
" <td>Mountain Bikes</td>\n",
|
|
105
|
+
" <td>Mountain-100 Silver, 44</td>\n",
|
|
106
|
+
" <td>Silver</td>\n",
|
|
107
|
+
" <td>Mountain-100</td>\n",
|
|
108
|
+
" <td>NaN</td>\n",
|
|
109
|
+
" <td>NaN</td>\n",
|
|
110
|
+
" </tr>\n",
|
|
111
|
+
" <tr>\n",
|
|
112
|
+
" <th>2</th>\n",
|
|
113
|
+
" <td>2</td>\n",
|
|
114
|
+
" <td>Mountain Bikes</td>\n",
|
|
115
|
+
" <td>Mountain-100 Silver, 44</td>\n",
|
|
116
|
+
" <td>Silver</td>\n",
|
|
117
|
+
" <td>Mountain-100</td>\n",
|
|
118
|
+
" <td>NaN</td>\n",
|
|
119
|
+
" <td>NaN</td>\n",
|
|
120
|
+
" </tr>\n",
|
|
121
|
+
" <tr>\n",
|
|
122
|
+
" <th>3</th>\n",
|
|
123
|
+
" <td>3</td>\n",
|
|
124
|
+
" <td>Road Bikes</td>\n",
|
|
125
|
+
" <td>Road-650 Black, 62</td>\n",
|
|
126
|
+
" <td>Black</td>\n",
|
|
127
|
+
" <td>Road-650</td>\n",
|
|
128
|
+
" <td>NaN</td>\n",
|
|
129
|
+
" <td>NaN</td>\n",
|
|
130
|
+
" </tr>\n",
|
|
131
|
+
" <tr>\n",
|
|
132
|
+
" <th>4</th>\n",
|
|
133
|
+
" <td>3</td>\n",
|
|
134
|
+
" <td>Mountain Bikes</td>\n",
|
|
135
|
+
" <td>Mountain-100 Silver, 44</td>\n",
|
|
136
|
+
" <td>Silver</td>\n",
|
|
137
|
+
" <td>Mountain-100</td>\n",
|
|
138
|
+
" <td>NaN</td>\n",
|
|
139
|
+
" <td>NaN</td>\n",
|
|
140
|
+
" </tr>\n",
|
|
141
|
+
" <tr>\n",
|
|
142
|
+
" <th>...</th>\n",
|
|
143
|
+
" <td>...</td>\n",
|
|
144
|
+
" <td>...</td>\n",
|
|
145
|
+
" <td>...</td>\n",
|
|
146
|
+
" <td>...</td>\n",
|
|
147
|
+
" <td>...</td>\n",
|
|
148
|
+
" <td>...</td>\n",
|
|
149
|
+
" <td>...</td>\n",
|
|
150
|
+
" </tr>\n",
|
|
151
|
+
" <tr>\n",
|
|
152
|
+
" <th>1196</th>\n",
|
|
153
|
+
" <td>599</td>\n",
|
|
154
|
+
" <td>Road Bikes</td>\n",
|
|
155
|
+
" <td>Road-150 Red, 48</td>\n",
|
|
156
|
+
" <td>Red</td>\n",
|
|
157
|
+
" <td>Road-150</td>\n",
|
|
158
|
+
" <td>NaN</td>\n",
|
|
159
|
+
" <td>NaN</td>\n",
|
|
160
|
+
" </tr>\n",
|
|
161
|
+
" <tr>\n",
|
|
162
|
+
" <th>1197</th>\n",
|
|
163
|
+
" <td>600</td>\n",
|
|
164
|
+
" <td>Road Bikes</td>\n",
|
|
165
|
+
" <td>Road-150 Red, 52</td>\n",
|
|
166
|
+
" <td>Red</td>\n",
|
|
167
|
+
" <td>Road-150</td>\n",
|
|
168
|
+
" <td>NaN</td>\n",
|
|
169
|
+
" <td>NaN</td>\n",
|
|
170
|
+
" </tr>\n",
|
|
171
|
+
" <tr>\n",
|
|
172
|
+
" <th>1198</th>\n",
|
|
173
|
+
" <td>600</td>\n",
|
|
174
|
+
" <td>Road Bikes</td>\n",
|
|
175
|
+
" <td>Road-650 Black, 48</td>\n",
|
|
176
|
+
" <td>Black</td>\n",
|
|
177
|
+
" <td>Road-650</td>\n",
|
|
178
|
+
" <td>NaN</td>\n",
|
|
179
|
+
" <td>NaN</td>\n",
|
|
180
|
+
" </tr>\n",
|
|
181
|
+
" <tr>\n",
|
|
182
|
+
" <th>1199</th>\n",
|
|
183
|
+
" <td>601</td>\n",
|
|
184
|
+
" <td>Mountain Bikes</td>\n",
|
|
185
|
+
" <td>Mountain-100 Black, 42</td>\n",
|
|
186
|
+
" <td>Black</td>\n",
|
|
187
|
+
" <td>Mountain-100</td>\n",
|
|
188
|
+
" <td>NaN</td>\n",
|
|
189
|
+
" <td>NaN</td>\n",
|
|
190
|
+
" </tr>\n",
|
|
191
|
+
" <tr>\n",
|
|
192
|
+
" <th>1200</th>\n",
|
|
193
|
+
" <td>601</td>\n",
|
|
194
|
+
" <td>Mountain Bikes</td>\n",
|
|
195
|
+
" <td>Mountain-100 Black, 44</td>\n",
|
|
196
|
+
" <td>Black</td>\n",
|
|
197
|
+
" <td>Mountain-100</td>\n",
|
|
198
|
+
" <td>NaN</td>\n",
|
|
199
|
+
" <td>NaN</td>\n",
|
|
200
|
+
" </tr>\n",
|
|
201
|
+
" </tbody>\n",
|
|
202
|
+
"</table>\n",
|
|
203
|
+
"<p>1201 rows × 7 columns</p>\n",
|
|
204
|
+
"</div>"
|
|
205
|
+
],
|
|
206
|
+
"text/plain": [
|
|
207
|
+
" Unnamed: 1 ProductSubcategory Product ProductColor \\\n",
|
|
208
|
+
"0 1 Road Bikes Road-150 Red, 62 Red \n",
|
|
209
|
+
"1 2 Mountain Bikes Mountain-100 Silver, 44 Silver \n",
|
|
210
|
+
"2 2 Mountain Bikes Mountain-100 Silver, 44 Silver \n",
|
|
211
|
+
"3 3 Road Bikes Road-650 Black, 62 Black \n",
|
|
212
|
+
"4 3 Mountain Bikes Mountain-100 Silver, 44 Silver \n",
|
|
213
|
+
"... ... ... ... ... \n",
|
|
214
|
+
"1196 599 Road Bikes Road-150 Red, 48 Red \n",
|
|
215
|
+
"1197 600 Road Bikes Road-150 Red, 52 Red \n",
|
|
216
|
+
"1198 600 Road Bikes Road-650 Black, 48 Black \n",
|
|
217
|
+
"1199 601 Mountain Bikes Mountain-100 Black, 42 Black \n",
|
|
218
|
+
"1200 601 Mountain Bikes Mountain-100 Black, 44 Black \n",
|
|
219
|
+
"\n",
|
|
220
|
+
" Model Unnamed: 6 Unnamed: 7 \n",
|
|
221
|
+
"0 Road-150 NaN NaN \n",
|
|
222
|
+
"1 Mountain-100 NaN NaN \n",
|
|
223
|
+
"2 Mountain-100 NaN NaN \n",
|
|
224
|
+
"3 Road-650 NaN NaN \n",
|
|
225
|
+
"4 Mountain-100 NaN NaN \n",
|
|
226
|
+
"... ... ... ... \n",
|
|
227
|
+
"1196 Road-150 NaN NaN \n",
|
|
228
|
+
"1197 Road-150 NaN NaN \n",
|
|
229
|
+
"1198 Road-650 NaN NaN \n",
|
|
230
|
+
"1199 Mountain-100 NaN NaN \n",
|
|
231
|
+
"1200 Mountain-100 NaN NaN \n",
|
|
232
|
+
"\n",
|
|
233
|
+
"[1201 rows x 7 columns]"
|
|
234
|
+
]
|
|
235
|
+
},
|
|
236
|
+
"execution_count": 10,
|
|
237
|
+
"metadata": {},
|
|
238
|
+
"output_type": "execute_result"
|
|
239
|
+
}
|
|
240
|
+
],
|
|
241
|
+
"source": [
|
|
242
|
+
"data[2]"
|
|
243
|
+
]
|
|
244
|
+
}
|
|
245
|
+
],
|
|
246
|
+
"metadata": {
|
|
247
|
+
"kernelspec": {
|
|
248
|
+
"display_name": "python311",
|
|
249
|
+
"language": "python",
|
|
250
|
+
"name": "python3"
|
|
251
|
+
},
|
|
252
|
+
"language_info": {
|
|
253
|
+
"codemirror_mode": {
|
|
254
|
+
"name": "ipython",
|
|
255
|
+
"version": 3
|
|
256
|
+
},
|
|
257
|
+
"file_extension": ".py",
|
|
258
|
+
"mimetype": "text/x-python",
|
|
259
|
+
"name": "python",
|
|
260
|
+
"nbconvert_exporter": "python",
|
|
261
|
+
"pygments_lexer": "ipython3",
|
|
262
|
+
"version": "3.11.13"
|
|
263
|
+
}
|
|
264
|
+
},
|
|
265
|
+
"nbformat": 4,
|
|
266
|
+
"nbformat_minor": 5
|
|
267
|
+
}
|
|
@@ -51,12 +51,12 @@ def decode_column(code):
|
|
|
51
51
|
|
|
52
52
|
def read_excel_table(io,
|
|
53
53
|
sheet_name=0,
|
|
54
|
-
usecols=None,
|
|
55
|
-
header=0,
|
|
54
|
+
usecols=None, # En el futuro detectar rangos de columnas
|
|
55
|
+
header=0, # En el futuro utilizar multiheaders
|
|
56
56
|
nrows=None,
|
|
57
|
-
checkcol=None,
|
|
57
|
+
checkcol=None, # En el futuro puede ser numérica
|
|
58
58
|
patterncol=None,
|
|
59
|
-
|
|
59
|
+
findheaders=False,
|
|
60
60
|
raw_df=None,
|
|
61
61
|
**kwargs):
|
|
62
62
|
"""
|
|
@@ -82,9 +82,9 @@ def read_excel_table(io,
|
|
|
82
82
|
patterncol : str, optional
|
|
83
83
|
Regular expression. Only rows matching this pattern in `checkcol`
|
|
84
84
|
are included.
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
85
|
+
findheaders : bool, default=False
|
|
86
|
+
If True, detects multiple tables in the sheet by looking for
|
|
87
|
+
non-empty cells in `checkcol`. Returns a list of DataFrames.
|
|
88
88
|
raw_df : pandas.DataFrame, optional
|
|
89
89
|
Preloaded DataFrame to avoid re-reading the Excel file.
|
|
90
90
|
**kwargs : dict
|
|
@@ -103,13 +103,44 @@ def read_excel_table(io,
|
|
|
103
103
|
>>> # Read rows in column "B" that start with digits
|
|
104
104
|
>>> df = read_excel_table("data.xlsx", checkcol="B", patterncol=r"^\\d+")
|
|
105
105
|
"""
|
|
106
|
-
if raw_df is None:
|
|
107
|
-
raw_df
|
|
106
|
+
if raw_df is not None:
|
|
107
|
+
raise ValueError('"raw_df" is not implemented yet')
|
|
108
|
+
|
|
108
109
|
if nrows is None:
|
|
109
110
|
max_nrows = float('inf')
|
|
110
111
|
|
|
111
|
-
if
|
|
112
|
-
|
|
112
|
+
if findheaders:
|
|
113
|
+
raw_df = pd.read_excel(io,
|
|
114
|
+
header=None,
|
|
115
|
+
sheet_name=sheet_name,
|
|
116
|
+
dtype=str)
|
|
117
|
+
# En el futuro se puede utilizar la primera columna de usecols
|
|
118
|
+
checkcol = 'A' if checkcol is None else checkcol
|
|
119
|
+
column = raw_df[raw_df.columns[decode_column(
|
|
120
|
+
checkcol)]].reset_index(drop=True)
|
|
121
|
+
condition = ~column.isna()
|
|
122
|
+
if patterncol:
|
|
123
|
+
condition &= column.apply(lambda x:
|
|
124
|
+
bool(re.match(patterncol, x))
|
|
125
|
+
if isinstance(x, str) else False)
|
|
126
|
+
|
|
127
|
+
headers = (column[condition.astype(int).diff() == 1].index-1).tolist()
|
|
128
|
+
if condition.iloc[0]:
|
|
129
|
+
headers = [None] + headers
|
|
130
|
+
|
|
131
|
+
return [read_excel_table(io,
|
|
132
|
+
sheet_name=sheet_name,
|
|
133
|
+
usecols=usecols,
|
|
134
|
+
header=header,
|
|
135
|
+
nrows=nrows,
|
|
136
|
+
checkcol=checkcol,
|
|
137
|
+
patterncol=patterncol,
|
|
138
|
+
findheaders=False,
|
|
139
|
+
raw_df=None,
|
|
140
|
+
**kwargs) for header in headers]
|
|
141
|
+
raw_df = pd.read_excel(io,
|
|
142
|
+
sheet_name=sheet_name,
|
|
143
|
+
dtype=str)
|
|
113
144
|
|
|
114
145
|
if checkcol is not None:
|
|
115
146
|
nrows = 0
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def locate_project(level=None, forced=False):
|
|
6
|
+
"""
|
|
7
|
+
Change the current working directory to a project root based on a search pattern or parent level.
|
|
8
|
+
|
|
9
|
+
Args:
|
|
10
|
+
level (str or int, optional): If str, searches upwards for a directory containing a file/folder matching the pattern.
|
|
11
|
+
If int, moves up 'level' parent directories.
|
|
12
|
+
forced (bool, optional): If True, forces restoration of the previous working directory.
|
|
13
|
+
|
|
14
|
+
Behavior:
|
|
15
|
+
- If called with a 'level', changes the working directory accordingly and stores the previous directory.
|
|
16
|
+
- If called without 'level', restores the previous working directory if available.
|
|
17
|
+
- Prints info messages on directory changes.
|
|
18
|
+
- Raises ValueError for invalid 'level' types.
|
|
19
|
+
- Raises FileNotFoundError if the search fails.
|
|
20
|
+
"""
|
|
21
|
+
global_vars = globals()
|
|
22
|
+
varname = '__locate__project__'
|
|
23
|
+
cwd = Path.cwd()
|
|
24
|
+
if varname in global_vars:
|
|
25
|
+
if level is None or forced:
|
|
26
|
+
cwd = global_vars[varname]
|
|
27
|
+
os.chdir(cwd)
|
|
28
|
+
del global_vars[varname]
|
|
29
|
+
print(f'[INFO] Working directory restored: {cwd}')
|
|
30
|
+
if not forced:
|
|
31
|
+
return
|
|
32
|
+
|
|
33
|
+
if level is not None:
|
|
34
|
+
new_cwd = cwd
|
|
35
|
+
if isinstance(level, str):
|
|
36
|
+
while ((not_ok := (list(new_cwd.glob(level)) == [])) and
|
|
37
|
+
new_cwd != (new_cwd := new_cwd.parent)):
|
|
38
|
+
pass
|
|
39
|
+
elif isinstance(level, int) and level >= 0:
|
|
40
|
+
count = 0
|
|
41
|
+
while ((not_ok := count < level) and
|
|
42
|
+
new_cwd != (new_cwd := new_cwd.parent)):
|
|
43
|
+
count += 1
|
|
44
|
+
else:
|
|
45
|
+
raise ValueError('level must be a str or a non-negative int')
|
|
46
|
+
if cwd != new_cwd:
|
|
47
|
+
if not not_ok:
|
|
48
|
+
os.chdir(new_cwd)
|
|
49
|
+
global_vars[varname] = cwd
|
|
50
|
+
print(f"[INFO] Working directory changed to: {new_cwd}")
|
|
51
|
+
else:
|
|
52
|
+
raise FileNotFoundError(
|
|
53
|
+
f"Could not find a directory matching level '{level}' from {cwd}"
|
|
54
|
+
)
|
utilitz-0.0.3/notebooks/.ipynb
DELETED
|
@@ -1,323 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"cells": [
|
|
3
|
-
{
|
|
4
|
-
"cell_type": "code",
|
|
5
|
-
"execution_count": 6,
|
|
6
|
-
"id": "feb25bd1",
|
|
7
|
-
"metadata": {},
|
|
8
|
-
"outputs": [],
|
|
9
|
-
"source": [
|
|
10
|
-
"import os\n",
|
|
11
|
-
"if not 'id_0123456789876543210' in locals():\n",
|
|
12
|
-
" os.chdir(os.path.split(os.getcwd())[0])\n",
|
|
13
|
-
" id_0123456789876543210 = None"
|
|
14
|
-
]
|
|
15
|
-
},
|
|
16
|
-
{
|
|
17
|
-
"cell_type": "code",
|
|
18
|
-
"execution_count": 7,
|
|
19
|
-
"id": "12e0cde8",
|
|
20
|
-
"metadata": {},
|
|
21
|
-
"outputs": [],
|
|
22
|
-
"source": [
|
|
23
|
-
"import pandas as pd\n",
|
|
24
|
-
"import re\n",
|
|
25
|
-
"from src.utilitz.excel import decode_column, encode_column"
|
|
26
|
-
]
|
|
27
|
-
},
|
|
28
|
-
{
|
|
29
|
-
"cell_type": "code",
|
|
30
|
-
"execution_count": 3,
|
|
31
|
-
"id": "7ed1af29",
|
|
32
|
-
"metadata": {},
|
|
33
|
-
"outputs": [],
|
|
34
|
-
"source": [
|
|
35
|
-
"def read_excel_table(io,\n",
|
|
36
|
-
" sheet_name=0,\n",
|
|
37
|
-
" usecols=None,\n",
|
|
38
|
-
" header=0,\n",
|
|
39
|
-
" nrows=None,\n",
|
|
40
|
-
" checkcol=None,\n",
|
|
41
|
-
" patterncol=None,\n",
|
|
42
|
-
" findtable=False,\n",
|
|
43
|
-
" raw_df=None,\n",
|
|
44
|
-
" **kwargs):\n",
|
|
45
|
-
" if raw_df is None:\n",
|
|
46
|
-
" raw_df = pd.read_excel(io, sheet_name=sheet_name)\n",
|
|
47
|
-
" if nrows is None:\n",
|
|
48
|
-
" max_nrows = float('inf')\n",
|
|
49
|
-
"\n",
|
|
50
|
-
" if header is None and findtable:\n",
|
|
51
|
-
" pass\n",
|
|
52
|
-
"\n",
|
|
53
|
-
" if checkcol is not None:\n",
|
|
54
|
-
" nrows = 0\n",
|
|
55
|
-
" check_column = raw_df.iloc[header:, decode_column(checkcol)]\n",
|
|
56
|
-
"\n",
|
|
57
|
-
" for x in check_column:\n",
|
|
58
|
-
" if not pd.isna(x) and nrows < max_nrows:\n",
|
|
59
|
-
" if patterncol and re.match(patterncol, str(x)) or not patterncol:\n",
|
|
60
|
-
" nrows += 1\n",
|
|
61
|
-
" else:\n",
|
|
62
|
-
" break\n",
|
|
63
|
-
"\n",
|
|
64
|
-
" return pd.read_excel(io,\n",
|
|
65
|
-
" sheet_name=sheet_name,\n",
|
|
66
|
-
" usecols=usecols,\n",
|
|
67
|
-
" header=header,\n",
|
|
68
|
-
" nrows=nrows,\n",
|
|
69
|
-
" **kwargs)"
|
|
70
|
-
]
|
|
71
|
-
},
|
|
72
|
-
{
|
|
73
|
-
"cell_type": "code",
|
|
74
|
-
"execution_count": 4,
|
|
75
|
-
"id": "e1083b24",
|
|
76
|
-
"metadata": {},
|
|
77
|
-
"outputs": [
|
|
78
|
-
{
|
|
79
|
-
"data": {
|
|
80
|
-
"text/html": [
|
|
81
|
-
"<div>\n",
|
|
82
|
-
"<style scoped>\n",
|
|
83
|
-
" .dataframe tbody tr th:only-of-type {\n",
|
|
84
|
-
" vertical-align: middle;\n",
|
|
85
|
-
" }\n",
|
|
86
|
-
"\n",
|
|
87
|
-
" .dataframe tbody tr th {\n",
|
|
88
|
-
" vertical-align: top;\n",
|
|
89
|
-
" }\n",
|
|
90
|
-
"\n",
|
|
91
|
-
" .dataframe thead th {\n",
|
|
92
|
-
" text-align: right;\n",
|
|
93
|
-
" }\n",
|
|
94
|
-
"</style>\n",
|
|
95
|
-
"<table border=\"1\" class=\"dataframe\">\n",
|
|
96
|
-
" <thead>\n",
|
|
97
|
-
" <tr style=\"text-align: right;\">\n",
|
|
98
|
-
" <th></th>\n",
|
|
99
|
-
" </tr>\n",
|
|
100
|
-
" </thead>\n",
|
|
101
|
-
" <tbody>\n",
|
|
102
|
-
" </tbody>\n",
|
|
103
|
-
"</table>\n",
|
|
104
|
-
"</div>"
|
|
105
|
-
],
|
|
106
|
-
"text/plain": [
|
|
107
|
-
"Empty DataFrame\n",
|
|
108
|
-
"Columns: []\n",
|
|
109
|
-
"Index: []"
|
|
110
|
-
]
|
|
111
|
-
},
|
|
112
|
-
"execution_count": 4,
|
|
113
|
-
"metadata": {},
|
|
114
|
-
"output_type": "execute_result"
|
|
115
|
-
}
|
|
116
|
-
],
|
|
117
|
-
"source": [
|
|
118
|
-
"io = 'data/prueba.xlsx'\n",
|
|
119
|
-
"read_excel_table(io,\n",
|
|
120
|
-
" usecols='B:G', # 'B:G', 'B:G', 'B:E'\n",
|
|
121
|
-
" header=None, # 3, 706, 1150\n",
|
|
122
|
-
" # nrows=None, # 20, 30, 40\n",
|
|
123
|
-
" checkcol='B',\n",
|
|
124
|
-
" # patterncol='^S'\n",
|
|
125
|
-
" )"
|
|
126
|
-
]
|
|
127
|
-
},
|
|
128
|
-
{
|
|
129
|
-
"cell_type": "code",
|
|
130
|
-
"execution_count": 5,
|
|
131
|
-
"id": "30a1184b",
|
|
132
|
-
"metadata": {},
|
|
133
|
-
"outputs": [
|
|
134
|
-
{
|
|
135
|
-
"data": {
|
|
136
|
-
"text/html": [
|
|
137
|
-
"<div>\n",
|
|
138
|
-
"<style scoped>\n",
|
|
139
|
-
" .dataframe tbody tr th:only-of-type {\n",
|
|
140
|
-
" vertical-align: middle;\n",
|
|
141
|
-
" }\n",
|
|
142
|
-
"\n",
|
|
143
|
-
" .dataframe tbody tr th {\n",
|
|
144
|
-
" vertical-align: top;\n",
|
|
145
|
-
" }\n",
|
|
146
|
-
"\n",
|
|
147
|
-
" .dataframe thead th {\n",
|
|
148
|
-
" text-align: right;\n",
|
|
149
|
-
" }\n",
|
|
150
|
-
"</style>\n",
|
|
151
|
-
"<table border=\"1\" class=\"dataframe\">\n",
|
|
152
|
-
" <thead>\n",
|
|
153
|
-
" <tr style=\"text-align: right;\">\n",
|
|
154
|
-
" <th></th>\n",
|
|
155
|
-
" <th>OrderNo</th>\n",
|
|
156
|
-
" <th>SalesOrderLineKey</th>\n",
|
|
157
|
-
" <th>OrderQuantity</th>\n",
|
|
158
|
-
" <th>ItemCost</th>\n",
|
|
159
|
-
" <th>ItemPrice</th>\n",
|
|
160
|
-
" <th>OrderDate</th>\n",
|
|
161
|
-
" </tr>\n",
|
|
162
|
-
" </thead>\n",
|
|
163
|
-
" <tbody>\n",
|
|
164
|
-
" <tr>\n",
|
|
165
|
-
" <th>0</th>\n",
|
|
166
|
-
" <td>SO43697</td>\n",
|
|
167
|
-
" <td>43697001</td>\n",
|
|
168
|
-
" <td>1</td>\n",
|
|
169
|
-
" <td>2171.29</td>\n",
|
|
170
|
-
" <td>3578.27</td>\n",
|
|
171
|
-
" <td>2017-07-01</td>\n",
|
|
172
|
-
" </tr>\n",
|
|
173
|
-
" <tr>\n",
|
|
174
|
-
" <th>1</th>\n",
|
|
175
|
-
" <td>SO43698</td>\n",
|
|
176
|
-
" <td>43698001</td>\n",
|
|
177
|
-
" <td>1</td>\n",
|
|
178
|
-
" <td>1912.15</td>\n",
|
|
179
|
-
" <td>3399.99</td>\n",
|
|
180
|
-
" <td>2017-07-01</td>\n",
|
|
181
|
-
" </tr>\n",
|
|
182
|
-
" <tr>\n",
|
|
183
|
-
" <th>2</th>\n",
|
|
184
|
-
" <td>SO43699</td>\n",
|
|
185
|
-
" <td>43699001</td>\n",
|
|
186
|
-
" <td>1</td>\n",
|
|
187
|
-
" <td>1912.15</td>\n",
|
|
188
|
-
" <td>3399.99</td>\n",
|
|
189
|
-
" <td>2017-07-01</td>\n",
|
|
190
|
-
" </tr>\n",
|
|
191
|
-
" <tr>\n",
|
|
192
|
-
" <th>3</th>\n",
|
|
193
|
-
" <td>SO43700</td>\n",
|
|
194
|
-
" <td>43700001</td>\n",
|
|
195
|
-
" <td>1</td>\n",
|
|
196
|
-
" <td>413.15</td>\n",
|
|
197
|
-
" <td>699.10</td>\n",
|
|
198
|
-
" <td>2017-07-01</td>\n",
|
|
199
|
-
" </tr>\n",
|
|
200
|
-
" <tr>\n",
|
|
201
|
-
" <th>4</th>\n",
|
|
202
|
-
" <td>SO43701</td>\n",
|
|
203
|
-
" <td>43701001</td>\n",
|
|
204
|
-
" <td>1</td>\n",
|
|
205
|
-
" <td>1912.15</td>\n",
|
|
206
|
-
" <td>3399.99</td>\n",
|
|
207
|
-
" <td>2017-07-01</td>\n",
|
|
208
|
-
" </tr>\n",
|
|
209
|
-
" <tr>\n",
|
|
210
|
-
" <th>...</th>\n",
|
|
211
|
-
" <td>...</td>\n",
|
|
212
|
-
" <td>...</td>\n",
|
|
213
|
-
" <td>...</td>\n",
|
|
214
|
-
" <td>...</td>\n",
|
|
215
|
-
" <td>...</td>\n",
|
|
216
|
-
" <td>...</td>\n",
|
|
217
|
-
" </tr>\n",
|
|
218
|
-
" <tr>\n",
|
|
219
|
-
" <th>697</th>\n",
|
|
220
|
-
" <td>SO44659</td>\n",
|
|
221
|
-
" <td>44659001</td>\n",
|
|
222
|
-
" <td>1</td>\n",
|
|
223
|
-
" <td>2171.29</td>\n",
|
|
224
|
-
" <td>3578.27</td>\n",
|
|
225
|
-
" <td>2017-10-14</td>\n",
|
|
226
|
-
" </tr>\n",
|
|
227
|
-
" <tr>\n",
|
|
228
|
-
" <th>698</th>\n",
|
|
229
|
-
" <td>SO44660</td>\n",
|
|
230
|
-
" <td>44660001</td>\n",
|
|
231
|
-
" <td>1</td>\n",
|
|
232
|
-
" <td>2171.29</td>\n",
|
|
233
|
-
" <td>3578.27</td>\n",
|
|
234
|
-
" <td>2017-10-15</td>\n",
|
|
235
|
-
" </tr>\n",
|
|
236
|
-
" <tr>\n",
|
|
237
|
-
" <th>699</th>\n",
|
|
238
|
-
" <td>SO44661</td>\n",
|
|
239
|
-
" <td>44661001</td>\n",
|
|
240
|
-
" <td>1</td>\n",
|
|
241
|
-
" <td>413.15</td>\n",
|
|
242
|
-
" <td>699.10</td>\n",
|
|
243
|
-
" <td>2017-10-15</td>\n",
|
|
244
|
-
" </tr>\n",
|
|
245
|
-
" <tr>\n",
|
|
246
|
-
" <th>700</th>\n",
|
|
247
|
-
" <td>SO44662</td>\n",
|
|
248
|
-
" <td>44662001</td>\n",
|
|
249
|
-
" <td>1</td>\n",
|
|
250
|
-
" <td>1898.09</td>\n",
|
|
251
|
-
" <td>3374.99</td>\n",
|
|
252
|
-
" <td>2017-10-15</td>\n",
|
|
253
|
-
" </tr>\n",
|
|
254
|
-
" <tr>\n",
|
|
255
|
-
" <th>701</th>\n",
|
|
256
|
-
" <td>SO44663</td>\n",
|
|
257
|
-
" <td>44663001</td>\n",
|
|
258
|
-
" <td>1</td>\n",
|
|
259
|
-
" <td>2171.29</td>\n",
|
|
260
|
-
" <td>3578.27</td>\n",
|
|
261
|
-
" <td>2017-10-15</td>\n",
|
|
262
|
-
" </tr>\n",
|
|
263
|
-
" </tbody>\n",
|
|
264
|
-
"</table>\n",
|
|
265
|
-
"<p>702 rows × 6 columns</p>\n",
|
|
266
|
-
"</div>"
|
|
267
|
-
],
|
|
268
|
-
"text/plain": [
|
|
269
|
-
" OrderNo SalesOrderLineKey OrderQuantity ItemCost ItemPrice OrderDate\n",
|
|
270
|
-
"0 SO43697 43697001 1 2171.29 3578.27 2017-07-01\n",
|
|
271
|
-
"1 SO43698 43698001 1 1912.15 3399.99 2017-07-01\n",
|
|
272
|
-
"2 SO43699 43699001 1 1912.15 3399.99 2017-07-01\n",
|
|
273
|
-
"3 SO43700 43700001 1 413.15 699.10 2017-07-01\n",
|
|
274
|
-
"4 SO43701 43701001 1 1912.15 3399.99 2017-07-01\n",
|
|
275
|
-
".. ... ... ... ... ... ...\n",
|
|
276
|
-
"697 SO44659 44659001 1 2171.29 3578.27 2017-10-14\n",
|
|
277
|
-
"698 SO44660 44660001 1 2171.29 3578.27 2017-10-15\n",
|
|
278
|
-
"699 SO44661 44661001 1 413.15 699.10 2017-10-15\n",
|
|
279
|
-
"700 SO44662 44662001 1 1898.09 3374.99 2017-10-15\n",
|
|
280
|
-
"701 SO44663 44663001 1 2171.29 3578.27 2017-10-15\n",
|
|
281
|
-
"\n",
|
|
282
|
-
"[702 rows x 6 columns]"
|
|
283
|
-
]
|
|
284
|
-
},
|
|
285
|
-
"execution_count": 5,
|
|
286
|
-
"metadata": {},
|
|
287
|
-
"output_type": "execute_result"
|
|
288
|
-
}
|
|
289
|
-
],
|
|
290
|
-
"source": [
|
|
291
|
-
"io = 'data/prueba.xlsx'\n",
|
|
292
|
-
"read_excel_table(io,\n",
|
|
293
|
-
" usecols='B:G', # 'B:G', 'B:G', 'B:E'\n",
|
|
294
|
-
" header=3, # 3, 706, 1150\n",
|
|
295
|
-
" nrows=None, # 20, 30, 40\n",
|
|
296
|
-
" checkcol='B',\n",
|
|
297
|
-
" patterncol='^S'\n",
|
|
298
|
-
" )"
|
|
299
|
-
]
|
|
300
|
-
}
|
|
301
|
-
],
|
|
302
|
-
"metadata": {
|
|
303
|
-
"kernelspec": {
|
|
304
|
-
"display_name": "Python 3",
|
|
305
|
-
"language": "python",
|
|
306
|
-
"name": "python3"
|
|
307
|
-
},
|
|
308
|
-
"language_info": {
|
|
309
|
-
"codemirror_mode": {
|
|
310
|
-
"name": "ipython",
|
|
311
|
-
"version": 3
|
|
312
|
-
},
|
|
313
|
-
"file_extension": ".py",
|
|
314
|
-
"mimetype": "text/x-python",
|
|
315
|
-
"name": "python",
|
|
316
|
-
"nbconvert_exporter": "python",
|
|
317
|
-
"pygments_lexer": "ipython3",
|
|
318
|
-
"version": "3.11.11"
|
|
319
|
-
}
|
|
320
|
-
},
|
|
321
|
-
"nbformat": 4,
|
|
322
|
-
"nbformat_minor": 5
|
|
323
|
-
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|