xlin 0.1.19__tar.gz → 0.1.21__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {xlin-0.1.19 → xlin-0.1.21}/PKG-INFO +1 -1
- {xlin-0.1.19 → xlin-0.1.21}/pyproject.toml +1 -1
- {xlin-0.1.19 → xlin-0.1.21}/xlin/jsonl.py +11 -0
- {xlin-0.1.19 → xlin-0.1.21}/xlin/read_as_dataframe.py +7 -5
- {xlin-0.1.19 → xlin-0.1.21}/LICENSE +0 -0
- {xlin-0.1.19 → xlin-0.1.21}/README.md +0 -0
- {xlin-0.1.19 → xlin-0.1.21}/xlin/__init__.py +0 -0
- {xlin-0.1.19 → xlin-0.1.21}/xlin/ischinese.py +0 -0
- {xlin-0.1.19 → xlin-0.1.21}/xlin/metric.py +0 -0
- {xlin-0.1.19 → xlin-0.1.21}/xlin/multiprocess_mapping.py +0 -0
- {xlin-0.1.19 → xlin-0.1.21}/xlin/statistic.py +0 -0
- {xlin-0.1.19 → xlin-0.1.21}/xlin/timing.py +0 -0
- {xlin-0.1.19 → xlin-0.1.21}/xlin/util.py +0 -0
- {xlin-0.1.19 → xlin-0.1.21}/xlin/xls2xlsx.py +0 -0
- {xlin-0.1.19 → xlin-0.1.21}/xlin/yaml.py +0 -0
@@ -246,3 +246,14 @@ def row_to_json(row: dict) -> dict:
|
|
246
246
|
|
247
247
|
return new_row
|
248
248
|
|
249
|
+
|
250
|
+
def generator_from_json(path):
|
251
|
+
jsonlist = load_json(path)
|
252
|
+
for line in jsonlist:
|
253
|
+
yield line
|
254
|
+
|
255
|
+
|
256
|
+
def generator_from_jsonl(path):
|
257
|
+
jsonlist = load_json_list(path)
|
258
|
+
for line in jsonlist:
|
259
|
+
yield line
|
@@ -13,7 +13,7 @@ from xlin.xls2xlsx import is_xslx
|
|
13
13
|
|
14
14
|
|
15
15
|
def read_as_dataframe(
|
16
|
-
filepath: Union[str, Path],
|
16
|
+
filepath: Union[str, Path, list[str], list[Path]],
|
17
17
|
sheet_name: Optional[str] = None,
|
18
18
|
fill_empty_str_to_na=True,
|
19
19
|
filter=lambda x: True,
|
@@ -21,11 +21,9 @@ def read_as_dataframe(
|
|
21
21
|
"""
|
22
22
|
读取文件为表格。如果是文件夹,则读取文件夹下的所有文件为表格并拼接
|
23
23
|
"""
|
24
|
-
|
25
|
-
if filepath.is_dir():
|
26
|
-
paths = ls(filepath, filter=filter, expand_all_subdir=True)
|
24
|
+
if isinstance(filepath, list):
|
27
25
|
df_list = []
|
28
|
-
for path in
|
26
|
+
for path in filepath:
|
29
27
|
try:
|
30
28
|
df = read_as_dataframe(path, sheet_name, fill_empty_str_to_na, filter)
|
31
29
|
df["数据来源"] = path.name
|
@@ -36,6 +34,10 @@ def read_as_dataframe(
|
|
36
34
|
if fill_empty_str_to_na:
|
37
35
|
df.fillna("", inplace=True)
|
38
36
|
return df
|
37
|
+
filepath = Path(filepath)
|
38
|
+
if filepath.is_dir():
|
39
|
+
paths = ls(filepath, filter=filter, expand_all_subdir=True)
|
40
|
+
return read_as_dataframe(paths, sheet_name, fill_empty_str_to_na, filter)
|
39
41
|
filename = filepath.name
|
40
42
|
if filename.endswith(".json") or filename.endswith(".jsonl"):
|
41
43
|
try:
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|