zhenpy 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zhenpy-0.1.0/PKG-INFO +12 -0
- zhenpy-0.1.0/README.md +2 -0
- zhenpy-0.1.0/pyproject.toml +20 -0
- zhenpy-0.1.0/setup.cfg +4 -0
- zhenpy-0.1.0/utils/__init__.py +12 -0
- zhenpy-0.1.0/utils/load.py +51 -0
- zhenpy-0.1.0/utils/save.py +44 -0
- zhenpy-0.1.0/zhenpy.egg-info/PKG-INFO +12 -0
- zhenpy-0.1.0/zhenpy.egg-info/SOURCES.txt +10 -0
- zhenpy-0.1.0/zhenpy.egg-info/dependency_links.txt +1 -0
- zhenpy-0.1.0/zhenpy.egg-info/requires.txt +1 -0
- zhenpy-0.1.0/zhenpy.egg-info/top_level.txt +1 -0
zhenpy-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: zhenpy
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Utility functions for loading and saving data files (CSV, TSV, JSON, JSONL, Excel)
|
|
5
|
+
License-Expression: MIT
|
|
6
|
+
Project-URL: Homepage, https://github.com/lizhen/zhenpy
|
|
7
|
+
Requires-Python: >=3.8
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
Requires-Dist: pandas
|
|
10
|
+
|
|
11
|
+
# zhenpy
|
|
12
|
+
私人使用的脚本
|
zhenpy-0.1.0/README.md
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "zhenpy"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Utility functions for loading and saving data files (CSV, TSV, JSON, JSONL, Excel)"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
requires-python = ">=3.8"
|
|
12
|
+
dependencies = [
|
|
13
|
+
"pandas",
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
[project.urls]
|
|
17
|
+
"Homepage" = "https://github.com/lizhen/zhenpy"
|
|
18
|
+
|
|
19
|
+
[tool.setuptools.packages.find]
|
|
20
|
+
include = ["utils*"]
|
zhenpy-0.1.0/setup.cfg
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from utils.load import load_jsonl, load_jsonl_dataframe, load_json_dataframe, load_file_dataframe
|
|
2
|
+
from utils.save import save_jsonl, save_json, save_dataframe
|
|
3
|
+
|
|
4
|
+
__all__ = [
|
|
5
|
+
"load_jsonl",
|
|
6
|
+
"load_jsonl_dataframe",
|
|
7
|
+
"load_json_dataframe",
|
|
8
|
+
"load_file_dataframe",
|
|
9
|
+
"save_jsonl",
|
|
10
|
+
"save_json",
|
|
11
|
+
"save_dataframe",
|
|
12
|
+
]
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import json
|
|
3
|
+
from pandas import DataFrame
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def load_jsonl(input_file, names=None) -> DataFrame:
|
|
7
|
+
if 'csv' in input_file:
|
|
8
|
+
return pd.read_csv(input_file, names=names) if names else pd.read_csv(input_file)
|
|
9
|
+
if 'tsv' in input_file:
|
|
10
|
+
return pd.read_csv(input_file, sep='\t', names=names) if names else pd.read_csv(input_file, sep='\t')
|
|
11
|
+
|
|
12
|
+
datas = []
|
|
13
|
+
with open(input_file, "r", encoding="utf8") as reader:
|
|
14
|
+
for line in reader:
|
|
15
|
+
line = line.strip()
|
|
16
|
+
json_line = json.loads(line)
|
|
17
|
+
datas.append(json_line)
|
|
18
|
+
|
|
19
|
+
return DataFrame(datas)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def load_jsonl_dataframe(input_file):
|
|
23
|
+
datas = []
|
|
24
|
+
with open(input_file, "r", encoding="utf8") as reader:
|
|
25
|
+
for line in reader:
|
|
26
|
+
line = line.strip()
|
|
27
|
+
json_line = json.loads(line)
|
|
28
|
+
datas.append(json_line)
|
|
29
|
+
return DataFrame(datas)
|
|
30
|
+
|
|
31
|
+
def load_json_dataframe(input_file):
|
|
32
|
+
datas = []
|
|
33
|
+
with open(input_file, encoding='utf8') as f:
|
|
34
|
+
datas = json.load(f)
|
|
35
|
+
|
|
36
|
+
return DataFrame(datas)
|
|
37
|
+
|
|
38
|
+
def load_file_dataframe(input_file, names=None) -> DataFrame:
|
|
39
|
+
if 'csv' in input_file:
|
|
40
|
+
return pd.read_csv(input_file, names=names) if names else pd.read_csv(input_file)
|
|
41
|
+
elif 'tsv' in input_file:
|
|
42
|
+
return pd.read_csv(input_file, sep='\t', names=names) if names else pd.read_csv(input_file, sep='\t')
|
|
43
|
+
elif 'xlsx' in input_file:
|
|
44
|
+
return pd.read_excel(input_file)
|
|
45
|
+
elif 'jsonl' in input_file:
|
|
46
|
+
return load_jsonl_dataframe(input_file)
|
|
47
|
+
elif 'json' in input_file:
|
|
48
|
+
return load_json_dataframe(input_file)
|
|
49
|
+
else:
|
|
50
|
+
print(f"File :{input_file}, doesn't contains suffix [csv, tsv, jsonl, json]")
|
|
51
|
+
return None
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
import pandas as pd
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def _get_suffix(file_path: str) -> str:
|
|
8
|
+
return Path(file_path).suffix.lower()
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def save_jsonl(df: pd.DataFrame, output_file: str) -> None:
|
|
12
|
+
"""Save DataFrame to JSONL format (one JSON object per line)."""
|
|
13
|
+
with open(output_file, "w", encoding="utf-8") as f:
|
|
14
|
+
for record in df.to_dict("records"):
|
|
15
|
+
f.write(json.dumps(record, ensure_ascii=False) + "\n")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def save_json(df: pd.DataFrame, output_file: str) -> None:
|
|
19
|
+
"""Save DataFrame to a single JSON array."""
|
|
20
|
+
with open(output_file, "w", encoding="utf-8") as f:
|
|
21
|
+
json.dump(df.to_dict("records"), f, ensure_ascii=False)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def save_dataframe(df: pd.DataFrame, output_file: str) -> None:
|
|
25
|
+
"""Save DataFrame to file based on extension.
|
|
26
|
+
|
|
27
|
+
Supported formats: csv, tsv, jsonl, json, xlsx.
|
|
28
|
+
"""
|
|
29
|
+
suffix = _get_suffix(output_file)
|
|
30
|
+
handlers = {
|
|
31
|
+
".csv": lambda: df.to_csv(output_file, index=False),
|
|
32
|
+
".tsv": lambda: df.to_csv(output_file, sep="\t", index=False),
|
|
33
|
+
".jsonl": lambda: save_jsonl(df, output_file),
|
|
34
|
+
".json": lambda: save_json(df, output_file),
|
|
35
|
+
".xlsx": lambda: df.to_excel(output_file, index=False),
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
handler = handlers.get(suffix)
|
|
39
|
+
if handler is None:
|
|
40
|
+
raise ValueError(
|
|
41
|
+
f"Unsupported file extension '{suffix}' in '{output_file}'. "
|
|
42
|
+
f"Supported: {', '.join(handlers.keys())}"
|
|
43
|
+
)
|
|
44
|
+
handler()
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: zhenpy
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Utility functions for loading and saving data files (CSV, TSV, JSON, JSONL, Excel)
|
|
5
|
+
License-Expression: MIT
|
|
6
|
+
Project-URL: Homepage, https://github.com/lizhen/zhenpy
|
|
7
|
+
Requires-Python: >=3.8
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
Requires-Dist: pandas
|
|
10
|
+
|
|
11
|
+
# zhenpy
|
|
12
|
+
私人使用的脚本
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
pandas
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
utils
|