xlin 0.1.19__tar.gz → 0.1.21__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xlin
3
- Version: 0.1.19
3
+ Version: 0.1.21
4
4
  Summary: toolbox for LinXueyuan
5
5
  License: MIT
6
6
  Author: LinXueyuanStdio
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "xlin"
3
- version = "0.1.19"
3
+ version = "0.1.21"
4
4
  description = "toolbox for LinXueyuan"
5
5
  authors = ["LinXueyuanStdio <23211526+LinXueyuanStdio@users.noreply.github.com>"]
6
6
  license = "MIT"
@@ -246,3 +246,14 @@ def row_to_json(row: dict) -> dict:
246
246
 
247
247
  return new_row
248
248
 
249
+
250
+ def generator_from_json(path):
251
+ jsonlist = load_json(path)
252
+ for line in jsonlist:
253
+ yield line
254
+
255
+
256
+ def generator_from_jsonl(path):
257
+ jsonlist = load_json_list(path)
258
+ for line in jsonlist:
259
+ yield line
@@ -13,7 +13,7 @@ from xlin.xls2xlsx import is_xslx
13
13
 
14
14
 
15
15
  def read_as_dataframe(
16
- filepath: Union[str, Path],
16
+ filepath: Union[str, Path, list[str], list[Path]],
17
17
  sheet_name: Optional[str] = None,
18
18
  fill_empty_str_to_na=True,
19
19
  filter=lambda x: True,
@@ -21,11 +21,9 @@ def read_as_dataframe(
21
21
  """
22
22
  读取文件为表格。如果是文件夹,则读取文件夹下的所有文件为表格并拼接
23
23
  """
24
- filepath = Path(filepath)
25
- if filepath.is_dir():
26
- paths = ls(filepath, filter=filter, expand_all_subdir=True)
24
+ if isinstance(filepath, list):
27
25
  df_list = []
28
- for path in paths:
26
+ for path in filepath:
29
27
  try:
30
28
  df = read_as_dataframe(path, sheet_name, fill_empty_str_to_na, filter)
31
29
  df["数据来源"] = path.name
@@ -36,6 +34,10 @@ def read_as_dataframe(
36
34
  if fill_empty_str_to_na:
37
35
  df.fillna("", inplace=True)
38
36
  return df
37
+ filepath = Path(filepath)
38
+ if filepath.is_dir():
39
+ paths = ls(filepath, filter=filter, expand_all_subdir=True)
40
+ return read_as_dataframe(paths, sheet_name, fill_empty_str_to_na, filter)
39
41
  filename = filepath.name
40
42
  if filename.endswith(".json") or filename.endswith(".jsonl"):
41
43
  try:
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes