xlin 0.1.20__py2.py3-none-any.whl → 0.1.22__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
xlin/read_as_dataframe.py CHANGED
@@ -13,7 +13,7 @@ from xlin.xls2xlsx import is_xslx
13
13
 
14
14
 
15
15
  def read_as_dataframe(
16
- filepath: Union[str, Path],
16
+ filepath: Union[str, Path, list[str], list[Path]],
17
17
  sheet_name: Optional[str] = None,
18
18
  fill_empty_str_to_na=True,
19
19
  filter=lambda x: True,
@@ -21,11 +21,9 @@ def read_as_dataframe(
21
21
  """
22
22
  读取文件为表格。如果是文件夹,则读取文件夹下的所有文件为表格并拼接
23
23
  """
24
- filepath = Path(filepath)
25
- if filepath.is_dir():
26
- paths = ls(filepath, filter=filter, expand_all_subdir=True)
24
+ if isinstance(filepath, list):
27
25
  df_list = []
28
- for path in paths:
26
+ for path in filepath:
29
27
  try:
30
28
  df = read_as_dataframe(path, sheet_name, fill_empty_str_to_na, filter)
31
29
  df["数据来源"] = path.name
@@ -36,6 +34,10 @@ def read_as_dataframe(
36
34
  if fill_empty_str_to_na:
37
35
  df.fillna("", inplace=True)
38
36
  return df
37
+ filepath = Path(filepath)
38
+ if filepath.is_dir():
39
+ paths = ls(filepath, filter=filter, expand_all_subdir=True)
40
+ return read_as_dataframe(paths, sheet_name, fill_empty_str_to_na, filter)
39
41
  filename = filepath.name
40
42
  if filename.endswith(".json") or filename.endswith(".jsonl"):
41
43
  try:
xlin/statistic.py CHANGED
@@ -19,7 +19,7 @@ def bucket_count(length: List[int], step=50, skip_zero_count=False):
19
19
  continue
20
20
  if skip_zero_count and j == 0:
21
21
  continue
22
- print(f"[{(i-1)*step}, {i*step}) {j} {sum(grouped_count[:i+1])/len(length)*100:.2f}%")
22
+ print(f"[{(i-1)*step}, {i*step}) {j} {sum(grouped_count[:i+1])/len(length)*100:.4f}%")
23
23
  x.append((i - 1) * step)
24
24
  y.append(j)
25
25
  return x, y
@@ -73,22 +73,22 @@ def draw_histogram(data: list[int], bins=30, title="Data Distribution Analysis")
73
73
  plt.plot(x_vals, kde(x_vals), color="navy", linewidth=2, label="KDE Curve")
74
74
 
75
75
  # 添加统计线
76
- plt.axvline(mean, color="red", linestyle="--", linewidth=2, label=f"Mean ({mean:.2f})")
77
- plt.axvline(median, color="green", linestyle="-.", linewidth=2, label=f"Median ({median:.2f})")
78
- plt.axvspan(mean - std, mean + std, color="orange", alpha=0.1, label=f"±1 Std.Dev ({std:.2f})")
76
+ plt.axvline(mean, color="red", linestyle="--", linewidth=2, label=f"Mean ({mean:.4f})")
77
+ plt.axvline(median, color="green", linestyle="-.", linewidth=2, label=f"Median ({median:.4f})")
78
+ plt.axvspan(mean - std, mean + std, color="orange", alpha=0.1, label=f"±1 Std.Dev ({std:.4f})")
79
79
 
80
80
  # 添加四分位线
81
- plt.axvline(q25, color="purple", linestyle=":", alpha=0.8, label=f"25th Percentile ({q25:.2f})")
82
- plt.axvline(q75, color="purple", linestyle=":", alpha=0.8, label=f"75th Percentile ({q75:.2f})")
83
- plt.axvline(q80, color="purple", linestyle=":", alpha=0.8, label=f"80th Percentile ({q80:.2f})")
84
- plt.axvline(q90, color="purple", linestyle=":", alpha=0.8, label=f"90th Percentile ({q90:.2f})")
81
+ plt.axvline(q25, color="purple", linestyle=":", alpha=0.8, label=f"25th Percentile ({q25:.4f})")
82
+ plt.axvline(q75, color="purple", linestyle=":", alpha=0.8, label=f"75th Percentile ({q75:.4f})")
83
+ plt.axvline(q80, color="purple", linestyle=":", alpha=0.8, label=f"80th Percentile ({q80:.4f})")
84
+ plt.axvline(q90, color="purple", linestyle=":", alpha=0.8, label=f"90th Percentile ({q90:.4f})")
85
85
 
86
86
  # 添加统计摘要
87
87
  stats_text = f"""\
88
- Data Range: [{data_range[0]:.2f}, {data_range[1]:.2f}]
88
+ Data Range: [{data_range[0]:.4f}, {data_range[1]:.4f}]
89
89
  Observations: {len(data):,}
90
- Standard Deviation: {std:.2f}
91
- IQR: {q75 - q25:.2f}
90
+ Standard Deviation: {std:.4f}
91
+ IQR: {q75 - q25:.4f}
92
92
  Skewness: {float((data - mean).mean()**3 / std**3):.4f}
93
93
  Kurtosis: {float((data - mean).mean()**4 / std**4):.4f}\
94
94
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xlin
3
- Version: 0.1.20
3
+ Version: 0.1.22
4
4
  Summary: toolbox for LinXueyuan
5
5
  License: MIT
6
6
  Author: LinXueyuanStdio
@@ -3,13 +3,13 @@ xlin/ischinese.py,sha256=Ia9IMQ6q-UHkdLwqS70L1fTnfSPbluFrv_I1UqsKquo,293
3
3
  xlin/jsonl.py,sha256=Ogn_9eIx1NPmI_hMvBVwuDTooJYDEJ8FTtViQ8zTVlQ,7618
4
4
  xlin/metric.py,sha256=N7wJ35y-C-IaBr1I1CJ_37lTG7gA69zmn9Xg6xSwKoI,1690
5
5
  xlin/multiprocess_mapping.py,sha256=dRXQoLaG1dK_qZ8B3bJblV0RKM2gqIeSW1EaOZbIdD0,14251
6
- xlin/read_as_dataframe.py,sha256=T8A4qk4Grof_WC_mNz4QVaWDQgJ103rUAQ8tsamm8SQ,8898
7
- xlin/statistic.py,sha256=i0Z1gbW2IYHCA0lb16w1Ncrk0Q7Q1Ttm0n4we-ki6II,9301
6
+ xlin/read_as_dataframe.py,sha256=MqY57L7Wp9UoWTRlZLSBKQNaZa-dKw51-ufrKvHKf8s,9041
7
+ xlin/statistic.py,sha256=WMZkPFJ5da0rqIJHabdjCbWmgzgCOIj_H6KM5SVF7H0,9301
8
8
  xlin/timing.py,sha256=XMT8dMcMolOMohDvAZOIM_BAiPMREhGQKnO1kc5s6PU,998
9
9
  xlin/util.py,sha256=TTWJaqF5D_r-gAZ_fj0kyHomvCagjwHXQZ2OPSgwd54,10976
10
10
  xlin/xls2xlsx.py,sha256=uSmXcDvIhi5Sq0LGidMXy0wErNBXdjaoa6EftYVjTXs,947
11
11
  xlin/yaml.py,sha256=kICi7G3Td5q2MaSXXt85qNTWoHMgjzt7pvn7r3C4dME,183
12
- xlin-0.1.20.dist-info/LICENSE,sha256=60ys6rRtc1dZOP8UjSUr9fAqhZudT3WpKe5WbMCralM,1066
13
- xlin-0.1.20.dist-info/METADATA,sha256=DW9S85CerwgeiPFFETvVEai0OmxdIcoKSt9UXvIg71s,1098
14
- xlin-0.1.20.dist-info/WHEEL,sha256=IrRNNNJ-uuL1ggO5qMvT1GGhQVdQU54d6ZpYqEZfEWo,92
15
- xlin-0.1.20.dist-info/RECORD,,
12
+ xlin-0.1.22.dist-info/LICENSE,sha256=60ys6rRtc1dZOP8UjSUr9fAqhZudT3WpKe5WbMCralM,1066
13
+ xlin-0.1.22.dist-info/METADATA,sha256=77itC4591plUaDbS6T01BmZqeZ3jkoXT9uHgsLTTeA8,1098
14
+ xlin-0.1.22.dist-info/WHEEL,sha256=IrRNNNJ-uuL1ggO5qMvT1GGhQVdQU54d6ZpYqEZfEWo,92
15
+ xlin-0.1.22.dist-info/RECORD,,
File without changes
File without changes