PyPI - xlin - Versions diffs - 0.1.15__py2.py3-none-any.whl → 0.1.17__py2.py3-none-any.whl - Mend

xlin 0.1.15py2.py3-none-any.whl → 0.1.17py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

xlin/__init__.py +1 -0
xlin/statistic.py +149 -1
xlin/timing.py +43 -0
xlin/util.py +9 -1
{xlin-0.1.15.dist-info → xlin-0.1.17.dist-info}/LICENSE +1 -1
{xlin-0.1.15.dist-info → xlin-0.1.17.dist-info}/METADATA +2 -2
xlin-0.1.17.dist-info/RECORD +15 -0
xlin-0.1.15.dist-info/RECORD +0 -14
{xlin-0.1.15.dist-info → xlin-0.1.17.dist-info}/WHEEL +0 -0

xlin/__init__.py CHANGED Viewed

@@ -4,6 +4,7 @@ from .metric import *
 from .multiprocess_mapping import *
 from .read_as_dataframe import *
 from .statistic import *
+from .timing import *
 from .util import *
 from .xls2xlsx import *
 from .yaml import *

xlin/statistic.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from typing import List
+from collections import defaultdict
 import pandas as pd
@@ -115,4 +116,151 @@ def draw_pie(numbers: List[int], title="Pie Chart of Numbers"):
     plt.pie(numbers, labels=[str(i) for i in range(len(numbers))], autopct='%1.1f%%')
     plt.title(title)
-    plt.show()
+    plt.show()
+def generate_classification_report(predictions: List[str], labels: List[str]) -> dict:
+    """
+    生成包含准确率、混淆矩阵、分类报告等详细评估结果的字典
+    Args:
+        predictions: 模型预测结果列表
+        labels: 真实标签列表
+    Returns:
+        包含以下结构的字典：
+        - accuracy: 整体准确率
+        - confusion_matrix: 混淆矩阵DataFrame
+        - class_report: 分类报告DataFrame
+        - error_analysis: 错误样本分析DataFrame
+        - total_samples: 总样本数
+        - time_generated: 报告生成时间
+    """
+    # 基础校验
+    assert len(predictions) == len(labels), "预测结果与标签长度不一致"
+    # 初始化报告字典
+    report = {}
+    # 获取唯一类别
+    classes = sorted(list(set(labels)))
+    error_label = "out_of_class"
+    extend_classes = classes + [error_label]
+    # 计算基础指标
+    total = len(labels)
+    correct = sum(p == l for p, l in zip(predictions, labels))
+    # 1. 准确率计算
+    report["accuracy"] = correct / total
+    # 2. 混淆矩阵构建
+    confusion = defaultdict(int)
+    for true_label, pred_label in zip(labels, predictions):
+        if pred_label not in classes:
+            pred_label = error_label
+        confusion[(true_label, pred_label)] += 1
+    confusion_matrix = pd.DataFrame(index=extend_classes, columns=extend_classes, data=0)
+    for (true, pred), count in confusion.items():
+        confusion_matrix.loc[true, pred] = count
+    # 3. 分类报告生成
+    micro_tp = 0
+    micro_fp = 0
+    micro_fn = 0
+    class_stats = []
+    for cls in extend_classes:
+        tp = confusion[(cls, cls)]
+        fp = sum(confusion[(other, cls)] for other in extend_classes if other != cls)
+        fn = sum(confusion[(cls, other)] for other in extend_classes if other != cls)
+        micro_tp += tp
+        micro_fp += fp
+        micro_fn += fn
+        precision = tp / (tp + fp) if (tp + fp) > 0 else 0
+        recall = tp / (tp + fn) if (tp + fn) > 0 else 0
+        f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
+        class_stats.append(
+            {
+                "class": cls,
+                "precision": precision,
+                "recall": recall,
+                "f1_score": f1,
+                "support": sum(confusion[(cls, other)] for other in extend_classes),
+            },
+        )
+    # 添加汇总统计
+    class_df = pd.DataFrame(class_stats)
+    report["class_report"] = class_df
+    confusion_matrix["recall"] = class_df["recall"].values.tolist()
+    p = class_df["precision"].values.tolist() + [None]
+    tail = pd.DataFrame([p], index=["precision"], columns=confusion_matrix.columns)
+    confusion_matrix = pd.concat([confusion_matrix, tail], axis=0)
+    confusion_matrix.index.name = "True \\ Label"
+    report["confusion_matrix"] = confusion_matrix
+    micro_precision = micro_tp / (micro_tp + micro_fp) if (micro_tp + micro_fp) > 0 else 0
+    micro_recall = micro_tp / (micro_tp + micro_fn) if (micro_tp + micro_fn) > 0 else 0
+    micro_f1 = 2 * (micro_precision * micro_recall) / (micro_precision + micro_recall) if (micro_precision + micro_recall) > 0 else 0
+    report["micro_stats"] = {
+        "precision": micro_precision,
+        "recall": micro_recall,
+        "f1_score": micro_f1,
+    }
+    report["macro_stats"] = {
+        "precision": class_df[class_df["class"] != error_label]["precision"].mean(),
+        "recall": class_df[class_df["class"] != error_label]["recall"].mean(),
+        "f1_score": class_df[class_df["class"] != error_label]["f1_score"].mean(),
+    }
+    # 4. 元数据信息
+    import datetime
+    report["total_samples"] = total
+    report["time_generated"] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    return report
+def print_classification_report(predictions: List[str], labels: List[str]):
+    report = generate_classification_report(predictions, labels)
+    """
+    打印报告内容
+    """
+    print(f"准确率: {report['accuracy']:.2%}")
+    print(f"总样本数: {report['total_samples']}, 生成时间: {report['time_generated']}")
+    print()
+    # 打印微观统计
+    print("=== 微观统计 ===")
+    micro_stats = report["micro_stats"]
+    print(f"准确率: {micro_stats['precision']:.2%}")
+    print(f"召回率: {micro_stats['recall']:.2%}")
+    print(f"F1分数: {micro_stats['f1_score']:.2%}")
+    print()
+    # 打印宏观统计
+    print("=== 宏观统计 ===")
+    macro_stats = report["macro_stats"]
+    print(f"准确率: {macro_stats['precision']:.2%}")
+    print(f"召回率: {macro_stats['recall']:.2%}")
+    print(f"F1分数: {macro_stats['f1_score']:.2%}")
+    print()
+    # 打印混淆矩阵
+    print("=== 混淆矩阵 ===")
+    print(report["confusion_matrix"])
+    print()
+    # 打印分类报告
+    print("=== 分类报告 ===")
+    print(report["class_report"])
+    print()
+if __name__ == "__main__":
+    # 示例数据
+    preds = ["cat", "dog", "cat", "dog", "extra1", "extra2"]
+    truth = ["cat", "cat", "dog", "dog", "dog", "dog"]
+    print_classification_report(preds, truth)

xlin/timing.py ADDED Viewed

@@ -0,0 +1,43 @@
+from timeit import default_timer as timer
+from functools import wraps
+import time
+class Benchmark(object):
+    def __init__(self, msg, fmt="%0.3g"):
+        self.msg = msg
+        self.fmt = fmt
+    def __enter__(self):
+        self.start = timer()
+        return self
+    def __exit__(self, *args):
+        t = timer() - self.start
+        print(("%s : " + self.fmt + " seconds") % (self.msg, t))
+        self.time = t
+def timing(f):
+    @wraps(f)
+    def wrap(*args, **kw):
+        ts = time.time()
+        result = f(*args, **kw)
+        te = time.time()
+        print(f'func:{f.__name__!r} args:[{args!r}, {kw!r}] took: {te - ts:2.4f} sec')
+        return result
+    return wrap
+class Timer:
+    """ Simple block which can be called as a context, to know the time of a block. """
+    def __enter__(self):
+        self.start = time.perf_counter()
+        return self
+    def __exit__(self, *args):
+        self.end = time.perf_counter()
+        self.interval = self.end - self.start

xlin/util.py CHANGED Viewed

@@ -133,10 +133,18 @@ def cp(
             base_input_dir = input_paths[0].parent
     base_input_dir = Path(base_input_dir)
     output_dir_path = Path(output_dir_path)
+    if output_dir_path.exists() and not output_dir_path.is_dir():
+        raise Exception(f"output_dir_path exists and is not a directory: {output_dir_path}")
+    if not output_dir_path.exists():
+        output_dir_path.mkdir(parents=True, exist_ok=True)
+        logger.warning(f"创建文件夹 {output_dir_path}")
+    if not base_input_dir.exists():
+        raise Exception(f"base_input_dir does not exist: {base_input_dir}")
+    if not base_input_dir.is_dir():
+        raise Exception(f"base_input_dir is not a directory: {base_input_dir}")
     for input_path in input_paths:
         relative_path = input_path.relative_to(base_input_dir)
         output_path = output_dir_path / relative_path
-        output_path.parent.mkdir(parents=True, exist_ok=True)
         copy_file(input_path, output_path, force_overwrite, verbose)

{xlin-0.1.15.dist-info → xlin-0.1.17.dist-info}/LICENSE RENAMED Viewed

@@ -1,6 +1,6 @@
 MIT License
-Copyright (c) 2024 兮尘
+Copyright (c) 2025 林学渊
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

{xlin-0.1.15.dist-info → xlin-0.1.17.dist-info}/METADATA RENAMED Viewed

@@ -1,9 +1,9 @@
 Metadata-Version: 2.1
 Name: xlin
-Version: 0.1.15
+Version: 0.1.17
 Summary: toolbox for LinXueyuan
 License: MIT
-Author: XiChen
+Author: LinXueyuanStdio
 Author-email: 23211526+LinXueyuanStdio@users.noreply.github.com
 Classifier: License :: OSI Approved :: MIT License
 Classifier: Programming Language :: Python :: 2

xlin-0.1.17.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,15 @@
+xlin/__init__.py,sha256=MWWCNPgJFS_oV2US52ULa4yg4Ku61qjn40NVKqcp9-c,248
+xlin/ischinese.py,sha256=Ia9IMQ6q-UHkdLwqS70L1fTnfSPbluFrv_I1UqsKquo,293
+xlin/jsonl.py,sha256=DvVM241a9VgQlp5WIMPRv-JIolT0RdSxw47IG_fc7xE,6690
+xlin/metric.py,sha256=N7wJ35y-C-IaBr1I1CJ_37lTG7gA69zmn9Xg6xSwKoI,1690
+xlin/multiprocess_mapping.py,sha256=pmzyEUYpbpIZ_ezyvWWWRpr7D7n4t3E3jW1nGXBbVck,7652
+xlin/read_as_dataframe.py,sha256=P8bOYW-zm8uGhehCldZI9ZQhHHLGqDPDbSMNWI2li6g,8885
+xlin/statistic.py,sha256=i0Z1gbW2IYHCA0lb16w1Ncrk0Q7Q1Ttm0n4we-ki6II,9301
+xlin/timing.py,sha256=XMT8dMcMolOMohDvAZOIM_BAiPMREhGQKnO1kc5s6PU,998
+xlin/util.py,sha256=TTWJaqF5D_r-gAZ_fj0kyHomvCagjwHXQZ2OPSgwd54,10976
+xlin/xls2xlsx.py,sha256=5zfcM0gmunFQOcOj9nYd9Dj0HMhU7-cPKnPIy6Ot9iU,930
+xlin/yaml.py,sha256=kICi7G3Td5q2MaSXXt85qNTWoHMgjzt7pvn7r3C4dME,183
+xlin-0.1.17.dist-info/LICENSE,sha256=60ys6rRtc1dZOP8UjSUr9fAqhZudT3WpKe5WbMCralM,1066
+xlin-0.1.17.dist-info/METADATA,sha256=Lg-wFcZRx0nvtw2tvaB6HCrLrPjRYnVELCp1Duz_IKI,1098
+xlin-0.1.17.dist-info/WHEEL,sha256=IrRNNNJ-uuL1ggO5qMvT1GGhQVdQU54d6ZpYqEZfEWo,92
+xlin-0.1.17.dist-info/RECORD,,

xlin-0.1.15.dist-info/RECORD DELETED Viewed

@@ -1,14 +0,0 @@
-xlin/__init__.py,sha256=xH5nS8y2RhQ8IDMM2pVkD5W0lxEFuymUSpzSWKo-358,226
-xlin/ischinese.py,sha256=Ia9IMQ6q-UHkdLwqS70L1fTnfSPbluFrv_I1UqsKquo,293
-xlin/jsonl.py,sha256=DvVM241a9VgQlp5WIMPRv-JIolT0RdSxw47IG_fc7xE,6690
-xlin/metric.py,sha256=N7wJ35y-C-IaBr1I1CJ_37lTG7gA69zmn9Xg6xSwKoI,1690
-xlin/multiprocess_mapping.py,sha256=pmzyEUYpbpIZ_ezyvWWWRpr7D7n4t3E3jW1nGXBbVck,7652
-xlin/read_as_dataframe.py,sha256=P8bOYW-zm8uGhehCldZI9ZQhHHLGqDPDbSMNWI2li6g,8885
-xlin/statistic.py,sha256=kp2P-Hr5Kb-R3dNgUXQieG8--iitjidg7SJuSiCpKdM,4131
-xlin/util.py,sha256=RJHMBKC1xVwso3NfYXxIY3qqAfahzDDgzuU7jvNhQBA,10494
-xlin/xls2xlsx.py,sha256=5zfcM0gmunFQOcOj9nYd9Dj0HMhU7-cPKnPIy6Ot9iU,930
-xlin/yaml.py,sha256=kICi7G3Td5q2MaSXXt85qNTWoHMgjzt7pvn7r3C4dME,183
-xlin-0.1.15.dist-info/LICENSE,sha256=KX0dDCYlO4DskqMZY8qeY94EZMrDRNnNqlGLkXVlKyM,1063
-xlin-0.1.15.dist-info/METADATA,sha256=GI2Hz1o2lX6rOSEm12phfhejUx1jG3yC29tkLUen6IA,1089
-xlin-0.1.15.dist-info/WHEEL,sha256=IrRNNNJ-uuL1ggO5qMvT1GGhQVdQU54d6ZpYqEZfEWo,92
-xlin-0.1.15.dist-info/RECORD,,

{xlin-0.1.15.dist-info → xlin-0.1.17.dist-info}/WHEEL RENAMED Viewed

File without changes

xlin 0.1.15__py2.py3-none-any.whl → 0.1.17__py2.py3-none-any.whl

xlin 0.1.15py2.py3-none-any.whl → 0.1.17py2.py3-none-any.whl