PyPI - yms-kan - Versions diffs - 0.0.2__py3-none-any.whl → 0.0.3__py3-none-any.whl - Mend

yms-kan 0.0.2py3-none-any.whl → 0.0.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

yms_kan/tool.py +499 -234
yms_kan/train_eval_utils.py +59 -13
yms_kan/version.py +1 -1
{yms_kan-0.0.2.dist-info → yms_kan-0.0.3.dist-info}/METADATA +1 -1
{yms_kan-0.0.2.dist-info → yms_kan-0.0.3.dist-info}/RECORD +8 -8
{yms_kan-0.0.2.dist-info → yms_kan-0.0.3.dist-info}/WHEEL +0 -0
{yms_kan-0.0.2.dist-info → yms_kan-0.0.3.dist-info}/licenses/LICENSE +0 -0
{yms_kan-0.0.2.dist-info → yms_kan-0.0.3.dist-info}/top_level.txt +0 -0

yms_kan/tool.py CHANGED Viewed

@@ -1,8 +1,155 @@
 import os
+import re
+from datetime import datetime, timezone, timedelta
+from typing import Optional, Dict, List
+import click
 import numpy as np
-import torch
+import pandas as pd
 import wandb
+from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, \
+    classification_report
+from tqdm import tqdm
+# 读取txt内两个不同表格的数据，并将结果转换为字典列表输出
+def read_multi_table_txt(file_path):
+    # 读取原始内容
+    with open(file_path, 'r') as f:
+        content = f.read()
+    # 按表格标题分割内容（假设每个新表格以"epoch"开头）
+    table_blocks = re.split(r'\n(?=epoch\s)', content.strip())
+    # 处理每个表格块
+    table_dicts = []
+    for block in table_blocks:
+        lines = [line.strip() for line in block.split('\n') if line.strip()]
+        # 解析列名（处理制表符和混合空格）
+        columns = re.split(r'\s{2,}|\t', lines[0])
+        # 解析数据行（处理混合分隔符）
+        data = []
+        for line in lines[1:]:
+            # 使用正则表达式分割多个连续空格/制表符
+            row = re.split(r'\s{2,}|\t', line)
+            data.append(row)
+        # 创建DataFrame并自动转换数值类型
+        df = pd.DataFrame(data, columns=columns)
+        df = df.apply(pd.to_numeric, errors='coerce')  # 自动识别数值列，非数值转换为NaN
+        # 将DataFrame转换为字典，每列以列表形式保存
+        table_dict = df.to_dict(orient='list')
+        table_dicts.append(table_dict)
+    return table_dicts
+def get_current_time(format_str="%Y-%m-%d %H:%M:%S"):
+    """
+    获取东八区（UTC+8）的当前时间，并返回指定格式的字符串
+    :param format_str: 时间格式（默认为 "%Y-%m-%d %H:%M:%S"）
+    :return: 格式化后的时间字符串
+    """
+    # 创建东八区的时区对象
+    utc8_timezone = timezone(timedelta(hours=8))
+    # 转换为东八区时间
+    utc8_time = datetime.now(utc8_timezone)
+    # 格式化为字符串
+    formatted_time = utc8_time.strftime(format_str)
+    return formatted_time
+# val和test时的相关结果指标计算
+def calculate_results(all_labels, all_predictions, classes, average='macro'):
+    results = {
+        'accuracy': accuracy_score(y_true=all_labels, y_pred=all_predictions),
+        'precision': precision_score(y_true=all_labels, y_pred=all_predictions, average=average),
+        'recall': recall_score(y_true=all_labels, y_pred=all_predictions, average=average),
+        'f1_score': f1_score(y_true=all_labels, y_pred=all_predictions, average=average),
+        'cm': confusion_matrix(y_true=all_labels, y_pred=all_predictions, labels=np.arange(len(classes)))
+    }
+    return results
+def calculate_metric(all_labels, all_predictions, classes, class_metric=False, average='macro avg'):
+    metric = classification_report(y_true=all_labels, y_pred=all_predictions,
+                                   target_names=classes, digits=4, output_dict=True, zero_division=0)
+    if not class_metric:
+        metric = {
+            'accuracy': metric.get('accuracy'),
+            'precision': metric.get(average).get('precision'),
+            'recall': metric.get(average).get('recall'),
+            'f1-score': metric.get(average).get('f1-score'),
+        }
+        return metric
+    else:
+        return metric
+def dict_to_classification_report(report_dict, digits=2):
+    headers = ["precision", "recall", "f1-score", "support"]
+    target_names = list(report_dict.keys())
+    target_names.remove('accuracy') if 'accuracy' in target_names else None
+    longest_last_line_heading = "weighted avg"
+    name_width = max(len(cn) for cn in target_names)
+    width = max(name_width, len(longest_last_line_heading), digits)
+    head_fmt = "{:>{width}s} " + " {:>9}" * len(headers)
+    report = head_fmt.format("", *headers, width=width)
+    report += "\n\n"
+    row_fmt = "{:>{width}s} " + " {:>9.{digits}f}" * 3 + " {:>9}\n"
+    for target_name in target_names:
+        scores = [report_dict[target_name][h] for h in headers]
+        report += row_fmt.format(target_name, *scores, width=width, digits=digits)
+    report += "\n"
+    average_options = ["micro avg", "macro avg", "weighted avg"]
+    if 'samples avg' in report_dict:
+        average_options.append('samples avg')
+    for average in average_options:
+        if average in report_dict:
+            scores = [report_dict[average][h] for h in headers]
+            if average == "accuracy":
+                row_fmt_accuracy = (
+                        "{:>{width}s} "
+                        + " {:>9.{digits}}" * 2
+                        + " {:>9.{digits}f}"
+                        + " {:>9}\n"
+                )
+                report += row_fmt_accuracy.format(
+                    average, "", "", *scores[2:], width=width, digits=digits
+                )
+            else:
+                report += row_fmt.format(average, *scores, width=width, digits=digits)
+    if 'accuracy' in report_dict:
+        row_fmt_accuracy = (
+                "{:>{width}s} "
+                + " {:>9.{digits}}" * 2
+                + " {:>9.{digits}f}"
+                + " {:>9}\n"
+        )
+        report += row_fmt_accuracy.format(
+            "accuracy", "", "", report_dict["accuracy"], "", width=width, digits=digits
+        )
+    return report
+# def append_metrics(metrics, metric, result, lr):
+#     metrics['train_losses'].append(result['train_loss'])
+#     metrics['val_losses'].append(result['val_loss'])
+#     metrics['accuracies'].append(metric['accuracy'])
+#     metrics['precisions'].append(metric['precision'])
+#     metrics['recalls'].append(metric['recall'])
+#     metrics['f1-scores'].append(metric['f1-score'])
+#     metrics['lrs'].append(lr)
+#     return metrics
 def initialize_results_file(results_file, result_info):
@@ -12,9 +159,10 @@ def initialize_results_file(results_file, result_info):
     参数:
         results_file (str): 结果文件的路径。
         result_info (list): 需要写入的第一行内容列表。
+        space:列名间隔（默认两个空格的距离）
     """
     # 处理 result_info，在每个单词后添加两个空格
-    result_info_str = "  ".join(result_info) + '\n'
+    result_info_str = '  '.join(result_info) + '\n'
     # 检查文件是否存在
     if os.path.exists(results_file):
         # 如果文件存在，读取第一行
@@ -35,86 +183,45 @@ def initialize_results_file(results_file, result_info):
         print(f"文件 {results_file} 已创建并写入 result_info。")
-def write_results_file(file_path: str,
-                       data_dict: dict,
-                       column_order: list,
-                       float_precision: int = 5) -> None:
+def is_similar_key(key1, key2):
     """
-    通用格式化文本行写入函数（支持列表形式数据）
+    检查两个键是否相似，考虑复数形式的转换。
-    参数：
-    file_path: 目标文件路径
-    data_dict: 包含数据的字典，键为列名，值为列表
-    column_order: 列顺序列表，元素为字典键
-    float_precision: 浮点数精度位数 (默认5位)
+    Args:
+        key1 (str): 第一个键
+        key2 (str): 第二个键
+    Returns:
+        bool: 如果两个键相似（包括复数形式的转换），返回 True，否则返回 False
     """
-    # 验证数据格式
-    rows = None
-    for key in data_dict:
-        if not isinstance(data_dict[key], list):
-            raise ValueError(f"Value for key '{key}' is not a list")
-        if rows is None:
-            rows = len(data_dict[key])
-        else:
-            if len(data_dict[key]) != rows:
-                raise ValueError("All lists in data_dict must have the same length")
+    if key1 == key2:
+        return True
-    # 辅助函数：格式化单个值
-    def format_value(value, column_name):
-        if isinstance(value, (int, np.integer)):
-            return f"{value:d}"
-        elif isinstance(value, (float, np.floating)):
-            if column_name in ['train_losses', 'val_losses']:
-                return f"{value:.{float_precision + 1}f}"
-            elif column_name == 'lrs':
-                return f"{value:.8f}"
-            else:
-                return f"{value:.{float_precision}f}"
-        elif isinstance(value, str):
-            return value
-        else:
-            return str(value)
+    # 检查 key2 是否是复数形式
+    if key2.endswith("ies"):
+        singular_candidate = key2.removesuffix("ies") + "y"
+        if key1 == singular_candidate:
+            return True
-    # 计算列宽
-    column_widths = []
-    for col in column_order:
-        dict_key = 'val_accuracies' if col == 'accuracies' else col
-        if dict_key not in data_dict:
-            raise ValueError(f"Missing required column: {dict_key}")
-        values = data_dict[dict_key]
-        max_width = len(col)
-        for val in values:
-            fmt_val = format_value(val, col)
-            max_width = max(max_width, len(fmt_val))
-        column_widths.append(max_width)
-    # 生成格式化行
-    lines = []
-    for i in range(rows):
-        row = []
-        for j, col in enumerate(column_order):
-            dict_key = 'val_accuracies' if col == 'accuracies' else col
-            val = data_dict[dict_key][i]
-            fmt_val = format_value(val, col)
-            # 对齐处理
-            if j == len(column_order) - 1:
-                fmt_val = fmt_val.ljust(column_widths[j])
-            else:
-                fmt_val = fmt_val.rjust(column_widths[j])
-            row.append(fmt_val)
-        lines.append("  ".join(row) + '\n')
+    if key2.endswith("es"):
+        singular_candidate = key2.removesuffix("es")
+        if key1 == singular_candidate:
+            return True
-    # 写入文件
-    with open(file_path, 'a', encoding='utf-8') as f:
-        f.writelines(lines)
+    if key2.endswith("s"):
+        singular_candidate = key2.removesuffix("s")
+        if key1 == singular_candidate:
+            return True
+    return False
 def append_to_results_file(file_path: str,
                            data_dict: dict,
                            column_order: list,
-                           float_precision: int = 5) -> None:
+                           float_precision: int = 4,
+                           more_float: int = 2,
+                           custom_column_widths: dict = None) -> None:
     """
     通用格式化文本行写入函数
@@ -123,182 +230,340 @@ def append_to_results_file(file_path: str,
     data_dict: 包含数据的字典，键为列名
     column_order: 列顺序列表，元素为字典键
     float_precision: 浮点数精度位数 (默认5位)
+    more_float: 额外的浮点数精度位数
+    custom_column_widths: 自定义列宽的字典，键为列名，值为列宽
     """
-    # 检查 data_dict 中的值是否为列表
-    all_values_are_lists = all(isinstance(value, list) for value in data_dict.values())
-    if all_values_are_lists:
-        num_rows = len(next(iter(data_dict.values())))
-        # 逐行处理
-        for row_index in range(num_rows):
-            formatted_data = []
-            column_widths = []
-            for col in column_order:
-                # 处理字典键的别名
-                dict_key = 'val_accuracies' if col == 'accuracies' else col
-                # 如果键不存在，跳过该列
-                if dict_key not in data_dict:
-                    continue
-                value_list = data_dict[dict_key]
-                if row_index >= len(value_list):
-                    continue
-                value = value_list[row_index]
-                # 根据数据类型进行格式化
-                if isinstance(value, (int, np.integer)):
-                    fmt_value = f"{value:d}"
-                elif isinstance(value, (float, np.floating)):
-                    if col in ['train_losses', 'val_losses']:  # 如果列名是'train_losses'或'val_losses'，保留浮点数精度位数+1位
-                        fmt_value = f"{value:.{float_precision + 1}f}"
-                    elif col == 'lrs':  # 如果列名是'lrs'，保留8位小数
-                        fmt_value = f"{value:.8f}"
-                    else:
-                        fmt_value = f"{value:.{float_precision}f}"
-                elif isinstance(value, str):
-                    fmt_value = value
-                else:  # 处理其他类型转换为字符串
-                    fmt_value = str(value)
-                # 取列名长度和数值长度的最大值作为列宽
-                column_width = max(len(col), len(fmt_value))
-                column_widths.append(column_width)
-                # 应用列宽对齐
-                if col == column_order[-1]:  # 最后一列左边对齐
-                    fmt_value = fmt_value.ljust(column_width)
-                else:
-                    fmt_value = fmt_value.rjust(column_width)
-                formatted_data.append(fmt_value)
-            # 构建文本行并写入，列之间用两个空格分隔
-            if formatted_data:
-                line = "  ".join(formatted_data) + '\n'
-                with open(file_path, 'a', encoding='utf-8') as f:
-                    f.write(line)
-    else:
-        # 非列表情况，原逻辑处理
-        # 计算每列的最大宽度
-        column_widths = []
-        formatted_data = []
-        for col in column_order:
-            # 处理字典键的别名
-            dict_key = 'val_accuracies' if col == 'accuracies' else col
-            # 如果键不存在，跳过该列
-            if dict_key not in data_dict:
-                continue
-            value = data_dict[dict_key]
-            # 根据数据类型进行格式化
-            if isinstance(value, (int, np.integer)):
-                fmt_value = f"{value:d}"
-            elif isinstance(value, (float, np.floating)):
-                if col in ['train_losses', 'val_losses']:  # 如果列名是'train_losses'或'val_losses'，保留浮点数精度位数+1位
-                    fmt_value = f"{value:.{float_precision + 1}f}"
-                elif col == 'lrs':  # 如果列名是'lrs'，保留8位小数
-                    fmt_value = f"{value:.8f}"
-                else:
-                    fmt_value = f"{value:.{float_precision}f}"
-            elif isinstance(value, str):
-                fmt_value = value
-            else:  # 处理其他类型转换为字符串
-                fmt_value = str(value)
+    # 计算每列的最大宽度
+    column_widths = []
+    formatted_data = []
+    for col in column_order:
+        # 查找 data_dict 中相似的键
+        dict_key = None
+        for key in data_dict:
+            if is_similar_key(key, col):
+                dict_key = key
+                break
+        if dict_key is None:
+            raise ValueError(f"Missing required column: {col}")
+        value = data_dict[dict_key]
+        # 根据数据类型进行格式化
+        if isinstance(value, (int, np.integer)):
+            fmt_value = f"{value:d}"
+        elif isinstance(value, (float, np.floating)):
+            if col in ['train_losses', 'val_losses']:  # 如果列名是'train_losses'或'val_losses'，保留浮点数精度位数+1位
+                fmt_value = f"{value:.{float_precision + more_float}f}"
+            elif col == 'lrs':
+                fmt_value = f"{value:.8f}"
+            else:
+                fmt_value = f"{value:.{float_precision}f}"
+        elif isinstance(value, str):
+            fmt_value = value
+        else:  # 处理其他类型转换为字符串
+            fmt_value = str(value)
+        # 确定列宽
+        if custom_column_widths and col in custom_column_widths:
+            column_width = custom_column_widths[col]
+        else:
             # 取列名长度和数值长度的最大值作为列宽
             column_width = max(len(col), len(fmt_value))
-            column_widths.append(column_width)
+        column_widths.append(column_width)
-            # 应用列宽对齐
-            if col == column_order[-1]:  # 最后一列左边对齐
-                fmt_value = fmt_value.ljust(column_width)
-            else:
-                fmt_value = fmt_value.rjust(column_width)
-            formatted_data.append(fmt_value)
-        # 构建文本行并写入，列之间用两个空格分隔
-        if formatted_data:
-            line = "  ".join(formatted_data) + '\n'
-            with open(file_path, 'a', encoding='utf-8') as f:
-                f.write(line)
-# def append_to_results_file(file_path: str,
-#                            data_dict: dict,
-#                            column_order: list,
-#                            column_widths: list = None,
-#                            float_precision: int = 5) -> None:
-#     """
-#     通用格式化文本行写入函数
-#
-#     参数：
-#     file_path: 目标文件路径
-#     data_dict: 包含数据的字典，键为列名
-#     column_order: 列顺序列表，元素为字典键
-#     column_widths: 每列字符宽度列表 (可选)
-#     float_precision: 浮点数精度位数 (默认4位)
-#     """
-#     formatted_data = []
-#
-#     # 遍历指定列顺序处理数据
-#     for i, col in enumerate(column_order):
-#         # 处理字典键的别名
-#         if col == 'accuracies':
-#             dict_key = 'val_accuracies'
-#         else:
-#             dict_key = col
-#
-#         if dict_key not in data_dict:
-#             raise ValueError(f"Missing required column: {dict_key}")
-#
-#         value = data_dict[dict_key]
-#
-#         # 根据数据类型进行格式化
-#         if isinstance(value, (int, np.integer)):
-#             fmt_value = f"{value:d}"
-#         elif isinstance(value, (float, np.floating)):
-#             if col in ['train_losses', 'val_losses']:  # 如果列名是'train_losses'或'val_losses'，保留浮点数精度位数+1位
-#                 fmt_value = f"{value:.{float_precision + 1}f}"
-#             elif col == 'lr':  # 如果列名是'lr'，保留8位小数
-#                 fmt_value = f"{value:.8f}"
-#             else:
-#                 fmt_value = f"{value:.{float_precision}f}"
-#         elif isinstance(value, str):
-#             fmt_value = value
-#         else:  # 处理其他类型转换为字符串
-#             fmt_value = str(value)
-#
-#         # 应用列宽对齐
-#         if column_widths and i < len(column_widths):
-#             try:
-#                 if i == len(column_order) - 1:  # 最后一列左边对齐
-#                     fmt_value = fmt_value.ljust(column_widths[i])
-#                 else:
-#                     fmt_value = fmt_value.rjust(column_widths[i])
-#             except TypeError:  # 处理非字符串类型
-#                 if i == len(column_order) - 1:  # 最后一列左边对齐
-#                     fmt_value = str(fmt_value).ljust(column_widths[i])
-#                 else:
-#                     fmt_value = str(fmt_value).rjust(column_widths[i])
-#
-#         formatted_data.append(fmt_value)
-#
-#     # 构建文本行并写入
-#     line = '\t'.join(formatted_data) + '\n'
-#     with open(file_path, 'a', encoding='utf-8') as f:
-#         f.write(line)
-def get_wandb_key(key_path='tools/wandb_key.txt'):
+        # 应用列宽对齐
+        if col == column_order[-1]:  # 最后一列左边对齐
+            fmt_value = fmt_value.ljust(column_width)
+        else:
+            fmt_value = fmt_value.rjust(column_width)
+        formatted_data.append(fmt_value)
+    # 构建文本行并写入，列之间用两个空格分隔
+    line = "  ".join(formatted_data) + '\n'
+    with open(file_path, 'a', encoding='utf-8') as f:
+        f.write(line)
+def get_wandb_key(key_path):
     with open(key_path, 'r', encoding='utf-8') as f:
         key = f.read()
     return key
-def wandb_use(project=None, name=None, key_path='tools/wandb_key.txt'):
+def wandb_init(project=None, key_path=None, name=None):
     run = None
     if project is not None:
+        if key_path is None:
+            raise ValueError("When 'project' is not None, 'key_path' should also not be None.")
         wandb_key = get_wandb_key(key_path)
         wandb.login(key=wandb_key)
         run = wandb.init(project=project, name=name)
     return run
+def check_wandb_login_required():
+    """兼容旧版的登录检查函数"""
+    # 优先检查环境变量
+    if os.environ.get("WANDB_API_KEY"):
+        return False
+    try:
+        api = wandb.Api()
+        # 方法 1：通过 settings 检查（适用于旧版）
+        if hasattr(api, "settings") and api.settings.get("entity"):
+            return False
+        # 方法 2：通过 projects() 验证（通用性强）
+        api.projects(per_page=1)  # 仅请求第一页的第一个项目
+        return False
+    except Exception as e:
+        print(f"检测到意外错误: {str(e)}")
+        return True  # 保守返回需要登录
+def get_wandb_runs(
+        project_path: str,
+        default_name: str = "未命名",
+        api_key: Optional[str] = None,
+        per_page: int = 1000
+) -> List[Dict[str, str]]:
+    """
+    获取指定 WandB 项目的所有运行信息（ID 和 Name）
+    Args:
+        project_path (str): 项目路径，格式为 "username/project_name"
+        default_name (str): 当运行未命名时的默认显示名称（默认："未命名"）
+        api_key (str, optional): WandB API 密钥，若未设置环境变量则需传入
+        per_page (int): 分页查询每页数量（默认1000，用于处理大量运行）
+    Returns:
+        List[Dict]: 包含运行信息的字典列表，格式 [{"id": "...", "name": "..."}]
+    Raises:
+        ValueError: 项目路径格式错误
+        wandb.errors.UsageError: API 密钥无效或未登录
+    """
+    # 参数校验
+    if "/" not in project_path or len(project_path.split("/")) != 2:
+        raise ValueError("项目路径格式应为 'username/project_name'")
+    # 登录（仅在需要时）
+    if api_key:
+        wandb.login(key=api_key)
+    elif not wandb.api.api_key:
+        raise wandb.errors.UsageError("需要提供API密钥或预先调用wandb.login()")
+    # 初始化API
+    api = wandb.Api()
+    try:
+        # 分页获取所有运行（自动处理分页逻辑）
+        runs = api.runs(project_path, per_page=per_page)
+        print(f'共获取{len(runs)}个run')
+        return [
+            {
+                "id": run.id,
+                "name": run.name or default_name,
+                "url": run.url,  # 增加实用字段
+                "state": run.state  # 包含运行状态
+            }
+            for run in runs
+        ]
+    except wandb.errors.CommError as e:
+        raise ConnectionError(f"连接失败: {str(e)}") from e
+    except Exception as e:
+        raise RuntimeError(f"获取运行数据失败: {str(e)}") from e
+def delete_runs(
+        project_path: str,
+        run_ids: Optional[List[str]] = None,
+        run_names: Optional[List[str]] = None,
+        delete_all: bool = False,
+        dry_run: bool = True,
+        api_key: Optional[str] = None,
+        per_page: int = 500
+) -> dict:
+    """
+    多功能WandB运行删除工具
+    :param project_path: 项目路径（格式：username/project_name）
+    :param run_ids: 指定要删除的运行ID列表（无视状态）
+    :param run_names: 指定要删除的运行名称列表（无视状态）
+    # :param preserve_states: 保护状态列表（默认保护 finished/running）
+    :param delete_all: 危险模式！删除所有运行（默认False）
+    :param dry_run: 模拟运行模式（默认True）
+    :param api_key: WandB API密钥
+    :param per_page: 分页查询数量
+    :return: 操作统计字典
+    使用场景：
+    1. 删除指定运行：delete_runs(..., run_ids=["abc","def"])
+    2. 默认删除失败运行：delete_runs(...)
+    3. 删除所有运行：delete_runs(..., delete_all=True)
+    """
+    preserve_states: List[str] = ["finished", "running"]
+    # 参数校验
+    if not project_path.count("/") == 1:
+        raise ValueError("项目路径格式应为 username/project_name")
+    if delete_all and (run_ids or run_names):
+        raise ValueError("delete_all模式不能与其他筛选参数同时使用")
+    # 身份验证
+    if api_key:
+        wandb.login(key=api_key)
+    elif not wandb.api.api_key:
+        raise wandb.errors.UsageError("需要API密钥或预先登录")
+    api = wandb.Api()
+    stats = {
+        "total": 0,
+        "candidates": 0,
+        "deleted": 0,
+        "failed": 0,
+        "dry_run": dry_run
+    }
+    try:
+        runs = api.runs(project_path, per_page=per_page)
+        stats["total"] = len(runs)
+        # 确定删除目标
+        if delete_all:
+            targets = runs
+            click.secho("\n⚠️ 危险操作：将删除项目所有运行！", fg="red", bold=True)
+        elif run_ids or run_names:
+            targets = [
+                run for run in runs
+                if run.id in (run_ids or []) or run.name in (run_names or [])
+            ]
+            print(f"\n找到 {len(targets)} 个指定运行")
+        else:
+            targets = [run for run in runs if run.state not in preserve_states]
+            print(f"\n找到 {len(targets)} 个非正常状态运行")
+        stats["candidates"] = len(targets)
+        if not targets:
+            print("没有符合条件的运行")
+            return stats
+        # 打印预览
+        print("\n待删除运行示例：")
+        for run in targets[:3]:
+            state = click.style(run.state, fg="green" if run.state == "finished" else "red")
+            print(f" • {run.id} | {run.name} | 状态：{state}")
+        if len(targets) > 3:
+            print(f" ...（共 {len(targets)} 条）")
+        # 安全确认
+        if dry_run:
+            click.secho("\n模拟运行模式：不会实际删除", fg="yellow")
+            return stats
+        if delete_all:
+            msg = click.style("确认要删除所有运行吗？此操作不可逆！", fg="red", bold=True)
+        else:
+            msg = f"确认要删除 {len(targets)} 个运行吗？"
+        if not click.confirm(msg, default=False):
+            print("操作已取消")
+            return stats
+        # 执行删除
+        print("\n删除进度：")
+        for i, run in enumerate(targets, 1):
+            try:
+                run.delete()
+                stats["deleted"] += 1
+                print(click.style(f"  [{i}/{len(targets)}] 已删除 {run.id}", fg="green"))
+            except Exception as e:
+                stats["failed"] += 1
+                print(click.style(f"  [{i}/{len(targets)}] 删除失败 {run.id}: {str(e)}", fg="red"))
+        return stats
+    except wandb.errors.CommError as e:
+        raise ConnectionError(f"网络错误: {str(e)}")
+    except Exception as e:
+        raise RuntimeError(f"操作失败: {str(e)}")
+def get_all_artifacts_from_project(project_path, max_runs=None, run_id=None):
+    """获取WandB项目或指定Run的所有Artifact
+    Args:
+        project_path (str): 项目路径，格式为 "entity/project"
+        max_runs (int, optional): 最大获取Run数量（仅当未指定run_id时生效）
+        run_id (str, optional): 指定要查询的Run ID
+    Returns:
+        list: 包含所有Artifact对象的列表
+    """
+    api = wandb.Api()
+    all_artifacts = []
+    seen_artifacts = set()  # 用于去重
+    try:
+        if run_id:
+            # 处理单个Run的情况
+            run = api.run(f"{project_path}/{run_id}")
+            artifacts = run.logged_artifacts()
+            for artifact in artifacts:
+                artifact_id = f"{artifact.name}:{artifact.version}"
+                if artifact_id not in seen_artifacts:
+                    all_artifacts.append(artifact)
+                    seen_artifacts.add(artifact_id)
+            print(f"Found {len(all_artifacts)} artifacts in run {run_id}")
+        else:
+            # 处理整个项目的情况
+            runs = api.runs(project_path, per_page=500)
+            run_iterator = tqdm(runs[:max_runs] if max_runs else runs,
+                                desc=f"Scanning {project_path}")
+            for run in run_iterator:
+                try:
+                    artifacts = run.logged_artifacts()
+                    for artifact in artifacts:
+                        artifact_id = f"{artifact.name}:{artifact.version}"
+                        if artifact_id not in seen_artifacts:
+                            all_artifacts.append(artifact)
+                            seen_artifacts.add(artifact_id)
+                except Exception as run_error:
+                    print(f"Error processing run {run.id}: {str(run_error)}")
+    except Exception as e:
+        print(f"Error: {str(e)}")
+        return []
+    return all_artifacts
+def upload_model_dataset(
+        artifact_dir: str,
+        artifact_name: str,
+        artifact_type: str) -> None:
+    run_id = f'yms_upload_{artifact_type}_' + get_current_time('%y%m%d_%H%M%S')
+    run = wandb.init(project='upload_model_dataset', name=artifact_name, id=run_id)
+    artifact = wandb.Artifact(artifact_name, artifact_type)
+    artifact.add_dir(artifact_dir)
+    run.log_artifact(artifact)
+    run.finish()
+def download_model_dataset(
+    download_name: str,
+    run_name: str,
+    artifact_type: str,
+    download_dir: str = None,
+    entity: str = 'YNA-DeepLearning'
+) -> str:
+    run_id = f'yms_download_{artifact_type}_' + get_current_time('%y%m%d_%H%M%S')
+    run = wandb.init(project='download_model_dataset', name=run_name, id=run_id)
+    artifact = run.use_artifact(entity + '/upload_model_dataset/' + download_name, type=artifact_type)
+    artifact_dir = artifact.download(root=download_dir)
+    return artifact_dir

yms_kan/train_eval_utils.py CHANGED Viewed

@@ -1,25 +1,52 @@
 import math
 import os
 import sys
-from enum import Enum, auto
 import numpy as np
 import torch
 from matplotlib import pyplot as plt
+from sklearn.metrics import classification_report
 from torch.optim.lr_scheduler import ReduceLROnPlateau
 from tqdm import tqdm
 from yms_kan import LBFGS
+from yms_kan.tool import initialize_results_file, append_to_results_file
-def train_val(model, dataset: dict, batch_size, batch_size_test, opt="LBFGS", epochs=100, lamb=0.,
-              lamb_l1=1., label=None, lamb_entropy=2., lamb_coef=0.,
+def calculate_metric(all_labels, all_predictions, classes, class_metric=False, average='macro avg'):
+    metric = classification_report(y_true=all_labels, y_pred=all_predictions,
+                                   target_names=classes, digits=4, output_dict=True, zero_division=0)
+    if not class_metric:
+        metric = {
+            'accuracy': metric.get('accuracy'),
+            'precision': metric.get(average).get('precision'),
+            'recall': metric.get(average).get('recall'),
+            'f1-score': metric.get(average).get('f1-score'),
+        }
+        return metric
+    else:
+        return metric
+def train_val(model, dataset: dict, batch_size, batch_size_test, save_path, txt_file=None, opt="LBFGS", epochs=100,
+              lamb=0.,
+              lamb_l1=1., label=None, class_dict=None, lamb_entropy=2., lamb_coef=0.,
               lamb_coefdiff=0., update_grid=True, grid_update_num=10, loss_fn=None, lr=1., start_grid_update_step=-1,
               stop_grid_update_step=100,
               save_fig=False, in_vars=None, out_vars=None, beta=3, save_fig_freq=1, img_folder='./video',
               singularity_avoiding=False, y_th=1000., reg_metric='edge_forward_spline_n'):
-    # all_predictions = []
-    # all_labels = []
+    all_predictions = []
+    all_labels = []
+    best = -1
+    column_order = ['epoch', 'train_losses', 'val_losses', 'accuracies', 'precisions', 'recalls',
+                    'f1-scores', 'lrs']
+    custom_column_widths = {'epoch': 5, 'train_loss': 12, 'val_loss': 10, 'accuracy': 10, 'precision': 9,
+                            'recall': 7,
+                            'f1-score': 8,
+                            'lr': 3}
+    if txt_file is not None:
+        initialize_results_file(txt_file, column_order)
     if lamb > 0. and not model.save_act:
         print('setting lamb=0. If you want to set lamb > 0, set model.save_act=True')
@@ -44,8 +71,8 @@ def train_val(model, dataset: dict, batch_size, batch_size_test, opt="LBFGS", ep
     lr_scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, min_lr=1e-9)
-    results = {'train_loss': .0, 'val_loss': .0, 'regularize': .0, 'all_predictions': [],
-               'all_labels': []}
+    results = {'train_losses': [], 'val_losses': [], 'accuracies': [], 'precisions': [], 'recalls': [], 'f1-scores': [],
+               'lrs': [], 'all_predictions': [], 'all_labels': []}
     steps = math.ceil(dataset['train_input'].shape[0] / batch_size)
@@ -118,7 +145,6 @@ def train_val(model, dataset: dict, batch_size, batch_size_test, opt="LBFGS", ep
                 optimizer.step()
             train_pbar.set_postfix(loss=train_loss.item())
-        # print(f'{epoch}/{epochs}:train_loss:{train_loss.item()}')
         val_loss = torch.zeros(1).to(model.device)
         with torch.no_grad():
             test_indices = np.arange(dataset['test_input'].shape[0])
@@ -142,14 +168,34 @@ def train_val(model, dataset: dict, batch_size, batch_size_test, opt="LBFGS", ep
                     diffs = torch.abs(outputs - label)
                     closest_indices = torch.argmin(diffs, dim=1)
                     closest_values = label[closest_indices]
-                    results['all_predictions'].extend(closest_values.detach().cpu().numpy())
-                    results['all_labels'].extend(batch_test_label.detach().cpu().numpy())
+                    all_predictions.extend(closest_values.detach().cpu().numpy())
+                    all_labels.extend(batch_test_label.detach().cpu().numpy())
+            train_lr = lr_scheduler.get_last_lr()[0]
             lr_scheduler.step(val_loss)
-        results['train_loss'] = train_loss.item()
-        results['val_loss'] = val_loss.item()
-        results['regularize'] = reg_.item()
+        if label is not None:
+            m = calculate_metric(all_labels, all_predictions, class_dict)
+            print(m)
+            results["accuracy"].append(m["accuracy"])
+            results["precisions"].append(m["precision"])
+            results["recalls"].append(m["recall"])
+            results["f1-scores"].append(m["f1-score"])
+            results["lrs"].append(train_lr)
+            if best < m["f1-score"]:
+                best = m["f1-score"]
+                results['all_predictions'] = all_predictions
+                results['all_labels'] = all_labels
+                if save_path is not None:
+                    model.saveckpt(path=save_path + '/' + 'model')
+            if txt_file is not None:
+                m.update({'lr': train_lr, 'epoch': epoch, 'train_loss': train_loss.item(), 'val_loss': val_loss.item()})
+                append_to_results_file(txt_file, m, column_order,
+                                       custom_column_widths=custom_column_widths)
+        results["train_losses"].append(train_loss.item())
+        results["val_losses"].append(val_loss.item())
+        results["regularize"].append(reg_.item())
         if save_fig and epoch % save_fig_freq == 0:
             model.plot(folder=img_folder, in_vars=in_vars, out_vars=out_vars, title="Step {}".format(epoch),

yms_kan/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.0.2" # 初始版本
1	+ __version__ = "0.0.3" # 初始版本

{yms_kan-0.0.2.dist-info → yms_kan-0.0.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: yms_kan
-Version: 0.0.2
+Version: 0.0.3
 Summary: My awesome package
 Author-email: yms <11@qq.com>
 License-Expression: MIT

{yms_kan-0.0.2.dist-info → yms_kan-0.0.3.dist-info}/RECORD RENAMED Viewed

@@ -9,14 +9,14 @@ yms_kan/experiment.py,sha256=VnZq7hmcvRk08GNI7VIpkOjkaRZBoIw1C8SU_f1KbaA,1682
 yms_kan/feynman.py,sha256=Eisf69K49s4C6UlPEi5LnNK_p5TUJQLBKxMp-sW0a9w,33687
 yms_kan/hypothesis.py,sha256=Ec20xadfgOSSWeZHQaGn-h9F2PY7LWFU3iniNI2Zd_4,23165
 yms_kan/spline.py,sha256=ZXyGwl2Sc-UrnrcuUXeUQkBOMnetaWcHrbpZaqatCvs,4345
-yms_kan/tool.py,sha256=CLIsOYWwG-A5PJvoyIP8cRBzX8iRhEssW-2uXdLfi-U,12124
-yms_kan/train_eval_utils.py,sha256=73pA3-HDPDik_yCsDW0oF1dIvVu_vPeHbvJ08o26ygQ,14867
+yms_kan/tool.py,sha256=rkRpqF3EcsAq7a3k1F1zKlxfJ4U9n-FzHyNCJgN4URY,21159
+yms_kan/train_eval_utils.py,sha256=y5eI6-kJU51pKTgB3TdwGyu1QKTACwbamZ9ZOdhPogc,17184
 yms_kan/utils.py,sha256=J07L-tgmc1OfU6Tl6mGwHJRizjFN75EJK8BxejaZLUc,23860
-yms_kan/version.py,sha256=qeSnHAh3t9Zb2L0FPUF5OaQvWEJcfTki6FmrfynjWz4,39
+yms_kan/version.py,sha256=ue5T-H1rqmrk8ISYQmYosD_ZfIp5J-L-wsfBrW8sgCw,39
 yms_kan/assets/img/mult_symbol.png,sha256=2f4xUKdweft-qUbHjFI5h9-smnEtc0FWq8hNYZhPAXY,6392
 yms_kan/assets/img/sum_symbol.png,sha256=94QkMUzmEjlCq_yf14nMEQmettaq86FmlGfdl22b4XE,6210
-yms_kan-0.0.2.dist-info/licenses/LICENSE,sha256=BJXDWyF4Groqtnp4Gi9puH4aLg7A2IC3MpHmC-cSxwc,1067
-yms_kan-0.0.2.dist-info/METADATA,sha256=jTD-nNMWFF64GiFO2-bYQePNxJh4J1-yi4eniWT1djQ,240
-yms_kan-0.0.2.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
-yms_kan-0.0.2.dist-info/top_level.txt,sha256=Z_JDh6yZf-EiW1eKgL6ADsN2yqEMRMspi-o29JZ1WPo,8
-yms_kan-0.0.2.dist-info/RECORD,,
+yms_kan-0.0.3.dist-info/licenses/LICENSE,sha256=BJXDWyF4Groqtnp4Gi9puH4aLg7A2IC3MpHmC-cSxwc,1067
+yms_kan-0.0.3.dist-info/METADATA,sha256=o53cYpZ1jV7K8ptCYWV5aG-jsekuKcb6wuzJW_sxsWo,240
+yms_kan-0.0.3.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
+yms_kan-0.0.3.dist-info/top_level.txt,sha256=Z_JDh6yZf-EiW1eKgL6ADsN2yqEMRMspi-o29JZ1WPo,8
+yms_kan-0.0.3.dist-info/RECORD,,

{yms_kan-0.0.2.dist-info → yms_kan-0.0.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{yms_kan-0.0.2.dist-info → yms_kan-0.0.3.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{yms_kan-0.0.2.dist-info → yms_kan-0.0.3.dist-info}/top_level.txt RENAMED Viewed

File without changes

yms-kan 0.0.2__py3-none-any.whl → 0.0.3__py3-none-any.whl

yms-kan 0.0.2py3-none-any.whl → 0.0.3py3-none-any.whl