PyPI - utilskit - Versions diffs - 0.1.0__py3-none-any.whl - Mend

utilskit 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

utilskit/__init__.py +7 -0
utilskit/classificationutils.py +143 -0
utilskit/dataframeutils.py +273 -0
utilskit/dbutils.py +159 -0
utilskit/logutils.py +109 -0
utilskit/plotutils.py +278 -0
utilskit/timeutils.py +40 -0
utilskit/utils.py +277 -0
utilskit-0.1.0.dist-info/METADATA +27 -0
utilskit-0.1.0.dist-info/RECORD +12 -0
utilskit-0.1.0.dist-info/WHEEL +5 -0
utilskit-0.1.0.dist-info/top_level.txt +1 -0

utilskit/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+from . import classificationutils
+from . import dataframeutils
+from . import dbutils
+from . import logutils
+from . import plotutils
+from . import timeutils
+from . import utils

utilskit/classificationutils.py ADDED Viewed

@@ -0,0 +1,143 @@
+'''
+pip install pandas
+'''
+import sys
+import pandas as pd
+import numpy as np
+def get_max_2nd_n_reliability(pred):
+    pred_min = np.expand_dims(np.min(pred, axis=1), axis=1)
+    pred = pred - pred_min
+    pred_max = np.expand_dims(np.max(pred, axis=1), axis=1)
+    pred = pred/pred_max
+    # 1순위 예측값 없애기
+    pred = np.where(pred == 1, -100, pred)
+    # 2순위 예측
+    max_2nd_index = np.argmax(pred, axis=1)
+    # 신뢰도 구하기
+    pred_reliability = (1 - np.max(pred, axis=1))*100
+    return max_2nd_index, pred_reliability
+def matrix2confusion(matrix, uni_label_list, round_num=4, show_percentage=True):
+    whole_sum = np.sum(matrix)
+    true_sum_list = np.sum(matrix, axis=-1).tolist()
+    pred_sum_list = np.sum(matrix, axis=-2).tolist()
+    # make matrix
+    if show_percentage:
+        per_num = 100
+    else:
+        per_num = 1
+    correct_sum = 0
+    for i in range(len(matrix)):
+        correct_count = matrix[i][i]
+        correct_sum += correct_count
+        pred_sum = pred_sum_list[i]
+        true_sum = true_sum_list[i]
+        # precision
+        try:
+            precision = correct_count / pred_sum
+            precision = np.round(precision, round_num) * per_num
+        except ZeroDivisionError:
+            precision = None
+        # recall
+        try:
+            recall = correct_count / true_sum
+            recall = np.round(recall, round_num) * per_num
+        except ZeroDivisionError:
+            recall = None
+        # f1_score
+        try:
+            f1_score = 2*precision*recall / (precision + recall)
+            f1_score = np.round(f1_score, round_num)
+        except TypeError:
+            f1_score = None
+        matrix[i].extend([None, precision, recall, f1_score, true_sum])
+    whole_accuracy = correct_sum / whole_sum
+    whole_accuracy = np.round(whole_accuracy, round_num) * per_num
+    # index & column
+    index_list = uni_label_list.copy()
+    index_list.append('count')
+    column_list = uni_label_list.copy()
+    column_list.extend(['accuracy', 'precision', 'recall', 'f1 score', 'count'])
+    # count 추가
+    pred_count = pred_sum_list + [None]*(len(column_list) - len(index_list))
+    matrix.append(pred_count)
+    # confusion matrix
+    confusion_matrix = pd.DataFrame(matrix, index=index_list, columns=column_list)
+    confusion_matrix['accuracy'][0] = whole_accuracy
+    return confusion_matrix
+def make_confusion_matrix(mode, true_list, pred_list, ignore_idx=None, round_num=4, label2id_dict=None, id2label_dict=None, show_percentage=True):
+    if mode == 'label2id':
+        uni_label_list = list(label2id_dict.keys())
+    elif mode == 'id2label':
+        uni_label_list = list(id2label_dict.values())
+    # matrix
+    matrix = []
+    for i in range(len(uni_label_list)):
+        matrix.append([])
+        for _ in range(len(uni_label_list)):
+            matrix[i].append(0)
+    # count
+    if mode == 'label2id':
+        for t, p in zip(true_list, pred_list):
+            t_i = label2id_dict[t]
+            p_i = label2id_dict[p]
+            matrix[t_i][p_i] += 1
+    elif mode == 'id2label':
+        for t_i, p_i in zip(true_list, pred_list):
+            # padding 등의 idx 는 무시
+            if (t_i is not None) and (t_i == ignore_idx):
+                continue
+            t_i = int(t_i)
+            p_i = int(p_i)
+            matrix[t_i][p_i] += 1
+    confusion_matrix = matrix2confusion(
+        matrix=matrix,
+        uni_label_list=uni_label_list,
+        round_num=round_num,
+        show_percentage=show_percentage
+    )
+    return confusion_matrix
+def reset_confusion_matrix(confusion_matrix, new_label_list, round_num=4, show_percentage=True):
+    try:
+        matrix_df = confusion_matrix[new_label_list]
+    except KeyError:
+        print('예측 결과에 존재하지 않는 라벨명을 입력하였습니다.')
+        sys.exit()
+    matrix_df = matrix_df.T[new_label_list]
+    matrix_df = matrix_df.T
+    matrix = matrix_df.values.tolist()
+    new_confusion_matrix = matrix2confusion(
+        matrix=matrix,
+        uni_label_list=new_label_list,
+        round_num=round_num,
+        show_percentage=show_percentage
+    )
+    return new_confusion_matrix

utilskit/dataframeutils.py ADDED Viewed

@@ -0,0 +1,273 @@
+import sys
+import os
+from datetime import datetime, timedelta
+import numpy as np
+import pandas as pd
+import csv
+import warnings
+warnings.filterwarnings('ignore')
+from utilskit import utils as u
+def read_df(path):
+    extention = path.split('.')[-1]
+    if extention in ['csv', 'CSV']:
+        switch = 'csv'
+    elif extention in ['xlsx', 'xls']:
+        switch = 'excel'
+    elif extention in ['txt']:
+        switch = 'txt'
+    else:
+        raise ValueError(f'{extention}은(는) 잘못되거나 지정되지 않은 확장자입니다.')
+    if switch == 'csv':
+        encoding = 'utf-8-sig'
+        while True:
+            try:
+                data_df = pd.read_csv(path, encoding=encoding)
+                break
+            except UnicodeDecodeError:
+                encoding = 'cp949'
+            except pd.errors.ParserError:
+                f = open(path, encoding=encoding)
+                reader = csv.reader(f)
+                csv_list = []
+                for line in reader:
+                    if len(line) != 38:
+                        pass
+                    csv_list.append(line)
+                f.close()
+                data_df = pd.DataFrame(csv_list)
+                data_df.columns = data_df.iloc[0].to_list()
+                data_df = data_df.drop(index=data_df.index[0])	# 0번째 행을 지움
+                break
+    if switch == 'excel':
+        data_df = pd.read_excel(path)
+    if switch == 'txt':
+        line_list = []
+        with open(path, 'r', encoding='utf-8-sig') as f:
+            for line in f.readlines():
+                line = line.replace('\n', '')
+                line_list.append(line)
+        data_df = pd.DataFrame(line_list, columns=['string'])
+    return data_df
+def utc2kor(df, time_column='time'):
+    if df.empty:
+        return df
+    df[time_column] = df[time_column].astype('str')
+    df[time_column] = df[time_column].apply(lambda x: x.replace('T', ' '))
+    df[time_column] = df[time_column].apply(lambda x: x.replace('Z', ''))
+    # UTC 시간을 한국 시간으로 (+9 시간)
+    df[time_column] = df[time_column].apply(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S'))
+    df[time_column] = df[time_column].apply(lambda x: x + timedelta(hours=9))
+    df[time_column] = df[time_column].astype('str')
+    df = df.sort_values(by=time_column,ascending=True)
+    return df
+# def dataframe_preprocessor(df,
+#                            max_dict=None, min_dict=None,
+#                            nan_drop_column=None,
+#                            do_nan_fill_whole=False
+#     ):
+#     # ========================================================================
+#     # # 최대값 초과 --> 이상치 --> 결측치
+#     # if max_dict is not None:
+#     #     df = maxadnormal2nan(df=df, max_dict=max_dict)
+#     # # 최소값 미만 --> 이상치 --> 결측치
+#     # if min_dict is not None:
+#     #     df = minadnormal2nan(df=df, min_dict=min_dict)
+#     # ========================================================================
+#     # 특정 컬럼 기준 NaN 제거
+#     # if nan_drop_column is not None:
+#     #     df = drop_nan(
+#     #         df=df,
+#     #         base_column=nan_drop_column
+#     #     )
+#     # ========================================================================
+#     # 전후값 채우기
+#     if do_nan_fill_whole:
+#         df = df.fillna(method='ffill')
+#         df = df.fillna(method='bfill')
+#     return df
+def adnormal2nan(df, stan_col, max_value=None, min_value=None):
+    if max_value is not None:
+        df[stan_col][df[stan_col] > max_value] = np.nan
+    if min_value is not None:
+        df[stan_col][df[stan_col] < min_value] = np.nan
+    return df
+def time_filling(df, start, end, time_column='time'):
+    if df.empty:
+        return df
+    time_range = pd.date_range(start=start, end=end, freq='S')
+    time_range_df = pd.DataFrame(time_range, columns=[time_column])
+    time_range_df = time_range_df.astype('str')
+    # 합치기
+    df = pd.merge(df, time_range_df, how='right')
+    return df
+def drop_nan(df, stan_col):
+    try:
+        df = df.dropna(subset=[stan_col])
+    except KeyError:
+        pass
+    return df
+def isdfvalid(df, valid_column_list):
+    # 유효 컬럼 존재 여부 확인
+    try:
+        _ = df[valid_column_list]
+        return True
+    except KeyError:
+        return False
+def local_nan_correction(df, stan_col, nan_repeat=5):
+    '''
+    nan_repeat 에 지정한 수치만큼 반복되는 결측치 구간을
+    앞뒤값 채우기로 보정하는 함수
+    '''
+    stan_ary = df[stan_col].values
+    nan_start_idx_list, nan_end_idx_list = u.identify_stan_repeat_section(
+        ary=stan_ary,
+        stan_value='nan',
+        stan_repeat=nan_repeat,
+        mode='below',
+        reverse=False
+    )
+    for nan_si, nan_ei in zip(nan_start_idx_list, nan_end_idx_list):
+        df.loc[nan_si-1:nan_ei, stan_col] = df.loc[nan_si-1:nan_ei, stan_col].fillna(method='ffill')
+        df.loc[nan_si:nan_ei+1, stan_col] = df.loc[nan_si:nan_ei+1, stan_col].fillna(method='bfill')
+    return df
+def pin_nan_correction(df, stan_col, max_diff=0.1, nan_repeat=3):
+    '''
+    이상치 범위에 속하지 않지만
+    데이터 흐름상 이상치로 볼 필요가 있는 국소 범위의 값들을 결측치로 변경하는 함수
+    예시: 20, 20, 20, 20, [  1], 20, 20, 20, 1, 1, 2, 1
+    결과: 20, 20, 20, 20, [NaN], 20, 20, 20, 1, 1, 2, 1
+    '''
+    # 기준 컬럼 데이터 추출
+    stan_ary = df[stan_col].values
+    # 현재 값에서 이전값을 뺀 데이터 ary 를 생성
+    stan_1_list = stan_ary.tolist()
+    stan_1_list.insert(0, stan_ary[0])
+    stan_1_ary = np.array(stan_1_list)[:-1]
+    diff_ary = np.round(stan_ary - stan_1_ary, 4)
+    diff_ary = np.array(list(map(abs, diff_ary)))
+    #==
+    # print()
+    idx_list = []
+    for idx, diff in enumerate(diff_ary):
+        # 앞뒤 차이값이 최대 차이값 보다 작은 경우
+        if diff < max_diff:
+            continue
+        # idx 위치 이전 10개 데이터에 대한 평균
+        before_aver = np.average(stan_ary[idx-10:idx])
+        # idx 위치 이후 10개 데이터에 대한 평균
+        after_aver = np.average(stan_ary[idx+1:idx+11])
+        # 구간 내 nan 이 존재하는 경우 앞뒤 평균을 동일시
+        if str(before_aver) == 'nan':
+            before_aver = after_aver
+        if str(after_aver) == 'nan':
+            after_aver = before_aver
+        # 앞뒤 평균값 간의 차이값 절대값 계산
+        aver_diff = abs(after_aver - before_aver)
+        # 바로 앞 뒤의 차이값과 평균값 간 차이값의 차이값 p 계산
+        p = np.round(diff - aver_diff, 4)
+        # p 가 최대 차이값 보다 큰 경우 이상치로 판단
+        if p > max_diff:
+            idx_list.append(idx)
+        # print(f'{idx:5d}, {before_aver:.2f}, {diff:.2f}, {after_aver:.2f}, {aver_diff:.2f}')
+        # print(p)
+    # print(idx_list)
+    del idx
+    # idx_list = [0, 1, 11094, 11095, 12894, 12895, 12896, 12897, 35710, 35711]
+    # for i in idx_list:
+    #     print(stan_ary[i])
+    # del i
+    #==
+    # print('-----------------------------------------')
+    # print(np.round(stan_ary[0:10], 4).tolist())
+    # print(np.round(stan_1_ary[0:10], 4).tolist())
+    # print(np.round(diff_ary[0:10], 4).tolist())
+    # print('-----------------------------------------')
+    # print(np.round(stan_ary[11090:11100], 4).tolist())
+    # print(np.round(stan_1_ary[11090:11100], 4).tolist())
+    # print(np.round(diff_ary[11090:11100], 4).tolist())
+    # print('-----------------------------------------')
+    # print(np.round(stan_ary[12890:12900], 4).tolist())
+    # print(np.round(stan_1_ary[12890:12900], 4).tolist())
+    # print(np.round(diff_ary[12890:12900], 4).tolist())
+    # print('-----------------------------------------')
+    # print(np.round(stan_ary[17945:17955], 4).tolist())
+    # print(np.round(stan_1_ary[17945:17955], 4).tolist())
+    # print(np.round(diff_ary[17945:17955], 4).tolist())
+    # print('-----------------------------------------')
+    # print(np.round(stan_ary[-10:], 4).tolist())
+    # print(np.round(stan_1_ary[-10:], 4).tolist())
+    # print(np.round(diff_ary[-10:], 4).tolist())
+    # pin idx 가 존재하는 경우 해당 범위를 nan 으로 대체
+    temp_ary = stan_ary.copy()
+    if len(idx_list) > 0:
+        for idx in idx_list:
+            if idx < 3:
+                temp_ary[:idx+3] = np.nan
+            else:
+                temp_ary[idx-3:idx+3] = np.nan
+    # nan 의 위치 구하기
+    for_fill_start_idx_list, for_fill_end_idx_list = u.identify_stan_repeat_section(
+        ary=temp_ary,
+        stan_value='nan',
+        stan_repeat=nan_repeat,
+        mode='below',
+        reverse=False
+    )
+    # 해당 부분을 NaN 값으로 변환
+    for fsi, fei in zip(for_fill_start_idx_list, for_fill_end_idx_list):
+        df.loc[fsi:fei, stan_col] = np.nan
+        df.loc[fsi-1:fei, stan_col] = df.loc[fsi-1:fei, stan_col].fillna(method='ffill')
+        df.loc[fsi:fei+1, stan_col] = df.loc[fsi:fei+1, stan_col].fillna(method='bfill')
+    return df

utilskit/dbutils.py ADDED Viewed

@@ -0,0 +1,159 @@
+# DB
+import pandas as pd
+import pymysql
+from sqlalchemy import create_engine
+# def get_info():
+#     db_host = '192.168.0.85'
+#     db_port = 3306
+#     db_user = 'theimc'
+#     db_passward = 'theimc#10!'
+#     db_name = 'BUSMONITORING'
+#     charset = 'utf8mb4'
+#     if_exists = 'append'
+#     autocommit = True
+#     return (db_host, db_port, db_user, db_passward, db_name, charset, if_exists, autocommit)
+# def get_info_p5000():
+#     db_host = "59.25.131.135"
+#     db_port = 3306
+#     db_user = "ai_m"
+#     db_password = "temp"
+#     db_name = "bus"
+#     charset = 'utf8mb4'
+#     if_exists = 'append'
+#     autocommit = True
+#     return (db_host, db_port, db_user, db_password, db_name, charset, if_exists ,autocommit)
+def db_connect(host, user, port, passward, name, charset='utf8mb4', if_exists='append', autocommit=True):
+    # db_host = db_info_dict['host']
+    # db_port = db_info_dict['port']
+    # db_user = db_info_dict['user']
+    # db_passward = db_info_dict['passward']
+    # db_name = db_info_dict['name']
+    # charset = db_info_dict['charset']
+    # if_exists = db_info_dict['if_exists']
+    # autocommit = db_info_dict['autocommit']
+    conn = pymysql.connect(
+        host=host,
+        user=user,
+        port=port,
+        password=passward,
+        db=name,
+        charset=charset,
+        autocommit=autocommit
+    )
+    return conn
+def select_db(conn, query, where=None):
+    cursor = conn.cursor()
+    query = query
+    cursor.execute(query)
+    info = cursor.fetchall()
+    cursor.close()
+    return info
+def delete_db(db_info_dict, table, where=None):
+    cursor = db_connect(db_info_dict).cursor()
+    if where:
+        query = f"""
+            DELETE FROM {table}
+            where {where}
+        """
+    else:
+        query = f"""
+            DELETE FROM {table}
+        """
+    cursor.execute(query)
+    cursor.close()
+def update_db(db_info_dict, table, set_, where):
+    cursor = db_connect(db_info_dict).cursor()
+    query = f"""
+        update {table}
+        set {set_}
+        where {where}
+    """
+    cursor.execute(query)
+    cursor.close()
+def pd2db(db_host, db_port, db_user, db_passward,
+          db_name, charset, if_exists, autocommit,
+          df, table, encoding='utf-8-sig', index=False):
+    url = f"mysql+pymysql://{db_user}:{db_passward}@{db_host}:{db_port}/{db_name}?charset={charset}"
+    # engine = create_engine(url, encoding=encoding)
+    engine = create_engine(url)
+    conn = engine.connect()
+    df.to_sql(name=table, con=engine, if_exists=if_exists, index=index)
+    conn.close()
+# def pd2db(db_info_dict, df, table, encoding='utf-8-sig', index=False):
+#     db_host = db_info_dict['host']
+#     db_port = db_info_dict['port']
+#     db_user = db_info_dict['user']
+#     db_passward = db_info_dict['passward']
+#     db_name = db_info_dict['name']
+#     charset = db_info_dict['charset']
+#     if_exists = db_info_dict['if_exists']
+#     autocommit = db_info_dict['autocommit']
+#     url = f"mysql+pymysql://{db_user}:{db_passward}@{db_host}:{db_port}/{db_name}?charset={charset}"
+#     # engine = create_engine(url, encoding=encoding)
+#     engine = create_engine(url)
+#     conn = engine.connect()
+#     df.to_sql(name=table, con=engine, if_exists=if_exists, index=index)
+#     conn.close()
+"""
+def main():
+    db_host = '211.195.9.226'
+    db_port = 3306
+    db_user = 'root'
+    db_passward = 'theimc#10!'
+    db_name = 'flagship'
+    charset = 'utf8mb4'
+    table = 'test'
+    if_exists = 'append'
+    autocommit = True
+    df = pd.read_csv('D:/python/project/temp/example.csv', encoding='utf-8-sig')
+    pd2db(
+        db_user=db_user,
+        db_passward=db_passward,
+        db_host=db_host,
+        db_port=db_port,
+        db_name=db_name,
+        charset=charset,
+        df=df,
+        table=table,
+        if_exists=if_exists,
+        encoding='utf-8-sig',
+        index=False
+    )
+if __name__ == '__main__':
+    main()
+"""

utilskit/logutils.py ADDED Viewed

@@ -0,0 +1,109 @@
+import sys
+import os
+from tqdm import tqdm
+import shutil
+import warnings
+warnings.filterwarnings('ignore')
+def get_logger(get, save_path, log_file_name, time_handler=True, console_display=False, logging_level='info'):
+    '''
+    로거 함수
+    parameters
+    ----------
+    get: str
+        log 생성용 이름.
+    log_file_name: str
+        logger 파일을 생성할 때 적용할 파일 이름 + path.
+    time_handler: bool (default: True)
+        자정(00:00) 을 넘긴 경우 그때까지 쌓인 기록을 이전 날짜 기록으로 뺄지 여부
+    console_display: bool (default: False)
+        로그 기록값을 콘솔에 표시할것인지 여부
+    logging_level: str
+        logger 를 표시할 수준. (notset < debug < info < warning < error < critical)
+    returns
+    -------
+    logger: logger
+        로거를 적용할 수 있는 로거 변수
+    '''
+    import logging
+    from logging import handlers
+    os.makedirs(save_path, exist_ok=True)
+    logger = logging.getLogger(get)
+    if logging_level == 'critical':
+        logger.setLevel(logging.CRITICAL)
+    if logging_level == 'error':
+        logger.setLevel(logging.ERROR)
+    if logging_level == 'warning':
+        logger.setLevel(logging.WARNING)
+    if logging_level == 'info':
+        logger.setLevel(logging.INFO)
+    if logging_level == 'debug':
+        logger.setLevel(logging.DEBUG)
+    if logging_level == 'notset':
+        logger.setLevel(logging.NOTSET)
+    # formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    formatter = logging.Formatter('%(asctime)s level:%(levelname)s %(filename)s line %(lineno)d %(message)s')
+    if console_display:
+        stream_handler = logging.StreamHandler()
+        stream_handler.setFormatter(formatter)
+        logger.addHandler(stream_handler)
+    if time_handler:
+        file_handler = handlers.TimedRotatingFileHandler(
+            filename=f'{save_path}/{log_file_name}',
+            when="midnight",
+            interval=1,
+            backupCount=30,
+            encoding="utf-8")
+        file_handler.suffix = '%Y%m%d'
+    else:
+        file_handler = logging.FileHandler(f'{save_path}/{log_file_name}')
+    file_handler.setFormatter(formatter)
+    logger.addHandler(file_handler)
+    return logger
+def log_sort(log_path):
+    os.makedirs(f'{log_path}_history', exist_ok=True)
+    log_file_list = os.listdir(log_path)
+    log_file_list.sort()
+    log_dict = {}
+    for log_file in log_file_list:
+        log_name = log_file.split('.')[0]
+        date = log_file.split('.')[-1]
+        if date == 'log':
+            continue
+        try:
+            log_dict[log_name].append(log_file)
+        except KeyError:
+            log_dict[log_name] = [log_file]
+    for log_name, log_list in log_dict.items():
+        for log_file in tqdm(log_list):
+            date = log_file.split('.')[-1]
+            yyyy = date[:4]
+            mm = date[4:6]
+            # dd = date[6:]
+            move_path = f'{log_path}_history/{yyyy}/{mm}/{log_name}'
+            os.makedirs(move_path, exist_ok=True)
+            shutil.move(
+                f'{log_path}/{log_file}',
+                f'{move_path}/{log_file}'
+            )
+if __name__ == "__main__":
+    root_path = ''
+    log_sort(root_path)

utilskit/plotutils.py ADDED Viewed

@@ -0,0 +1,278 @@
+import sys
+import os
+import matplotlib.pyplot as plt
+COLOR_DICT = {
+    'default':None,
+    'blue':'b',
+    'green':'g',
+    'red':'r',
+    'cyan':'c',
+    'magenta':'m',
+    'yellow':'y',
+    'black':'k',
+    'white':'w',
+    'orange':'#ffa500',
+    'pink':'#ffc0cb',
+    'khaki':'#f0e68c',
+    'gold':'#ffd700',
+    'skyblue':'#87ceeb',
+    'navy':'#000080',
+    'lightgreen':'#90ee90',
+    'olive':'#808000',
+    'violet':'#ee82ee',
+    'gray':'#808080',
+    'brown':'#a52a2a'
+}
+LINE_DICT = {
+    'default':'-',
+    'None':'None',
+    'line':'-',
+    'dash':'--',
+    'dot':':',
+    'dash-dot':'-.'
+}
+MARKER_DICT = {
+    'default':None,
+    'None':None,
+    'dot':'.',
+    'pixel':',',
+    'circle':'o',
+    'triangle_down':'v',
+    'triangle_up':'^',
+    'triangle_left':'<',
+    'triangle_right':'>',
+    'tri_down':'1',
+    'tri_up':'2',
+    'tri_left':'3',
+    'tri_right':'4',
+    'square':'s',
+    'pentagon':'p',
+    'star':'*',
+    'hexagon1':'h',
+    'hexagon2':'H',
+    'plus':'+',
+    'x':'x',
+    'diamond':'D',
+    'thin_diamond':'d'
+}
+def get_style(line_style='default', line_size='default', line_color='default',
+              marker_style='default', marker_size='default', marker_color='default',
+              marker_border_size='default', marker_border_color='default'):
+    # 라인 스타일
+    try:
+        ls = LINE_DICT[line_style]
+    except KeyError:
+        ls = '-'
+    # 라인 크기
+    if line_size == 'default':
+        lw = None
+    else:
+        lw = line_size
+    # 라인 색
+    try:
+        c = COLOR_DICT[line_color]
+    except KeyError:
+        c = None
+    # 마커 형태
+    try:
+        marker = MARKER_DICT[marker_style]
+    except KeyError:
+        marker = None
+    # 마커 크기
+    if marker_size == 'default':
+        ms = None
+    else:
+        ms = marker_size
+    # 마커 색
+    try:
+        mfc = COLOR_DICT[marker_color]
+    except KeyError:
+        mfc = None
+    # 마커 테두리 사이즈
+    if marker_border_size == 'default':
+        mew = None
+    else:
+        mew = marker_border_size
+    # 마커 테두리 색
+    try:
+        mec = COLOR_DICT[marker_border_color]
+    except KeyError:
+        mec = None
+    return ls, lw, c, marker, ms, mfc, mew, mec
+def draw_plot(title, x, y, title_font_size=13, x_font_size=13, y_font_size=13,
+              line_style='default', line_size='default', line_color='default',
+              marker_style='default', marker_size='default', marker_color='default',
+              marker_border_size='default', marker_border_color='default',
+              add_x_list=None, add_y_list=None, add_color_list=None,
+              fig_size=None, x_range=None, y_range=None,
+              focus_start_list=None, focus_end_list=None, focus_color_list=None, alpha_list=None,
+              label=None, save_path=None):
+    ls, lw, c, marker, ms, mfc, mew, mec = get_style(
+        line_style=line_style,
+        line_size=line_size,
+        line_color=line_color,
+        marker_style=marker_style,
+        marker_size=marker_size,
+        marker_color=marker_color,
+        marker_border_size=marker_border_size,
+        marker_border_color=marker_border_color
+    )
+    if fig_size:
+        plt.figure(figsize=fig_size)
+    # 축 길이
+    if x_range:
+        x_min = x_range[0]
+        x_max = x_range[1]
+        plt.xlim(x_min, x_max)	# (최솟값, 최댓값)
+    if y_range:
+        y_min = y_range[0]
+        y_max = y_range[1]
+        plt.ylim(y_min, y_max)
+    plt.title(title, fontdict={'fontsize':title_font_size})
+    # x, y 축 글자 크기
+    plt.xticks(fontsize=x_font_size)
+    plt.yticks(fontsize=y_font_size)
+    # plot
+    plt.plot(
+        x, y,
+        ls=ls,
+        lw=lw,
+        c=c,
+        marker=marker,
+        ms=ms,
+        mfc=mfc,
+        mew=mew,
+        mec=mec,
+        label=label
+    )
+    # 라벨링
+    if label is not None:
+        plt.xlabel('xlabel')
+        plt.ylabel('ylabel')
+        plt.legend()
+    # 추가
+    if add_x_list is not None and add_y_list is not None:
+        if add_color_list is None:
+            for add_x, add_y in zip(add_x_list, add_y_list):
+                plt.plot(
+                    add_x, add_y,
+                    ls=ls,
+                    marker=marker
+                )
+        else:
+            for add_x, add_y, add_color in zip(add_x_list, add_y_list, add_color_list):
+                _, _, add_c, _, _, _, _, _ = get_style(line_color=add_color)
+                plt.plot(
+                    add_x, add_y,
+                    ls=ls,
+                    marker=marker,
+                    c=add_c
+                )
+    # 포커싱
+    if focus_start_list is not None and focus_end_list is not None:
+        if focus_color_list is None:
+            focus_color_list = ['gray'] * len(focus_start_list)
+        if alpha_list is None:
+            alpha_list = [0.2] * len(focus_start_list)
+        for focus_start, focus_end, focus_c, alpha in zip(focus_start_list, focus_end_list, focus_color_list, alpha_list):
+            plt.axvspan(focus_start, focus_end, facecolor=focus_c, alpha=alpha)
+    if save_path:
+        os.makedirs(save_path, exist_ok=True)
+        plt.savefig(f'{save_path}/{title}.png')
+    plt.close('all')
+def draw_subplot(image_title, sub_row_idx, sub_col_idx,
+                 title_list, x_list, y_list,
+                 title_font_size=13, x_font_size=13, y_font_size=13,
+                 x_range_list=None, y_range_list=None,
+            #   line_style='default', line_size='default', line_color='default',
+            #   marker_style='default', marker_size='default', marker_color='default',
+            #   marker_border_size='default', marker_border_color='default',
+            #   add_x_list=None, add_y_list=None, add_color_list=None,
+              fig_size=None, # x_range=None, y_range=None,
+              focus_start_list=None, focus_end_list=None, focus_color_list=None, alpha_list=None,
+              label=None, save_path=None):
+    # ls, lw, c, marker, ms, mfc, mew, mec = get_style(
+    #     line_style=line_style,
+    #     line_size=line_size,
+    #     line_color=line_color,
+    #     marker_style=marker_style,
+    #     marker_size=marker_size,
+    #     marker_color=marker_color,
+    #     marker_border_size=marker_border_size,
+    #     marker_border_color=marker_border_color
+    # )
+    if fig_size is not None:
+        fig, axs = plt.subplots(sub_row_idx, sub_col_idx, figsize=fig_size)
+    else:
+        fig, axs = plt.subplots(sub_row_idx, sub_col_idx)
+    for i, ax in enumerate(axs.flat):
+        x_ = x_list[i]
+        y_ = y_list[i]
+        ax.plot(x_, y_)
+        ax.set_title(title_list[i], fontsize=title_font_size)
+        if x_range_list is not None:
+            ax.set_xlim(x_range_list[i], fontsize=x_font_size)
+        if y_range_list is not None:
+            ax.set_ylim(y_range_list[i])#, fontsize=y_font_size)
+        ax.tick_params(axis='x', labelsize=x_font_size)
+        ax.tick_params(axis='y', labelsize=y_font_size)
+        # 포커싱
+        if focus_start_list is not None and focus_end_list is not None:
+            if focus_color_list is None:
+                focus_color_list = ['gray'] * len(focus_start_list)
+            if alpha_list is None:
+                alpha_list = [0.2] * len(focus_start_list)
+            for focus_start, focus_end, focus_c, alpha in zip(focus_start_list, focus_end_list, focus_color_list, alpha_list):
+                ax.axvspan(focus_start, focus_end, facecolor=focus_c, alpha=alpha)
+    plt.tight_layout()
+    # plt.subplots_adjust(wspace=0.4, hspace=0.7, top=0.2, bottom=0.1)  # wspace: 수평 간격, hspace: 수직 간격
+    if save_path:
+        os.makedirs(save_path, exist_ok=True)
+        plt.savefig(f'{save_path}/{image_title}.png')
+    plt.close(fig)
+    # plt.clf()

utilskit/timeutils.py ADDED Viewed

@@ -0,0 +1,40 @@
+from datetime import date, datetime, timedelta
+import time
+# 오늘 날짜 추출
+def get_now(format_string='년-월-일 시:분:초'):
+    now = datetime.now()
+    format_string = format_string.replace('년', '%Y')
+    format_string = format_string.replace('월', '%m')
+    format_string = format_string.replace('일', '%d')
+    format_string = format_string.replace('시', '%H')
+    format_string = format_string.replace('분', '%M')
+    format_string = format_string.replace('초', '%S')
+    result = now.strftime(format_string)
+    return result
+def time_measure(start):
+    t = time.time() - start
+    h = int(t // 3600)
+    m = int((t % 3600) // 60)
+    s = int(t % 60)
+    return h, m ,s
+def get_date_list(schedule, year, mon_list, start_day_list, end_day_list):
+    date_list = []
+    if schedule:
+        yesterday = date.today()# - timedelta(1)
+        yesterday = str(yesterday)
+        date_list = [yesterday]
+    else:
+        for mon in mon_list:
+            start_day = start_day_list[mon-1]
+            end_day = end_day_list[mon-1]
+            for dd in range(start_day, end_day+1):
+                dd = str(dd).zfill(2)
+                mm = str(mon).zfill(2)
+                date_list.append(f'{year}-{mm}-{dd}')
+    return date_list

utilskit/utils.py ADDED Viewed

@@ -0,0 +1,277 @@
+'''
+pip install xlrd
+'''
+import numpy as np
+import pandas as pd
+import shutil
+import os
+import sys
+import json
+import time
+import csv
+from tqdm import tqdm
+from datetime import date, datetime, timedelta
+def save_yaml(path, obj):
+    import yaml
+    with open(path, 'w') as f:
+        yaml.dump(obj, f, sort_keys=False)
+def load_yaml(path):
+    import yaml
+    with open(path, 'r') as f:
+        return yaml.load(f, Loader=yaml.FullLoader)
+def envs_setting(random_seed):
+    '''
+    난수지정 등의 환경설정
+    parameters
+    ----------
+    random_seed: int
+        설정할 random seed
+    returns
+    -------
+    torch, numpy, random 등에 대한 랜덤 시드 고정
+    '''
+    import torch
+    import torch.backends.cudnn as cudnn
+    import random
+    import numpy as np
+    # seed
+    torch.manual_seed(random_seed)
+    torch.cuda.manual_seed(random_seed)
+    torch.cuda.manual_seed_all(random_seed)
+    cudnn.benchmark = False
+    cudnn.deterministic = True
+    np.random.seed(random_seed)
+    random.seed(random_seed)
+def normalize_1D(ary):
+    '''
+    1차원데이터를 0~1 사이 값으로 normalize 하는 함수
+    parameters
+    ----------
+    ary: numpy array
+        noramlize 를 적용할 1차원 array
+    returns
+    -------
+    0 ~ 1 사이로 noramalize 된 array
+    '''
+    ary = np.array(ary)
+    if len(ary.shape) > 1:
+        return print('1 차원 데이터만 입력 가능')
+    ary_min = np.min(ary)
+    ary_min = np.subtract(ary, ary_min)
+    ary_max = np.max(ary_min)
+    ary_norm = np.divide(ary_min, ary_max)
+    return ary_norm
+def get_error_info():
+    import traceback
+    traceback_string = traceback.format_exc()
+    return traceback_string
+def read_jsonl(data_path):
+    try:
+        data_list = validate_data(
+            data_path=data_path,
+            encoding='utf-8-sig'
+        )
+    except UnicodeDecodeError:
+        data_list = validate_data(
+            data_path=data_path,
+            encoding='cp949'
+        )
+    return data_list
+def validate_data(data_path, encoding):
+    data_list = []
+    try:
+        with open(data_path, 'r', encoding=encoding) as f:
+            prodigy_data_list = json.load(f)
+        data_list.append(prodigy_data_list)
+    except json.decoder.JSONDecodeError:
+        with open(data_path, 'r', encoding=encoding) as f:
+            for line in f:
+                line = line.replace('\n', '')
+                line.strip()
+                if line[-1] == '}':
+                    json_line = json.loads(line)
+                    data_list.append(json_line)
+    return data_list
+def tensor2array(x_tensor):
+    x_ary = x_tensor.detach().cpu().numpy()
+    return x_ary
+def save_tensor(x_tensor, mode):
+    x_ary = tensor2array(x_tensor=x_tensor)
+    if mode == 1:
+        b = x_ary[0]
+        # b = np.round(b, 3)
+        b = np.where(np.absolute(b) > 2, np.round(b, 0), np.round(b, 3))
+        df = pd.DataFrame(b)
+        df.to_csv(f'./temp.csv', index=False, encoding='utf-8-sig')
+        print(df)
+        print(x_ary.shape)
+    if mode == 2:
+        ary = x_ary[0]
+        i, j, k = ary.shape
+        print(i, j, k)
+        for idx in range(k):
+            a = np.squeeze(ary[:, :, idx:idx+1])
+            a = np.where(np.absolute(a) > 2, np.round(a, 0), np.round(a, 3))
+            df = pd.DataFrame(a)
+            df.to_csv(f'./temp{idx}.csv', index=False, encoding='utf-8-sig')
+            print(df)
+        print(x_ary.shape)
+def identify_repeat_section(ary, stan_num, include_nan=False):
+    '''
+    데이터 array 에서 특정 숫자가 정해놓은 반복 횟수 (stan_repeat) 만큼 반복되면
+    그 구간의 시작, 끝 위치 index 값을 추출한다.
+    '''
+    start_idx_list = []
+    end_idx_list = []
+    start_idx = 0
+    # pre_value = 'nan'
+    flag = 1
+    for idx, value in enumerate(ary):
+        value_str = str(value)
+        # 가장 처음인 경우
+        if idx == 0:
+            pre_value = value_str
+            continue
+        # 현재 값이 이전 값과 동일할때
+        if value_str == pre_value:
+            # 현재 값이 nan 이 아닌 경우 만
+            if include_nan:
+                flag += 1
+            else:
+                if value_str != 'nan':
+                    flag += 1
+        # 현재 값이 이전 값과 다를때
+        else:
+            if flag >= stan_num:
+                start_idx_list.append(start_idx)
+                end_idx_list.append(idx-1)
+            # 시작 지점 갱신
+            start_idx = idx
+            flag = 1
+        pre_value = value_str
+    if flag >= stan_num:
+        start_idx_list.append(start_idx)
+        end_idx_list.append(idx)
+    return start_idx_list, end_idx_list
+def identify_stan_repeat_section(ary, stan_value, stan_repeat, mode, reverse=False):
+    '''
+    ary 에서 기준값(stan_value)이 지정한 횟수(stan_repeat)
+    이상(above) 또는 이하(below) 만큼 반복되는 구간의 시작, 끝 위치 index 값을 추출하는 함수
+    reverse 를 True 로 지정하면 해당 각 구간의 끝->시작, 시작->끝 으로 반전된다.
+    '''
+    nan_start_idx = 0
+    nan_start_idx_list = []
+    nan_end_idx_list = []
+    flag = 1
+    if len(ary) == 0:
+        return [], []
+    for idx, value in enumerate(ary):
+        value_str = str(value)
+        # 가장 처음인 경우
+        if idx == 0:
+            pre_value = value_str
+            continue
+        # 현재 값이 stan 일 때
+        if value_str == stan_value:
+            # 이전값이 nan 인경우
+            if pre_value == stan_value:
+                flag += 1
+            # 이전 값이 nan 이 아닌 경우
+            else:
+                flag = 1
+                # idx 시작 위치 지정
+                nan_start_idx = idx
+        # 현재 값이 nan 이 아닐 때
+        else:
+            # 이전 값이 nan 인 경우
+            if pre_value == stan_value:
+                # idx 끝 위치 지정
+                if mode == 'above':
+                    if flag >= stan_repeat:
+                        nan_start_idx_list.append(nan_start_idx)
+                        nan_end_idx_list.append(idx-1)
+                elif mode == 'below':
+                    if flag <= stan_repeat:
+                        nan_start_idx_list.append(nan_start_idx)
+                        nan_end_idx_list.append(idx-1)
+                else:
+                    print('mode 를 above 또는 이하 below 중 하나로 지정해주세요')
+                    raise KeyError()
+        pre_value = value_str
+    if value_str == stan_value:
+        if mode == 'above':
+            if flag >= stan_repeat:
+                nan_start_idx_list.append(nan_start_idx)
+                nan_end_idx_list.append(idx)
+        elif mode == 'below':
+            if flag <= stan_repeat:
+                nan_start_idx_list.append(nan_start_idx)
+                nan_end_idx_list.append(idx)
+        else:
+            print('mode 를 above 또는 이하 below 중 하나로 지정해주세요')
+            raise KeyError()
+    if reverse:
+        rev_start_idx_list = [0]
+        rev_end_idx_list = [len(ary)-1]
+        for ns_idx, ne_idx in zip(nan_start_idx_list, nan_end_idx_list):
+            if ns_idx == 0:
+                rev_start_idx_list.pop(0)
+                rev_start_idx_list.append(ne_idx+1)
+                continue
+            if ne_idx == len(ary)-1:
+                rev_end_idx_list.pop(-1)
+                rev_end_idx_list.append(ns_idx-1)
+                continue
+            rev_start_idx_list.append(ne_idx+1)
+            rev_end_idx_list.insert(-1, ns_idx-1)
+        return rev_start_idx_list, rev_end_idx_list
+    return nan_start_idx_list, nan_end_idx_list

utilskit-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,27 @@
+Metadata-Version: 2.4
+Name: utilskit
+Version: 0.1.0
+Summary: description
+Author: Kimyh
+Author-email: kim_yh663927@naver.com
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+Requires-Dist: matplotlib==3.10.3
+Requires-Dist: numpy==2.2.6
+Requires-Dist: pandas==2.3.1
+Requires-Dist: PyMySQL==1.1.1
+Requires-Dist: SQLAlchemy==2.0.41
+Requires-Dist: tqdm==4.67.1
+Dynamic: author
+Dynamic: author-email
+Dynamic: classifier
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: requires-dist
+Dynamic: requires-python
+Dynamic: summary
+baseline

utilskit-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,12 @@
+utilskit/__init__.py,sha256=RdcxmxKmxbWEgIFBG8hTg-6NJE4_ylNnavxpWrPICFI,176
+utilskit/classificationutils.py,sha256=QTOYCKRoveb2rqB06Lj26VLujgt_I2kJJdhuk7QEVLA,4320
+utilskit/dataframeutils.py,sha256=C1DZn7JfRC7D7vhuTCttOAVfX5WKt9Kn8S0yF9agArE,9303
+utilskit/dbutils.py,sha256=NGb6DDRpWmmg25fAkVQxYp76zg61Iykj9pmglZp01Bo,3993
+utilskit/logutils.py,sha256=SVlXImmuEHTG6i6Qe95OIQbmCicZdL0kX03CBiQU7xI,3302
+utilskit/plotutils.py,sha256=sCb8j32ieQvblZksoerzIor91y_FYwAdC0-Zugfs5Pk,8052
+utilskit/timeutils.py,sha256=l2VFqlUG_Bbyyr6rmWiFGf9b2KF6i65MjM7Dhs2tiBg,1257
+utilskit/utils.py,sha256=4UkDhs2PDaxN17MiQ8TXtviy_cV6l8Z-YUt6jeZ3ueo,7880
+utilskit-0.1.0.dist-info/METADATA,sha256=-9P8nqc5xzWqBfUw0ckN4nTEjhEEMDuYXGFWnsy9U7w,705
+utilskit-0.1.0.dist-info/WHEEL,sha256=lTU6B6eIfYoiQJTZNc-fyaR6BpL6ehTzU3xGYxn2n8k,91
+utilskit-0.1.0.dist-info/top_level.txt,sha256=bi7zXh9RMItInj0Kdx2-Owt3AFtUHm__qZ40kPbCukg,9
+utilskit-0.1.0.dist-info/RECORD,,

utilskit-0.1.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (78.1.1)
+Root-Is-Purelib: true
+Tag: py3-none-any

utilskit-0.1.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ utilskit