PyPI - utilskit - Versions diffs - 0.1.0__tar.gz → 0.1.2__tar.gz - Mend

utilskit 0.1.0tar.gz → 0.1.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

{utilskit-0.1.0 → utilskit-0.1.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: utilskit
-Version: 0.1.0
+Version: 0.1.2
 Summary: description
 Author: Kimyh
 Author-email: kim_yh663927@naver.com
@@ -24,4 +24,9 @@ Dynamic: requires-dist
 Dynamic: requires-python
 Dynamic: summary
-baseline
+0.1.2
+- repeatutils 의 get_repeat_section 에서 하나의 값이 여러 구간에서 반복될때 마지막 구간만 나오는 부분 수정
+- repeatutils 의 get_repeat_section 및 get_stan_repeat_section 에서 추출되는 구간의 마지막 값이 +1 이 되는 부분 수정
+0.1.1
+repeatutils.py 추가
+utils.py 에서 repeat 관련 함수 제거

utilskit-0.1.2/README.md ADDED Viewed

@@ -0,0 +1,6 @@
+0.1.2
+- repeatutils 의 get_repeat_section 에서 하나의 값이 여러 구간에서 반복될때 마지막 구간만 나오는 부분 수정
+- repeatutils 의 get_repeat_section 및 get_stan_repeat_section 에서 추출되는 구간의 마지막 값이 +1 이 되는 부분 수정
+0.1.1
+repeatutils.py 추가
+utils.py 에서 repeat 관련 함수 제거

{utilskit-0.1.0 → utilskit-0.1.2}/setup.py RENAMED Viewed

@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
 setup(
     name="utilskit",  # 패키지 이름 (pip install 시 사용될 이름)
-    version="0.1.0",    # 버전
+    version="0.1.2",    # 버전
     packages=find_packages(),  # textbasic 폴더 내 모든 패키지 포함
     include_package_data=True,  # 이 설정을 통해 패키지 내 데이터 파일을 포함시킬 수 있음
     package_data={

utilskit-0.1.2/utilskit/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+# from . import classificationutils
+# from . import dataframeutils
+# from . import dbutils
+# from . import logutils
+# from . import plotutils
+# from . import timeutils
+# from . import utils

utilskit-0.1.2/utilskit/classificationutils/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .classificationutils import *

{utilskit-0.1.0/utilskit → utilskit-0.1.2/utilskit/classificationutils}/classificationutils.py RENAMED Viewed

@@ -5,6 +5,7 @@ import sys
 import pandas as pd
 import numpy as np
+__all__ = ["matrix2confusion"]
 def get_max_2nd_n_reliability(pred):
     pred_min = np.expand_dims(np.min(pred, axis=1), axis=1)

{utilskit-0.1.0 → utilskit-0.1.2}/utilskit/dataframeutils.py RENAMED Viewed

@@ -113,7 +113,6 @@ def adnormal2nan(df, stan_col, max_value=None, min_value=None):
 def time_filling(df, start, end, time_column='time'):
     if df.empty:
         return df
     time_range = pd.date_range(start=start, end=end, freq='S')
     time_range_df = pd.DataFrame(time_range, columns=[time_column])
     time_range_df = time_range_df.astype('str')
@@ -160,6 +159,92 @@ def local_nan_correction(df, stan_col, nan_repeat=5):
     return df
+# def pin_nan_correction(df, stan_col, max_diff=0.1, nan_repeat=3):
+#     '''
+#     이상치 범위에 속하지 않지만
+#     데이터 흐름상 이상치로 볼 필요가 있는 국소 범위의 값들을 결측치로 변경하는 함수
+#     예시: 20, 20, 20, 20, [  1], 20, 20, 20, 1, 1, 2, 1
+#     결과: 20, 20, 20, 20, [NaN], 20, 20, 20, 1, 1, 2, 1
+#     '''
+#     # 기준 컬럼 데이터 추출
+#     stan_ary = df[stan_col].values
+#     # 현재 값에서 이전값을 뺀 데이터 ary 를 생성
+#     stan_1_list = stan_ary.tolist()
+#     stan_1_list.insert(0, stan_ary[0])
+#     stan_1_ary = np.array(stan_1_list)[:-1]
+#     diff_ary = np.round(stan_ary - stan_1_ary, 4)
+#     diff_ary = np.array(list(map(abs, diff_ary)))
+#     #==
+#     # print()
+#     idx_list = []
+#     for idx, diff in enumerate(diff_ary):
+#         # 앞뒤 차이값이 최대 차이값 보다 작은 경우
+#         if diff < max_diff:
+#             continue
+#         # idx 위치 이전 10개 데이터에 대한 평균
+#         before_aver = np.average(stan_ary[idx-10:idx])
+#         # idx 위치 이후 10개 데이터에 대한 평균
+#         after_aver = np.average(stan_ary[idx+1:idx+11])
+#         # 구간 내 nan 이 존재하는 경우 앞뒤 평균을 동일시
+#         if str(before_aver) == 'nan':
+#             before_aver = after_aver
+#         if str(after_aver) == 'nan':
+#             after_aver = before_aver
+#         # 앞뒤 평균값 간의 차이값 절대값 계산
+#         aver_diff = abs(after_aver - before_aver)
+#         # 바로 앞 뒤의 차이값과 평균값 간 차이값의 차이값 p 계산
+#         p = np.round(diff - aver_diff, 4)
+#         # p 가 최대 차이값 보다 큰 경우 이상치로 판단
+#         if p > max_diff:
+#             idx_list.append(idx)
+#         # print(f'{idx:5d}, {before_aver:.2f}, {diff:.2f}, {after_aver:.2f}, {aver_diff:.2f}')
+#         # print(p)
+#     # print(idx_list)
+#     del idx
+#     # pin idx 가 존재하는 경우 해당 범위를 nan 으로 대체
+#     temp_ary = stan_ary.copy()
+#     if len(idx_list) > 0:
+#         for idx in idx_list:
+#             if idx < 3:
+#                 temp_ary[:idx+3] = np.nan
+#             else:
+#                 temp_ary[idx-3:idx+3] = np.nan
+#     # nan 의 위치 구하기
+#     for_fill_start_idx_list, for_fill_end_idx_list = u.identify_stan_repeat_section(
+#         ary=temp_ary,
+#         stan_value='nan',
+#         stan_repeat=nan_repeat,
+#         mode='below',
+#         reverse=False
+#     )
+#     # 해당 부분을 NaN 값으로 변환
+#     for fsi, fei in zip(for_fill_start_idx_list, for_fill_end_idx_list):
+#         df.loc[fsi:fei, stan_col] = np.nan
+#         df.loc[fsi-1:fei, stan_col] = df.loc[fsi-1:fei, stan_col].fillna(method='ffill')
+#         df.loc[fsi:fei+1, stan_col] = df.loc[fsi:fei+1, stan_col].fillna(method='bfill')
+#     return df
 def pin_nan_correction(df, stan_col, max_diff=0.1, nan_repeat=3):
     '''
     이상치 범위에 속하지 않지만
@@ -214,36 +299,6 @@ def pin_nan_correction(df, stan_col, max_diff=0.1, nan_repeat=3):
         # print(p)
     # print(idx_list)
     del idx
-    # idx_list = [0, 1, 11094, 11095, 12894, 12895, 12896, 12897, 35710, 35711]
-    # for i in idx_list:
-    #     print(stan_ary[i])
-    # del i
-    #==
-    # print('-----------------------------------------')
-    # print(np.round(stan_ary[0:10], 4).tolist())
-    # print(np.round(stan_1_ary[0:10], 4).tolist())
-    # print(np.round(diff_ary[0:10], 4).tolist())
-    # print('-----------------------------------------')
-    # print(np.round(stan_ary[11090:11100], 4).tolist())
-    # print(np.round(stan_1_ary[11090:11100], 4).tolist())
-    # print(np.round(diff_ary[11090:11100], 4).tolist())
-    # print('-----------------------------------------')
-    # print(np.round(stan_ary[12890:12900], 4).tolist())
-    # print(np.round(stan_1_ary[12890:12900], 4).tolist())
-    # print(np.round(diff_ary[12890:12900], 4).tolist())
-    # print('-----------------------------------------')
-    # print(np.round(stan_ary[17945:17955], 4).tolist())
-    # print(np.round(stan_1_ary[17945:17955], 4).tolist())
-    # print(np.round(diff_ary[17945:17955], 4).tolist())
-    # print('-----------------------------------------')
-    # print(np.round(stan_ary[-10:], 4).tolist())
-    # print(np.round(stan_1_ary[-10:], 4).tolist())
-    # print(np.round(diff_ary[-10:], 4).tolist())
     # pin idx 가 존재하는 경우 해당 범위를 nan 으로 대체
     temp_ary = stan_ary.copy()
@@ -255,7 +310,7 @@ def pin_nan_correction(df, stan_col, max_diff=0.1, nan_repeat=3):
                 temp_ary[idx-3:idx+3] = np.nan
     # nan 의 위치 구하기
-    for_fill_start_idx_list, for_fill_end_idx_list = u.identify_stan_repeat_section(
+    for_fill_start_idx_list, for_fill_end_idx_list = um.identify_stan_repeat_section(
         ary=temp_ary,
         stan_value='nan',
         stan_repeat=nan_repeat,

utilskit-0.1.2/utilskit/repeatutils.py ADDED Viewed

@@ -0,0 +1,251 @@
+import sys
+import os
+import numpy as np
+def issame(value1, value2):
+    # 서로 같은 경우
+    if value1 == value2:
+        return True
+    # 서로 다른 경우
+    else:
+        # 어느 한 쪽 이라도 str type 인 경우
+        if isinstance(value1, str) or isinstance(value2, str):
+            if str(value1) == str(value2):
+                return True # ex) value1 = 1, value2 = '1'
+            else:
+                return False # ex) value1 = 1, value2 = 'nan'
+        # 어느 한 쪽이라도 str type 이 아닌 경우
+        else:
+            # value1 이 NaN 일때
+            if np.isnan(value1):
+                # value2 도 NaN 이면
+                if np.isnan(value2):
+                    return True
+                # value2 가 NaN 이 아니면
+                else:
+                    return False
+            else:
+                return False
+# def get_repeat_section2(data, repeat_num, refer_value=None, except_nan=True):
+#     '''
+#     '''
+#     raw_ary = np.array(data)
+#     ary = raw_ary.copy()
+#     same_tf = (ary[:-1] == ary[1:])
+#     is_nan = np.isnan(ary)
+#     for i, j, k, l in zip(ary[:-1], ary[1:], same_tf, is_nan):
+#         print(i, j, k, l)
+#     a = np.where(same_tf==1)
+#     print(a)
+#     sys.exit()
+#     value_list = []
+#     start_idx_list = []
+#     end_idx_list = []
+#     start_idx = 0
+#     # pre_value = 'nan'
+#     repeat_num = 1
+#     for idx, value in enumerate(ary):
+#         # 가장 처음인 경우
+#         if idx == 0:
+#             pre_value = value
+#             continue
+#         # 현재 값이 이전 값과 동일할때
+#         if issame(value, pre_value):
+#             repeat_num += 1
+#         # 현재 값이 이전 값과 다를때
+#         else:
+#             if repeat_num >= stan_repeat:
+#                 value_list.append(pre_value)
+#                 start_idx_list.append(start_idx)
+#                 end_idx_list.append(idx-1)
+#             # 시작 지점 갱신 & 반복횟수 초기화
+#             start_idx = idx
+#             repeat_num = 1
+#         pre_value = value
+#     # 마지막 값이 이전 값과 같을때
+#     if issame(value, pre_value):
+#         if repeat_num >= stan_repeat:
+#             value_list.append(value)
+#             start_idx_list.append(start_idx)
+#             end_idx_list.append(idx)
+#     # --------------------------------------
+#     # 함수 수정중
+#     # 결과 정리
+#     result = {'nan':None}
+#     for v, si, ei in zip(value_list, start_idx_list, end_idx_list):
+#         result[str(v)] = (si, ei)
+#     if except_nan:
+#         del result['nan']
+#     return result
+def get_repeat_section(ary, stan_repeat, except_nan=True):
+    '''
+    데이터 array 에서 정해놓은 반복 횟수 (stan_repeat) 만큼 반복되는 숫자구간이 있다면
+    그 구간의 시작, 끝 위치 index 값을 추출한다.
+    NaN 가 반복되는지 여부를 포함시킬 수 있다.
+    '''
+    value_list = []
+    start_idx_list = []
+    end_idx_list = []
+    start_idx = 0
+    # pre_value = 'nan'
+    repeat_num = 1
+    for idx, value in enumerate(ary):
+        # 가장 처음인 경우
+        if idx == 0:
+            pre_value = value
+            continue
+        # 현재 값이 이전 값과 동일할때
+        if issame(value, pre_value):
+            repeat_num += 1
+        # 현재 값이 이전 값과 다를때
+        else:
+            if repeat_num >= stan_repeat:
+                value_list.append(pre_value)
+                start_idx_list.append(start_idx)
+                end_idx_list.append(idx-1)
+            # 시작 지점 갱신 & 반복횟수 초기화
+            start_idx = idx
+            repeat_num = 1
+        pre_value = value
+    # 마지막 값이 이전 값과 같을때
+    if issame(value, pre_value):
+        if repeat_num >= stan_repeat:
+            value_list.append(value)
+            start_idx_list.append(start_idx)
+            end_idx_list.append(idx)
+    # --------------------------------------
+    # 함수 수정중
+    # 결과 정리
+    # print(value_list)
+    # sys.exit()
+    result = {}
+    for v, si, ei in zip(value_list, start_idx_list, end_idx_list):
+        try:
+            result[str(v)].append((si, ei))
+        except KeyError:
+            result[str(v)] = [(si, ei)]
+    if except_nan:
+        del result['nan']
+    return result
+def get_stan_repeat_section(ary, stan_value, stan_repeat, mode, reverse=False):
+    '''
+    ary 에서 기준값(stan_value)이 지정한 횟수(stan_repeat)
+    이상(above) 또는 이하(below) 만큼 반복되는 구간의 시작, 끝 위치 index 값을 추출하는 함수
+    reverse 를 True 로 지정하면 해당 각 구간의 끝->시작, 시작->끝 으로 반전된다.
+    mode 는 a (above) 와 b (below)만 존재
+    '''
+    start_idx = 0
+    start_idx_list = []
+    end_idx_list = []
+    repeat_num = 1
+    if len(ary) == 0:
+        return [], []
+    # stan_value = float(stan_value)
+    for idx, value in enumerate(ary):
+        # 가장 처음인 경우
+        if idx == 0:
+            pre_value = value
+            continue
+        # 현재 값이 기준값(stan_value) 인 경우
+        if issame(value, stan_value):
+            # 이전값이 기준값과 동일하면
+            if issame(pre_value, stan_value):
+                # 반복횟수 +1
+                repeat_num += 1
+            # 이전 값이 기준값과 다르면
+            else:
+                # 반복횟수 초기화
+                repeat_num = 1
+                # 현재 위치를 시작로 위치 지정
+                start_idx = idx
+        # 현재 값이 기준값과 다른 경우
+        else:
+            # 이전 값이 기준값과 동일하면
+            if issame(pre_value, stan_value):
+                # idx 끝 위치 지정
+                # 반복 횟수 기준 이상인 경우
+                if mode == 'a':
+                    # 기록된 반복 횟수가 기준 횟수 이상이면
+                    if repeat_num >= stan_repeat:
+                        # 지정해둔 시작 위치 index 값을 구간시작 index 로 저장
+                        start_idx_list.append(start_idx)
+                        # 현재 위치 바로 이전 위치 index 값을 구간끝 index 로 저장
+                        end_idx_list.append(idx-1)
+                # 반복 횟수 기준 이하인 경우
+                elif mode == 'b':
+                    # 기록된 반복 횟수가 기준 횟수 이하면
+                    if repeat_num <= stan_repeat:
+                        start_idx_list.append(start_idx)
+                        end_idx_list.append(idx-1)
+                else:
+                    print('mode 를 a (above:이상) 또는 b (below:이하) 중 하나로 지정해주세요')
+                    raise KeyError()
+        # 현재 위치 값을 이전 위치로 저장
+        pre_value = value
+    # 가장 마지막 데이터가 기준값과 동일한 경우
+    if issame(value, stan_value):
+        if mode == 'a':
+            if repeat_num >= stan_repeat:
+                start_idx_list.append(start_idx)
+                # 현재 위치 index 를 구간 끝 index 로 저장
+                end_idx_list.append(idx)
+        elif mode == 'b':
+            if repeat_num <= stan_repeat:
+                start_idx_list.append(start_idx)
+                end_idx_list.append(idx)
+        else:
+            raise KeyError('mode 를 a (above:이상) 또는 b (below:이하) 중 하나로 지정해주세요')
+    # 가장 마지막 데이터가 기준값과 다르면 반복 계산할 필요 없음
+    # 반전
+    if reverse:
+        rev_start_idx_list = [0]
+        rev_end_idx_list = [len(ary)-1]
+        for ns_idx, ne_idx in zip(start_idx_list, end_idx_list):
+            if ns_idx == 0:
+                rev_start_idx_list.pop(0)
+                rev_start_idx_list.append(ne_idx+1)
+                continue
+            if ne_idx == len(ary)-1:
+                rev_end_idx_list.pop(-1)
+                rev_end_idx_list.append(ns_idx-1)
+                continue
+            rev_start_idx_list.append(ne_idx+1)
+            rev_end_idx_list.insert(-1, ns_idx-1)
+        start_idx_list = rev_start_idx_list.copy()
+        end_idx_list = rev_end_idx_list.copy()
+    # 결과 정리
+    result = []
+    for si, ei in zip(start_idx_list, end_idx_list):
+        result.append((si, ei))
+    return result

utilskit-0.1.2/utilskit/utils.py ADDED Viewed

@@ -0,0 +1,151 @@
+'''
+pip install xlrd
+'''
+import numpy as np
+import pandas as pd
+import shutil
+import os
+import sys
+import json
+import time
+import csv
+from tqdm import tqdm
+from datetime import date, datetime, timedelta
+def save_yaml(path, obj):
+    import yaml
+    with open(path, 'w') as f:
+        yaml.dump(obj, f, sort_keys=False)
+def load_yaml(path):
+    import yaml
+    with open(path, 'r') as f:
+        return yaml.load(f, Loader=yaml.FullLoader)
+def envs_setting(random_seed):
+    '''
+    난수지정 등의 환경설정
+    parameters
+    ----------
+    random_seed: int
+        설정할 random seed
+    returns
+    -------
+    torch, numpy, random 등에 대한 랜덤 시드 고정
+    '''
+    import torch
+    import torch.backends.cudnn as cudnn
+    import random
+    import numpy as np
+    # seed
+    torch.manual_seed(random_seed)
+    torch.cuda.manual_seed(random_seed)
+    torch.cuda.manual_seed_all(random_seed)
+    cudnn.benchmark = False
+    cudnn.deterministic = True
+    np.random.seed(random_seed)
+    random.seed(random_seed)
+def normalize_1D(ary):
+    '''
+    1차원데이터를 0~1 사이 값으로 normalize 하는 함수
+    parameters
+    ----------
+    ary: numpy array
+        noramlize 를 적용할 1차원 array
+    returns
+    -------
+    0 ~ 1 사이로 noramalize 된 array
+    '''
+    ary = np.array(ary)
+    if len(ary.shape) > 1:
+        return print('1 차원 데이터만 입력 가능')
+    ary_min = np.min(ary)
+    ary_min = np.subtract(ary, ary_min)
+    ary_max = np.max(ary_min)
+    ary_norm = np.divide(ary_min, ary_max)
+    return ary_norm
+def get_error_info():
+    import traceback
+    traceback_string = traceback.format_exc()
+    return traceback_string
+def read_jsonl(data_path):
+    try:
+        data_list = validate_data(
+            data_path=data_path,
+            encoding='utf-8-sig'
+        )
+    except UnicodeDecodeError:
+        data_list = validate_data(
+            data_path=data_path,
+            encoding='cp949'
+        )
+    return data_list
+def validate_data(data_path, encoding):
+    data_list = []
+    try:
+        with open(data_path, 'r', encoding=encoding) as f:
+            prodigy_data_list = json.load(f)
+        data_list.append(prodigy_data_list)
+    except json.decoder.JSONDecodeError:
+        with open(data_path, 'r', encoding=encoding) as f:
+            for line in f:
+                line = line.replace('\n', '')
+                line.strip()
+                if line[-1] == '}':
+                    json_line = json.loads(line)
+                    data_list.append(json_line)
+    return data_list
+def tensor2array(x_tensor):
+    x_ary = x_tensor.detach().cpu().numpy()
+    return x_ary
+def save_tensor(x_tensor, mode):
+    x_ary = tensor2array(x_tensor=x_tensor)
+    if mode == 1:
+        b = x_ary[0]
+        # b = np.round(b, 3)
+        b = np.where(np.absolute(b) > 2, np.round(b, 0), np.round(b, 3))
+        df = pd.DataFrame(b)
+        df.to_csv(f'./temp.csv', index=False, encoding='utf-8-sig')
+        print(df)
+        print(x_ary.shape)
+    if mode == 2:
+        ary = x_ary[0]
+        i, j, k = ary.shape
+        print(i, j, k)
+        for idx in range(k):
+            a = np.squeeze(ary[:, :, idx:idx+1])
+            a = np.where(np.absolute(a) > 2, np.round(a, 0), np.round(a, 3))
+            df = pd.DataFrame(a)
+            df.to_csv(f'./temp{idx}.csv', index=False, encoding='utf-8-sig')
+            print(df)
+        print(x_ary.shape)

{utilskit-0.1.0 → utilskit-0.1.2}/utilskit.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: utilskit
-Version: 0.1.0
+Version: 0.1.2
 Summary: description
 Author: Kimyh
 Author-email: kim_yh663927@naver.com
@@ -24,4 +24,9 @@ Dynamic: requires-dist
 Dynamic: requires-python
 Dynamic: summary
-baseline
+0.1.2
+- repeatutils 의 get_repeat_section 에서 하나의 값이 여러 구간에서 반복될때 마지막 구간만 나오는 부분 수정
+- repeatutils 의 get_repeat_section 및 get_stan_repeat_section 에서 추출되는 구간의 마지막 값이 +1 이 되는 부분 수정
+0.1.1
+repeatutils.py 추가
+utils.py 에서 repeat 관련 함수 제거

{utilskit-0.1.0 → utilskit-0.1.2}/utilskit.egg-info/SOURCES.txt RENAMED Viewed

@@ -2,15 +2,17 @@ MANIFEST.in
 README.md
 setup.py
 utilskit/__init__.py
-utilskit/classificationutils.py
 utilskit/dataframeutils.py
 utilskit/dbutils.py
 utilskit/logutils.py
 utilskit/plotutils.py
+utilskit/repeatutils.py
 utilskit/timeutils.py
 utilskit/utils.py
 utilskit.egg-info/PKG-INFO
 utilskit.egg-info/SOURCES.txt
 utilskit.egg-info/dependency_links.txt
 utilskit.egg-info/requires.txt
-utilskit.egg-info/top_level.txt
+utilskit.egg-info/top_level.txt
+utilskit/classificationutils/__init__.py
+utilskit/classificationutils/classificationutils.py

utilskit-0.1.0/README.md DELETED Viewed

	@@ -1 +0,0 @@
1	- baseline

utilskit-0.1.0/utilskit/__init__.py DELETED Viewed

@@ -1,7 +0,0 @@
-from . import classificationutils
-from . import dataframeutils
-from . import dbutils
-from . import logutils
-from . import plotutils
-from . import timeutils
-from . import utils

utilskit-0.1.0/utilskit/utils.py DELETED Viewed

@@ -1,277 +0,0 @@
-'''
-pip install xlrd
-'''
-import numpy as np
-import pandas as pd
-import shutil
-import os
-import sys
-import json
-import time
-import csv
-from tqdm import tqdm
-from datetime import date, datetime, timedelta
-def save_yaml(path, obj):
-    import yaml
-    with open(path, 'w') as f:
-        yaml.dump(obj, f, sort_keys=False)
-def load_yaml(path):
-    import yaml
-    with open(path, 'r') as f:
-        return yaml.load(f, Loader=yaml.FullLoader)
-def envs_setting(random_seed):
-    '''
-    난수지정 등의 환경설정
-    parameters
-    ----------
-    random_seed: int
-        설정할 random seed
-    returns
-    -------
-    torch, numpy, random 등에 대한 랜덤 시드 고정
-    '''
-    import torch
-    import torch.backends.cudnn as cudnn
-    import random
-    import numpy as np
-    # seed
-    torch.manual_seed(random_seed)
-    torch.cuda.manual_seed(random_seed)
-    torch.cuda.manual_seed_all(random_seed)
-    cudnn.benchmark = False
-    cudnn.deterministic = True
-    np.random.seed(random_seed)
-    random.seed(random_seed)
-def normalize_1D(ary):
-    '''
-    1차원데이터를 0~1 사이 값으로 normalize 하는 함수
-    parameters
-    ----------
-    ary: numpy array
-        noramlize 를 적용할 1차원 array
-    returns
-    -------
-    0 ~ 1 사이로 noramalize 된 array
-    '''
-    ary = np.array(ary)
-    if len(ary.shape) > 1:
-        return print('1 차원 데이터만 입력 가능')
-    ary_min = np.min(ary)
-    ary_min = np.subtract(ary, ary_min)
-    ary_max = np.max(ary_min)
-    ary_norm = np.divide(ary_min, ary_max)
-    return ary_norm
-def get_error_info():
-    import traceback
-    traceback_string = traceback.format_exc()
-    return traceback_string
-def read_jsonl(data_path):
-    try:
-        data_list = validate_data(
-            data_path=data_path,
-            encoding='utf-8-sig'
-        )
-    except UnicodeDecodeError:
-        data_list = validate_data(
-            data_path=data_path,
-            encoding='cp949'
-        )
-    return data_list
-def validate_data(data_path, encoding):
-    data_list = []
-    try:
-        with open(data_path, 'r', encoding=encoding) as f:
-            prodigy_data_list = json.load(f)
-        data_list.append(prodigy_data_list)
-    except json.decoder.JSONDecodeError:
-        with open(data_path, 'r', encoding=encoding) as f:
-            for line in f:
-                line = line.replace('\n', '')
-                line.strip()
-                if line[-1] == '}':
-                    json_line = json.loads(line)
-                    data_list.append(json_line)
-    return data_list
-def tensor2array(x_tensor):
-    x_ary = x_tensor.detach().cpu().numpy()
-    return x_ary
-def save_tensor(x_tensor, mode):
-    x_ary = tensor2array(x_tensor=x_tensor)
-    if mode == 1:
-        b = x_ary[0]
-        # b = np.round(b, 3)
-        b = np.where(np.absolute(b) > 2, np.round(b, 0), np.round(b, 3))
-        df = pd.DataFrame(b)
-        df.to_csv(f'./temp.csv', index=False, encoding='utf-8-sig')
-        print(df)
-        print(x_ary.shape)
-    if mode == 2:
-        ary = x_ary[0]
-        i, j, k = ary.shape
-        print(i, j, k)
-        for idx in range(k):
-            a = np.squeeze(ary[:, :, idx:idx+1])
-            a = np.where(np.absolute(a) > 2, np.round(a, 0), np.round(a, 3))
-            df = pd.DataFrame(a)
-            df.to_csv(f'./temp{idx}.csv', index=False, encoding='utf-8-sig')
-            print(df)
-        print(x_ary.shape)
-def identify_repeat_section(ary, stan_num, include_nan=False):
-    '''
-    데이터 array 에서 특정 숫자가 정해놓은 반복 횟수 (stan_repeat) 만큼 반복되면
-    그 구간의 시작, 끝 위치 index 값을 추출한다.
-    '''
-    start_idx_list = []
-    end_idx_list = []
-    start_idx = 0
-    # pre_value = 'nan'
-    flag = 1
-    for idx, value in enumerate(ary):
-        value_str = str(value)
-        # 가장 처음인 경우
-        if idx == 0:
-            pre_value = value_str
-            continue
-        # 현재 값이 이전 값과 동일할때
-        if value_str == pre_value:
-            # 현재 값이 nan 이 아닌 경우 만
-            if include_nan:
-                flag += 1
-            else:
-                if value_str != 'nan':
-                    flag += 1
-        # 현재 값이 이전 값과 다를때
-        else:
-            if flag >= stan_num:
-                start_idx_list.append(start_idx)
-                end_idx_list.append(idx-1)
-            # 시작 지점 갱신
-            start_idx = idx
-            flag = 1
-        pre_value = value_str
-    if flag >= stan_num:
-        start_idx_list.append(start_idx)
-        end_idx_list.append(idx)
-    return start_idx_list, end_idx_list
-def identify_stan_repeat_section(ary, stan_value, stan_repeat, mode, reverse=False):
-    '''
-    ary 에서 기준값(stan_value)이 지정한 횟수(stan_repeat)
-    이상(above) 또는 이하(below) 만큼 반복되는 구간의 시작, 끝 위치 index 값을 추출하는 함수
-    reverse 를 True 로 지정하면 해당 각 구간의 끝->시작, 시작->끝 으로 반전된다.
-    '''
-    nan_start_idx = 0
-    nan_start_idx_list = []
-    nan_end_idx_list = []
-    flag = 1
-    if len(ary) == 0:
-        return [], []
-    for idx, value in enumerate(ary):
-        value_str = str(value)
-        # 가장 처음인 경우
-        if idx == 0:
-            pre_value = value_str
-            continue
-        # 현재 값이 stan 일 때
-        if value_str == stan_value:
-            # 이전값이 nan 인경우
-            if pre_value == stan_value:
-                flag += 1
-            # 이전 값이 nan 이 아닌 경우
-            else:
-                flag = 1
-                # idx 시작 위치 지정
-                nan_start_idx = idx
-        # 현재 값이 nan 이 아닐 때
-        else:
-            # 이전 값이 nan 인 경우
-            if pre_value == stan_value:
-                # idx 끝 위치 지정
-                if mode == 'above':
-                    if flag >= stan_repeat:
-                        nan_start_idx_list.append(nan_start_idx)
-                        nan_end_idx_list.append(idx-1)
-                elif mode == 'below':
-                    if flag <= stan_repeat:
-                        nan_start_idx_list.append(nan_start_idx)
-                        nan_end_idx_list.append(idx-1)
-                else:
-                    print('mode 를 above 또는 이하 below 중 하나로 지정해주세요')
-                    raise KeyError()
-        pre_value = value_str
-    if value_str == stan_value:
-        if mode == 'above':
-            if flag >= stan_repeat:
-                nan_start_idx_list.append(nan_start_idx)
-                nan_end_idx_list.append(idx)
-        elif mode == 'below':
-            if flag <= stan_repeat:
-                nan_start_idx_list.append(nan_start_idx)
-                nan_end_idx_list.append(idx)
-        else:
-            print('mode 를 above 또는 이하 below 중 하나로 지정해주세요')
-            raise KeyError()
-    if reverse:
-        rev_start_idx_list = [0]
-        rev_end_idx_list = [len(ary)-1]
-        for ns_idx, ne_idx in zip(nan_start_idx_list, nan_end_idx_list):
-            if ns_idx == 0:
-                rev_start_idx_list.pop(0)
-                rev_start_idx_list.append(ne_idx+1)
-                continue
-            if ne_idx == len(ary)-1:
-                rev_end_idx_list.pop(-1)
-                rev_end_idx_list.append(ns_idx-1)
-                continue
-            rev_start_idx_list.append(ne_idx+1)
-            rev_end_idx_list.insert(-1, ns_idx-1)
-        return rev_start_idx_list, rev_end_idx_list
-    return nan_start_idx_list, nan_end_idx_list