utilskit 0.1.3__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {utilskit-0.1.3 → utilskit-0.2.1}/PKG-INFO +11 -5
  2. {utilskit-0.1.3 → utilskit-0.2.1}/README.md +9 -4
  3. {utilskit-0.1.3 → utilskit-0.2.1}/setup.py +3 -2
  4. utilskit-0.2.1/test/test.py +358 -0
  5. {utilskit-0.1.3 → utilskit-0.2.1}/utilskit/classificationutils/classificationutils.py +30 -8
  6. utilskit-0.2.1/utilskit/dataframeutils/__init__.py +1 -0
  7. utilskit-0.2.1/utilskit/dataframeutils/dataframeutils.py +255 -0
  8. utilskit-0.2.1/utilskit/logutils/__init__.py +1 -0
  9. utilskit-0.2.1/utilskit/logutils/logutils.py +215 -0
  10. utilskit-0.2.1/utilskit/plotutils/__init__.py +1 -0
  11. {utilskit-0.1.3 → utilskit-0.2.1}/utilskit/plotutils/plotutils.py +57 -38
  12. utilskit-0.2.1/utilskit/repeatutils/__init__.py +1 -0
  13. utilskit-0.2.1/utilskit/repeatutils/repeatutils.py +502 -0
  14. utilskit-0.2.1/utilskit/timeutils/__init__.py +1 -0
  15. utilskit-0.2.1/utilskit/timeutils/timeutils.py +48 -0
  16. utilskit-0.2.1/utilskit/utils/__init__.py +1 -0
  17. {utilskit-0.1.3 → utilskit-0.2.1}/utilskit/utils/utils.py +7 -17
  18. {utilskit-0.1.3 → utilskit-0.2.1}/utilskit.egg-info/PKG-INFO +11 -5
  19. {utilskit-0.1.3 → utilskit-0.2.1}/utilskit.egg-info/SOURCES.txt +1 -0
  20. {utilskit-0.1.3 → utilskit-0.2.1}/utilskit.egg-info/requires.txt +1 -0
  21. utilskit-0.1.3/utilskit/dataframeutils/dataframeutils.py +0 -328
  22. utilskit-0.1.3/utilskit/dbutils/__init__.py +0 -0
  23. utilskit-0.1.3/utilskit/logutils/__init__.py +0 -0
  24. utilskit-0.1.3/utilskit/logutils/logutils.py +0 -109
  25. utilskit-0.1.3/utilskit/plotutils/__init__.py +0 -0
  26. utilskit-0.1.3/utilskit/repeatutils/__init__.py +0 -0
  27. utilskit-0.1.3/utilskit/repeatutils/repeatutils.py +0 -251
  28. utilskit-0.1.3/utilskit/timeutils/__init__.py +0 -0
  29. utilskit-0.1.3/utilskit/timeutils/timeutils.py +0 -40
  30. utilskit-0.1.3/utilskit/utils/__init__.py +0 -0
  31. {utilskit-0.1.3 → utilskit-0.2.1}/MANIFEST.in +0 -0
  32. {utilskit-0.1.3 → utilskit-0.2.1}/setup.cfg +0 -0
  33. {utilskit-0.1.3 → utilskit-0.2.1}/utilskit/__init__.py +0 -0
  34. {utilskit-0.1.3 → utilskit-0.2.1}/utilskit/classificationutils/__init__.py +0 -0
  35. {utilskit-0.1.3/utilskit/dataframeutils → utilskit-0.2.1/utilskit/dbutils}/__init__.py +0 -0
  36. {utilskit-0.1.3 → utilskit-0.2.1}/utilskit/dbutils/dbutils.py +0 -0
  37. {utilskit-0.1.3 → utilskit-0.2.1}/utilskit.egg-info/dependency_links.txt +0 -0
  38. {utilskit-0.1.3 → utilskit-0.2.1}/utilskit.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: utilskit
3
- Version: 0.1.3
3
+ Version: 0.2.1
4
4
  Summary: description
5
5
  Author: Kimyh
6
6
  Author-email: kim_yh663927@naver.com
@@ -15,6 +15,7 @@ Requires-Dist: pandas==2.3.1
15
15
  Requires-Dist: PyMySQL==1.1.1
16
16
  Requires-Dist: SQLAlchemy==2.0.41
17
17
  Requires-Dist: tqdm==4.67.1
18
+ Requires-Dist: xlrd==2.0.2
18
19
  Dynamic: author
19
20
  Dynamic: author-email
20
21
  Dynamic: classifier
@@ -24,9 +25,14 @@ Dynamic: requires-dist
24
25
  Dynamic: requires-python
25
26
  Dynamic: summary
26
27
 
27
- 0.1.2
28
+ # 0.2.1
29
+ - repeatutila 에 get_section 함수 추가
30
+ # 0.2.0
31
+ - 정식 최초 배포버전
32
+ - 각 함수의 사용성 강화 및 비활성 함수 지정
33
+ # 0.1.2
28
34
  - repeatutils 의 get_repeat_section 에서 하나의 값이 여러 구간에서 반복될때 마지막 구간만 나오는 부분 수정
29
35
  - repeatutils 의 get_repeat_section 및 get_stan_repeat_section 에서 추출되는 구간의 마지막 값이 +1 이 되는 부분 수정
30
- 0.1.1
31
- repeatutils.py 추가
32
- utils.py 에서 repeat 관련 함수 제거
36
+ # 0.1.1
37
+ - repeatutils.py 추가
38
+ - utils.py 에서 repeat 관련 함수 제거
@@ -1,6 +1,11 @@
1
- 0.1.2
1
+ # 0.2.1
2
+ - repeatutila 에 get_section 함수 추가
3
+ # 0.2.0
4
+ - 정식 최초 배포버전
5
+ - 각 함수의 사용성 강화 및 비활성 함수 지정
6
+ # 0.1.2
2
7
  - repeatutils 의 get_repeat_section 에서 하나의 값이 여러 구간에서 반복될때 마지막 구간만 나오는 부분 수정
3
8
  - repeatutils 의 get_repeat_section 및 get_stan_repeat_section 에서 추출되는 구간의 마지막 값이 +1 이 되는 부분 수정
4
- 0.1.1
5
- repeatutils.py 추가
6
- utils.py 에서 repeat 관련 함수 제거
9
+ # 0.1.1
10
+ - repeatutils.py 추가
11
+ - utils.py 에서 repeat 관련 함수 제거
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name="utilskit", # 패키지 이름 (pip install 시 사용될 이름)
5
- version="0.1.3", # 버전
5
+ version="0.2.1", # 버전
6
6
  packages=find_packages(), # textbasic 폴더 내 모든 패키지 포함
7
7
  include_package_data=True, # 이 설정을 통해 패키지 내 데이터 파일을 포함시킬 수 있음
8
8
  package_data={
@@ -13,7 +13,8 @@ setup(
13
13
  "pandas==2.3.1",
14
14
  "PyMySQL==1.1.1",
15
15
  "SQLAlchemy==2.0.41",
16
- "tqdm==4.67.1"
16
+ "tqdm==4.67.1",
17
+ "xlrd==2.0.2"
17
18
  ],
18
19
  # install_requires=[
19
20
  # "pandas>=1.3.0,<2.0.0", # 버전 범위 설정 방법
@@ -0,0 +1,358 @@
1
+ import sys
2
+ import os
3
+ # import time
4
+
5
+ sys.path.append('/home/kimyh/library/utilskit')
6
+ def main():
7
+ from utilskit import classificationutils as clu
8
+ label2id_dict = {
9
+ '고양이':0,
10
+ '개':1
11
+ }
12
+ t = ['고양이', '개', '개', '고양이', '고양이', '개']
13
+ p = ['개', '개', '고양이', '고양이', '고양이', '개']
14
+ id2label_dict = {
15
+ 0:'고양이',
16
+ 1:'개'
17
+ }
18
+ t = [1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0]
19
+ p = [1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1]
20
+
21
+ print(type(id2label_dict))
22
+ cm = clu.confusion_matrix(
23
+ class_dict=id2label_dict,
24
+ true_list=t,
25
+ pred_list=p,
26
+ ignore_idx=None,
27
+ round_num=2,
28
+ percentage=True
29
+ )
30
+ print(cm)
31
+ cm.to_csv('cm.csv', encoding='utf-8-sig')
32
+
33
+ def main2():
34
+ import pandas as pd
35
+ import numpy as np
36
+ from datetime import datetime, timedelta
37
+ from utilskit import dataframeutils as dfu
38
+ start_time = datetime.strptime('2025-07-22 10:05:15', '%Y-%m-%d %H:%M:%S')
39
+ end_time = start_time + timedelta(seconds=5)
40
+ time_range = pd.date_range(start=start_time, end=end_time, freq='S')
41
+ value_ary = np.random.randint(10, 20, len(time_range))
42
+ df = pd.DataFrame({
43
+ 'time':time_range,
44
+ 'value':value_ary
45
+ })
46
+ print(df)
47
+ df = dfu.utc2kor(
48
+ dataframe=df,
49
+ column='time',
50
+ extend=True
51
+ )
52
+ print(df)
53
+
54
+
55
+ def main3():
56
+ import numpy as np
57
+ import pandas as pd
58
+ from utilskit import dataframeutils as dfu
59
+
60
+ value_ary1 = [1, 6, 3, 8, 5]
61
+ value_ary2 = [5, 7, 2, 6, 9]
62
+ df = pd.DataFrame({'col1':value_ary1, 'col2':value_ary2})
63
+ df = dfu.adnormal2nan(
64
+ dataframe=df,
65
+ column='col1',
66
+ max_value=7,
67
+ min_value=2
68
+ )
69
+ print(df)
70
+
71
+
72
+ def main4():
73
+ import numpy as np
74
+ import pandas as pd
75
+ from datetime import datetime
76
+ from utilskit import dataframeutils as dfu
77
+
78
+ time_ary = ['2024-05-11 03:45:12', '2024-05-11 03:45:15', '2024-05-11 03:45:16']
79
+ value_ary = [1, 5, 6]
80
+ df = pd.DataFrame({
81
+ 'time':time_ary,
82
+ 'value':value_ary
83
+ })
84
+ print(df)
85
+ df = dfu.time_filling(
86
+ dataframe=df,
87
+ start='2024-05-11 03:45:10',
88
+ end='2024-05-11 03:45:20',
89
+ column='time'
90
+ )
91
+ print(df)
92
+ df = pd.DataFrame([1, 2, 3, 4], columns=['value'])
93
+ if dfu.isdfvalid(df, ['value']):
94
+ print('컬럼이 전부 존재합니다.')
95
+
96
+
97
+ def main5():
98
+ import pandas as pd
99
+ import numpy as np
100
+ from utilskit import dataframeutils as dfu
101
+
102
+ value_ary1 = [1, np.nan, np.nan, 2, 3, np.nan, np.nan, np.nan]
103
+ value_ary2 = np.random.randint(0, 10, size=len(value_ary1))
104
+ df = pd.DataFrame({
105
+ 'value1':value_ary1,
106
+ 'value2':value_ary2
107
+ })
108
+ print(df)
109
+ df = dfu.fill_repeat_nan(
110
+ dataframe=df,
111
+ column='value1',
112
+ repeat=3
113
+ )
114
+ print(df)
115
+
116
+
117
+ def main6():
118
+ import pandas as pd
119
+ import numpy as np
120
+ from utilskit import dataframeutils as dfu
121
+
122
+ # value_ary1 = [20, 20, 20, 20, 2, 20, 20, 20, 1, 1, 2, 1]
123
+ # # value_ary2 = np.random.randint(0, 10, size=len(value_ary1))
124
+ # df = pd.DataFrame({
125
+ # 'value1':value_ary1
126
+ # })
127
+ # # print(df)
128
+ # df = dfu.pin2nan(
129
+ # dataframe=df,
130
+ # column='value1',
131
+ # max_diff=0.1,
132
+ # repeat=3
133
+ # )
134
+ # print(df)
135
+
136
+ import pandas as pd
137
+
138
+ data = [19, 19, 20, 20, 1, 21, 21, 22, 1, 1, 2, 1]
139
+ df = pd.DataFrame({'val': data})
140
+
141
+ # 이전, 현재, 다음 값을 비교하기 위해 shift를 활용
142
+ df['prev'] = df['val'].shift(1)
143
+ df['next'] = df['val'].shift(-1)
144
+ print(df)
145
+
146
+ # [1]만 추출: 이전값과 다음값이 모두 1이 아니면서 현재값이 1인 경우
147
+ isolated_ones = df[(df['val'] == 1) & (df['prev'] != 1) & (df['next'] != 1)]
148
+
149
+ print(isolated_ones)
150
+
151
+ def main7():
152
+ from utilskit import logutils as lu
153
+ log = lu.get_logger(
154
+ log_path='./log3',
155
+ log_name='whole',
156
+ rollover=True
157
+ )
158
+
159
+ log.debug("DEBUG 메시지입니다.")
160
+ log.info("INFO 메시지입니다.")
161
+ log.warning("WARNING 메시지입니다.")
162
+ log.error("ERROR 메시지입니다.")
163
+ log.critical("CRITICAL 메시지입니다.")
164
+ lu.log_sort('./log3')
165
+
166
+
167
+ def main8():
168
+ import numpy as np
169
+ from utilskit import plotutils as plu
170
+ np.random.seed(42)
171
+ x = np.arange(100)
172
+ data = np.random.randint(5, 20, size=100)
173
+ data1 = np.random.randint(5, 20, size=100)
174
+ data2 = np.random.randint(5, 20, size=100)
175
+
176
+ plu.draw_plot(
177
+ title='whole2',
178
+ x=x,
179
+ y=data,
180
+ fig_size=(30, 8),
181
+ x_range=(-10, 120),
182
+ y_range=(0, 25),
183
+ x_label='x data',
184
+ y_label='y data',
185
+ legend=True,
186
+ title_font=25,
187
+ x_font=20,
188
+ y_font=20,
189
+ x_label_font=23,
190
+ y_label_font=23,
191
+ line_style='dash',
192
+ line_size=3,
193
+ marker_style='circle',
194
+ marker_size=10,
195
+ marker_color='white',
196
+ marker_border_size=2,
197
+ marker_border_color='black',
198
+ add_x_list=[x, x],
199
+ add_y_list=[data1, data2],
200
+ add_color_list=['red', 'violet'],
201
+ focus_list=[(22, 27), (42, 53), (70, 76)],
202
+ focus_color_list=['red', 'red', 'blue'],
203
+ alpha_list=[0.1, 0.5, 1],
204
+ save_path='./image'
205
+ )
206
+
207
+ def main9():
208
+ import numpy as np
209
+ from utilskit import plotutils as plu
210
+ np.random.seed(42)
211
+ x = np.arange(100)
212
+ data = np.random.randint(5, 20, size=100)
213
+ data1 = np.random.randint(50, 90, size=100)
214
+ data2 = np.random.randint(180, 190, size=100)
215
+
216
+ plu.draw_subplot(
217
+ sub_title_list=['data', 'data1', 'data2'],
218
+ x_list=[x, x, x],
219
+ y_list=[data, data1, data2],
220
+ # sub_row_idx=3,
221
+ # sub_col_idx=1,
222
+ # fig_size=(30, 5*3),
223
+ # x_range_list=[(0, 100), (-10, 110), (-20, 120)],
224
+ # y_range_list=[(-10, 100), (-10, 100), (150, 240)],
225
+ # title_font=25,
226
+ # x_font=15,
227
+ # y_font=5,
228
+ focus_list=[(22, 27), (42, 53), (70, 76)],
229
+ focus_color_list=['red', 'red', 'green'],
230
+ alpha_list=[0.2, 0.2, 0.2],
231
+ save_path='./sub_image',
232
+ save_name='sub-focus'
233
+ )
234
+
235
+
236
+ def main10():
237
+ import numpy as np
238
+ from utilskit import repeatutils as rpu
239
+ data = np.array(
240
+ [
241
+ 1, 1, 1, 1, 1, # 0 ~ 4
242
+ 2, 2, 2, 2, # 5 ~ 8
243
+ 3, 3, # 9 ~ 10
244
+ 4, 4, 4, # 11 ~ 13
245
+ np.nan, np.nan, np.nan, np.nan, # 14 ~ 17
246
+ 1, 1, 1, 1, # 18 ~ 21
247
+ 3, 3, 3, # 22 ~ 24
248
+ np.nan, np.nan, np.nan, np.nan, np.nan, # 25 ~ 29
249
+ 1, 1, 1, 1, 1, 1, 1, # 30 ~ 36
250
+ np.nan # 37
251
+ ]
252
+ )
253
+ data = ['아', '아', '아', '아', '아', '바', '바']
254
+ print(data)
255
+ repeat_section = rpu.get_repeat_section(
256
+ data=data,
257
+ repeat=4,
258
+ except_nan=False
259
+ )
260
+ print(repeat_section)
261
+
262
+
263
+ def main11():
264
+ import numpy as np
265
+ from utilskit import repeatutils as rpu
266
+ data = np.array(
267
+ [
268
+ 1, 1, 1, 1, 1, # 0 ~ 4
269
+ 2, 2, 2, 2, # 5 ~ 8
270
+ 3, 3, # 9 ~ 10
271
+ 4, 4, 4, # 11 ~ 13
272
+ np.nan, np.nan, np.nan, np.nan, # 14 ~ 17
273
+ 1, 1, 1, 1, # 18 ~ 21
274
+ 3, 3, 3, # 22 ~ 24
275
+ np.nan, np.nan, np.nan, np.nan, np.nan, # 25 ~ 29
276
+ 1, 1, 1, 1, 1, 1, 1, # 30 ~ 36
277
+ np.nan # 37
278
+ ]
279
+ )
280
+ repeat_section = rpu.get_stan_repeat_section(
281
+ data=data,
282
+ value=1,
283
+ repeat=4,
284
+ mode='a',
285
+ reverse=True
286
+ )
287
+ print(repeat_section)
288
+
289
+
290
+ def main12():
291
+ from utilskit import timeutils as tiu
292
+ now = tiu.get_now('년|분|시|월|초|일')
293
+
294
+ import time
295
+ hh, mm, ss = tiu.time_measure(-1)
296
+ print(f'입력된 값은 {hh}시간 {mm}분 {ss}초 입니다.')
297
+
298
+ date_list = tiu.get_date_list(
299
+ year=2025,
300
+ mon_list=[2],
301
+ start_day_list=[25],
302
+ end_day_list=[33]
303
+ )
304
+ print(date_list)
305
+
306
+
307
+ def main13():
308
+ from utilskit import utils as u
309
+ u.envs_setting()
310
+
311
+ a = 1
312
+ b = '2'
313
+ try:
314
+ c = a + b
315
+ except TypeError:
316
+ error_info = u.get_error_info()
317
+ print(error_info)
318
+
319
+
320
+ def main14():
321
+ import numpy as np
322
+ from utilskit import repeatutils as rpu
323
+ data = np.array(
324
+ [
325
+ 1, 1, 1, 1, 1, # 0 ~ 4
326
+ 2, 2, 2, 2, # 5 ~ 8
327
+ 3, 3, # 9 ~ 10
328
+ 4, 4, 4, # 11 ~ 13
329
+ np.nan, np.nan, np.nan, np.nan, # 14 ~ 17
330
+ 1, 1, 1, 1, # 18 ~ 21
331
+ 3, 4, 5, # 22 ~ 24
332
+ np.nan, np.nan, np.nan, np.nan, np.nan, # 25 ~ 29
333
+ 1, 1, 1, 1, 1, 1, 1, # 30 ~ 36
334
+ np.nan, np.nan, np.nan # 37 ~ 39
335
+ ]
336
+ )
337
+ result = rpu.get_section(
338
+ data,
339
+ repeat=4,
340
+ # mode='e', # 반복횟수 이상
341
+ # key='nan', # 최대 최소 범위 지정시 무시된다.
342
+ max_key=3, # 최대 3
343
+ min_key=2, # 최소 2
344
+ # between=True, # 사잇값
345
+ # max_equal=False, # 3 이하
346
+ # min_equal=True, # 2 이상
347
+ # except_nan=False, # 최대 최소 범위 지정시 무시된다.
348
+ # reverse=True # 반전 없음
349
+ )
350
+ print(result)
351
+
352
+
353
+ if __name__ == '__main__':
354
+ from utilskit import logutils as lu
355
+ log = lu.get_logger()
356
+ log.info('인포메이션')
357
+ log.error('에러로그')
358
+ main14()
@@ -5,7 +5,7 @@ import sys
5
5
  import pandas as pd
6
6
  import numpy as np
7
7
 
8
- __all__ = ["matrix2confusion"]
8
+ __all__ = ["confusion_matrix"]
9
9
 
10
10
  def get_max_2nd_n_reliability(pred):
11
11
  pred_min = np.expand_dims(np.min(pred, axis=1), axis=1)
@@ -79,16 +79,38 @@ def matrix2confusion(matrix, uni_label_list, round_num=4, show_percentage=True):
79
79
 
80
80
  # confusion matrix
81
81
  confusion_matrix = pd.DataFrame(matrix, index=index_list, columns=column_list)
82
- confusion_matrix['accuracy'][0] = whole_accuracy
82
+ # confusion_matrix['accuracy'][0] = whole_accuracy
83
+ confusion_matrix.iloc[0, confusion_matrix.columns.get_loc('accuracy')] = whole_accuracy
83
84
 
84
85
  return confusion_matrix
85
86
 
86
87
 
87
- def make_confusion_matrix(mode, true_list, pred_list, ignore_idx=None, round_num=4, label2id_dict=None, id2label_dict=None, show_percentage=True):
88
+ def confusion_matrix(class_dict, true_list, pred_list,
89
+ ignore_idx=None, round_num=2, percentage=True):
90
+
91
+ # 모드, 데이터, dict 간 호환성 검증
92
+ key_list = list(class_dict.keys())
93
+ value_list = list(class_dict.values())
94
+ try:
95
+ _ = int(value_list[0]) # value 값이 id (정수) 인 경우
96
+ mode = 'label2id'
97
+ except ValueError:
98
+ try:
99
+ _ = int(key_list[0])
100
+ except ValueError:
101
+ raise ValueError('id 값은 정수형이어야합니다.')
102
+ mode = 'id2label'
103
+
104
+ t_unique_list = np.unique(true_list).tolist()
105
+ p_unique_list = np.unique(pred_list).tolist()
106
+ if not set(t_unique_list).issubset(key_list) or not set(p_unique_list).issubset(key_list):
107
+ raise ValueError(f'입력된 정답 데이터({t_unique_list}) 또는 예측 데이터({p_unique_list}) 가 클래스 사전의 key({key_list}) 값과 일치하지 않습니다.')
108
+
109
+
88
110
  if mode == 'label2id':
89
- uni_label_list = list(label2id_dict.keys())
111
+ uni_label_list = key_list.copy()
90
112
  elif mode == 'id2label':
91
- uni_label_list = list(id2label_dict.values())
113
+ uni_label_list = value_list.copy()
92
114
 
93
115
  # matrix
94
116
  matrix = []
@@ -100,8 +122,8 @@ def make_confusion_matrix(mode, true_list, pred_list, ignore_idx=None, round_num
100
122
  # count
101
123
  if mode == 'label2id':
102
124
  for t, p in zip(true_list, pred_list):
103
- t_i = label2id_dict[t]
104
- p_i = label2id_dict[p]
125
+ t_i = class_dict[t]
126
+ p_i = class_dict[p]
105
127
  matrix[t_i][p_i] += 1
106
128
 
107
129
  elif mode == 'id2label':
@@ -117,7 +139,7 @@ def make_confusion_matrix(mode, true_list, pred_list, ignore_idx=None, round_num
117
139
  matrix=matrix,
118
140
  uni_label_list=uni_label_list,
119
141
  round_num=round_num,
120
- show_percentage=show_percentage
142
+ show_percentage=percentage
121
143
  )
122
144
 
123
145
  return confusion_matrix
@@ -0,0 +1 @@
1
+ from .dataframeutils import *