vos-data-utils 0.0.2__tar.gz → 0.0.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vos-data-utils-0.0.2 → vos-data-utils-0.0.4}/PKG-INFO +2 -2
- {vos-data-utils-0.0.2 → vos-data-utils-0.0.4}/setup.py +2 -1
- vos-data-utils-0.0.4/vdutils/__init__.py +35 -0
- {vos-data-utils-0.0.2 → vos-data-utils-0.0.4}/vdutils/bjd.py +40 -10
- {vos-data-utils-0.0.2 → vos-data-utils-0.0.4}/vdutils/bjdconnector.py +322 -55
- {vos-data-utils-0.0.2 → vos-data-utils-0.0.4}/vdutils/convaddr.py +121 -26
- {vos-data-utils-0.0.2 → vos-data-utils-0.0.4}/vdutils/cordate.py +1 -1
- vos-data-utils-0.0.4/vdutils/data/__init__.py +14 -0
- vos-data-utils-0.0.4/vdutils/genpnu.py +623 -0
- {vos-data-utils-0.0.2/vdutils → vos-data-utils-0.0.4/vdutils/library}/__init__.py +2 -19
- {vos-data-utils-0.0.2 → vos-data-utils-0.0.4}/vdutils/library/data.py +51 -1
- {vos-data-utils-0.0.2 → vos-data-utils-0.0.4}/vdutils/tests/test_convaddr.py +1 -3
- {vos-data-utils-0.0.2 → vos-data-utils-0.0.4}/vdutils/tests/test_cordate.py +6 -1
- vos-data-utils-0.0.4/vdutils/tests/test_genpnu.py +1004 -0
- vos-data-utils-0.0.4/vdutils/tests/test_vid.py +247 -0
- vos-data-utils-0.0.4/vdutils/tests/tests.py +28 -0
- vos-data-utils-0.0.4/vdutils/vid.py +177 -0
- {vos-data-utils-0.0.2 → vos-data-utils-0.0.4}/vos_data_utils.egg-info/PKG-INFO +2 -2
- {vos-data-utils-0.0.2 → vos-data-utils-0.0.4}/vos_data_utils.egg-info/SOURCES.txt +3 -13
- {vos-data-utils-0.0.2 → vos-data-utils-0.0.4}/vos_data_utils.egg-info/requires.txt +1 -0
- vos-data-utils-0.0.2/vdutils/data/__init__.py +0 -13
- vos-data-utils-0.0.2/vdutils/data/bjd.txt +0 -49844
- vos-data-utils-0.0.2/vdutils/data/bjd_changed.txt +0 -8579
- vos-data-utils-0.0.2/vdutils/data/bjd_connectors.pkl +0 -0
- vos-data-utils-0.0.2/vdutils/data/bjd_current.txt +0 -20560
- vos-data-utils-0.0.2/vdutils/data/bjd_frequency_dictionary.txt +0 -11290
- vos-data-utils-0.0.2/vdutils/data/bjd_smallest.txt +0 -9786
- vos-data-utils-0.0.2/vdutils/data/date_dictionary.txt +0 -738978
- vos-data-utils-0.0.2/vdutils/data/full_bjd_connectors.pkl +0 -0
- vos-data-utils-0.0.2/vdutils/data/multiple_word_sgg_list.txt +0 -65
- vos-data-utils-0.0.2/vdutils/data/pnu/bjd_20230701.pkl +0 -0
- vos-data-utils-0.0.2/vdutils/data/pnu/bjd_20240101.pkl +0 -0
- vos-data-utils-0.0.2/vdutils/data/pnu/bjd_20240118.pkl +0 -0
- vos-data-utils-0.0.2/vdutils/pnu.py +0 -221
- vos-data-utils-0.0.2/vdutils/tests/__init__.py +0 -0
- vos-data-utils-0.0.2/vdutils/tests/tests.py +0 -18
- vos-data-utils-0.0.2/vdutils/vid.py +0 -119
- {vos-data-utils-0.0.2 → vos-data-utils-0.0.4}/README.md +0 -0
- {vos-data-utils-0.0.2 → vos-data-utils-0.0.4}/setup.cfg +0 -0
- {vos-data-utils-0.0.2/vdutils/library → vos-data-utils-0.0.4/vdutils/tests}/__init__.py +0 -0
- {vos-data-utils-0.0.2 → vos-data-utils-0.0.4}/vos_data_utils.egg-info/dependency_links.txt +0 -0
- {vos-data-utils-0.0.2 → vos-data-utils-0.0.4}/vos_data_utils.egg-info/entry_points.txt +0 -0
- {vos-data-utils-0.0.2 → vos-data-utils-0.0.4}/vos_data_utils.egg-info/top_level.txt +0 -0
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: vos-data-utils
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.4
|
|
4
4
|
Summary: description
|
|
5
|
-
Author:
|
|
5
|
+
Author: ValueOfSpace
|
|
6
6
|
Author-email: dev@valueofspace.com
|
|
7
7
|
Classifier: Programming Language :: Python :: 3
|
|
8
8
|
Classifier: License :: OSI Approved :: MIT License
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
doc = """
|
|
2
|
+
vos-data-utils - a utility library for ValueOfSpace data work with Python
|
|
3
|
+
==========================================================================
|
|
4
|
+
|
|
5
|
+
Main Features
|
|
6
|
+
-------------
|
|
7
|
+
Correction function for dates related to building permit, commencement, and completion in the Building Permit Information
|
|
8
|
+
Support for current legal district data and change history
|
|
9
|
+
Correction function to align with the current legal district by reflecting changes in legal district data
|
|
10
|
+
Conversion function to transform address strings into Parcel Number (PNU)
|
|
11
|
+
Generation function for unique transaction case IDs in ValueofSpace
|
|
12
|
+
"""
|
|
13
|
+
version = "0.0.4"
|
|
14
|
+
author = "ValueOfSpace"
|
|
15
|
+
description = "description"
|
|
16
|
+
license = "MIT"
|
|
17
|
+
|
|
18
|
+
def __version__():
|
|
19
|
+
return version
|
|
20
|
+
|
|
21
|
+
def __author__():
|
|
22
|
+
return author
|
|
23
|
+
|
|
24
|
+
def __description__():
|
|
25
|
+
return description
|
|
26
|
+
|
|
27
|
+
def __license__():
|
|
28
|
+
return license
|
|
29
|
+
|
|
30
|
+
def __doc__():
|
|
31
|
+
return doc
|
|
32
|
+
|
|
33
|
+
def __test__():
|
|
34
|
+
from vdutils.tests.tests import __run_test__
|
|
35
|
+
__run_test__()
|
|
@@ -5,7 +5,6 @@ import pickle
|
|
|
5
5
|
from typing import (
|
|
6
6
|
List,
|
|
7
7
|
Dict,
|
|
8
|
-
Literal,
|
|
9
8
|
Optional
|
|
10
9
|
)
|
|
11
10
|
import requests
|
|
@@ -14,20 +13,25 @@ import pkg_resources
|
|
|
14
13
|
from collections import defaultdict
|
|
15
14
|
from dotenv import load_dotenv
|
|
16
15
|
from dataclasses import dataclass
|
|
17
|
-
from vdutils import Log
|
|
16
|
+
from vdutils.library import Log
|
|
18
17
|
from vdutils.library.data import (
|
|
19
18
|
ADD_BJD_CHANGED_DICTIONARY,
|
|
20
19
|
CORRECT_ERROR_BJD,
|
|
21
20
|
ADD_BJD,
|
|
22
21
|
DELETE_BJD
|
|
23
22
|
)
|
|
23
|
+
from vdutils.data import (
|
|
24
|
+
__sep__,
|
|
25
|
+
__index__,
|
|
26
|
+
__encoding__
|
|
27
|
+
)
|
|
24
28
|
|
|
25
29
|
|
|
26
30
|
@dataclass
|
|
27
31
|
class Bjd():
|
|
28
32
|
|
|
29
|
-
|
|
30
33
|
def __init__(self):
|
|
34
|
+
|
|
31
35
|
load_dotenv()
|
|
32
36
|
try: self.api_key: str = os.environ['BJD_API_KEY']
|
|
33
37
|
except: self.api_key = None
|
|
@@ -35,14 +39,15 @@ class Bjd():
|
|
|
35
39
|
except: self.api_key = None
|
|
36
40
|
try: self.api_key: str = os.environ['BJD_API_SUB_URL'] # https://www.data.go.kr/data/15063424/fileData.do#layer-api-guide API 목록 중 국토교통부_전국 법정동_20230710 GET
|
|
37
41
|
except: self.api_key = None
|
|
42
|
+
|
|
43
|
+
self.sep = __sep__
|
|
44
|
+
self.index = __index__
|
|
45
|
+
self.encoding = __encoding__
|
|
38
46
|
self.update_dir_path: str = "vdutils/data/update" # 행정안전부 https://www.mois.go.kr/frt/bbs/type001/commonSelectBoardList.do?bbsId=BBSMSTR_000000000052
|
|
39
47
|
self.api_page: int = 0
|
|
40
48
|
self.api_per_page: int = 1024
|
|
41
49
|
self.bjd_api_dictionary: Dict[str, Dict[str, str]] = None
|
|
42
50
|
self.bjd_api_df: pd.DataFrame = None
|
|
43
|
-
self.output_sep: Literal['\t'] = '\t'
|
|
44
|
-
self.output_encoding: str = 'utf-8'
|
|
45
|
-
self.output_index: bool = False
|
|
46
51
|
self.file_name_bjd = pkg_resources.resource_filename(
|
|
47
52
|
"vdutils",
|
|
48
53
|
"data/bjd.txt"
|
|
@@ -108,6 +113,7 @@ class Bjd():
|
|
|
108
113
|
break
|
|
109
114
|
return res_dic
|
|
110
115
|
|
|
116
|
+
|
|
111
117
|
def _correct_error(
|
|
112
118
|
self,
|
|
113
119
|
api_dic: Dict[str, Dict[str, str]]
|
|
@@ -124,6 +130,7 @@ class Bjd():
|
|
|
124
130
|
|
|
125
131
|
return api_dic
|
|
126
132
|
|
|
133
|
+
|
|
127
134
|
def _update_data(
|
|
128
135
|
self,
|
|
129
136
|
res_dic: Dict[str, Dict[str, str]]
|
|
@@ -142,6 +149,7 @@ class Bjd():
|
|
|
142
149
|
res_dic.update(update_dic)
|
|
143
150
|
return res_dic
|
|
144
151
|
|
|
152
|
+
|
|
145
153
|
def _correct_prev_bjd_cd(
|
|
146
154
|
self,
|
|
147
155
|
sido_nm: str,
|
|
@@ -155,12 +163,14 @@ class Bjd():
|
|
|
155
163
|
else:
|
|
156
164
|
return prev_bjd_cd
|
|
157
165
|
|
|
166
|
+
|
|
158
167
|
@staticmethod
|
|
159
168
|
def _update_sejongsi(sgg_nm):
|
|
160
169
|
if sgg_nm == '세종시':
|
|
161
170
|
return None
|
|
162
171
|
return sgg_nm
|
|
163
172
|
|
|
173
|
+
|
|
164
174
|
def _split_sgg_nm(
|
|
165
175
|
self,
|
|
166
176
|
sgg_nm: Optional[str]
|
|
@@ -193,6 +203,7 @@ class Bjd():
|
|
|
193
203
|
else:
|
|
194
204
|
return sgg_nm
|
|
195
205
|
|
|
206
|
+
|
|
196
207
|
@staticmethod
|
|
197
208
|
def _clean_bjd_nm(
|
|
198
209
|
bjd_nm: Optional[str]
|
|
@@ -208,6 +219,7 @@ class Bjd():
|
|
|
208
219
|
return bjd_nm
|
|
209
220
|
return ''
|
|
210
221
|
|
|
222
|
+
|
|
211
223
|
def _get_full_bjd_nm(
|
|
212
224
|
self,
|
|
213
225
|
sido_nm: Optional[str],
|
|
@@ -224,6 +236,7 @@ class Bjd():
|
|
|
224
236
|
full_bjd_nm = re.sub(r'\s+', ' ', full_bjd_nm) # 공백이 여러 칸인 것을 한 칸으로 변경
|
|
225
237
|
return full_bjd_nm
|
|
226
238
|
|
|
239
|
+
|
|
227
240
|
def _make_dataframe(self, res_dic) -> pd.DataFrame:
|
|
228
241
|
res_df = pd.DataFrame(res_dic).T.sort_values('법정동코드').reset_index().drop(columns='index').replace(0, None).replace('0', None)
|
|
229
242
|
res_df['과거법정동코드'] = res_df[[
|
|
@@ -247,7 +260,8 @@ class Bjd():
|
|
|
247
260
|
]].apply(lambda x: self._get_full_bjd_nm(*x), axis=1)
|
|
248
261
|
res_df = res_df.reset_index(drop=True)
|
|
249
262
|
return res_df
|
|
250
|
-
|
|
263
|
+
|
|
264
|
+
|
|
251
265
|
def _create_bjd(self):
|
|
252
266
|
"""
|
|
253
267
|
국토교통부 전국 법정동 API 수집하여 딕셔너리와 데이터프레임으로 가공하는 기능
|
|
@@ -261,6 +275,7 @@ class Bjd():
|
|
|
261
275
|
self.bjd_api_df = res_df
|
|
262
276
|
self.logger.info("Success Created Bjd Dataframe and Dictionary from Public Data API")
|
|
263
277
|
|
|
278
|
+
|
|
264
279
|
def _save_bjd(self):
|
|
265
280
|
if self.bjd_api_df is None:
|
|
266
281
|
self._create_bjd()
|
|
@@ -276,7 +291,8 @@ class Bjd():
|
|
|
276
291
|
f.writelines('\n'.join(self.multiple_word_sgg_list))
|
|
277
292
|
f.close()
|
|
278
293
|
self.logger.info("Success Saved Multiple Sigungu List To Text File")
|
|
279
|
-
|
|
294
|
+
|
|
295
|
+
|
|
280
296
|
def _do_all_bjd_job(self):
|
|
281
297
|
"""
|
|
282
298
|
국토교통부 전국 법정동 API 수집하여 법정동 관련 파일을 모두 생성하는 기능
|
|
@@ -319,10 +335,12 @@ class Bjd():
|
|
|
319
335
|
@dataclass
|
|
320
336
|
class CurrentBjd(Bjd):
|
|
321
337
|
|
|
338
|
+
|
|
322
339
|
def __init__(self):
|
|
323
340
|
super().__init__()
|
|
324
341
|
self.current_bjd_df: pd.DataFrame = None
|
|
325
342
|
|
|
343
|
+
|
|
326
344
|
def _create_current_bjd(self):
|
|
327
345
|
"""
|
|
328
346
|
국토교통부 전국 법정동 API 현재 존재하는 법정동만 데이터프레임으로 가공하는 기능
|
|
@@ -333,6 +351,7 @@ class CurrentBjd(Bjd):
|
|
|
333
351
|
self.current_bjd_df = self.bjd_api_df.loc[self.bjd_api_df['삭제일자'].isnull()]
|
|
334
352
|
self.logger.info("Success Created Current Bjd Dataframe")
|
|
335
353
|
|
|
354
|
+
|
|
336
355
|
def _save_current_bjd(self):
|
|
337
356
|
if self.current_bjd_df is None:
|
|
338
357
|
self._create_current_bjd()
|
|
@@ -348,10 +367,12 @@ class CurrentBjd(Bjd):
|
|
|
348
367
|
@dataclass
|
|
349
368
|
class ChangedBjd(Bjd):
|
|
350
369
|
|
|
370
|
+
|
|
351
371
|
def __init__(self):
|
|
352
372
|
super().__init__()
|
|
353
373
|
self.changed_bjd_df: pd.DataFrame = None
|
|
354
374
|
|
|
375
|
+
|
|
355
376
|
def _get_prev_bjd_nm(
|
|
356
377
|
self,
|
|
357
378
|
prev_bjd_cd: Optional[str] # 법정동코드_변경전
|
|
@@ -376,6 +397,7 @@ class ChangedBjd(Bjd):
|
|
|
376
397
|
except:
|
|
377
398
|
return None
|
|
378
399
|
|
|
400
|
+
|
|
379
401
|
def _get_prev_value(
|
|
380
402
|
self,
|
|
381
403
|
prev_bjd_cd: Optional[str], # 법정동코드_변경전
|
|
@@ -413,6 +435,7 @@ class ChangedBjd(Bjd):
|
|
|
413
435
|
bjd_nm_changed = f"{' '.join(changed_list_prev)} > {' '.join(changed_list_curr)}"
|
|
414
436
|
return f'[법정동코드 변경내역] {bjd_cd_changed} | [법정동명 변경내역]: {bjd_nm_changed}'
|
|
415
437
|
|
|
438
|
+
|
|
416
439
|
def _create_changed_bjd(self):
|
|
417
440
|
if self.bjd_api_df is None:
|
|
418
441
|
self._create_bjd()
|
|
@@ -451,7 +474,8 @@ class ChangedBjd(Bjd):
|
|
|
451
474
|
]].apply(lambda x: self._find_diff(*x), axis=1)
|
|
452
475
|
self.changed_bjd_df = self.changed_bjd_df.sort_values(['생성일자_변경후'], na_position='first')
|
|
453
476
|
self.logger.info("Success Created Changed Bjd Dataframe")
|
|
454
|
-
|
|
477
|
+
|
|
478
|
+
|
|
455
479
|
def _save_changed_bjd(self):
|
|
456
480
|
if self.changed_bjd_df is None:
|
|
457
481
|
self._create_changed_bjd()
|
|
@@ -467,10 +491,12 @@ class ChangedBjd(Bjd):
|
|
|
467
491
|
@dataclass
|
|
468
492
|
class SmallestBjd(CurrentBjd):
|
|
469
493
|
|
|
494
|
+
|
|
470
495
|
def __init__(self):
|
|
471
496
|
super().__init__()
|
|
472
497
|
self.smallest_bjd_list: List[str] = None
|
|
473
498
|
|
|
499
|
+
|
|
474
500
|
def _create_smallest_bjd(self):
|
|
475
501
|
if self.current_bjd_df is None:
|
|
476
502
|
self._create_current_bjd()
|
|
@@ -482,6 +508,7 @@ class SmallestBjd(CurrentBjd):
|
|
|
482
508
|
self.smallest_bjd_list = sorted(list(smallest_bjd_set))
|
|
483
509
|
self.logger.info("Success Created Smallest Bjd Name List")
|
|
484
510
|
|
|
511
|
+
|
|
485
512
|
def _save_smallest_bjd(self):
|
|
486
513
|
if self.smallest_bjd_list is None:
|
|
487
514
|
self._create_smallest_bjd()
|
|
@@ -494,10 +521,12 @@ class SmallestBjd(CurrentBjd):
|
|
|
494
521
|
@dataclass
|
|
495
522
|
class BjdFrequencyDictionary(CurrentBjd):
|
|
496
523
|
|
|
524
|
+
|
|
497
525
|
def __init__(self):
|
|
498
526
|
super().__init__()
|
|
499
527
|
self.bjd_frequency_dictionary: Dict[str, int] = None
|
|
500
|
-
|
|
528
|
+
|
|
529
|
+
|
|
501
530
|
def _create_bjd_frequency_dictionary(self):
|
|
502
531
|
if self.current_bjd_df is None:
|
|
503
532
|
self._create_current_bjd()
|
|
@@ -509,6 +538,7 @@ class BjdFrequencyDictionary(CurrentBjd):
|
|
|
509
538
|
|
|
510
539
|
self.logger.info("Success Created Bjd Frequency Dictionary")
|
|
511
540
|
|
|
541
|
+
|
|
512
542
|
def _save_bjd_frequency_dictionary(self):
|
|
513
543
|
if self.bjd_frequency_dictionary is None:
|
|
514
544
|
self._create_bjd_frequency_dictionary()
|