vos-data-utils 0.0.2__py3-none-any.whl → 0.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vos-data-utils might be problematic. Click here for more details.
- vdutils/__init__.py +21 -45
- vdutils/bjd.py +40 -10
- vdutils/bjdconnector.py +322 -55
- vdutils/convaddr.py +121 -26
- vdutils/cordate.py +1 -1
- vdutils/data/__init__.py +10 -9
- vdutils/genpnu.py +623 -0
- vdutils/library/__init__.py +42 -0
- vdutils/library/data.py +51 -1
- vdutils/tests/test_convaddr.py +1 -3
- vdutils/tests/test_cordate.py +6 -1
- vdutils/tests/test_genpnu.py +1004 -0
- vdutils/tests/test_vid.py +247 -0
- vdutils/tests/tests.py +15 -5
- vdutils/vid.py +157 -99
- {vos_data_utils-0.0.2.dist-info → vos_data_utils-0.0.4.dist-info}/METADATA +3 -2
- vos_data_utils-0.0.4.dist-info/RECORD +21 -0
- vdutils/data/bjd.txt +0 -49844
- vdutils/data/bjd_changed.txt +0 -8579
- vdutils/data/bjd_connectors.pkl +0 -0
- vdutils/data/bjd_current.txt +0 -20560
- vdutils/data/bjd_frequency_dictionary.txt +0 -11290
- vdutils/data/bjd_smallest.txt +0 -9786
- vdutils/data/date_dictionary.txt +0 -738978
- vdutils/data/full_bjd_connectors.pkl +0 -0
- vdutils/data/multiple_word_sgg_list.txt +0 -65
- vdutils/data/pnu/bjd_20230701.pkl +0 -0
- vdutils/data/pnu/bjd_20240101.pkl +0 -0
- vdutils/data/pnu/bjd_20240118.pkl +0 -0
- vdutils/pnu.py +0 -221
- vos_data_utils-0.0.2.dist-info/RECORD +0 -31
- {vos_data_utils-0.0.2.dist-info → vos_data_utils-0.0.4.dist-info}/WHEEL +0 -0
- {vos_data_utils-0.0.2.dist-info → vos_data_utils-0.0.4.dist-info}/entry_points.txt +0 -0
- {vos_data_utils-0.0.2.dist-info → vos_data_utils-0.0.4.dist-info}/top_level.txt +0 -0
vdutils/bjdconnector.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
import pickle
|
|
2
|
-
import pkg_resources
|
|
3
1
|
import pandas as pd
|
|
4
2
|
from typing import (
|
|
5
3
|
List,
|
|
@@ -7,23 +5,24 @@ from typing import (
|
|
|
7
5
|
Optional
|
|
8
6
|
)
|
|
9
7
|
from dataclasses import dataclass
|
|
8
|
+
from vdutils.data import _get_folder_names
|
|
9
|
+
from vdutils.library import Log
|
|
10
10
|
from vdutils.convaddr import ConvAddr
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
@dataclass
|
|
14
14
|
class BjdObject():
|
|
15
15
|
|
|
16
|
-
CA = ConvAddr()
|
|
17
|
-
current_sido_list = CA.sido_list
|
|
18
|
-
current_sgg_list = CA.sgg_list
|
|
19
|
-
current_emd_list = CA.emd_list
|
|
20
|
-
current_ri_list = CA.ri_list
|
|
21
|
-
multiple_word_sgg_list = CA.multiple_word_sgg_list
|
|
22
|
-
|
|
23
16
|
def __init__(
|
|
24
17
|
self,
|
|
25
18
|
bjd_cd: str,
|
|
26
|
-
full_bjd_nm: str
|
|
19
|
+
full_bjd_nm: str,
|
|
20
|
+
sido_list: List[str],
|
|
21
|
+
sgg_list: List[str],
|
|
22
|
+
emd_list: List[str],
|
|
23
|
+
ri_list: List[str],
|
|
24
|
+
multiple_word_sgg_list: List[str],
|
|
25
|
+
base_dt: str
|
|
27
26
|
):
|
|
28
27
|
self.bjd_cd: str = bjd_cd
|
|
29
28
|
self.full_bjd_nm: str = full_bjd_nm
|
|
@@ -41,22 +40,31 @@ class BjdObject():
|
|
|
41
40
|
self.emd_cd: Optional[str] = None
|
|
42
41
|
self.ri_cd: Optional[str] = None
|
|
43
42
|
self.bjd_nm: str = None
|
|
43
|
+
self.base_dt: str = base_dt
|
|
44
|
+
self.base_dt_print: str = f"{self.base_dt[:4]}-{self.base_dt[4:6]}-{self.base_dt[6:8]}"
|
|
45
|
+
self.sido_list: List[str] = sido_list
|
|
46
|
+
self.sgg_list: List[str] = sgg_list
|
|
47
|
+
self.emd_list: List[str] = emd_list
|
|
48
|
+
self.ri_list: List[str] = ri_list
|
|
49
|
+
self.multiple_word_sgg_list: List[str] = multiple_word_sgg_list
|
|
44
50
|
self._prepare()
|
|
45
51
|
|
|
52
|
+
|
|
46
53
|
def _get_bjd_typ(
|
|
47
54
|
self,
|
|
48
55
|
bjd_nm
|
|
49
56
|
):
|
|
50
57
|
for bjd_typ_nm, bjd_typ_group in [
|
|
51
|
-
("sido", self.
|
|
52
|
-
("sgg", self.
|
|
53
|
-
("emd", self.
|
|
54
|
-
("ri", self.
|
|
58
|
+
("sido", self.sido_list),
|
|
59
|
+
("sgg", self.sgg_list),
|
|
60
|
+
("emd", self.emd_list),
|
|
61
|
+
("ri", self.ri_list)
|
|
55
62
|
]:
|
|
56
63
|
if bjd_nm in bjd_typ_group:
|
|
57
64
|
return bjd_typ_nm
|
|
58
65
|
raise ValueError(f"{bjd_nm}: Invalid input")
|
|
59
66
|
|
|
67
|
+
|
|
60
68
|
def _get_typ(self):
|
|
61
69
|
if self.ri_nm is not None: return "ri"
|
|
62
70
|
elif self.emd_nm is not None: return "emd"
|
|
@@ -64,6 +72,7 @@ class BjdObject():
|
|
|
64
72
|
elif self.sido_nm is not None: return "sido"
|
|
65
73
|
else: raise ValueError()
|
|
66
74
|
|
|
75
|
+
|
|
67
76
|
def _get_bjd_nm(self):
|
|
68
77
|
if self.typ == "sido": return self.sido_nm
|
|
69
78
|
elif self.typ == "sgg": return self.sgg_nm
|
|
@@ -71,6 +80,7 @@ class BjdObject():
|
|
|
71
80
|
elif self.typ == "ri": return self.ri_nm
|
|
72
81
|
else: raise ValueError()
|
|
73
82
|
|
|
83
|
+
|
|
74
84
|
def _get_bjd_cd(self):
|
|
75
85
|
if self.sido_nm is not None:
|
|
76
86
|
if self.sido_nm == "세종특별자치시": self.sido_cd = self.bjd_cd[:5] + ("0" * 5)
|
|
@@ -82,6 +92,7 @@ class BjdObject():
|
|
|
82
92
|
if self.ri_nm is not None:
|
|
83
93
|
self.ri_cd = self.bjd_cd
|
|
84
94
|
|
|
95
|
+
|
|
85
96
|
def _get_is_existed_bjd(self):
|
|
86
97
|
if self.sido_nm is not None:
|
|
87
98
|
self.sido = True
|
|
@@ -92,12 +103,14 @@ class BjdObject():
|
|
|
92
103
|
if self.ri_nm is not None:
|
|
93
104
|
self.ri = True
|
|
94
105
|
|
|
106
|
+
|
|
95
107
|
def _split_full_bjd_nm(self):
|
|
96
108
|
for multiple_sgg_nm in self.multiple_word_sgg_list:
|
|
97
109
|
if multiple_sgg_nm in self.full_bjd_nm:
|
|
98
110
|
return [multiple_sgg_nm] + self.full_bjd_nm.replace(multiple_sgg_nm, "").split()
|
|
99
111
|
return self.full_bjd_nm.split()
|
|
100
112
|
|
|
113
|
+
|
|
101
114
|
def _prepare(self):
|
|
102
115
|
bjd_nm_list = self._split_full_bjd_nm()
|
|
103
116
|
for bjd_nm in bjd_nm_list:
|
|
@@ -112,6 +125,7 @@ class BjdObject():
|
|
|
112
125
|
self._get_bjd_cd()
|
|
113
126
|
self._get_is_existed_bjd()
|
|
114
127
|
|
|
128
|
+
|
|
115
129
|
def _print(self):
|
|
116
130
|
print(f"bjd_cd: {self.bjd_cd}")
|
|
117
131
|
print(f"bjd_nm: {self.bjd_nm}")
|
|
@@ -129,6 +143,7 @@ class BjdObject():
|
|
|
129
143
|
print(f"sgg_cd: {self.sgg_cd}")
|
|
130
144
|
print(f"emd_cd: {self.emd_cd}")
|
|
131
145
|
print(f"ri_cd: {self.ri_cd}")
|
|
146
|
+
print(f"base_dt: {self.base_dt_print}")
|
|
132
147
|
|
|
133
148
|
|
|
134
149
|
@dataclass
|
|
@@ -141,10 +156,17 @@ class BjdConnector():
|
|
|
141
156
|
"ri": "emd"
|
|
142
157
|
}
|
|
143
158
|
|
|
159
|
+
|
|
144
160
|
def __init__(
|
|
145
161
|
self,
|
|
146
162
|
bjd_cd: str,
|
|
147
|
-
full_bjd_nm: str
|
|
163
|
+
full_bjd_nm: str,
|
|
164
|
+
sido_list: List[str],
|
|
165
|
+
sgg_list: List[str],
|
|
166
|
+
emd_list: List[str],
|
|
167
|
+
ri_list: List[str],
|
|
168
|
+
multiple_word_sgg_list: List[str],
|
|
169
|
+
base_dt: str,
|
|
148
170
|
):
|
|
149
171
|
self.typ: str = None
|
|
150
172
|
self.bjd_cd: str = bjd_cd
|
|
@@ -159,18 +181,33 @@ class BjdConnector():
|
|
|
159
181
|
self.bottom_bjd_nm: List[str] = []
|
|
160
182
|
self.bottom_bjd: List[BjdConnector()] = []
|
|
161
183
|
self.is_smallest: bool = None
|
|
184
|
+
self.base_dt: str = base_dt
|
|
185
|
+
self.base_dt_print: str = f"{self.base_dt[:4]}-{self.base_dt[4:6]}-{self.base_dt[6:8]}"
|
|
186
|
+
self.sido_list: List[str] = sido_list
|
|
187
|
+
self.sgg_list: List[str] = sgg_list
|
|
188
|
+
self.emd_list: List[str] = emd_list
|
|
189
|
+
self.ri_list: List[str] = ri_list
|
|
190
|
+
self.multiple_word_sgg_list: List[str] = multiple_word_sgg_list
|
|
162
191
|
self._update_metadata()
|
|
163
192
|
self._update_top_bjd()
|
|
164
193
|
|
|
194
|
+
|
|
165
195
|
def _update_metadata(self):
|
|
166
196
|
bjd_object = BjdObject(
|
|
167
197
|
bjd_cd=self.bjd_cd,
|
|
168
|
-
full_bjd_nm=self.full_bjd_nm
|
|
198
|
+
full_bjd_nm=self.full_bjd_nm,
|
|
199
|
+
sido_list=self.sido_list,
|
|
200
|
+
sgg_list=self.sgg_list,
|
|
201
|
+
emd_list=self.emd_list,
|
|
202
|
+
ri_list=self.ri_list,
|
|
203
|
+
multiple_word_sgg_list=self.multiple_word_sgg_list,
|
|
204
|
+
base_dt=self.base_dt
|
|
169
205
|
)
|
|
170
206
|
self.metadata = bjd_object
|
|
171
207
|
self.typ = bjd_object.typ
|
|
172
208
|
self.bjd_nm = bjd_object.bjd_nm
|
|
173
209
|
|
|
210
|
+
|
|
174
211
|
def _find_top_relation(
|
|
175
212
|
self,
|
|
176
213
|
typ,
|
|
@@ -186,6 +223,7 @@ class BjdConnector():
|
|
|
186
223
|
metadata=metadata
|
|
187
224
|
) # 상위 법정동이 없을 상위 법정동의 상위를 탐색
|
|
188
225
|
|
|
226
|
+
|
|
189
227
|
def _update_top_bjd(self):
|
|
190
228
|
typ = self.typ
|
|
191
229
|
metadata = self.metadata
|
|
@@ -213,9 +251,11 @@ class BjdConnector():
|
|
|
213
251
|
else:
|
|
214
252
|
print(self.metadata._print())
|
|
215
253
|
|
|
254
|
+
|
|
216
255
|
def _update_connected_bjd(self):
|
|
217
256
|
pass
|
|
218
257
|
|
|
258
|
+
|
|
219
259
|
def _print(self):
|
|
220
260
|
print(f"typ: {self.typ}")
|
|
221
261
|
print(f"bjd_cd: {self.bjd_cd}")
|
|
@@ -230,20 +270,88 @@ class BjdConnector():
|
|
|
230
270
|
print(f"bottom_bjd_nm: {self.bottom_bjd_nm}")
|
|
231
271
|
print(f"bottom_bjd: {self.bottom_bjd}")
|
|
232
272
|
print(f"is_smallest: {self.is_smallest}")
|
|
273
|
+
print(f"base_dt: {self.base_dt_print}")
|
|
233
274
|
|
|
234
275
|
|
|
235
276
|
@dataclass
|
|
236
277
|
class BjdConnectorGraph():
|
|
237
278
|
|
|
238
|
-
CA = ConvAddr()
|
|
239
|
-
bjd_current_df = CA.bjd_df
|
|
240
279
|
|
|
241
|
-
def __init__(
|
|
280
|
+
def __init__(
|
|
281
|
+
self,
|
|
282
|
+
base_dt: Optional[str] = None,
|
|
283
|
+
is_inherit: bool = False
|
|
284
|
+
):
|
|
285
|
+
|
|
286
|
+
if base_dt is not None:
|
|
287
|
+
if not isinstance(base_dt, str):
|
|
288
|
+
raise TypeError("type of object('base_dt') must be string")
|
|
289
|
+
|
|
290
|
+
if not base_dt.isdigit():
|
|
291
|
+
raise ValueError("object('base_dt') should be a string consisting of numbers")
|
|
292
|
+
|
|
293
|
+
if len(base_dt) != 8:
|
|
294
|
+
raise ValueError("object('base_dt') should be a string consisting of exactly 8(YYYYMMDD) digits")
|
|
295
|
+
else: pass
|
|
296
|
+
|
|
297
|
+
self.base_dt = base_dt
|
|
298
|
+
self.is_inherit = is_inherit
|
|
299
|
+
self.logger = Log('BjdConnectorGraph').stream_handler("INFO")
|
|
242
300
|
self.bjd_connectors: Dict[str, BjdConnector] = dict()
|
|
301
|
+
self._get_base_dt()
|
|
302
|
+
|
|
303
|
+
CA = ConvAddr(base_dt=self.base_dt, is_inherit=True)
|
|
304
|
+
self.bjd_current_df = CA.bjd_df
|
|
305
|
+
self.sido_list = CA.sido_list
|
|
306
|
+
self.sgg_list = CA.sgg_list
|
|
307
|
+
self.emd_list = CA.emd_list
|
|
308
|
+
self.ri_list = CA.ri_list
|
|
309
|
+
self.multiple_word_sgg_list = CA.multiple_word_sgg_list
|
|
310
|
+
|
|
243
311
|
self._creates_bjd_connectors()
|
|
244
312
|
self._update_bjd_connectors()
|
|
245
313
|
self._check_bjd_connectors_bottom_counts()
|
|
246
314
|
|
|
315
|
+
|
|
316
|
+
def _find_latest_base_dt(
|
|
317
|
+
self,
|
|
318
|
+
base_dts: List[str]
|
|
319
|
+
) -> str:
|
|
320
|
+
|
|
321
|
+
"""
|
|
322
|
+
입력된 날짜(YYYYMMDD)와 법정동 데이터 시점 리스트와 비교하여 입력된 날짜보다 과거 시점 중 최신 시점을 반환
|
|
323
|
+
"""
|
|
324
|
+
|
|
325
|
+
for date in base_dts:
|
|
326
|
+
if date < self.base_dt:
|
|
327
|
+
return date
|
|
328
|
+
|
|
329
|
+
# 입력된 날짜보다 작은 날짜가 없을 경우
|
|
330
|
+
self.logger.info("입력된 날짜보다 이전 시점의 법정동 데이터가 존재하지 않습니다. 보유한 데이터중 최신 데이터를 적용합니다.")
|
|
331
|
+
return base_dts[0]
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def _get_base_dt(self):
|
|
335
|
+
|
|
336
|
+
"""
|
|
337
|
+
입력된 날짜(YYYYMMDD)와 법정동 데이터 시점 리스트와 비교하여 입력된 날짜보다 과거 시점 중 최신 시점을 반환 \n
|
|
338
|
+
입력된 날짜(YYYYMMDD)가 없으면 데이터 시점 리스트 중 최신 시점을 반환
|
|
339
|
+
"""
|
|
340
|
+
|
|
341
|
+
if self.is_inherit:
|
|
342
|
+
return self.base_dt
|
|
343
|
+
|
|
344
|
+
base_dts = _get_folder_names(base_folder_path='vdutils/data/bjd')
|
|
345
|
+
base_dts = sorted(base_dts, reverse=True)
|
|
346
|
+
try:
|
|
347
|
+
if self.base_dt is None:
|
|
348
|
+
self.base_dt = base_dts[0]
|
|
349
|
+
else:
|
|
350
|
+
self.base_dt = self._find_latest_base_dt(base_dts=base_dts)
|
|
351
|
+
finally:
|
|
352
|
+
self.logger.info(f"적용 법정동 데이터 시점: {self.base_dt}")
|
|
353
|
+
|
|
354
|
+
|
|
247
355
|
def _creates_bjd_connectors(self):
|
|
248
356
|
for _ in self.bjd_current_df[["법정동코드", "법정동명"]].itertuples():
|
|
249
357
|
bjd_cd = str(_.법정동코드)
|
|
@@ -251,9 +359,16 @@ class BjdConnectorGraph():
|
|
|
251
359
|
|
|
252
360
|
self.bjd_connectors[bjd_cd] = BjdConnector(
|
|
253
361
|
bjd_cd=bjd_cd,
|
|
254
|
-
full_bjd_nm=full_bjd_nm
|
|
362
|
+
full_bjd_nm=full_bjd_nm,
|
|
363
|
+
sido_list=self.sido_list,
|
|
364
|
+
sgg_list=self.sgg_list,
|
|
365
|
+
emd_list=self.emd_list,
|
|
366
|
+
ri_list=self.ri_list,
|
|
367
|
+
multiple_word_sgg_list=self.multiple_word_sgg_list,
|
|
368
|
+
base_dt=self.base_dt
|
|
255
369
|
)
|
|
256
370
|
|
|
371
|
+
|
|
257
372
|
def _update_bjd_connectors(self):
|
|
258
373
|
for bjd_cd, bjd_connector in self.bjd_connectors.items():
|
|
259
374
|
if len(bjd_connector.top_bjd_cd):
|
|
@@ -276,6 +391,7 @@ class BjdConnectorGraph():
|
|
|
276
391
|
if len(bjd_connector.bottom_bjd_cd) == 0: bjd_connector.is_smallest = True
|
|
277
392
|
else: bjd_connector.is_smallest = False
|
|
278
393
|
|
|
394
|
+
|
|
279
395
|
def _check_bjd_connectors_bottom_counts(self):
|
|
280
396
|
for bjd_cd, bjd_connector in self.bjd_connectors.items():
|
|
281
397
|
if len(bjd_connector.bottom_bjd_cd) != len(bjd_connector.bottom_bjd):
|
|
@@ -288,19 +404,21 @@ class BjdConnectorGraph():
|
|
|
288
404
|
@dataclass
|
|
289
405
|
class FullBjdConnector():
|
|
290
406
|
|
|
291
|
-
CA = ConvAddr()
|
|
292
|
-
BCG = BjdConnectorGraph()
|
|
293
|
-
bjd_connectors = BCG.bjd_connectors
|
|
294
|
-
multiple_word_sgg_list = CA.multiple_word_sgg_list
|
|
295
|
-
|
|
296
407
|
def __init__(
|
|
297
408
|
self,
|
|
298
409
|
full_bjd_cd: str,
|
|
299
410
|
full_bjd_nm: str,
|
|
300
411
|
created_dt: str,
|
|
301
412
|
deleted_dt: str,
|
|
302
|
-
before_bjd_cd: str
|
|
413
|
+
before_bjd_cd: str,
|
|
414
|
+
bjd_connectors,
|
|
415
|
+
multiple_word_sgg_list,
|
|
416
|
+
base_dt: str
|
|
303
417
|
):
|
|
418
|
+
self.base_dt = base_dt
|
|
419
|
+
self.base_dt_print: str = f"{self.base_dt[:4]}-{self.base_dt[4:6]}-{self.base_dt[6:8]}"
|
|
420
|
+
self.bjd_connectors = bjd_connectors
|
|
421
|
+
self.multiple_word_sgg_list = multiple_word_sgg_list
|
|
304
422
|
self.full_bjd_cd: str = full_bjd_cd
|
|
305
423
|
self.full_bjd_nm: str = full_bjd_nm
|
|
306
424
|
self.is_exist: bool = None
|
|
@@ -327,8 +445,15 @@ class FullBjdConnector():
|
|
|
327
445
|
self.emd_bjd_connector: Optional[BjdConnector] = None
|
|
328
446
|
self.ri_bjd_connector: Optional[BjdConnector] = None
|
|
329
447
|
self.is_exist = self._get_is_exist()
|
|
448
|
+
|
|
449
|
+
# CA = ConvAddr(base_dt=self.base_dt)
|
|
450
|
+
# BCG = BjdConnectorGraph(base_dt=self.base_dt, is_inherit=True)
|
|
451
|
+
# self.bjd_connectors = BCG.bjd_connectors
|
|
452
|
+
# self.multiple_word_sgg_list = BCG.multiple_word_sgg_list
|
|
453
|
+
|
|
330
454
|
self._get_bjd_connectors()
|
|
331
455
|
|
|
456
|
+
|
|
332
457
|
def _split_full_bjd_nm(
|
|
333
458
|
self
|
|
334
459
|
):
|
|
@@ -339,10 +464,12 @@ class FullBjdConnector():
|
|
|
339
464
|
return self.full_bjd_nm.split()
|
|
340
465
|
return []
|
|
341
466
|
|
|
467
|
+
|
|
342
468
|
def _get_is_exist(self):
|
|
343
469
|
if self.deleted_dt is not None: return False
|
|
344
470
|
return True
|
|
345
471
|
|
|
472
|
+
|
|
346
473
|
def _get_bjd_cd_and_nm_from_typ(
|
|
347
474
|
self,
|
|
348
475
|
bjd_connector
|
|
@@ -353,6 +480,7 @@ class FullBjdConnector():
|
|
|
353
480
|
setattr(self, f"{typ}_nm", bjd_connector.bjd_nm)
|
|
354
481
|
setattr(self, f"{typ}_bjd_connector", bjd_connector)
|
|
355
482
|
|
|
483
|
+
|
|
356
484
|
def _get_each_bjd_connector(
|
|
357
485
|
self,
|
|
358
486
|
bjd_connector_list: List[BjdConnector],
|
|
@@ -373,6 +501,7 @@ class FullBjdConnector():
|
|
|
373
501
|
if len(included_bjds) == 0:
|
|
374
502
|
raise ValueError(f"{[bjd_connector.bjd for bjd_connector in bjd_connector_list]}, Not in This Full Bjd Name List {full_bjd_nm_list}")
|
|
375
503
|
|
|
504
|
+
|
|
376
505
|
def _get_bjd_connectors(self):
|
|
377
506
|
full_bjd_nm_list = self._split_full_bjd_nm()
|
|
378
507
|
start_bjd_connector = self.bjd_connectors[self.full_bjd_cd] # 가장 작은 단위의 법정동
|
|
@@ -381,6 +510,7 @@ class FullBjdConnector():
|
|
|
381
510
|
except:
|
|
382
511
|
print(f"Error Full Bjd Name List: {full_bjd_nm_list}, Bjd Code: {self.full_bjd_cd}")
|
|
383
512
|
|
|
513
|
+
|
|
384
514
|
def _print(self):
|
|
385
515
|
print(f"full_bjd_cd: {self.full_bjd_cd}")
|
|
386
516
|
print(f"full_bjd_nm: {self.full_bjd_nm}")
|
|
@@ -406,25 +536,90 @@ class FullBjdConnector():
|
|
|
406
536
|
print(f"sgg_bjd_connector: {self.sgg_bjd_connector}")
|
|
407
537
|
print(f"emd_bjd_connector: {self.emd_bjd_connector}")
|
|
408
538
|
print(f"ri_bjd_connector: {self.ri_bjd_connector}")
|
|
539
|
+
print(f"base_dt: {self.base_dt_print}")
|
|
409
540
|
|
|
410
541
|
|
|
411
542
|
@dataclass
|
|
412
543
|
class FullBjdGConnectorGraph():
|
|
413
544
|
|
|
414
|
-
CA = ConvAddr()
|
|
415
|
-
BCG = BjdConnectorGraph()
|
|
416
|
-
bjd_df = CA.bjd_df
|
|
417
|
-
bjd_connectors = BCG.bjd_connectors
|
|
418
545
|
|
|
419
|
-
def __init__(
|
|
546
|
+
def __init__(
|
|
547
|
+
self,
|
|
548
|
+
base_dt: Optional[str] = None,
|
|
549
|
+
is_inherit: bool = False
|
|
550
|
+
):
|
|
551
|
+
|
|
552
|
+
if base_dt is not None:
|
|
553
|
+
if not isinstance(base_dt, str):
|
|
554
|
+
raise TypeError("type of object('base_dt') must be string")
|
|
555
|
+
|
|
556
|
+
if not base_dt.isdigit():
|
|
557
|
+
raise ValueError("object('base_dt') should be a string consisting of numbers")
|
|
558
|
+
|
|
559
|
+
if len(base_dt) != 8:
|
|
560
|
+
raise ValueError("object('base_dt') should be a string consisting of exactly 8(YYYYMMDD) digits")
|
|
561
|
+
else: pass
|
|
562
|
+
|
|
563
|
+
self.base_dt = base_dt
|
|
564
|
+
self.is_inherit = is_inherit
|
|
565
|
+
self.logger = Log('FullBjdGConnectorGraph').stream_handler("INFO")
|
|
420
566
|
self.full_bjd_connectors: Dict[str, FullBjdConnector] = dict()
|
|
567
|
+
self._get_base_dt()
|
|
568
|
+
|
|
569
|
+
CA = ConvAddr(base_dt=self.base_dt, is_inherit=True)
|
|
570
|
+
BCG = BjdConnectorGraph(base_dt=self.base_dt, is_inherit=True)
|
|
571
|
+
self.bjd_df = CA.bjd_df
|
|
572
|
+
self.bjd_connectors = BCG.bjd_connectors
|
|
573
|
+
self.multiple_word_sgg_list = CA.multiple_word_sgg_list
|
|
574
|
+
|
|
421
575
|
self._creates_full_bjd_connectors()
|
|
422
576
|
self._update_before_and_after()
|
|
423
577
|
|
|
578
|
+
|
|
579
|
+
def _find_latest_base_dt(
|
|
580
|
+
self,
|
|
581
|
+
base_dts: List[str]
|
|
582
|
+
) -> str:
|
|
583
|
+
|
|
584
|
+
"""
|
|
585
|
+
입력된 날짜(YYYYMMDD)와 법정동 데이터 시점 리스트와 비교하여 입력된 날짜보다 과거 시점 중 최신 시점을 반환
|
|
586
|
+
"""
|
|
587
|
+
|
|
588
|
+
for date in base_dts:
|
|
589
|
+
if date < self.base_dt:
|
|
590
|
+
return date
|
|
591
|
+
|
|
592
|
+
# 입력된 날짜보다 작은 날짜가 없을 경우
|
|
593
|
+
self.logger.info("입력된 날짜보다 이전 시점의 법정동 데이터가 존재하지 않습니다. 보유한 데이터중 최신 데이터를 적용합니다.")
|
|
594
|
+
return base_dts[0]
|
|
595
|
+
|
|
596
|
+
|
|
597
|
+
def _get_base_dt(self):
|
|
598
|
+
|
|
599
|
+
"""
|
|
600
|
+
입력된 날짜(YYYYMMDD)와 법정동 데이터 시점 리스트와 비교하여 입력된 날짜보다 과거 시점 중 최신 시점을 반환 \n
|
|
601
|
+
입력된 날짜(YYYYMMDD)가 없으면 데이터 시점 리스트 중 최신 시점을 반환
|
|
602
|
+
"""
|
|
603
|
+
|
|
604
|
+
if self.is_inherit:
|
|
605
|
+
return self.base_dt
|
|
606
|
+
|
|
607
|
+
base_dts = _get_folder_names(base_folder_path='vdutils/data/bjd')
|
|
608
|
+
base_dts = sorted(base_dts, reverse=True)
|
|
609
|
+
try:
|
|
610
|
+
if self.base_dt is None:
|
|
611
|
+
self.base_dt = base_dts[0]
|
|
612
|
+
else:
|
|
613
|
+
self.base_dt = self._find_latest_base_dt(base_dts=base_dts)
|
|
614
|
+
finally:
|
|
615
|
+
self.logger.info(f"적용 법정동 데이터 시점: {self.base_dt}")
|
|
616
|
+
|
|
617
|
+
|
|
424
618
|
@staticmethod
|
|
425
619
|
def _replace_nan_with_none(df: pd.DataFrame):
|
|
426
620
|
return df.where(pd.notna(df), None)
|
|
427
621
|
|
|
622
|
+
|
|
428
623
|
def _creates_full_bjd_connectors(self):
|
|
429
624
|
self.bjd_df = self.bjd_df[[
|
|
430
625
|
"과거법정동코드",
|
|
@@ -446,9 +641,13 @@ class FullBjdGConnectorGraph():
|
|
|
446
641
|
full_bjd_nm=full_bjd_nm,
|
|
447
642
|
created_dt=created_dt,
|
|
448
643
|
deleted_dt=deleted_dt,
|
|
449
|
-
before_bjd_cd=before_bjd_cd
|
|
644
|
+
before_bjd_cd=before_bjd_cd,
|
|
645
|
+
bjd_connectors=self.bjd_connectors,
|
|
646
|
+
multiple_word_sgg_list=self.multiple_word_sgg_list,
|
|
647
|
+
base_dt=self.base_dt
|
|
450
648
|
)
|
|
451
649
|
|
|
650
|
+
|
|
452
651
|
def _update_before_and_after(self):
|
|
453
652
|
for bjd_cd, full_bjd_connector in self.full_bjd_connectors.items():
|
|
454
653
|
if full_bjd_connector.before_bjd_cd is not None \
|
|
@@ -462,29 +661,93 @@ class ConvAddrByBjdConnector():
|
|
|
462
661
|
|
|
463
662
|
bjd_connectors = None
|
|
464
663
|
full_bjd_connectors = None
|
|
465
|
-
file_name_bjd_connectors = pkg_resources.resource_filename(
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
)
|
|
469
|
-
file_name_full_bjd_connectors = pkg_resources.resource_filename(
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
)
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
664
|
+
# file_name_bjd_connectors = pkg_resources.resource_filename(
|
|
665
|
+
# "vdutils",
|
|
666
|
+
# "data/bjd_connectors.pkl"
|
|
667
|
+
# )
|
|
668
|
+
# file_name_full_bjd_connectors = pkg_resources.resource_filename(
|
|
669
|
+
# "vdutils",
|
|
670
|
+
# "data/full_bjd_connectors.pkl"
|
|
671
|
+
# )
|
|
672
|
+
|
|
673
|
+
|
|
674
|
+
def __init__(
|
|
675
|
+
self,
|
|
676
|
+
base_dt: Optional[str] = None,
|
|
677
|
+
is_inherit: bool = False
|
|
678
|
+
):
|
|
679
|
+
|
|
680
|
+
if base_dt is not None:
|
|
681
|
+
if not isinstance(base_dt, str):
|
|
682
|
+
raise TypeError("type of object('base_dt') must be string")
|
|
683
|
+
|
|
684
|
+
if not base_dt.isdigit():
|
|
685
|
+
raise ValueError("object('base_dt') should be a string consisting of numbers")
|
|
686
|
+
|
|
687
|
+
if len(base_dt) != 8:
|
|
688
|
+
raise ValueError("object('base_dt') should be a string consisting of exactly 8(YYYYMMDD) digits")
|
|
689
|
+
else: pass
|
|
690
|
+
|
|
691
|
+
self.base_dt = base_dt
|
|
692
|
+
self.is_inherit = is_inherit
|
|
693
|
+
self.logger = Log('ConvAddrByBjdConnector').stream_handler("INFO")
|
|
694
|
+
self._get_base_dt()
|
|
695
|
+
self.load_data()
|
|
696
|
+
self.base_dt_print: str = f"{self.base_dt[:4]}-{self.base_dt[4:6]}-{self.base_dt[6:8]}"
|
|
697
|
+
|
|
698
|
+
|
|
699
|
+
def _find_latest_base_dt(
|
|
700
|
+
self,
|
|
701
|
+
base_dts: List[str]
|
|
702
|
+
) -> str:
|
|
703
|
+
|
|
704
|
+
"""
|
|
705
|
+
입력된 날짜(YYYYMMDD)와 법정동 데이터 시점 리스트와 비교하여 입력된 날짜보다 과거 시점 중 최신 시점을 반환
|
|
706
|
+
"""
|
|
707
|
+
|
|
708
|
+
for date in base_dts:
|
|
709
|
+
if date < self.base_dt:
|
|
710
|
+
return date
|
|
711
|
+
|
|
712
|
+
# 입력된 날짜보다 작은 날짜가 없을 경우
|
|
713
|
+
self.logger.info("입력된 날짜보다 이전 시점의 법정동 데이터가 존재하지 않습니다. 보유한 데이터중 최신 데이터를 적용합니다.")
|
|
714
|
+
return base_dts[0]
|
|
715
|
+
|
|
716
|
+
|
|
717
|
+
def _get_base_dt(self):
|
|
718
|
+
|
|
719
|
+
"""
|
|
720
|
+
입력된 날짜(YYYYMMDD)와 법정동 데이터 시점 리스트와 비교하여 입력된 날짜보다 과거 시점 중 최신 시점을 반환 \n
|
|
721
|
+
입력된 날짜(YYYYMMDD)가 없으면 데이터 시점 리스트 중 최신 시점을 반환
|
|
722
|
+
"""
|
|
723
|
+
|
|
724
|
+
if self.is_inherit:
|
|
725
|
+
return self.base_dt
|
|
726
|
+
|
|
727
|
+
base_dts = _get_folder_names(base_folder_path='vdutils/data/bjd')
|
|
728
|
+
base_dts = sorted(base_dts, reverse=True)
|
|
729
|
+
try:
|
|
730
|
+
if self.base_dt is None:
|
|
731
|
+
self.base_dt = base_dts[0]
|
|
732
|
+
else:
|
|
733
|
+
self.base_dt = self._find_latest_base_dt(base_dts=base_dts)
|
|
734
|
+
finally:
|
|
735
|
+
self.logger.info(f"적용 법정동 데이터 시점: {self.base_dt}")
|
|
736
|
+
|
|
737
|
+
|
|
738
|
+
def load_data(self):
|
|
739
|
+
FBCG = FullBjdGConnectorGraph(base_dt=self.base_dt, is_inherit=True)
|
|
740
|
+
if self.bjd_connectors is None:
|
|
741
|
+
self.bjd_connectors = FBCG.bjd_connectors
|
|
742
|
+
# with open(cls.file_name_bjd_connectors, "rb") as f:
|
|
743
|
+
# cls.bjd_connectors = pickle.load(f)
|
|
744
|
+
# print("Done loaded bjd_connectors.pkl")
|
|
745
|
+
if self.full_bjd_connectors is None:
|
|
746
|
+
self.full_bjd_connectors = FBCG.full_bjd_connectors
|
|
747
|
+
# with open(cls.file_name_full_bjd_connectors, "rb") as f:
|
|
748
|
+
# cls.full_bjd_connectors = pickle.load(f)
|
|
749
|
+
# print("Done loaded full_bjd_connectors.pkl")
|
|
750
|
+
|
|
488
751
|
|
|
489
752
|
def _get_bjd_connectors(
|
|
490
753
|
self,
|
|
@@ -501,6 +764,7 @@ class ConvAddrByBjdConnector():
|
|
|
501
764
|
return result
|
|
502
765
|
return None
|
|
503
766
|
|
|
767
|
+
|
|
504
768
|
def _get_correct_bjd_connector(
|
|
505
769
|
self,
|
|
506
770
|
addr: str
|
|
@@ -510,12 +774,14 @@ class ConvAddrByBjdConnector():
|
|
|
510
774
|
and bjd_connector.bjd_nm in addr:
|
|
511
775
|
return self._get_bjd_connectors(addr, bjd_connector)
|
|
512
776
|
|
|
777
|
+
|
|
513
778
|
def _get_full_bjd_connector_by_bjd_connector(
|
|
514
779
|
self,
|
|
515
780
|
bjd_connector: BjdConnector
|
|
516
781
|
):
|
|
517
782
|
return self.full_bjd_connectors[bjd_connector.bjd_cd]
|
|
518
783
|
|
|
784
|
+
|
|
519
785
|
def _get_recently_full_bjd_connector(
|
|
520
786
|
self,
|
|
521
787
|
full_bjd_connector: FullBjdConnector
|
|
@@ -527,6 +793,7 @@ class ConvAddrByBjdConnector():
|
|
|
527
793
|
return self._get_recently_full_bjd_connector(full_bjd_connector.after[0])
|
|
528
794
|
return full_bjd_connector
|
|
529
795
|
|
|
796
|
+
|
|
530
797
|
def get_full_bjd_connector_by_address(
|
|
531
798
|
self,
|
|
532
799
|
addr: Optional[str]
|