vos-data-utils 1.0.6__py3-none-any.whl → 1.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vdutils/__init__.py +1 -1
- vdutils/bjdconnector.py +3 -3
- vdutils/convaddr.py +33 -1
- vdutils/data/date/date_dictionary.txt +40788 -40485
- vdutils/genpnu.py +496 -139
- vdutils/library/data.py +14 -1
- vdutils/tests/test_convaddr.py +34 -0
- vdutils/tests/test_genpnu.py +697 -250
- {vos_data_utils-1.0.6.dist-info → vos_data_utils-1.0.7.dist-info}/METADATA +269 -83
- {vos_data_utils-1.0.6.dist-info → vos_data_utils-1.0.7.dist-info}/RECORD +13 -13
- {vos_data_utils-1.0.6.dist-info → vos_data_utils-1.0.7.dist-info}/WHEEL +0 -0
- {vos_data_utils-1.0.6.dist-info → vos_data_utils-1.0.7.dist-info}/entry_points.txt +0 -0
- {vos_data_utils-1.0.6.dist-info → vos_data_utils-1.0.7.dist-info}/top_level.txt +0 -0
vdutils/genpnu.py
CHANGED
|
@@ -6,13 +6,17 @@ from typing import (
|
|
|
6
6
|
List,
|
|
7
7
|
Dict,
|
|
8
8
|
Tuple,
|
|
9
|
+
Union,
|
|
9
10
|
Optional
|
|
10
11
|
)
|
|
11
12
|
from dataclasses import dataclass
|
|
12
13
|
from collections import defaultdict
|
|
13
14
|
from vdutils.library.data import (
|
|
14
15
|
SGG_SPLIT_LIST,
|
|
15
|
-
LAST_NM_REFINE_MAP
|
|
16
|
+
LAST_NM_REFINE_MAP,
|
|
17
|
+
LOAD_PATTERN_FOR_SLICE,
|
|
18
|
+
JIBUN_PATTERN_FOR_SLICE,
|
|
19
|
+
ETC_LAND_PATTERN_FOR_SPLIT
|
|
16
20
|
)
|
|
17
21
|
from vdutils.library import Log
|
|
18
22
|
from vdutils.data import (
|
|
@@ -21,6 +25,7 @@ from vdutils.data import (
|
|
|
21
25
|
__encoding__,
|
|
22
26
|
_get_folder_names
|
|
23
27
|
)
|
|
28
|
+
from vdutils.convaddr import ConvAddr
|
|
24
29
|
|
|
25
30
|
|
|
26
31
|
@dataclass
|
|
@@ -46,6 +51,7 @@ class GenPnu():
|
|
|
46
51
|
self.sep = __sep__
|
|
47
52
|
self.index: bool = __index__
|
|
48
53
|
self.encoding: str = __encoding__
|
|
54
|
+
self.origin_base_dt: str = base_dt
|
|
49
55
|
self.base_dt: Optional[str] = base_dt
|
|
50
56
|
self.bjd_current_df: pd.DataFrame() = None
|
|
51
57
|
self.bjd_current_nm_cd_dic = defaultdict(list)
|
|
@@ -66,6 +72,7 @@ class GenPnu():
|
|
|
66
72
|
"data/bjd"
|
|
67
73
|
)
|
|
68
74
|
|
|
75
|
+
|
|
69
76
|
def _find_latest_base_dt(
|
|
70
77
|
self,
|
|
71
78
|
base_dts: List[str]
|
|
@@ -99,7 +106,7 @@ class GenPnu():
|
|
|
99
106
|
else:
|
|
100
107
|
self.base_dt = self._find_latest_base_dt(base_dts=base_dts)
|
|
101
108
|
finally:
|
|
102
|
-
self.logger.info(f"적용 법정동 데이터 시점: {self.base_dt}")
|
|
109
|
+
self.logger.info(f"[GenPnu] 적용 법정동 데이터 시점: {self.base_dt}")
|
|
103
110
|
|
|
104
111
|
|
|
105
112
|
def _get_file_names(self):
|
|
@@ -173,6 +180,16 @@ class GenPnu():
|
|
|
173
180
|
self.logger.error(e)
|
|
174
181
|
|
|
175
182
|
|
|
183
|
+
def _import_convaddr(self):
|
|
184
|
+
try:
|
|
185
|
+
self.convaddr = ConvAddr(base_dt=self.origin_base_dt)
|
|
186
|
+
self.bjd_smallest_list = self.convaddr.bjd_smallest_list
|
|
187
|
+
self.convert_pnu_dic = dict((bjd_nm, bjd_cd) for bjd_cd, bjd_nm in self.convaddr.bjd_current_dic.items() if bjd_cd != "법정동코드")
|
|
188
|
+
except Exception as e:
|
|
189
|
+
self.logger.error(f"Failed to import vdutils.convaddr module")
|
|
190
|
+
self.logger.error(e)
|
|
191
|
+
|
|
192
|
+
|
|
176
193
|
def _prepare(self):
|
|
177
194
|
self._get_bjd_current_df(
|
|
178
195
|
file_name_bjd_current=self.file_name_bjd_current,
|
|
@@ -182,6 +199,7 @@ class GenPnu():
|
|
|
182
199
|
)
|
|
183
200
|
self._create_bjd_current_nm_cd_dic()
|
|
184
201
|
self._create_bjd_dic()
|
|
202
|
+
self._import_convaddr()
|
|
185
203
|
|
|
186
204
|
|
|
187
205
|
def get_bjd_cd(
|
|
@@ -214,14 +232,14 @@ class GenPnu():
|
|
|
214
232
|
if not re.match("^[가-힣0-9 ]+$", bjd_nm):
|
|
215
233
|
raise ValueError("object('bjd_nm') should consist of only Korean characters and numbers")
|
|
216
234
|
|
|
235
|
+
not_a_valid_district_response: Dict[str, Any] = {
|
|
236
|
+
"error": True,
|
|
237
|
+
"bjd_cd": None,
|
|
238
|
+
"deleted_dt": None,
|
|
239
|
+
"base_dt": self.base_dt_print,
|
|
240
|
+
"msg": f"'{bjd_nm}' is not a valid legal district name"
|
|
241
|
+
}
|
|
217
242
|
try:
|
|
218
|
-
not_a_valid_district_response: Dict[str, Any] = {
|
|
219
|
-
"error": True,
|
|
220
|
-
"bjd_cd": None,
|
|
221
|
-
"deleted_dt": None,
|
|
222
|
-
"base_dt": self.base_dt_print,
|
|
223
|
-
"msg": f"'{bjd_nm}' is not a valid legal district name"
|
|
224
|
-
}
|
|
225
243
|
bjd_nm = " ".join(bjd_nm.split())
|
|
226
244
|
|
|
227
245
|
if bjd_nm in self.bjd_current_nm_cd_dic:
|
|
@@ -289,7 +307,8 @@ class GenPnu():
|
|
|
289
307
|
"full_bjd_nm": Optional[str],
|
|
290
308
|
"created_dt": Optional[str],
|
|
291
309
|
"deleted_dt": Optional[str],
|
|
292
|
-
"base_dt": str
|
|
310
|
+
"base_dt": str,
|
|
311
|
+
"msg": str
|
|
293
312
|
}
|
|
294
313
|
"""
|
|
295
314
|
|
|
@@ -302,25 +321,25 @@ class GenPnu():
|
|
|
302
321
|
if len(bjd_cd) != 10:
|
|
303
322
|
raise ValueError("object('bjd_cd') should be a string consisting of exactly 10 digits")
|
|
304
323
|
|
|
324
|
+
not_a_valid_district_response: Dict[str, Any] = {
|
|
325
|
+
"error": True,
|
|
326
|
+
"sido_nm": None,
|
|
327
|
+
"sgg_nm": None,
|
|
328
|
+
"emd_nm": None,
|
|
329
|
+
"ri_nm": None,
|
|
330
|
+
"full_bjd_nm": None,
|
|
331
|
+
"created_dt": None,
|
|
332
|
+
"deleted_dt": None,
|
|
333
|
+
"base_dt": self.base_dt_print,
|
|
334
|
+
"msg": f"'{bjd_cd}' is not a valid legal district code"
|
|
335
|
+
}
|
|
305
336
|
try:
|
|
306
|
-
not_a_valid_district_response: Dict[str, Any] = {
|
|
307
|
-
"error": True,
|
|
308
|
-
"sido_nm": None,
|
|
309
|
-
"sgg_nm": None,
|
|
310
|
-
"emd_nm": None,
|
|
311
|
-
"ri_nm": None,
|
|
312
|
-
"full_bjd_nm": None,
|
|
313
|
-
"created_dt": None,
|
|
314
|
-
"deleted_dt": None,
|
|
315
|
-
"base_dt": self.base_dt_print,
|
|
316
|
-
"msg": f"'{bjd_cd}' is not a valid legal district code"
|
|
317
|
-
}
|
|
318
337
|
if bjd_cd in self.bjd_dic:
|
|
319
338
|
return {"error": False, **self.bjd_dic[bjd_cd], "base_dt": self.base_dt_print, "msg": ""}
|
|
320
339
|
else:
|
|
321
|
-
return
|
|
340
|
+
return not_a_valid_district_response
|
|
322
341
|
except Exception as e:
|
|
323
|
-
return {
|
|
342
|
+
return {**not_a_valid_district_response, "msg": str(e)}
|
|
324
343
|
|
|
325
344
|
|
|
326
345
|
@staticmethod
|
|
@@ -346,10 +365,11 @@ class GenPnu():
|
|
|
346
365
|
ValueError: If the 'jibun' object is not of the specified format.
|
|
347
366
|
|
|
348
367
|
Returns:
|
|
349
|
-
bool
|
|
368
|
+
dict: Validation result containing 'error' (bool) and 'msg' (str).
|
|
350
369
|
"""
|
|
351
370
|
|
|
352
|
-
msg = ""
|
|
371
|
+
msg = "Invalid 'jibun' format. Please follow the specified format."
|
|
372
|
+
"""
|
|
353
373
|
Invalid 'jibun' format. Please follow the specified format.
|
|
354
374
|
|
|
355
375
|
The address should include '산' and only contain digits except for '산' and '-'.
|
|
@@ -365,17 +385,17 @@ class GenPnu():
|
|
|
365
385
|
or jibun == "" \
|
|
366
386
|
or jibun is None \
|
|
367
387
|
or jibun[0] in ["B", "가", "지"]:
|
|
368
|
-
return True
|
|
388
|
+
return {"error": True, "msg": msg}
|
|
369
389
|
if "*" in jibun:
|
|
370
|
-
return True
|
|
390
|
+
return {"error": True, "msg": msg}
|
|
371
391
|
|
|
372
392
|
jibun = jibun.replace(" ", "")
|
|
373
393
|
pattern = re.compile(r'^(산\s*)?\d{1,4}-\d{1,4}$|^(산\s*)?\d{1,4}$|^\d{1,4}-\d{1,4}$|^\d{1,4}$')
|
|
374
394
|
|
|
375
395
|
if not bool(pattern.match(jibun)):
|
|
376
|
-
|
|
396
|
+
return {"error": True, "msg": msg}
|
|
377
397
|
|
|
378
|
-
return
|
|
398
|
+
return {"error": False, "msg": ""}
|
|
379
399
|
|
|
380
400
|
|
|
381
401
|
@staticmethod
|
|
@@ -401,7 +421,7 @@ class GenPnu():
|
|
|
401
421
|
def _get_jibun_datas(
|
|
402
422
|
jibun: str
|
|
403
423
|
) -> Tuple[str, str, str]:
|
|
404
|
-
|
|
424
|
+
|
|
405
425
|
"""
|
|
406
426
|
입력된 지번 문자열(지번 문자열 적합성 확인된 입력값)에서 본번과 부번을 분리하여 번, 지 코드를 반환
|
|
407
427
|
"""
|
|
@@ -427,6 +447,110 @@ class GenPnu():
|
|
|
427
447
|
return bunji_cd, bun, ji
|
|
428
448
|
|
|
429
449
|
|
|
450
|
+
@staticmethod
|
|
451
|
+
def _validate_pnu(
|
|
452
|
+
pnu: Optional[str]
|
|
453
|
+
) -> dict:
|
|
454
|
+
"""
|
|
455
|
+
입력된 PNU 문자열이 올바른 형식인지 정규식을 이용하여 검증하여 반환
|
|
456
|
+
|
|
457
|
+
Args:
|
|
458
|
+
pnu (str): Validates the format of the given PNU.
|
|
459
|
+
The PNU must be a 19-character string consisting only of digits.
|
|
460
|
+
|
|
461
|
+
Raises:
|
|
462
|
+
ValueError: If the 'pnu' object is not of the specified format.
|
|
463
|
+
|
|
464
|
+
Returns:
|
|
465
|
+
dict: Validation result containing 'error' (bool) and 'msg' (str).
|
|
466
|
+
"""
|
|
467
|
+
|
|
468
|
+
msg = "Invalid 'pnu' format. Please follow the specified format."
|
|
469
|
+
"""
|
|
470
|
+
Invalid 'pnu' format. Please follow the specified format.
|
|
471
|
+
|
|
472
|
+
The PNU must be a 19-character string consisting only of digits.
|
|
473
|
+
Example:
|
|
474
|
+
1234567890123456789
|
|
475
|
+
"""
|
|
476
|
+
|
|
477
|
+
if not pnu or not isinstance(pnu, str):
|
|
478
|
+
return {"error": True, "msg": msg}
|
|
479
|
+
|
|
480
|
+
pattern = re.compile(r'^\d{19}$')
|
|
481
|
+
if not pattern.match(pnu):
|
|
482
|
+
return {"error": True, "msg": msg}
|
|
483
|
+
|
|
484
|
+
return {"error": False, "msg": ""}
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
def _create_generate_pnu_result(
|
|
488
|
+
self,
|
|
489
|
+
error=True,
|
|
490
|
+
pnu=None,
|
|
491
|
+
bjd_cd=None,
|
|
492
|
+
mountain_cd=None,
|
|
493
|
+
bunji_cd=None,
|
|
494
|
+
bjd_datas=None,
|
|
495
|
+
bun=None,
|
|
496
|
+
ji=None,
|
|
497
|
+
msg=""
|
|
498
|
+
) -> Dict[str, Any]:
|
|
499
|
+
|
|
500
|
+
"""
|
|
501
|
+
결과 딕셔너리를 생성하는 유틸리티 함수
|
|
502
|
+
|
|
503
|
+
Returns:
|
|
504
|
+
Dict[str, Any]: {
|
|
505
|
+
"error": bool,
|
|
506
|
+
"pnu": Optional[str],
|
|
507
|
+
"bjd_cd": Optional[str],
|
|
508
|
+
"mountain_cd": Optional[str],
|
|
509
|
+
"bunji_cd": Optional[str],
|
|
510
|
+
"bjd_datas": {
|
|
511
|
+
"error": bool,
|
|
512
|
+
"sido_nm": Optional[str],
|
|
513
|
+
"sgg_nm": Optional[str],
|
|
514
|
+
"emd_nm": Optional[str],
|
|
515
|
+
"ri_nm": Optional[str],
|
|
516
|
+
"full_bjd_nm": Optional[str],
|
|
517
|
+
"created_dt": Optional[str],
|
|
518
|
+
"deleted_dt": Optional[str],
|
|
519
|
+
"base_dt": str,
|
|
520
|
+
"msg": str,
|
|
521
|
+
},
|
|
522
|
+
"bun": Optional[str],
|
|
523
|
+
"ji": Optional[str],
|
|
524
|
+
"base_dt": str,
|
|
525
|
+
"msg": str
|
|
526
|
+
}
|
|
527
|
+
"""
|
|
528
|
+
|
|
529
|
+
return {
|
|
530
|
+
"error": error,
|
|
531
|
+
"pnu": pnu,
|
|
532
|
+
"bjd_cd": bjd_cd,
|
|
533
|
+
"mountain_cd": mountain_cd,
|
|
534
|
+
"bunji_cd": bunji_cd,
|
|
535
|
+
"bjd_datas": bjd_datas or {
|
|
536
|
+
"error": True,
|
|
537
|
+
"sido_nm": None,
|
|
538
|
+
"sgg_nm": None,
|
|
539
|
+
"emd_nm": None,
|
|
540
|
+
"ri_nm": None,
|
|
541
|
+
"full_bjd_nm": None,
|
|
542
|
+
"created_dt": None,
|
|
543
|
+
"deleted_dt": None,
|
|
544
|
+
"base_dt": self.base_dt_print,
|
|
545
|
+
"msg": ""
|
|
546
|
+
},
|
|
547
|
+
"bun": bun,
|
|
548
|
+
"ji": ji,
|
|
549
|
+
"base_dt": self.base_dt_print,
|
|
550
|
+
"msg": msg
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
|
|
430
554
|
def generate_pnu(
|
|
431
555
|
self,
|
|
432
556
|
bjd_cd: str,
|
|
@@ -452,20 +576,30 @@ class GenPnu():
|
|
|
452
576
|
TypeError: If the 'jibun' object is not of type string.
|
|
453
577
|
ValueError: If the 'bjd_cd' object does not consist of digits only.
|
|
454
578
|
ValueError: If the 'bjd_cd' object does not consist of exactly 10 digits.
|
|
455
|
-
ValueError: If the 'jibun' object is not of the specified format.
|
|
456
579
|
|
|
457
580
|
Returns:
|
|
458
581
|
Dict[str, Any]: {
|
|
459
582
|
"error": bool,
|
|
460
|
-
"pnu": str,
|
|
461
|
-
"bjd_cd": str,
|
|
462
|
-
"mountain_cd": str,
|
|
463
|
-
"bunji_cd": str,
|
|
464
|
-
"bjd_datas":
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
583
|
+
"pnu": Optional[str],
|
|
584
|
+
"bjd_cd": Optional[str],
|
|
585
|
+
"mountain_cd": Optional[str],
|
|
586
|
+
"bunji_cd": Optional[str],
|
|
587
|
+
"bjd_datas": {
|
|
588
|
+
"error": bool,
|
|
589
|
+
"sido_nm": Optional[str],
|
|
590
|
+
"sgg_nm": Optional[str],
|
|
591
|
+
"emd_nm": Optional[str],
|
|
592
|
+
"ri_nm": Optional[str],
|
|
593
|
+
"full_bjd_nm": Optional[str],
|
|
594
|
+
"created_dt": Optional[str],
|
|
595
|
+
"deleted_dt": Optional[str],
|
|
596
|
+
"base_dt": str,
|
|
597
|
+
"msg": str,
|
|
598
|
+
},
|
|
599
|
+
"bun": Optional[str],
|
|
600
|
+
"ji": Optional[str],
|
|
601
|
+
"base_dt": str,
|
|
602
|
+
"msg": str
|
|
469
603
|
}
|
|
470
604
|
"""
|
|
471
605
|
|
|
@@ -481,55 +615,59 @@ class GenPnu():
|
|
|
481
615
|
if len(bjd_cd) != 10:
|
|
482
616
|
raise ValueError("object('bjd_cd') should be a string consisting of exactly 10 digits")
|
|
483
617
|
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
msg
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
618
|
+
is_valid_jibun = self._validate_jibun(jibun)
|
|
619
|
+
if is_valid_jibun.get("error") is True:
|
|
620
|
+
return self._create_generate_pnu_result(
|
|
621
|
+
error=True,
|
|
622
|
+
msg=is_valid_jibun.get("msg")
|
|
623
|
+
)
|
|
624
|
+
|
|
625
|
+
try:
|
|
626
|
+
bjd_datas = self.get_bjd_data(bjd_cd)
|
|
627
|
+
if bjd_datas.get("error") is True:
|
|
628
|
+
return self._create_generate_pnu_result(
|
|
629
|
+
error=True,
|
|
630
|
+
bjd_datas=bjd_datas,
|
|
631
|
+
msg=bjd_datas.get("msg")
|
|
632
|
+
)
|
|
633
|
+
|
|
634
|
+
else:
|
|
635
|
+
jibun = jibun.replace(" ", "")
|
|
636
|
+
jibun, mountain_cd = self._get_mountain_cd(jibun)
|
|
637
|
+
bunji_cd, bun, ji = self._get_jibun_datas(jibun)
|
|
638
|
+
is_valid_pnu = self._validate_pnu(f"{bjd_cd}{mountain_cd}{bunji_cd}")
|
|
639
|
+
|
|
640
|
+
if is_valid_pnu.get("error") is True:
|
|
641
|
+
return self._create_generate_pnu_result(
|
|
642
|
+
error=True,
|
|
643
|
+
pnu=f"{bjd_cd}{mountain_cd}{bunji_cd}",
|
|
644
|
+
bjd_cd=bjd_cd,
|
|
645
|
+
mountain_cd=mountain_cd,
|
|
646
|
+
bunji_cd=bunji_cd,
|
|
647
|
+
bjd_datas=bjd_datas,
|
|
648
|
+
bun=bun,
|
|
649
|
+
ji=ji,
|
|
650
|
+
msg=is_valid_pnu.get("msg")
|
|
651
|
+
)
|
|
505
652
|
|
|
506
653
|
else:
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
"bjd_cd": bjd_cd,
|
|
525
|
-
"mountain_cd": mountain_cd,
|
|
526
|
-
"bunji_cd": bunji_cd,
|
|
527
|
-
"bjd_datas": bjd_datas,
|
|
528
|
-
"bun": bun,
|
|
529
|
-
"ji": ji,
|
|
530
|
-
"msg": msg,
|
|
531
|
-
"base_dt": self.base_dt_print
|
|
532
|
-
}
|
|
654
|
+
return self._create_generate_pnu_result(
|
|
655
|
+
error=False,
|
|
656
|
+
pnu=f"{bjd_cd}{mountain_cd}{bunji_cd}",
|
|
657
|
+
bjd_cd=bjd_cd,
|
|
658
|
+
mountain_cd=mountain_cd,
|
|
659
|
+
bunji_cd=bunji_cd,
|
|
660
|
+
bjd_datas=bjd_datas,
|
|
661
|
+
bun=bun,
|
|
662
|
+
ji=ji,
|
|
663
|
+
msg=""
|
|
664
|
+
)
|
|
665
|
+
|
|
666
|
+
except Exception as e:
|
|
667
|
+
self._create_generate_pnu_result(
|
|
668
|
+
error=True,
|
|
669
|
+
msg=str(e)
|
|
670
|
+
)
|
|
533
671
|
|
|
534
672
|
|
|
535
673
|
def generate_pnu_from_bjd_nm(
|
|
@@ -539,7 +677,7 @@ class GenPnu():
|
|
|
539
677
|
) -> Dict[str, Any]:
|
|
540
678
|
|
|
541
679
|
"""
|
|
542
|
-
입력된 문자열(
|
|
680
|
+
입력된 문자열(법정동명, 지번)을 필지관리번호(pnu)로 변환하여 반환
|
|
543
681
|
|
|
544
682
|
Args:
|
|
545
683
|
bjd_nm (str): The input should be a string consisting of Korean administrative district names.
|
|
@@ -556,20 +694,30 @@ class GenPnu():
|
|
|
556
694
|
TypeError: If the 'bjd_nm' object is not of type string.
|
|
557
695
|
TypeError: If the 'jibun' object is not of type string.
|
|
558
696
|
ValueError: If the 'bjd_nm' object is not consist of only Korean characters and numbers.
|
|
559
|
-
ValueError: If the 'jibun' object is not of the specified format.
|
|
560
697
|
|
|
561
698
|
Returns:
|
|
562
699
|
Dict[str, Any]: {
|
|
563
700
|
"error": bool,
|
|
564
|
-
"pnu": str,
|
|
565
|
-
"bjd_cd": str,
|
|
566
|
-
"mountain_cd": str,
|
|
567
|
-
"bunji_cd": str,
|
|
568
|
-
"bjd_datas":
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
701
|
+
"pnu": Optional[str],
|
|
702
|
+
"bjd_cd": Optional[str],
|
|
703
|
+
"mountain_cd": Optional[str],
|
|
704
|
+
"bunji_cd": Optional[str],
|
|
705
|
+
"bjd_datas": {
|
|
706
|
+
"error": bool,
|
|
707
|
+
"sido_nm": Optional[str],
|
|
708
|
+
"sgg_nm": Optional[str],
|
|
709
|
+
"emd_nm": Optional[str],
|
|
710
|
+
"ri_nm": Optional[str],
|
|
711
|
+
"full_bjd_nm": Optional[str],
|
|
712
|
+
"created_dt": Optional[str],
|
|
713
|
+
"deleted_dt": Optional[str],
|
|
714
|
+
"base_dt": str,
|
|
715
|
+
"msg": str,
|
|
716
|
+
},
|
|
717
|
+
"bun": Optional[str],
|
|
718
|
+
"ji": Optional[str],
|
|
719
|
+
"base_dt": str,
|
|
720
|
+
"msg": str
|
|
573
721
|
}
|
|
574
722
|
"""
|
|
575
723
|
|
|
@@ -582,45 +730,254 @@ class GenPnu():
|
|
|
582
730
|
if not re.match("^[가-힣0-9 ]+$", bjd_nm):
|
|
583
731
|
raise ValueError("object('bjd_nm') should consist of only Korean characters and numbers")
|
|
584
732
|
|
|
585
|
-
|
|
733
|
+
try:
|
|
586
734
|
res = self.get_bjd_cd(bjd_nm=bjd_nm)
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
735
|
+
if res.get("error") is True:
|
|
736
|
+
return self._create_generate_pnu_result(
|
|
737
|
+
error=res.get("error"),
|
|
738
|
+
msg=res.get("msg")
|
|
739
|
+
)
|
|
740
|
+
else:
|
|
741
|
+
return self.generate_pnu(
|
|
742
|
+
bjd_cd=res.get("bjd_cd"),
|
|
743
|
+
jibun=jibun
|
|
744
|
+
)
|
|
745
|
+
except Exception as e:
|
|
746
|
+
return self._create_generate_pnu_result(
|
|
747
|
+
error=True,
|
|
748
|
+
msg=str(e)
|
|
749
|
+
)
|
|
750
|
+
|
|
751
|
+
|
|
752
|
+
def _clean_bracket_and_content(
|
|
753
|
+
self,
|
|
754
|
+
string: str
|
|
755
|
+
) -> str:
|
|
756
|
+
|
|
757
|
+
"""
|
|
758
|
+
주소문자열의 ([]) 와 같은 괄호 및 괄호안 문자 제거하는 기능
|
|
759
|
+
"""
|
|
760
|
+
|
|
761
|
+
pattern = r'[\(\[].*?[\)\]]'
|
|
762
|
+
return re.sub(pattern, '', string).rstrip()
|
|
763
|
+
|
|
764
|
+
|
|
765
|
+
def _split_etc_main_address(
|
|
766
|
+
self,
|
|
767
|
+
main_address: str,
|
|
768
|
+
detail_address: str
|
|
769
|
+
) -> Tuple[Optional[str], Optional[str]]:
|
|
770
|
+
|
|
771
|
+
"""
|
|
772
|
+
주소문자열에서 주(main) 주소, 주 외(main etc) 주소, 상세(detail) 주소를 추출하여 반환
|
|
773
|
+
"""
|
|
774
|
+
|
|
775
|
+
not_a_valid_district_response: Dict[str, Any] = {
|
|
776
|
+
"error": True,
|
|
777
|
+
"main_address": None,
|
|
778
|
+
"etc_main_address": None,
|
|
779
|
+
"detail_address": None,
|
|
780
|
+
"base_dt": self.base_dt_print,
|
|
781
|
+
"msg": f"'{detail_address}' is failed to split main address and main etc address"
|
|
782
|
+
}
|
|
783
|
+
try:
|
|
784
|
+
if re.sub(ETC_LAND_PATTERN_FOR_SPLIT, "", detail_address):
|
|
785
|
+
clean_detail_address = re.sub(ETC_LAND_PATTERN_FOR_SPLIT, "", detail_address).strip()
|
|
786
|
+
etc_main_address = detail_address.replace(clean_detail_address, "")
|
|
787
|
+
return {
|
|
788
|
+
"error": False,
|
|
789
|
+
"main_address": main_address,
|
|
790
|
+
"etc_main_address": etc_main_address,
|
|
791
|
+
"detail_address": clean_detail_address,
|
|
792
|
+
"base_dt": self.base_dt_print,
|
|
793
|
+
"msg": ""
|
|
794
|
+
}
|
|
795
|
+
else:
|
|
796
|
+
return {
|
|
797
|
+
"error": False,
|
|
798
|
+
"main_address": main_address,
|
|
799
|
+
"etc_main_address": None,
|
|
800
|
+
"detail_address": detail_address,
|
|
801
|
+
"base_dt": self.base_dt_print,
|
|
802
|
+
"msg": ""
|
|
803
|
+
}
|
|
804
|
+
except Exception as e:
|
|
805
|
+
return {**not_a_valid_district_response, "msg": str(e)}
|
|
806
|
+
|
|
807
|
+
|
|
808
|
+
def _split_main_and_detail_address(
|
|
809
|
+
self,
|
|
810
|
+
address: str
|
|
811
|
+
) -> Dict[str, Any]:
|
|
812
|
+
|
|
813
|
+
"""
|
|
814
|
+
주소문자열에서 주(main) 주소, 주 외(main etc) 주소, 상세(detail) 주소를 추출하여 반환
|
|
815
|
+
"""
|
|
816
|
+
|
|
817
|
+
not_a_valid_district_response: Dict[str, Any] = {
|
|
818
|
+
"error": True,
|
|
819
|
+
"main_address": None,
|
|
820
|
+
"etc_main_address": None,
|
|
821
|
+
"detail_address": None,
|
|
822
|
+
"base_dt": self.base_dt_print,
|
|
823
|
+
"msg": f"'{address}' is failed to split main address and detail address"
|
|
824
|
+
}
|
|
825
|
+
try:
|
|
826
|
+
cleaned_address = self._clean_bracket_and_content(string=address)
|
|
827
|
+
match = re.search(LOAD_PATTERN_FOR_SLICE, cleaned_address)
|
|
828
|
+
if match:
|
|
829
|
+
return {**not_a_valid_district_response, "msg": f"'{address}' is load address"}
|
|
830
|
+
|
|
831
|
+
match = re.search(JIBUN_PATTERN_FOR_SLICE, cleaned_address)
|
|
832
|
+
if match:
|
|
833
|
+
detail_address = address.split(match.group(0))[1].strip()
|
|
834
|
+
main_address = address.replace(detail_address, "").strip()
|
|
835
|
+
return self._split_etc_main_address(main_address=main_address, detail_address=detail_address)
|
|
836
|
+
else:
|
|
837
|
+
return not_a_valid_district_response
|
|
838
|
+
except Exception as e:
|
|
839
|
+
return {**not_a_valid_district_response, "msg": str(e)}
|
|
840
|
+
|
|
841
|
+
|
|
842
|
+
def _is_not_in_smallest_bjd(
|
|
843
|
+
self,
|
|
844
|
+
rest_main_address: str
|
|
845
|
+
) -> bool:
|
|
846
|
+
|
|
847
|
+
"""
|
|
848
|
+
입력된 지번 문자열(법정동명이 제외된 지번 문자열)에서 최소단위 법정동이 포함되어 있는지 확인하여 반환
|
|
849
|
+
"""
|
|
850
|
+
|
|
851
|
+
try:
|
|
852
|
+
check_word = rest_main_address.split()[0]
|
|
853
|
+
except:
|
|
854
|
+
check_word = rest_main_address
|
|
855
|
+
for bjd in self.bjd_smallest_list:
|
|
856
|
+
if check_word == bjd:
|
|
857
|
+
return False
|
|
858
|
+
return True
|
|
859
|
+
|
|
860
|
+
|
|
861
|
+
def _extract_bjd_from_address(
|
|
862
|
+
self,
|
|
863
|
+
main_address: str
|
|
864
|
+
) -> Union[str, None]:
|
|
865
|
+
|
|
866
|
+
"""
|
|
867
|
+
입력된 지번 문자열에서 번지 상세주소가 제외된 지번의 법정동 문자열을 추출하여 반환
|
|
868
|
+
법정동 딕셔너리에서 입력된 지번 문자열내에 존재하는 법정동이 있는지 조회하고 존재할 경우,
|
|
869
|
+
입력된 지번 문자열에서 매칭된 법정동을 잘라낸 나머지 문자열에 최소 단위 법정동명이 존재하는지 확인해서 최소 단위 법정동 레벨인지를 확인
|
|
870
|
+
매칭된 법정동이 최소 단위 법정동이면 반환
|
|
871
|
+
|
|
872
|
+
Args:
|
|
873
|
+
addr (str): The input korean address string.
|
|
874
|
+
|
|
875
|
+
Raises:
|
|
876
|
+
TypeError: If the 'address' object is not of type string.
|
|
877
|
+
|
|
878
|
+
Returns:
|
|
879
|
+
Union[str, None]
|
|
880
|
+
"""
|
|
881
|
+
|
|
882
|
+
if not isinstance(main_address, str):
|
|
883
|
+
raise TypeError("type of object('address') must be string")
|
|
884
|
+
|
|
885
|
+
try:
|
|
886
|
+
for key in self.convert_pnu_dic.keys():
|
|
887
|
+
if key in main_address and self.convert_pnu_dic[key][5:] != '00000':
|
|
888
|
+
rest_main_address = main_address.replace(key, '')
|
|
889
|
+
if main_address.split(key)[1][0] == ' ' \
|
|
890
|
+
and self._is_not_in_smallest_bjd(rest_main_address=rest_main_address):
|
|
891
|
+
return key
|
|
892
|
+
return None
|
|
893
|
+
|
|
894
|
+
except Exception as e:
|
|
895
|
+
return None
|
|
896
|
+
|
|
897
|
+
|
|
898
|
+
def generate_pnu_from_address(
|
|
899
|
+
self,
|
|
900
|
+
address: str,
|
|
901
|
+
) -> Dict[str, Any]:
|
|
902
|
+
|
|
903
|
+
"""
|
|
904
|
+
입력된 문자열(주소)에서 법정동, 지번을 분리 필지관리번호(pnu)로 변환하여 반환
|
|
905
|
+
|
|
906
|
+
Args:
|
|
907
|
+
bjd_nm (str): The input should be a string consisting of Korean administrative district names.
|
|
908
|
+
jibun (str): Validates the format of the given address.
|
|
909
|
+
The address should include '산' and only contain digits except for '산' and '-'.
|
|
910
|
+
The main and sub numbers should be separated by a hyphen, and both can have a maximum of 4 digits.
|
|
911
|
+
Examples:
|
|
912
|
+
With mountain and sub-number: 산 0000-0000
|
|
913
|
+
With mountain and no sub-number: 산 0000
|
|
914
|
+
Without mountain and with sub-number: 0000-0000
|
|
915
|
+
Without mountain and without sub-number: 0000
|
|
916
|
+
|
|
917
|
+
Raises:
|
|
918
|
+
TypeError: If the 'bjd_nm' object is not of type string.
|
|
919
|
+
TypeError: If the 'jibun' object is not of type string.
|
|
920
|
+
ValueError: If the 'bjd_nm' object is not consist of only Korean characters and numbers.
|
|
921
|
+
|
|
922
|
+
Returns:
|
|
923
|
+
Dict[str, Any]: {
|
|
924
|
+
"error": bool,
|
|
925
|
+
"pnu": Optional[str],
|
|
926
|
+
"bjd_cd": Optional[str],
|
|
927
|
+
"mountain_cd": Optional[str],
|
|
928
|
+
"bunji_cd": Optional[str],
|
|
929
|
+
"bjd_datas": {
|
|
930
|
+
"error": bool,
|
|
931
|
+
"sido_nm": Optional[str],
|
|
932
|
+
"sgg_nm": Optional[str],
|
|
933
|
+
"emd_nm": Optional[str],
|
|
934
|
+
"ri_nm": Optional[str],
|
|
935
|
+
"full_bjd_nm": Optional[str],
|
|
936
|
+
"created_dt": Optional[str],
|
|
937
|
+
"deleted_dt": Optional[str],
|
|
938
|
+
"base_dt": str,
|
|
939
|
+
"msg": str,
|
|
940
|
+
},
|
|
941
|
+
"bun": Optional[str],
|
|
942
|
+
"ji": Optional[str],
|
|
943
|
+
"base_dt": str,
|
|
944
|
+
"msg": str
|
|
945
|
+
}
|
|
946
|
+
"""
|
|
947
|
+
|
|
948
|
+
if not isinstance(address, str):
|
|
949
|
+
raise TypeError("type of object('address') must be string")
|
|
950
|
+
|
|
951
|
+
try:
|
|
952
|
+
is_split_address = self._split_main_and_detail_address(address=address)
|
|
953
|
+
if is_split_address.get("error") is True:
|
|
954
|
+
return self._create_generate_pnu_result(
|
|
955
|
+
error=is_split_address.get("error"),
|
|
956
|
+
msg=is_split_address.get("msg")
|
|
957
|
+
)
|
|
958
|
+
|
|
959
|
+
else:
|
|
960
|
+
main_address = is_split_address.get("main_address")
|
|
961
|
+
bjd_nm = self._extract_bjd_from_address(main_address=main_address)
|
|
962
|
+
if bjd_nm is None:
|
|
963
|
+
return self._create_generate_pnu_result(
|
|
964
|
+
error=True,
|
|
965
|
+
msg=f"Failed to extract bjd name from address: {address}"
|
|
592
966
|
)
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
bjd_datas = None
|
|
611
|
-
bun = None
|
|
612
|
-
ji = None
|
|
613
|
-
msg = str(e)
|
|
614
|
-
|
|
615
|
-
return {
|
|
616
|
-
"error": error,
|
|
617
|
-
"pnu": pnu,
|
|
618
|
-
"bjd_cd": bjd_cd,
|
|
619
|
-
"mountain_cd": mountain_cd,
|
|
620
|
-
"bunji_cd": bunji_cd,
|
|
621
|
-
"bjd_datas": bjd_datas,
|
|
622
|
-
"bun": bun,
|
|
623
|
-
"ji": ji,
|
|
624
|
-
"msg": msg,
|
|
625
|
-
"base_dt": self.base_dt_print
|
|
626
|
-
}
|
|
967
|
+
|
|
968
|
+
bjd_nm = bjd_nm.strip()
|
|
969
|
+
jibun = main_address.replace(bjd_nm, "")
|
|
970
|
+
if jibun is None:
|
|
971
|
+
return self._create_generate_pnu_result(
|
|
972
|
+
error=True,
|
|
973
|
+
msg=f"Failed to extract valid jibun from address: {address}"
|
|
974
|
+
)
|
|
975
|
+
|
|
976
|
+
jibun = jibun.strip()
|
|
977
|
+
return self.generate_pnu_from_bjd_nm(bjd_nm=bjd_nm, jibun=jibun)
|
|
978
|
+
|
|
979
|
+
except Exception as e:
|
|
980
|
+
return self._create_generate_pnu_result(
|
|
981
|
+
error=True,
|
|
982
|
+
msg=str(e)
|
|
983
|
+
)
|