vos-data-utils 1.0.9__py3-none-any.whl → 1.0.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vdutils/__init__.py +1 -1
- vdutils/bjd.py +27 -6
- vdutils/convaddr.py +67 -13
- vdutils/data/bjd/20250630/bjd.txt +49878 -0
- vdutils/data/bjd/20250630/bjd_changed.txt +13730 -0
- vdutils/data/bjd/20250630/bjd_current.txt +20557 -0
- vdutils/data/bjd/20250630/bjd_frequency_dictionary.txt +11301 -0
- vdutils/data/bjd/20250630/bjd_smallest.txt +9794 -0
- vdutils/data/bjd/20250630/multiple_word_sgg_list.txt +65 -0
- vdutils/data/date/date_dictionary.txt +40455 -40514
- vdutils/genpnu.py +10 -4
- vdutils/library/data.py +5129 -0
- vdutils/tests/test_genpnu.py +5 -5
- {vos_data_utils-1.0.9.dist-info → vos_data_utils-1.0.10.dist-info}/METADATA +23 -1
- {vos_data_utils-1.0.9.dist-info → vos_data_utils-1.0.10.dist-info}/RECORD +18 -12
- {vos_data_utils-1.0.9.dist-info → vos_data_utils-1.0.10.dist-info}/WHEEL +0 -0
- {vos_data_utils-1.0.9.dist-info → vos_data_utils-1.0.10.dist-info}/entry_points.txt +0 -0
- {vos_data_utils-1.0.9.dist-info → vos_data_utils-1.0.10.dist-info}/top_level.txt +0 -0
vdutils/__init__.py
CHANGED
|
@@ -10,7 +10,7 @@ Correction function to align with the current legal district by reflecting chang
|
|
|
10
10
|
Conversion function to transform address strings into Parcel Number (PNU)
|
|
11
11
|
Generation function for unique transaction case IDs in ValueofSpace
|
|
12
12
|
"""
|
|
13
|
-
version = "1.0.
|
|
13
|
+
version = "1.0.10"
|
|
14
14
|
author = "ValueOfSpace"
|
|
15
15
|
description = "description"
|
|
16
16
|
license = "MIT License"
|
vdutils/bjd.py
CHANGED
|
@@ -205,18 +205,38 @@ class Bjd():
|
|
|
205
205
|
return sgg_nm
|
|
206
206
|
|
|
207
207
|
|
|
208
|
+
@staticmethod
|
|
209
|
+
def _convert_bjd_nm(
|
|
210
|
+
bjd_nm: Optional[str]
|
|
211
|
+
) -> str:
|
|
212
|
+
"""
|
|
213
|
+
평사리의 한자 표기를 올바른 형태로 변환한다
|
|
214
|
+
bjd_nm is None 일 경우 ''을 반환한다
|
|
215
|
+
"""
|
|
216
|
+
|
|
217
|
+
if bjd_nm is not None and bjd_nm in ['평(坪)사리', '평(平)사리']:
|
|
218
|
+
# 평사리의 한자 표기 변환 (예: 평(坪)사리 -> 평사리(坪沙), 평(平)사리 -> 평사리(平沙))
|
|
219
|
+
bjd_nm = re.sub(r'평\(坪\)사리', '평사리(坪沙)', bjd_nm)
|
|
220
|
+
bjd_nm = re.sub(r'평\(平\)사리', '평사리(平沙)', bjd_nm)
|
|
221
|
+
return bjd_nm
|
|
222
|
+
return bjd_nm
|
|
223
|
+
|
|
224
|
+
|
|
208
225
|
@staticmethod
|
|
209
226
|
def _clean_bjd_nm(
|
|
210
227
|
bjd_nm: Optional[str]
|
|
211
228
|
) -> str:
|
|
212
229
|
"""
|
|
213
|
-
행정구역명에서 한글과 숫자를 제외하고 삭제하는 기능
|
|
230
|
+
행정구역명에서 한글과 숫자를 제외하고 삭제하는 기능
|
|
231
|
+
한자 표기가 포함된 괄호는 유지한다
|
|
214
232
|
bjd_nm is None 일 경우 ''을 반환한다
|
|
215
233
|
"""
|
|
216
234
|
|
|
217
235
|
if bjd_nm is not None:
|
|
218
|
-
|
|
219
|
-
|
|
236
|
+
# 한자 표기가 포함된 괄호는 유지하고, 다른 특수문자만 제거
|
|
237
|
+
# 한자 범위: \u4e00-\u9fff (CJK Unified Ideographs)
|
|
238
|
+
bjd_nm = re.sub(r'\([^)]*[^\u4e00-\u9fff)][^)]*\)', '', bjd_nm)
|
|
239
|
+
bjd_nm = re.sub(r'[^ 0-9ㄱ-ㅎ가-힣()\u4e00-\u9fff]+', '', bjd_nm)
|
|
220
240
|
return bjd_nm
|
|
221
241
|
return ''
|
|
222
242
|
|
|
@@ -249,6 +269,7 @@ class Bjd():
|
|
|
249
269
|
res_df['시군구명'] = res_df['시군구명'].apply(lambda x: self._clean_bjd_nm(x))
|
|
250
270
|
res_df['읍면동명'] = res_df['읍면동명'].apply(lambda x: self._clean_bjd_nm(x))
|
|
251
271
|
res_df['리명'] = res_df['리명'].apply(lambda x: self._clean_bjd_nm(x))
|
|
272
|
+
res_df['리명'] = res_df['리명'].apply(lambda x: self._convert_bjd_nm(x)) # NOTE 평사리 한자 표기 변환
|
|
252
273
|
res_df['시도명'] = res_df['시도명'].apply(lambda x: x.replace(' ', ''))
|
|
253
274
|
# res_df['시군구명'] = res_df['시군구명'].apply(lambda x: x.replace(' ', ''))
|
|
254
275
|
res_df['읍면동명'] = res_df['읍면동명'].apply(lambda x: x.replace(' ', ''))
|
|
@@ -288,7 +309,7 @@ class Bjd():
|
|
|
288
309
|
)
|
|
289
310
|
self.logger.info("Success Saved Bjd Dataframe To Text File")
|
|
290
311
|
if self.multiple_word_sgg_list is not None:
|
|
291
|
-
with open(self.file_name_multiple_word_sgg_list, 'w') as f:
|
|
312
|
+
with open(self.file_name_multiple_word_sgg_list, 'w', encoding=self.encoding) as f:
|
|
292
313
|
f.writelines('\n'.join(self.multiple_word_sgg_list))
|
|
293
314
|
f.close()
|
|
294
315
|
self.logger.info("Success Saved Multiple Sigungu List To Text File")
|
|
@@ -513,7 +534,7 @@ class SmallestBjd(CurrentBjd):
|
|
|
513
534
|
def _save_smallest_bjd(self):
|
|
514
535
|
if self.smallest_bjd_list is None:
|
|
515
536
|
self._create_smallest_bjd()
|
|
516
|
-
with open(self.file_name_bjd_smallest, 'w') as f:
|
|
537
|
+
with open(self.file_name_bjd_smallest, 'w', encoding=self.encoding) as f:
|
|
517
538
|
f.writelines('\n'.join(self.smallest_bjd_list))
|
|
518
539
|
f.close()
|
|
519
540
|
self.logger.info("Success Saved Changed Smallest Bjd Name List To Text File")
|
|
@@ -544,7 +565,7 @@ class BjdFrequencyDictionary(CurrentBjd):
|
|
|
544
565
|
if self.bjd_frequency_dictionary is None:
|
|
545
566
|
self._create_bjd_frequency_dictionary()
|
|
546
567
|
bjd_frequency_list = list((key, value) for key, value in self.bjd_frequency_dictionary.items())
|
|
547
|
-
with open(self.file_name_bjd_frequency_dictionary, 'w') as f:
|
|
568
|
+
with open(self.file_name_bjd_frequency_dictionary, 'w', encoding=self.encoding) as f:
|
|
548
569
|
vstr = ''
|
|
549
570
|
sep = ','
|
|
550
571
|
for line in bjd_frequency_list:
|
vdutils/convaddr.py
CHANGED
|
@@ -161,13 +161,13 @@ class ConvAddr():
|
|
|
161
161
|
|
|
162
162
|
def _prepare(self):
|
|
163
163
|
|
|
164
|
-
with open(self.file_name_bjd_current, 'r') as file_bjd_current:
|
|
164
|
+
with open(self.file_name_bjd_current, 'r', encoding=self.encoding) as file_bjd_current:
|
|
165
165
|
self.bjd_current_dic: Dict[str, str] = dict((line.split('\t')[2], line.split('\t')[9].replace('\n', '')) for line in file_bjd_current)
|
|
166
166
|
|
|
167
|
-
with open(self.file_name_bjd_smallest, 'r') as file_bjd_smallest:
|
|
167
|
+
with open(self.file_name_bjd_smallest, 'r', encoding=self.encoding) as file_bjd_smallest:
|
|
168
168
|
self.bjd_smallest_list: List[str] = [line.strip() for line in file_bjd_smallest]
|
|
169
169
|
|
|
170
|
-
with open(self.file_name_multiple_word_sgg_list, 'r') as file_multiple_word_sgg_list:
|
|
170
|
+
with open(self.file_name_multiple_word_sgg_list, 'r', encoding=self.encoding) as file_multiple_word_sgg_list:
|
|
171
171
|
self.multiple_word_sgg_list: List[str] = [line.strip() for line in file_multiple_word_sgg_list]
|
|
172
172
|
|
|
173
173
|
bjd_df: pd.DataFrame = pd.read_csv(
|
|
@@ -177,7 +177,13 @@ class ConvAddr():
|
|
|
177
177
|
encoding=self.encoding,
|
|
178
178
|
dtype={
|
|
179
179
|
'과거법정동코드': str,
|
|
180
|
-
'법정동코드': str
|
|
180
|
+
'법정동코드': str,
|
|
181
|
+
'생성일자': object,
|
|
182
|
+
'삭제일자': object,
|
|
183
|
+
'시도명': str,
|
|
184
|
+
'시군구명': str,
|
|
185
|
+
'읍면동명': str,
|
|
186
|
+
'리명': str
|
|
181
187
|
})
|
|
182
188
|
self.bjd_df = bjd_df
|
|
183
189
|
self.sido_list: List[str] = list(sido for sido in bjd_df['시도명'].unique() if isinstance(sido, str))
|
|
@@ -189,7 +195,17 @@ class ConvAddr():
|
|
|
189
195
|
self.file_name_bjd_current,
|
|
190
196
|
sep=self.sep,
|
|
191
197
|
engine='python',
|
|
192
|
-
encoding=self.encoding
|
|
198
|
+
encoding=self.encoding,
|
|
199
|
+
dtype={
|
|
200
|
+
'과거법정동코드': str,
|
|
201
|
+
'법정동코드': str,
|
|
202
|
+
'생성일자': object,
|
|
203
|
+
'삭제일자': object,
|
|
204
|
+
'시도명': str,
|
|
205
|
+
'시군구명': str,
|
|
206
|
+
'읍면동명': str,
|
|
207
|
+
'리명': str
|
|
208
|
+
}
|
|
193
209
|
)
|
|
194
210
|
self.bjd_current_bjd_nm_list: List[str] = list(bjd_nm for bjd_nm in bjd_current_df['법정동명'] if bjd_nm is not None)
|
|
195
211
|
bjd_current_df['시도시군구명'] = bjd_current_df[['시도명', '시군구명']].apply(lambda x: self._concat_sido_sgg(*x), axis=1)
|
|
@@ -207,8 +223,15 @@ class ConvAddr():
|
|
|
207
223
|
encoding=self.encoding,
|
|
208
224
|
dtype={
|
|
209
225
|
'과거법정동코드': str,
|
|
210
|
-
'법정동코드': str
|
|
226
|
+
'법정동코드': str,
|
|
227
|
+
'생성일자_변경전': object,
|
|
228
|
+
'삭제일자_변경전': object,
|
|
229
|
+
'생성일자_변경후': object,
|
|
230
|
+
'삭제일자_변경후': object,
|
|
231
|
+
'법정동명_변경전': str,
|
|
232
|
+
'법정동명_변경후': str
|
|
211
233
|
})
|
|
234
|
+
self.bjd_changed_df = bjd_changed_df
|
|
212
235
|
sub_bjd_changed_df = bjd_changed_df.loc[
|
|
213
236
|
(bjd_changed_df['법정동명_변경후'].isnull()==False) &
|
|
214
237
|
(bjd_changed_df['법정동명_변경전'].isnull()==False)
|
|
@@ -324,6 +347,21 @@ class ConvAddr():
|
|
|
324
347
|
return changed_bjd_list
|
|
325
348
|
|
|
326
349
|
|
|
350
|
+
def check_is_current_bjd(
|
|
351
|
+
self,
|
|
352
|
+
bjd_nm: str
|
|
353
|
+
) -> bool:
|
|
354
|
+
|
|
355
|
+
"""
|
|
356
|
+
입력된 법정동명이 현재 법정동명 리스트에 포함되어있는지 확인하여 반환
|
|
357
|
+
"""
|
|
358
|
+
|
|
359
|
+
if bjd_nm in self.bjd_current_bjd_nm_list:
|
|
360
|
+
return True
|
|
361
|
+
else:
|
|
362
|
+
return False
|
|
363
|
+
|
|
364
|
+
|
|
327
365
|
def correct_changed_bjd(
|
|
328
366
|
self,
|
|
329
367
|
addr: str,
|
|
@@ -358,20 +396,36 @@ class ConvAddr():
|
|
|
358
396
|
raise ValueError("bjd_changed_dic is None")
|
|
359
397
|
|
|
360
398
|
origin_addr: str = addr
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
399
|
+
last_changed_bjd_nm: str = None
|
|
400
|
+
max_iterations = 10 # 무한 루프 방지
|
|
401
|
+
iteration = 0
|
|
402
|
+
|
|
403
|
+
while iteration < max_iterations:
|
|
404
|
+
changed_list: List[str] = list()
|
|
405
|
+
for old_bjd_nm in self.bjd_changed_old_bjd_nm_list:
|
|
406
|
+
if old_bjd_nm in addr:
|
|
407
|
+
changed_list.append(old_bjd_nm)
|
|
408
|
+
|
|
409
|
+
changed_list = self.union_similar_changed_bjd(changed_list)
|
|
410
|
+
if not changed_list: # 더 이상 변경할 것이 없으면 종료
|
|
411
|
+
break
|
|
365
412
|
|
|
366
|
-
changed_list = self.union_similar_changed_bjd(changed_list)
|
|
367
|
-
if changed_list:
|
|
368
413
|
for changed_bjd_nm in changed_list:
|
|
369
414
|
after_changed_bjd_nm = self.bjd_changed_dic[changed_bjd_nm]
|
|
415
|
+
last_changed_bjd_nm = after_changed_bjd_nm
|
|
370
416
|
addr = addr.replace(changed_bjd_nm, after_changed_bjd_nm)
|
|
371
417
|
if is_log:
|
|
372
|
-
self.logger.info(f'{origin_addr}')
|
|
373
418
|
self.logger.info(f'해당 법정동명은 변경되었습니다. 변경전 : [ {changed_bjd_nm} ] 변경후 : [ {after_changed_bjd_nm} ]')
|
|
374
419
|
|
|
420
|
+
iteration += 1
|
|
421
|
+
|
|
422
|
+
if last_changed_bjd_nm is not None:
|
|
423
|
+
if self.check_is_current_bjd(bjd_nm=last_changed_bjd_nm):
|
|
424
|
+
if is_log:
|
|
425
|
+
self.logger.info(f'해당 법정동명은 현재 법정동명입니다.')
|
|
426
|
+
else:
|
|
427
|
+
if is_log:
|
|
428
|
+
self.logger.warning(f'해당 법정동명은 현재 법정동명이 아닙니다.')
|
|
375
429
|
return addr
|
|
376
430
|
|
|
377
431
|
|