ssdlab-region-parser 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 SSDLab
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,15 @@
1
+ Metadata-Version: 2.1
2
+ Name: ssdlab-region-parser
3
+ Version: 0.1.0
4
+ Summary: Chinese administrative region parser, not include Honkong, Macau and Taiwan yet.
5
+ Author: ChufanHe
6
+ Author-email: sthechufan@gmail.com
7
+ Requires-Python: >=3.10
8
+ Description-Content-Type: text/markdown
9
+ License-File: LICENSE
10
+ Requires-Dist: pandas>=2.0
11
+ Requires-Dist: jieba>=0.42.1
12
+ Requires-Dist: openpyxl>=3.1.0
13
+
14
+ # region_parser
15
+ Chinese administrative region parser, not include Honkong, Macau and Taiwan yet.
@@ -0,0 +1,2 @@
1
+ # region_parser
2
+ Chinese administrative region parser, not include Honkong, Macau and Taiwan yet.
@@ -0,0 +1,24 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "ssdlab-region-parser"
7
+ version = "0.1.0"
8
+ description = "Chinese administrative region parser, not include Honkong, Macau and Taiwan yet."
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ dependencies = [
12
+ "pandas>=2.0",
13
+ "jieba>=0.42.1",
14
+ "openpyxl>=3.1.0"
15
+ ]
16
+
17
+ [tool.setuptools]
18
+ package-dir = {"" = "src"}
19
+
20
+ [tool.setuptools.packages.find]
21
+ where = ["src"]
22
+
23
+ [tool.setuptools.package-data]
24
+ "region_parser.data" = ["*.xlsx"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,26 @@
1
+ from setuptools import setup, find_packages
2
+
3
+ with open("README.md", "r", encoding="utf-8") as fh:
4
+ long_description = fh.read()
5
+
6
+ setup(
7
+ name="ssdlab-region-parser",
8
+ version="0.1.0",
9
+ author="ChufanHe",
10
+ author_email="sthechufan@gmail.com",
11
+ description="Chinese administrative region parser, not include Hong Kong, Macau and Taiwan yet.",
12
+ long_description=long_description,
13
+ long_description_content_type="text/markdown",
14
+ package_dir={"": "src"},
15
+ packages=find_packages(where="src"),
16
+ python_requires=">=3.10",
17
+ install_requires=[
18
+ "pandas>=2.0",
19
+ "jieba>=0.42.1",
20
+ "openpyxl>=3.1.0",
21
+ ],
22
+ package_data={
23
+ "region_parser.data": ["*.xlsx"],
24
+ },
25
+ include_package_data=True,
26
+ )
@@ -0,0 +1,3 @@
1
+ from .parser import CN_RegionParser
2
+
3
+ __all__ = ["CN_RegionParser"]
@@ -0,0 +1,75 @@
1
+ from dataclasses import dataclass, field
2
+ import pandas as pd
3
+
4
+
5
+ @dataclass
6
+ class CN_RegionParseResult:
7
+ province_name: object = pd.NA
8
+ province_code: object = pd.NA
9
+ city_name: object = pd.NA
10
+ city_code: object = pd.NA
11
+ county_name: object = pd.NA
12
+ county_code: object = pd.NA
13
+
14
+ matched_words: list[str] = field(default_factory=list)
15
+ kept_words: list[str] = field(default_factory=list)
16
+ dropped_words: list[str] = field(default_factory=list)
17
+ best_match: object = pd.NA
18
+
19
+ def to_dict(self) -> dict:
20
+ return {
21
+ "省级名称": self.province_name,
22
+ "省级代码": self.province_code,
23
+ "城市名称": self.city_name,
24
+ "城市代码": self.city_code,
25
+ "区县名称": self.county_name,
26
+ "区县代码": self.county_code,
27
+ # "matched_words": self.matched_words,
28
+ # "kept_words": self.kept_words,
29
+ # "dropped_words": self.dropped_words,
30
+ # "best_match": self.best_match,
31
+ }
32
+
33
+ # series
34
+ def to_series(self) -> pd.Series:
35
+ return pd.Series(self.to_dict())
36
+
37
+ # tuple
38
+ def to_tuple(self) -> tuple:
39
+ return (
40
+ self.province_name, self.province_code,
41
+ self.city_name, self.city_code,
42
+ self.county_name, self.county_code
43
+ )
44
+
45
+ # list
46
+ def to_list(self) -> list:
47
+ return [
48
+ self.province_name, self.province_code,
49
+ self.city_name, self.city_code,
50
+ self.county_name, self.county_code,
51
+ ]
52
+
53
+ # str
54
+ def to_str(self) -> str:
55
+ return (
56
+ f"省级名称:{self.province_name} 省份代码:{self.province_code} | "
57
+ f"城市名称:{self.city_name} 城市代码:{self.city_code} | "
58
+ f"区县名称:{self.county_name} 区县代码:{self.county_code}"
59
+ )
60
+
61
+
62
+ @dataclass
63
+ class CN_RegionResources:
64
+ df_region_code: pd.DataFrame
65
+ province_keywords: list[str]
66
+ city_keywords: list[str]
67
+ county_keywords: list[str]
68
+ region_keywords: list[str]
69
+ word_set: set[str]
70
+ province_dict: dict
71
+ city_dict: dict
72
+ county_dict: dict
73
+ region_mapping_dict: dict
74
+ df_paths: pd.DataFrame
75
+ word_to_paths: dict
@@ -0,0 +1,330 @@
1
+ import jieba
2
+ jieba.setLogLevel(jieba.logging.WARN)
3
+ import pandas as pd
4
+
5
+ from .resources import build_region_resources, load_keywords_into_jieba
6
+ from .models import CN_RegionParseResult
7
+
8
+
9
+ class CN_RegionParser:
10
+ def __init__(self):
11
+ self.resources = build_region_resources()
12
+ load_keywords_into_jieba(self.resources.region_keywords)
13
+
14
+ @staticmethod
15
+ def _deduplicate_keep_order(words: list[str]) -> list[str]:
16
+ seen = set()
17
+ result = []
18
+ for word in words:
19
+ if pd.isna(word):
20
+ continue
21
+ word = str(word).strip()
22
+ if not word:
23
+ continue
24
+ if word not in seen:
25
+ seen.add(word)
26
+ result.append(word)
27
+ return result
28
+
29
+ def extract_region(self, text: str) -> str:
30
+ if pd.isna(text):
31
+ return pd.NA
32
+ text = str(text).strip()
33
+ if not text:
34
+ return pd.NA
35
+
36
+ words = jieba.lcut(text)
37
+ matched = [w for w in words if w in self.resources.word_set]
38
+ matched = self._deduplicate_keep_order(matched)
39
+ return " ".join(matched) if matched else pd.NA
40
+
41
+ def extract_region_lcut_all(self, text: str) -> str:
42
+ if pd.isna(text):
43
+ return pd.NA
44
+ text = str(text).strip()
45
+ if not text:
46
+ return pd.NA
47
+
48
+ words = jieba.lcut(text, cut_all=True)
49
+ matched = [w for w in words if w in self.resources.word_set]
50
+ matched = self._deduplicate_keep_order(matched)
51
+ return " ".join(matched) if matched else pd.NA
52
+
53
+ def extract_region_lcut_for_search(self, text: str) -> str:
54
+ if pd.isna(text):
55
+ return pd.NA
56
+ text = str(text).strip()
57
+ if not text:
58
+ return pd.NA
59
+
60
+ words = jieba.lcut_for_search(text)
61
+ matched = [w for w in words if w in self.resources.word_set]
62
+ matched = self._deduplicate_keep_order(matched)
63
+ return " ".join(matched) if matched else pd.NA
64
+
65
+ def extract_candidates(self, text: str) -> dict:
66
+ a1 = self.extract_region(text)
67
+ a2 = self.extract_region_lcut_all(text)
68
+ a3 = self.extract_region_lcut_for_search(text)
69
+
70
+ a1_list = a1.split() if pd.notna(a1) else []
71
+ a2_list = a2.split() if pd.notna(a2) else []
72
+ a3_list = a3.split() if pd.notna(a3) else []
73
+
74
+ merged = self._deduplicate_keep_order(a1_list + a2_list + a3_list)
75
+
76
+ return {
77
+ "extract_region": a1,
78
+ "extract_region_lcut_all": a2,
79
+ "extract_region_lcut_for_search": a3,
80
+ "merged_words": merged,
81
+ }
82
+
83
+ def can_coexist_in_same_path(self, w1: str, w2: str) -> bool:
84
+ paths1 = self.resources.word_to_paths.get(w1, set())
85
+ paths2 = self.resources.word_to_paths.get(w2, set())
86
+ return len(paths1 & paths2) > 0
87
+
88
+ def keep_words_by_chain(self, words: list[str]) -> dict:
89
+ if not words:
90
+ return {
91
+ "kept_words": [],
92
+ "dropped_words": [],
93
+ "common_paths": set(),
94
+ }
95
+
96
+ kept = []
97
+ dropped = []
98
+
99
+ current_paths = None
100
+
101
+ for word in words:
102
+ word_paths = self.resources.word_to_paths.get(word, set())
103
+
104
+ if not word_paths:
105
+ dropped.append(word)
106
+ continue
107
+
108
+ if current_paths is None:
109
+ kept.append(word)
110
+ current_paths = set(word_paths)
111
+ continue
112
+
113
+ overlap = current_paths & word_paths
114
+ if overlap:
115
+ kept.append(word)
116
+ current_paths = overlap
117
+ else:
118
+ dropped.append(word)
119
+
120
+ return {
121
+ "kept_words": kept,
122
+ "dropped_words": dropped,
123
+ "common_paths": current_paths if current_paths is not None else set(),
124
+ }
125
+
126
+ def parse(self, text: str):
127
+ candidates = self.extract_candidates(text)
128
+ filtered = self.keep_words_by_chain(candidates["merged_words"])
129
+
130
+ best = self.find_best_match(
131
+ kept_words=filtered["kept_words"],
132
+ common_paths=filtered["common_paths"],
133
+ )
134
+
135
+ # matched_words = self.map_words_to_standard_names(filtered["kept_words"])
136
+
137
+ prov, city, county = self.fill_region_by_best(
138
+ best=best,
139
+ context_list=filtered["kept_words"],
140
+ )
141
+
142
+ region_info = self.expand_region_codes(prov, city, county)
143
+
144
+ return CN_RegionParseResult(
145
+ province_name = region_info["省级名称"],
146
+ province_code = region_info["省级代码"],
147
+ city_name = region_info["城市名称"],
148
+ city_code = region_info["城市代码"],
149
+ county_name = region_info["区县名称"],
150
+ county_code = region_info["区县代码"],
151
+ # matched_words = matched_words,
152
+ # kept_words = filtered["kept_words"],
153
+ # dropped_words = filtered["dropped_words"],
154
+ # best_match = best,
155
+ )
156
+
157
+ def map_words_to_standard_names(self, words: list[str]) -> list[str]:
158
+ mapped = []
159
+ for word in words:
160
+ std = self.resources.region_mapping_dict.get(word, word)
161
+ if std not in mapped:
162
+ mapped.append(std)
163
+ return mapped
164
+
165
+ @staticmethod
166
+ def _split_path(path: str) -> tuple[str, str, str]:
167
+ parts = str(path).split("|")
168
+ parts = parts + [""] * (3 - len(parts))
169
+ return parts[0], parts[1], parts[2]
170
+
171
+ def find_best_match(self, kept_words: list[str], common_paths: set) -> object:
172
+ if not kept_words:
173
+ return pd.NA
174
+
175
+ std_words = self.map_words_to_standard_names(kept_words)
176
+
177
+ province_names = set(self.resources.df_region_code["省级名称"].dropna().astype(str))
178
+ city_names = set(self.resources.df_region_code["城市名称"].dropna().astype(str))
179
+ county_names = set(self.resources.df_region_code["区县名称"].dropna().astype(str))
180
+
181
+ provs = [w for w in std_words if w in province_names]
182
+ cities = [w for w in std_words if w in city_names]
183
+ counties = [w for w in std_words if w in county_names]
184
+
185
+ # 情况1:唯一合法路径,优先返回最细层级
186
+ if len(common_paths) == 1:
187
+ path = next(iter(common_paths))
188
+ prov, city, county = self._split_path(path)
189
+ if county:
190
+ return county
191
+ if city:
192
+ return city
193
+ if prov:
194
+ return prov
195
+
196
+ # 情况2:如果明确命中了区县,优先区县
197
+ if len(counties) == 1:
198
+ return counties[0]
199
+
200
+ # 情况3:如果明确命中了城市,返回城市
201
+ if len(cities) == 1:
202
+ return cities[0]
203
+
204
+ # 情况4:如果只明确到省
205
+ if len(provs) == 1:
206
+ return provs[0]
207
+
208
+ # 情况5:多个候选时,从 kept_words 末尾往前找最细层级
209
+ for w in reversed(std_words):
210
+ if w in county_names:
211
+ return w
212
+ for w in reversed(std_words):
213
+ if w in city_names:
214
+ return w
215
+ for w in reversed(std_words):
216
+ if w in province_names:
217
+ return w
218
+
219
+ return pd.NA
220
+
221
+ def fill_region_by_best(self, best: object, context_list: list[str]) -> tuple[object, object, object]:
222
+ if pd.isna(best):
223
+ return pd.NA, pd.NA, pd.NA
224
+
225
+ df = self.resources.df_region_code
226
+ std_context = self.map_words_to_standard_names(context_list)
227
+
228
+ province_names = set(df["省级名称"].dropna().astype(str))
229
+ city_names = set(df["城市名称"].dropna().astype(str))
230
+ county_names = set(df["区县名称"].dropna().astype(str))
231
+
232
+ # 1) best 是省级:只返回省
233
+ if best in province_names:
234
+ return best, pd.NA, pd.NA
235
+
236
+ # 2) best 是市级:返回省、市;区县留空
237
+ if best in city_names:
238
+ rows = df[df["城市名称"] == best].copy()
239
+ if rows.empty:
240
+ return pd.NA, best, pd.NA
241
+
242
+ best_score = -1
243
+ best_row = None
244
+
245
+ for _, row in rows.iterrows():
246
+ score = 0
247
+ if row["省级名称"] in std_context:
248
+ score += 1
249
+ if row["城市名称"] in std_context:
250
+ score += 1
251
+
252
+ if score > best_score:
253
+ best_score = score
254
+ best_row = row
255
+
256
+ if best_row is None:
257
+ best_row = rows.iloc[0]
258
+
259
+ return best_row["省级名称"], best_row["城市名称"], pd.NA
260
+
261
+ # 3) best 是区县级:返回完整省、市、县
262
+ if best in county_names:
263
+ rows = df[df["区县名称"] == best].copy()
264
+ if rows.empty:
265
+ return pd.NA, pd.NA, best
266
+
267
+ best_score = -1
268
+ best_row = None
269
+
270
+ for _, row in rows.iterrows():
271
+ score = 0
272
+ if row["省级名称"] in std_context:
273
+ score += 1
274
+ if row["城市名称"] in std_context:
275
+ score += 1
276
+ if row["区县名称"] in std_context:
277
+ score += 1
278
+
279
+ if score > best_score:
280
+ best_score = score
281
+ best_row = row
282
+
283
+ if best_row is None:
284
+ best_row = rows.iloc[0]
285
+
286
+ return best_row["省级名称"], best_row["城市名称"], best_row["区县名称"]
287
+
288
+ return pd.NA, pd.NA, pd.NA
289
+ def expand_region_codes(self, prov: object, city: object, county: object) -> dict:
290
+ df = self.resources.df_region_code
291
+
292
+ result = {
293
+ "省级名称": prov,
294
+ "省级代码": pd.NA,
295
+ "城市名称": city,
296
+ "城市代码": pd.NA,
297
+ "区县名称": county,
298
+ "区县代码": pd.NA,
299
+ }
300
+
301
+ if pd.notna(prov) and pd.notna(city) and pd.notna(county):
302
+ row = df[
303
+ (df["省级名称"] == prov) &
304
+ (df["城市名称"] == city) &
305
+ (df["区县名称"] == county)
306
+ ]
307
+ if not row.empty:
308
+ row = row.iloc[0]
309
+ result["省级代码"] = row.get("省份代码", pd.NA)
310
+ result["城市代码"] = row.get("城市代码", pd.NA)
311
+ result["区县代码"] = row.get("区县代码", pd.NA)
312
+ return result
313
+
314
+ if pd.notna(prov) and pd.notna(city):
315
+ row = df[
316
+ (df["省级名称"] == prov) &
317
+ (df["城市名称"] == city)
318
+ ]
319
+ if not row.empty:
320
+ row = row.iloc[0]
321
+ result["省级代码"] = row.get("省份代码", pd.NA)
322
+ result["城市代码"] = row.get("城市代码", pd.NA)
323
+
324
+ if pd.notna(prov):
325
+ row = df[df["省级名称"] == prov]
326
+ if not row.empty:
327
+ row = row.iloc[0]
328
+ result["省级代码"] = row.get("省份代码", pd.NA)
329
+
330
+ return result
@@ -0,0 +1,136 @@
1
+ from importlib.resources import files
2
+ import pandas as pd
3
+ import jieba
4
+
5
+ from .models import CN_RegionResources
6
+
7
+
8
+ RESOURCE_PACKAGE = "region_parser.data"
9
+ REGION_EXCEL_NAME = "CN_Region_Reference_Library.xlsx"
10
+
11
+
12
+ def split_keywords(series: pd.Series) -> list[str]:
13
+ words = (
14
+ series.dropna()
15
+ .astype(str)
16
+ .str.split("|", regex=False)
17
+ .explode()
18
+ .astype(str)
19
+ .str.strip()
20
+ )
21
+ words = words[words != ""].unique().tolist()
22
+ words.sort(key=len, reverse=True)
23
+ return words
24
+
25
+
26
+ def load_region_code_table() -> pd.DataFrame:
27
+ excel_path = files(RESOURCE_PACKAGE).joinpath(REGION_EXCEL_NAME)
28
+ return pd.read_excel(excel_path)
29
+
30
+
31
+ def build_word_to_paths(df_paths: pd.DataFrame) -> dict:
32
+ word_to_paths = {}
33
+
34
+ for _, row in df_paths.iterrows():
35
+ path = row["path"]
36
+ for col in ["省级关键词", "市级关键词", "区县关键词"]:
37
+ value = row.get(col)
38
+ if pd.isna(value):
39
+ continue
40
+
41
+ for word in str(value).split("|"):
42
+ word = word.strip()
43
+ if not word:
44
+ continue
45
+
46
+ if word not in word_to_paths:
47
+ word_to_paths[word] = set()
48
+ word_to_paths[word].add(path)
49
+
50
+ return word_to_paths
51
+
52
+
53
+ def build_region_resources() -> CN_RegionResources:
54
+ df_region_code = load_region_code_table()
55
+
56
+ province_keywords = split_keywords(df_region_code["省级关键词"])
57
+ city_keywords = split_keywords(df_region_code["市级关键词"])
58
+ county_keywords = split_keywords(df_region_code["区县关键词"])
59
+
60
+ region_keywords = sorted(
61
+ set(province_keywords + city_keywords + county_keywords),
62
+ key=len,
63
+ reverse=True,
64
+ )
65
+
66
+ word_set = set(region_keywords)
67
+
68
+ df_province = (
69
+ df_region_code[["省级名称", "省级关键词", "省份代码"]]
70
+ .dropna(subset=["省级关键词"])
71
+ .assign(省级关键词=lambda x: x["省级关键词"].str.split("|", regex=False))
72
+ .explode("省级关键词")
73
+ .assign(省级关键词=lambda x: x["省级关键词"].astype(str).str.strip())
74
+ .query("省级关键词 != ''")
75
+ .drop_duplicates()
76
+ .reset_index(drop=True)
77
+ )
78
+ province_dict = df_province.set_index("省级关键词")["省级名称"].to_dict()
79
+
80
+ df_city = (
81
+ df_region_code[["城市名称", "市级关键词", "城市代码"]]
82
+ .dropna(subset=["市级关键词"])
83
+ .assign(市级关键词=lambda x: x["市级关键词"].str.split("|", regex=False))
84
+ .explode("市级关键词")
85
+ .assign(市级关键词=lambda x: x["市级关键词"].astype(str).str.strip())
86
+ .query("市级关键词 != ''")
87
+ .drop_duplicates()
88
+ .reset_index(drop=True)
89
+ )
90
+ city_dict = df_city.set_index("市级关键词")["城市名称"].to_dict()
91
+
92
+ df_county = (
93
+ df_region_code[["区县名称", "区县关键词", "区县代码"]]
94
+ .dropna(subset=["区县关键词"])
95
+ .assign(区县关键词=lambda x: x["区县关键词"].str.split("|", regex=False))
96
+ .explode("区县关键词")
97
+ .assign(区县关键词=lambda x: x["区县关键词"].astype(str).str.strip())
98
+ .query("区县关键词 != ''")
99
+ .drop_duplicates()
100
+ .reset_index(drop=True)
101
+ )
102
+ county_dict = df_county.set_index("区县关键词")["区县名称"].to_dict()
103
+
104
+ region_mapping_dict = {**province_dict, **city_dict, **county_dict}
105
+
106
+ df_paths = df_region_code[
107
+ ["省级名称", "城市名称", "区县名称", "省级关键词", "市级关键词", "区县关键词"]
108
+ ].copy()
109
+
110
+ df_paths["path"] = (
111
+ df_paths["省级名称"].fillna("") + "|" +
112
+ df_paths["城市名称"].fillna("") + "|" +
113
+ df_paths["区县名称"].fillna("")
114
+ )
115
+
116
+ word_to_paths = build_word_to_paths(df_paths)
117
+
118
+ return CN_RegionResources(
119
+ df_region_code=df_region_code,
120
+ province_keywords=province_keywords,
121
+ city_keywords=city_keywords,
122
+ county_keywords=county_keywords,
123
+ region_keywords=region_keywords,
124
+ word_set=word_set,
125
+ province_dict=province_dict,
126
+ city_dict=city_dict,
127
+ county_dict=county_dict,
128
+ region_mapping_dict=region_mapping_dict,
129
+ df_paths=df_paths,
130
+ word_to_paths=word_to_paths,
131
+ )
132
+
133
+
134
+ def load_keywords_into_jieba(words: list[str]) -> None:
135
+ for word in words:
136
+ jieba.add_word(word, freq=1000)
@@ -0,0 +1,15 @@
1
+ Metadata-Version: 2.1
2
+ Name: ssdlab-region-parser
3
+ Version: 0.1.0
4
+ Summary: Chinese administrative region parser, not include Honkong, Macau and Taiwan yet.
5
+ Author: ChufanHe
6
+ Author-email: sthechufan@gmail.com
7
+ Requires-Python: >=3.10
8
+ Description-Content-Type: text/markdown
9
+ License-File: LICENSE
10
+ Requires-Dist: pandas>=2.0
11
+ Requires-Dist: jieba>=0.42.1
12
+ Requires-Dist: openpyxl>=3.1.0
13
+
14
+ # region_parser
15
+ Chinese administrative region parser, not include Honkong, Macau and Taiwan yet.
@@ -0,0 +1,24 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ setup.py
5
+ src/SSDLab_region_parser/__init__.py
6
+ src/SSDLab_region_parser/models.py
7
+ src/SSDLab_region_parser/parser.py
8
+ src/SSDLab_region_parser/resources.py
9
+ src/SSDLab_region_parser.egg-info/PKG-INFO
10
+ src/SSDLab_region_parser.egg-info/SOURCES.txt
11
+ src/SSDLab_region_parser.egg-info/dependency_links.txt
12
+ src/SSDLab_region_parser.egg-info/requires.txt
13
+ src/SSDLab_region_parser.egg-info/top_level.txt
14
+ src/SSDLab_region_parser/data/__init__.py
15
+ src/ssdlab_region_parser/__init__.py
16
+ src/ssdlab_region_parser/models.py
17
+ src/ssdlab_region_parser/parser.py
18
+ src/ssdlab_region_parser/resources.py
19
+ src/ssdlab_region_parser.egg-info/PKG-INFO
20
+ src/ssdlab_region_parser.egg-info/SOURCES.txt
21
+ src/ssdlab_region_parser.egg-info/dependency_links.txt
22
+ src/ssdlab_region_parser.egg-info/requires.txt
23
+ src/ssdlab_region_parser.egg-info/top_level.txt
24
+ src/ssdlab_region_parser/data/__init__.py
@@ -0,0 +1,3 @@
1
+ pandas>=2.0
2
+ jieba>=0.42.1
3
+ openpyxl>=3.1.0