ustrade 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ustrade/__init__.py +29 -2
- ustrade/client.py +83 -3
- ustrade/codes.py +2 -3
- {ustrade-0.4.0.dist-info → ustrade-0.5.0.dist-info}/METADATA +9 -3
- ustrade-0.5.0.dist-info/RECORD +12 -0
- {ustrade-0.4.0.dist-info → ustrade-0.5.0.dist-info}/WHEEL +1 -1
- ustrade-0.4.0.dist-info/RECORD +0 -12
- {ustrade-0.4.0.dist-info → ustrade-0.5.0.dist-info}/licenses/LICENSE.txt +0 -0
- {ustrade-0.4.0.dist-info → ustrade-0.5.0.dist-info}/top_level.txt +0 -0
ustrade/__init__.py
CHANGED
|
@@ -5,6 +5,7 @@ from .codes import HSCode
|
|
|
5
5
|
from .errors import *
|
|
6
6
|
|
|
7
7
|
from importlib import metadata
|
|
8
|
+
from typing import Literal
|
|
8
9
|
|
|
9
10
|
try:
|
|
10
11
|
__version__ = metadata.version("ustrade")
|
|
@@ -158,6 +159,31 @@ def get_product(hs: str) -> HSCode:
|
|
|
158
159
|
"""
|
|
159
160
|
return _get_default_client().get_product(hs)
|
|
160
161
|
|
|
162
|
+
def search_for_code(keyword : str | list[str],
|
|
163
|
+
mode : Literal["OR", "AND"] = "OR",
|
|
164
|
+
in_codes : str = None) -> pd.DataFrame:
|
|
165
|
+
"""
|
|
166
|
+
Research keywords in the HS Code description base.
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
keyword (str | list[str]):
|
|
170
|
+
A single keyword or a list of keywords.
|
|
171
|
+
mode (Literal["OR", "AND"]):
|
|
172
|
+
Exclusive or inclusive search if `keyword` is a list. Default uses "OR".
|
|
173
|
+
"OR" mode will return every code associated with at least one keyword.
|
|
174
|
+
"AND" mode will return only the codes associated with all the keywords.
|
|
175
|
+
in_codes (str):
|
|
176
|
+
The code chapter or heading to look in. Default None will search across all chapters.
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
pd.DataFrame:
|
|
180
|
+
A dataframe containing the list of associated codes.
|
|
181
|
+
|
|
182
|
+
Examples:
|
|
183
|
+
>>> ut.search_for_code(keyword="oil", in_codes="27")
|
|
184
|
+
"""
|
|
185
|
+
return _get_default_client().search_for_code(keyword, mode, in_codes)
|
|
186
|
+
|
|
161
187
|
|
|
162
188
|
__all__ = [
|
|
163
189
|
"CensusClient",
|
|
@@ -171,5 +197,6 @@ __all__ = [
|
|
|
171
197
|
"get_country_by_iso2",
|
|
172
198
|
"get_desc_from_code",
|
|
173
199
|
"get_children_codes",
|
|
174
|
-
"get_product"
|
|
175
|
-
|
|
200
|
+
"get_product",
|
|
201
|
+
"search_for_code",
|
|
202
|
+
]
|
ustrade/client.py
CHANGED
|
@@ -2,7 +2,11 @@ import requests
|
|
|
2
2
|
import socket
|
|
3
3
|
from datetime import datetime
|
|
4
4
|
import pandas as pd
|
|
5
|
+
import re
|
|
5
6
|
from urllib.parse import urlencode
|
|
7
|
+
from typing import Literal
|
|
8
|
+
import unicodedata
|
|
9
|
+
|
|
6
10
|
from . import countries
|
|
7
11
|
from .countries import Country
|
|
8
12
|
from . import codes
|
|
@@ -23,7 +27,7 @@ class CensusClient:
|
|
|
23
27
|
self.BASE_URL = "api.census.gov"
|
|
24
28
|
self.BASE_PORT = 443
|
|
25
29
|
|
|
26
|
-
self._hs_codes, self._codes_by_hs_codes = codes._load_codes()
|
|
30
|
+
self._hs_codes, self._codes_by_hs_codes, self._desc_by_hs_codes = codes._load_codes()
|
|
27
31
|
self._code_tree = codes.build_tree_from_codes(self._hs_codes)
|
|
28
32
|
|
|
29
33
|
self.col_mapping = {
|
|
@@ -397,8 +401,9 @@ class CensusClient:
|
|
|
397
401
|
"""
|
|
398
402
|
Returns the description of the specified HS code
|
|
399
403
|
|
|
400
|
-
|
|
401
|
-
hs (str):
|
|
404
|
+
Args:
|
|
405
|
+
hs (str):
|
|
406
|
+
the HS code (ex: '1806')
|
|
402
407
|
"""
|
|
403
408
|
if isinstance(hs, str):
|
|
404
409
|
if hs in self._codes_by_hs_codes:
|
|
@@ -477,6 +482,81 @@ class CensusClient:
|
|
|
477
482
|
raise InvalidCodeError(
|
|
478
483
|
f"Code must be a str or a HSCode instance - received a {type(code).__name__!r}"
|
|
479
484
|
)
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
def _normalize_kw(self, s: str) -> str:
|
|
488
|
+
s = s.lower()
|
|
489
|
+
s = unicodedata.normalize("NFKD", s)
|
|
490
|
+
s = "".join(c for c in s if not unicodedata.combining(c))
|
|
491
|
+
s = re.sub(r"[^a-z0-9\s]+", " ", s)
|
|
492
|
+
return re.sub(r"\s+", " ", s).strip()
|
|
493
|
+
|
|
494
|
+
def _tokenize(self, s: str) -> list[str]:
|
|
495
|
+
return self._normalize_kw(s).split()
|
|
496
|
+
|
|
497
|
+
|
|
498
|
+
def search_for_code(self, keyword : str | list[str],
|
|
499
|
+
mode : Literal["OR", "AND"] = "OR",
|
|
500
|
+
in_codes : str = None) -> pd.DataFrame:
|
|
501
|
+
"""
|
|
502
|
+
Research keywords in the HS Code description base.
|
|
503
|
+
|
|
504
|
+
Args:
|
|
505
|
+
keyword (str | list[str]):
|
|
506
|
+
a single keyword or a list of keywords.
|
|
507
|
+
mode (Literal["OR", "AND"]):
|
|
508
|
+
exclusive or inclusive search if keyword is a list. Default uses "OR".
|
|
509
|
+
"OR" mode will return every code associated with at least one word of the list.
|
|
510
|
+
"AND" mode will return only the codes associated with all the words of the list.
|
|
511
|
+
in_codes (str):
|
|
512
|
+
the code chapter or heading to look in. Default None will search across all chapters.
|
|
513
|
+
|
|
514
|
+
Returns:
|
|
515
|
+
pd.Dataframe:
|
|
516
|
+
A dataframe containing the list of associated codes.
|
|
517
|
+
|
|
518
|
+
Examples:
|
|
519
|
+
>>> ut.search_for_code(keyword = "oil", in_codes = "27")
|
|
520
|
+
|
|
521
|
+
"""
|
|
522
|
+
if in_codes is not None:
|
|
523
|
+
if in_codes not in self._codes_by_hs_codes:
|
|
524
|
+
raise CodeNotFoundError(f"Error : {in_codes} was not found as a valid code.")
|
|
525
|
+
|
|
526
|
+
keywords = self._tokenize(keyword) if isinstance(keyword, str) else list(keyword)
|
|
527
|
+
keywords = [self._normalize_kw(k) for k in keywords]
|
|
528
|
+
|
|
529
|
+
results_code = []
|
|
530
|
+
results_desc = []
|
|
531
|
+
|
|
532
|
+
for desc, code in self._desc_by_hs_codes.items():
|
|
533
|
+
if in_codes is not None and not code.hscode.startswith(in_codes):
|
|
534
|
+
continue
|
|
535
|
+
|
|
536
|
+
tokens = self._tokenize(desc)
|
|
537
|
+
|
|
538
|
+
def keyword_matches(k: str) -> bool:
|
|
539
|
+
return any(tok.startswith(k) for tok in tokens)
|
|
540
|
+
|
|
541
|
+
if mode == "OR":
|
|
542
|
+
ok = any(keyword_matches(k) for k in keywords)
|
|
543
|
+
else:
|
|
544
|
+
ok = all(keyword_matches(k) for k in keywords)
|
|
545
|
+
|
|
546
|
+
if ok:
|
|
547
|
+
results_code.append(code.hscode)
|
|
548
|
+
results_desc.append(code.description)
|
|
549
|
+
|
|
550
|
+
return pd.DataFrame({
|
|
551
|
+
"Description": results_desc,
|
|
552
|
+
"Code": results_code
|
|
553
|
+
})
|
|
554
|
+
|
|
555
|
+
|
|
556
|
+
|
|
557
|
+
|
|
558
|
+
|
|
559
|
+
|
|
480
560
|
|
|
481
561
|
|
|
482
562
|
|
ustrade/codes.py
CHANGED
|
@@ -19,7 +19,7 @@ class HSCode:
|
|
|
19
19
|
return self.parent
|
|
20
20
|
|
|
21
21
|
|
|
22
|
-
def _load_codes() -> tuple[list[HSCode], dict[str, HSCode]]:
|
|
22
|
+
def _load_codes() -> tuple[list[HSCode], dict[str, HSCode], dict[str, HSCode]]:
|
|
23
23
|
csv_path = files(__package__) / "data" / "harmonized-system.csv"
|
|
24
24
|
codes: list[HSCode] = []
|
|
25
25
|
with csv_path.open(encoding="utf-8") as f:
|
|
@@ -37,7 +37,7 @@ def _load_codes() -> tuple[list[HSCode], dict[str, HSCode]]:
|
|
|
37
37
|
)
|
|
38
38
|
)
|
|
39
39
|
|
|
40
|
-
return codes, {c.hscode: c for c in codes}
|
|
40
|
+
return codes, {c.hscode: c for c in codes}, {c.description: c for c in codes}
|
|
41
41
|
|
|
42
42
|
|
|
43
43
|
def _get_parent(code: str) -> str | None:
|
|
@@ -70,4 +70,3 @@ def build_tree_from_codes(codes: list[HSCode]) -> dict[str, HSCode]:
|
|
|
70
70
|
parent_node.children.append(node.hscode)
|
|
71
71
|
|
|
72
72
|
return code_dict
|
|
73
|
-
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ustrade
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: Python client for the U.S. Census Bureau International Trade API
|
|
5
5
|
Author: Fantin Sibony
|
|
6
6
|
License-Expression: MIT
|
|
@@ -10,6 +10,8 @@ Description-Content-Type: text/markdown
|
|
|
10
10
|
License-File: LICENSE.txt
|
|
11
11
|
Requires-Dist: requests
|
|
12
12
|
Requires-Dist: pandas
|
|
13
|
+
Provides-Extra: dev
|
|
14
|
+
Requires-Dist: pytest; extra == "dev"
|
|
13
15
|
Dynamic: license-file
|
|
14
16
|
|
|
15
17
|
# ustrade
|
|
@@ -20,7 +22,9 @@ Dynamic: license-file
|
|
|
20
22
|
<p align="left">
|
|
21
23
|
<img src="https://img.shields.io/badge/python-3.10%2B-blue" />
|
|
22
24
|
<img src="https://img.shields.io/badge/status-active-success" />
|
|
25
|
+
<img src= "https://img.shields.io/pypi/v/ustrade" />
|
|
23
26
|
<img src="https://img.shields.io/badge/license-MIT-green" />
|
|
27
|
+
<img src="https://static.pepy.tech/personalized-badge/ustrade?period=total&units=INTERNATIONAL_SYSTEM&left_color=BLACK&right_color=GREEN&left_text=downloads" />
|
|
24
28
|
</p>
|
|
25
29
|
|
|
26
30
|
---
|
|
@@ -34,8 +38,10 @@ Dynamic: license-file
|
|
|
34
38
|
- `"4279"` (Census code)
|
|
35
39
|
- HS codes lookup + product descriptions
|
|
36
40
|
- Standardized DataFrame output with clean column names
|
|
37
|
-
|
|
38
|
-
-
|
|
41
|
+
|
|
42
|
+
For interactive exploration and non-Python usage, a Streamlit dashboard is available for this library at [https://ustrade.streamlit.app](https://ustrade.streamlit.app).
|
|
43
|
+
|
|
44
|
+
Source code : [https://github.com/fantinsib/ustrade_dashboard](https://github.com/fantinsib/ustrade_dashboard)
|
|
39
45
|
|
|
40
46
|
---
|
|
41
47
|
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
ustrade/__init__.py,sha256=NBsxO-lvNGvQ-iK3Qfmvg-6YsuGrdqq1QuhGoSYdHEQ,7016
|
|
2
|
+
ustrade/client.py,sha256=k_NyEN_IrZcIG7SfbfEqbAA6pKz3Er8ufjhFEVCm48k,19263
|
|
3
|
+
ustrade/codes.py,sha256=OA8thItlx0vczrzwmdPO3Skf3COwfGYE4K4hAW69oB8,1953
|
|
4
|
+
ustrade/countries.py,sha256=8CagTTccgs_Obr3dSpTffPKcikVe7eUdObcer-azIbU,783
|
|
5
|
+
ustrade/errors.py,sha256=AIKPwwb1fUmtvZkwqNpVAXevwD5jSz2NN8_iYzpM1Dk,802
|
|
6
|
+
ustrade/data/country_codes.csv,sha256=xwpr9MmBsIB78lIiKGDCaWDbGaO1kyDz0Vxwcxu1TTU,5794
|
|
7
|
+
ustrade/data/harmonized-system.csv,sha256=537p4c_RWkLR-t6ywGGNEPa57ZrsDl1mgm3H750xJqU,850576
|
|
8
|
+
ustrade-0.5.0.dist-info/licenses/LICENSE.txt,sha256=dwk5PMNyALOsBcKsVyrMjtoD0LvXr-Lu61cMjFJJn7I,1066
|
|
9
|
+
ustrade-0.5.0.dist-info/METADATA,sha256=HBXw6S5RRworsbL-8VeluS2q9ZIMw7d2mwCpY4bU5co,5084
|
|
10
|
+
ustrade-0.5.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
11
|
+
ustrade-0.5.0.dist-info/top_level.txt,sha256=sGFKm5WBJQE8rpKswtEFbRvBNzrON1Dr5UCdQptW2GE,8
|
|
12
|
+
ustrade-0.5.0.dist-info/RECORD,,
|
ustrade-0.4.0.dist-info/RECORD
DELETED
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
ustrade/__init__.py,sha256=uOIWAm1QLJKPm0ajouB4t_NNh822k3g3xc8M6AITma0,5968
|
|
2
|
-
ustrade/client.py,sha256=txGvXKFSC88T74HmBd5Q9b9L-7Bvh_eu1sEX3SWB16k,16611
|
|
3
|
-
ustrade/codes.py,sha256=-4f54TUF9Dq93PYuzJli6d5HyGlWWmYlHqngb1GJe40,1900
|
|
4
|
-
ustrade/countries.py,sha256=8CagTTccgs_Obr3dSpTffPKcikVe7eUdObcer-azIbU,783
|
|
5
|
-
ustrade/errors.py,sha256=AIKPwwb1fUmtvZkwqNpVAXevwD5jSz2NN8_iYzpM1Dk,802
|
|
6
|
-
ustrade/data/country_codes.csv,sha256=xwpr9MmBsIB78lIiKGDCaWDbGaO1kyDz0Vxwcxu1TTU,5794
|
|
7
|
-
ustrade/data/harmonized-system.csv,sha256=537p4c_RWkLR-t6ywGGNEPa57ZrsDl1mgm3H750xJqU,850576
|
|
8
|
-
ustrade-0.4.0.dist-info/licenses/LICENSE.txt,sha256=dwk5PMNyALOsBcKsVyrMjtoD0LvXr-Lu61cMjFJJn7I,1066
|
|
9
|
-
ustrade-0.4.0.dist-info/METADATA,sha256=IhiWRMtf6vFlsmD88cITFl1F0f5HE_OmlZ4vGalWbsA,4602
|
|
10
|
-
ustrade-0.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
11
|
-
ustrade-0.4.0.dist-info/top_level.txt,sha256=sGFKm5WBJQE8rpKswtEFbRvBNzrON1Dr5UCdQptW2GE,8
|
|
12
|
-
ustrade-0.4.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|