ustrade 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ustrade/__init__.py +175 -0
- ustrade/client.py +490 -0
- ustrade/codes.py +73 -0
- ustrade/countries.py +35 -0
- ustrade/data/country_codes.csv +241 -0
- ustrade/data/harmonized-system.csv +6941 -0
- ustrade/errors.py +47 -0
- ustrade-0.4.0.dist-info/METADATA +189 -0
- ustrade-0.4.0.dist-info/RECORD +12 -0
- ustrade-0.4.0.dist-info/WHEEL +5 -0
- ustrade-0.4.0.dist-info/licenses/LICENSE.txt +21 -0
- ustrade-0.4.0.dist-info/top_level.txt +1 -0
ustrade/__init__.py
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
from .countries import Country
|
|
3
|
+
from .client import CensusClient
|
|
4
|
+
from .codes import HSCode
|
|
5
|
+
from .errors import *
|
|
6
|
+
|
|
7
|
+
from importlib import metadata
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
__version__ = metadata.version("ustrade")
|
|
11
|
+
except metadata.PackageNotFoundError:
|
|
12
|
+
__version__ = "0.0.0"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
_default_client: CensusClient | None = None
|
|
16
|
+
|
|
17
|
+
def _get_default_client() -> CensusClient:
|
|
18
|
+
global _default_client
|
|
19
|
+
if _default_client is None:
|
|
20
|
+
_default_client = CensusClient()
|
|
21
|
+
return _default_client
|
|
22
|
+
|
|
23
|
+
def get_imports(country : str| Country | list[str | Country], product : str|list[str], date : str)-> pd.DataFrame:
|
|
24
|
+
"""
|
|
25
|
+
Returns the import value from the US to the specified country of the product for the month
|
|
26
|
+
Args:
|
|
27
|
+
country (str | Country | list[str | Country]) : can be the ISO2 code, the full name, the Census Bureau code for this country, or a Country object
|
|
28
|
+
product (str | list[str]) : HS code
|
|
29
|
+
date (str): the month, in format 'YYYY-MM'
|
|
30
|
+
|
|
31
|
+
Examples:
|
|
32
|
+
>>> ut.get_imports(["France", "GB"], ["12", "13"], "2018-03")
|
|
33
|
+
>>> ut.get_imports("GB", "12", "2018-03")
|
|
34
|
+
"""
|
|
35
|
+
return _get_default_client().get_imports(country = country, product= product, date = date)
|
|
36
|
+
|
|
37
|
+
def get_exports(country : str| Country | list[str | Country], product : str|list[str], date : str)-> pd.DataFrame:
|
|
38
|
+
"""
|
|
39
|
+
Returns the export value from the US to the specified country of the product for the month
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
country (str | Country | list[str | Country]) : can be the ISO2 code, the full name, the Census Bureau code for this country, or a Country object
|
|
43
|
+
product (str | list[str]) : HS code
|
|
44
|
+
date (str): the date, in format 'YYYY-MM'
|
|
45
|
+
Examples:
|
|
46
|
+
>>> ut.get_exports(["France", "GB"], ["08", "09"], "2018-03")
|
|
47
|
+
>>> ut.get_exports("GB", "08", "2018-03")
|
|
48
|
+
"""
|
|
49
|
+
return _get_default_client().get_exports(country = country, product= product, date = date)
|
|
50
|
+
|
|
51
|
+
def get_imports_on_period(country : str| Country | list[str | Country], product : str|list[str], start: str, end: str)->pd.DataFrame:
|
|
52
|
+
"""
|
|
53
|
+
Return the imports on the specified period
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
country (str | Country | list[str | Country]):
|
|
57
|
+
ISO2 code, full name, Census Bureau code, or a Country object.
|
|
58
|
+
product (str | list[str]):
|
|
59
|
+
HS code.
|
|
60
|
+
start (str):
|
|
61
|
+
Starting date in format "YYYY-MM".
|
|
62
|
+
end (str):
|
|
63
|
+
Ending date in format "YYYY-MM".
|
|
64
|
+
|
|
65
|
+
Examples:
|
|
66
|
+
>>> ut.get_imports_on_period(["France", "DE", "GB"], ["09", "08", "07"], "2016-01", "2018-01")
|
|
67
|
+
>>> from ustrade import CensusClient
|
|
68
|
+
>>> c = CensusClient(timeout=120)
|
|
69
|
+
>>> c.get_imports_on_period(["France", "DE", "GB"], ["08", "07"], "2016-01", "2018-01")
|
|
70
|
+
|
|
71
|
+
Notes:
|
|
72
|
+
- Queries can take time to load.
|
|
73
|
+
- Consider increasing `timeout`.
|
|
74
|
+
- Data is only available from 2010-01.
|
|
75
|
+
"""
|
|
76
|
+
return _get_default_client().get_imports_on_period(country, product, start, end)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def get_exports_on_period(country : str| Country | list[str | Country], product : str|list[str], start: str, end: str)->pd.DataFrame:
|
|
80
|
+
"""
|
|
81
|
+
Return the exports on the specified period.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
country (str | Country | list[str | Country]):
|
|
85
|
+
ISO2 code, full name, Census Bureau code, or a Country object.
|
|
86
|
+
product (str | list[str]):
|
|
87
|
+
HS code(s).
|
|
88
|
+
start (str):
|
|
89
|
+
Start date in format "YYYY-MM".
|
|
90
|
+
end (str):
|
|
91
|
+
End date in format "YYYY-MM".
|
|
92
|
+
|
|
93
|
+
Examples:
|
|
94
|
+
>>> ut.get_exports_on_period(["France", "DE", "GB"], ["09", "08", "07"], "2016-01", "2018-01")
|
|
95
|
+
>>> from ustrade import CensusClient
|
|
96
|
+
>>> c = CensusClient(timeout=120)
|
|
97
|
+
>>> c.get_exports_on_period(["France", "DE", "GB"], ["08", "07"], "2016-01", "2018-01")
|
|
98
|
+
|
|
99
|
+
Notes:
|
|
100
|
+
- Queries can take time to load.
|
|
101
|
+
- Consider increasing `timeout`.
|
|
102
|
+
- Data is only available from 2010-01.
|
|
103
|
+
"""
|
|
104
|
+
return _get_default_client().get_exports_on_period(country, product, start, end)
|
|
105
|
+
|
|
106
|
+
def get_country_by_name(country: str)-> Country:
|
|
107
|
+
"""
|
|
108
|
+
Search a country with its name
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
country (str) : the full name of the country (ex: 'France')
|
|
112
|
+
"""
|
|
113
|
+
return _get_default_client().get_country_by_name(country)
|
|
114
|
+
|
|
115
|
+
def get_country_by_code(cty_code: str):
|
|
116
|
+
"""
|
|
117
|
+
Search a country with its code
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
cty_code (str) : the Census Bureau code of the country (ex: '4120')
|
|
121
|
+
"""
|
|
122
|
+
return _get_default_client().get_country_by_code(cty_code)
|
|
123
|
+
|
|
124
|
+
def get_country_by_iso2(iso2: str):
|
|
125
|
+
"""
|
|
126
|
+
Search a country with its ISO 2 ID
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
iso2 (str) : the ISO2 code of the country (ex: 'IT')
|
|
130
|
+
"""
|
|
131
|
+
return _get_default_client().get_country_by_iso2(iso2)
|
|
132
|
+
|
|
133
|
+
def get_desc_from_code(hs: str):
|
|
134
|
+
"""
|
|
135
|
+
Returns the description associated with the HS code specified
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
hs (str): the HS code (ex: '73')
|
|
139
|
+
"""
|
|
140
|
+
return _get_default_client().get_desc_from_code(hs)
|
|
141
|
+
|
|
142
|
+
def get_children_codes(code: str | HSCode, return_names = True)-> dict | list[str]:
|
|
143
|
+
"""
|
|
144
|
+
Returns a dict of the codes and their desc directly attached to code in the hierarchy
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
code (str | HSCode): either the code as a string or the HSCode object
|
|
148
|
+
return_names (bool): returns a dict with the code and the description if true, a list of the codes if false
|
|
149
|
+
"""
|
|
150
|
+
return _get_default_client().get_children_codes(code, return_names)
|
|
151
|
+
|
|
152
|
+
def get_product(hs: str) -> HSCode:
|
|
153
|
+
"""
|
|
154
|
+
Returns all the informations on a specified HS code through a HSCode object
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
hs (str): the HS code (ex: '1806')
|
|
158
|
+
"""
|
|
159
|
+
return _get_default_client().get_product(hs)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
__all__ = [
|
|
163
|
+
"CensusClient",
|
|
164
|
+
"Country",
|
|
165
|
+
"get_imports",
|
|
166
|
+
"get_exports",
|
|
167
|
+
"get_imports_on_period",
|
|
168
|
+
"get_exports_on_period",
|
|
169
|
+
"get_country_by_name",
|
|
170
|
+
"get_country_by_code",
|
|
171
|
+
"get_country_by_iso2",
|
|
172
|
+
"get_desc_from_code",
|
|
173
|
+
"get_children_codes",
|
|
174
|
+
"get_product"
|
|
175
|
+
]
|
ustrade/client.py
ADDED
|
@@ -0,0 +1,490 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
import socket
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from urllib.parse import urlencode
|
|
6
|
+
from . import countries
|
|
7
|
+
from .countries import Country
|
|
8
|
+
from . import codes
|
|
9
|
+
from .codes import HSCode
|
|
10
|
+
from .errors import *
|
|
11
|
+
|
|
12
|
+
class CensusClient:
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def __init__(self, timeout=60, retries = 3):
|
|
16
|
+
self.timeout = timeout
|
|
17
|
+
self.retries = retries
|
|
18
|
+
self._country_codes = countries._load_countries()
|
|
19
|
+
self._country_by_code = {c.code: c for c in self._country_codes}
|
|
20
|
+
self._country_by_name = {c.name.lower(): c for c in self._country_codes}
|
|
21
|
+
self._country_by_iso = {c.iso2.upper(): c for c in self._country_codes}
|
|
22
|
+
|
|
23
|
+
self.BASE_URL = "api.census.gov"
|
|
24
|
+
self.BASE_PORT = 443
|
|
25
|
+
|
|
26
|
+
self._hs_codes, self._codes_by_hs_codes = codes._load_codes()
|
|
27
|
+
self._code_tree = codes.build_tree_from_codes(self._hs_codes)
|
|
28
|
+
|
|
29
|
+
self.col_mapping = {
|
|
30
|
+
|
|
31
|
+
"CTY_CODE": "country_code",
|
|
32
|
+
'CTY_NAME': "country_name",
|
|
33
|
+
"I_ENDUSE": "product_code",
|
|
34
|
+
"I_COMMODITY": "product_code",
|
|
35
|
+
"E_COMMODITY": "product_code",
|
|
36
|
+
"E_ENDUSE": 'product_code',
|
|
37
|
+
"I_ENDUSE_LDESC" : 'product_name',
|
|
38
|
+
"E_ENDUSE_LDESC" : "product_name",
|
|
39
|
+
"I_COMMODITY_SDESC": "product_name",
|
|
40
|
+
"E_COMMODITY_SDESC": "product_name",
|
|
41
|
+
"GEN_VAL_MO" : "import_value",
|
|
42
|
+
'ALL_VAL_MO': "export_value",
|
|
43
|
+
"CON_VAL_MO": 'consumption_import_value',
|
|
44
|
+
"YEAR": "year",
|
|
45
|
+
"MONTH": "month"
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
self.type_map = {
|
|
49
|
+
"import_value": "float",
|
|
50
|
+
"export_value": "float",
|
|
51
|
+
"product_name": 'str',
|
|
52
|
+
"product_code": 'str',
|
|
53
|
+
"consumption_import_value": 'float',
|
|
54
|
+
"country": "str",
|
|
55
|
+
"time": "datetime",
|
|
56
|
+
'date': "datetime",
|
|
57
|
+
"country_code": 'str'
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
self._cols_to_return = ["date",
|
|
62
|
+
"country_name",
|
|
63
|
+
"country_code",
|
|
64
|
+
"product_name",
|
|
65
|
+
"product_code",
|
|
66
|
+
"import_value",
|
|
67
|
+
"export_value",
|
|
68
|
+
"consumption_import_value"
|
|
69
|
+
]
|
|
70
|
+
|
|
71
|
+
def _check_connectivity(self) -> bool:
|
|
72
|
+
"""
|
|
73
|
+
Check if connection can be made to the API
|
|
74
|
+
"""
|
|
75
|
+
try:
|
|
76
|
+
with socket.create_connection(
|
|
77
|
+
(self.BASE_URL, self.BASE_PORT),
|
|
78
|
+
timeout=self.timeout
|
|
79
|
+
):
|
|
80
|
+
return True
|
|
81
|
+
except OSError as e:
|
|
82
|
+
print(e)
|
|
83
|
+
return False
|
|
84
|
+
|
|
85
|
+
##### DATA RESEARCH FUNCTIONS #######
|
|
86
|
+
|
|
87
|
+
def get_imports(self, country : str| Country | list[str | Country], product : str|list[str], date : str)-> pd.DataFrame:
|
|
88
|
+
"""
|
|
89
|
+
Returns the import value from the US to the specified country of the product for the month
|
|
90
|
+
Args:
|
|
91
|
+
country (str | Country | list[str | Country]) : can be the ISO2 code, the full name, the Census Bureau code for this country, or a Country object
|
|
92
|
+
product (str | list[str]) : HS code
|
|
93
|
+
date (str): the month, in format 'YYYY-MM'
|
|
94
|
+
|
|
95
|
+
Examples:
|
|
96
|
+
>>> ut.get_imports(["France", "GB"], ["12", "13"], "2018-03")
|
|
97
|
+
>>> ut.get_imports("GB", "12", "2018-03")
|
|
98
|
+
"""
|
|
99
|
+
return self._get_flow(country, product, date=date, flux="imports")
|
|
100
|
+
|
|
101
|
+
def get_exports(self, country : str| Country | list[str | Country], product : str|list[str], date : str)-> pd.DataFrame:
|
|
102
|
+
"""
|
|
103
|
+
Returns the export value from the US to the specified country of the product for the month
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
country (str | Country | list[str | Country]) : can be the ISO2 code, the full name, the Census Bureau code for this country, or a Country object
|
|
107
|
+
product (str | list[str]) : HS code
|
|
108
|
+
date (str): the date, in format 'YYYY-MM'
|
|
109
|
+
Examples:
|
|
110
|
+
>>> ut.get_exports(["France", "GB"], ["08", "09"], "2018-03")
|
|
111
|
+
>>> ut.get_exports("GB", "08", "2018-03")
|
|
112
|
+
"""
|
|
113
|
+
return self._get_flow(country, product, date, "exports")
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _build_params(self,
|
|
117
|
+
country: str|list,
|
|
118
|
+
product: str|list,
|
|
119
|
+
flux: str,
|
|
120
|
+
date:str = None,
|
|
121
|
+
start:str = None,
|
|
122
|
+
end:str= None)->dict:
|
|
123
|
+
|
|
124
|
+
if isinstance(country, (str, countries.Country)):
|
|
125
|
+
cty = self._normalize_country(country)
|
|
126
|
+
country = [cty]
|
|
127
|
+
if isinstance(country, list):
|
|
128
|
+
cty_list = []
|
|
129
|
+
for c in country:
|
|
130
|
+
cty_list.append(self._normalize_country(c))
|
|
131
|
+
country = cty_list
|
|
132
|
+
|
|
133
|
+
if isinstance(product, str):
|
|
134
|
+
product = [product]
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
flux_letter = flux[0].upper()
|
|
138
|
+
|
|
139
|
+
if date:
|
|
140
|
+
dt = datetime.strptime(date, "%Y-%m")
|
|
141
|
+
year = dt.year
|
|
142
|
+
month = f"{dt.month:02d}"
|
|
143
|
+
date_range = False
|
|
144
|
+
|
|
145
|
+
if start and end:
|
|
146
|
+
dt_start = datetime.strptime(start, "%Y-%m")
|
|
147
|
+
year_start = dt_start.year
|
|
148
|
+
month_start = f"{dt_start.month:02d}"
|
|
149
|
+
|
|
150
|
+
dt_end = datetime.strptime(end, "%Y-%m")
|
|
151
|
+
year_end = dt_end.year
|
|
152
|
+
month_end = f"{dt_end.month:02d}"
|
|
153
|
+
time_range = f"from+{year_start}-{month_start}+to+{year_end}-{month_end}"
|
|
154
|
+
date_range=True
|
|
155
|
+
|
|
156
|
+
#Base arguments ####
|
|
157
|
+
if flux == 'imports':
|
|
158
|
+
params = {"get":
|
|
159
|
+
f"CTY_CODE,CTY_NAME,{flux_letter}_COMMODITY,{flux_letter}_COMMODITY_SDESC,GEN_VAL_MO,CON_VAL_MO"}
|
|
160
|
+
|
|
161
|
+
if flux == 'exports':
|
|
162
|
+
params = {'get' :
|
|
163
|
+
f"CTY_CODE,CTY_NAME,{flux_letter}_COMMODITY,{flux_letter}_COMMODITY_SDESC,ALL_VAL_MO"}
|
|
164
|
+
|
|
165
|
+
query = urlencode(params)
|
|
166
|
+
|
|
167
|
+
url = f"https://{self.BASE_URL}/data/timeseries/intltrade/{flux}/hs?{query}"
|
|
168
|
+
|
|
169
|
+
#Adding countries + codes: ####
|
|
170
|
+
for c in country:
|
|
171
|
+
url += f"&CTY_CODE={str(c)}"
|
|
172
|
+
for k in product:
|
|
173
|
+
url += f'&{flux_letter}_COMMODITY={str(k)}'
|
|
174
|
+
|
|
175
|
+
### Adding Time ranges: ###
|
|
176
|
+
|
|
177
|
+
if date_range:
|
|
178
|
+
url += f"&time={time_range}"
|
|
179
|
+
|
|
180
|
+
else:
|
|
181
|
+
url += f'&YEAR={year}&MONTH={month}'
|
|
182
|
+
return url
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def _get_flow(self, country, product, date, flux):
|
|
187
|
+
|
|
188
|
+
url = self._build_params(country, product, date= date,flux= flux)
|
|
189
|
+
|
|
190
|
+
response = requests.get(url, timeout=self.timeout)
|
|
191
|
+
response.raise_for_status()
|
|
192
|
+
|
|
193
|
+
try:
|
|
194
|
+
data = response.json()
|
|
195
|
+
except requests.exceptions.JSONDecodeError:
|
|
196
|
+
return pd.DataFrame()
|
|
197
|
+
header, rows = data[0], data[1:]
|
|
198
|
+
|
|
199
|
+
df = pd.DataFrame(rows, columns=header)
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
return (self._prepare_results(df))
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def get_imports_on_period(self, country : str| Country | list[str | Country], product : str|list[str], start: str, end: str)->pd.DataFrame:
|
|
206
|
+
"""
|
|
207
|
+
Return the imports on the specified period
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
country (str | Country | list[str | Country]):
|
|
211
|
+
ISO2 code, full name, Census Bureau code, or a Country object.
|
|
212
|
+
product (str | list[str]):
|
|
213
|
+
HS code.
|
|
214
|
+
start (str):
|
|
215
|
+
Starting date in format "YYYY-MM".
|
|
216
|
+
end (str):
|
|
217
|
+
Ending date in format "YYYY-MM".
|
|
218
|
+
|
|
219
|
+
Examples:
|
|
220
|
+
>>> ut.get_imports_on_period(["France", "DE", "GB"], ["09", "08", "07"], "2016-01", "2018-01")
|
|
221
|
+
>>> from ustrade import CensusClient
|
|
222
|
+
>>> c = CensusClient(timeout=120)
|
|
223
|
+
>>> c.get_imports_on_period(["France", "DE", "GB"], ["08", "07"], "2016-01", "2018-01")
|
|
224
|
+
|
|
225
|
+
Notes:
|
|
226
|
+
- Queries can take time to load.
|
|
227
|
+
- Consider increasing `timeout`.
|
|
228
|
+
- Data is only available from 2010-01.
|
|
229
|
+
"""
|
|
230
|
+
return self._get_flow_on_period(country, product, start=start,end= end,flux= 'imports')
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def get_exports_on_period(self, country : str| Country | list[str | Country], product : str|list[str], start: str, end: str)->pd.DataFrame:
|
|
234
|
+
"""
|
|
235
|
+
Return the exports on the specified period.
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
country (str | Country | list[str | Country]):
|
|
239
|
+
ISO2 code, full name, Census Bureau code, or a Country object.
|
|
240
|
+
product (str | list[str]):
|
|
241
|
+
HS code(s).
|
|
242
|
+
start (str):
|
|
243
|
+
Start date in format "YYYY-MM".
|
|
244
|
+
end (str):
|
|
245
|
+
End date in format "YYYY-MM".
|
|
246
|
+
|
|
247
|
+
Examples:
|
|
248
|
+
>>> ut.get_exports_on_period(["France", "DE", "GB"], ["09", "08", "07"], "2016-01", "2018-01")
|
|
249
|
+
>>> from ustrade import CensusClient
|
|
250
|
+
>>> c = CensusClient(timeout=120)
|
|
251
|
+
>>> c.get_exports_on_period(["France", "DE", "GB"], ["08", "07"], "2016-01", "2018-01")
|
|
252
|
+
|
|
253
|
+
Notes:
|
|
254
|
+
- Queries can take time to load.
|
|
255
|
+
- Consider increasing `timeout`.
|
|
256
|
+
- Data is only available from 2010-01.
|
|
257
|
+
"""
|
|
258
|
+
return self._get_flow_on_period(country, product, start=start, end=end, flux='exports')
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def _get_flow_on_period(self, country, product, start, end, flux):
|
|
262
|
+
url = self._build_params(country, product, start = start,end = end,flux= flux)
|
|
263
|
+
|
|
264
|
+
response = requests.get(url, timeout=self.timeout)
|
|
265
|
+
response.raise_for_status()
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
try:
|
|
269
|
+
data = response.json()
|
|
270
|
+
except requests.exceptions.JSONDecodeError:
|
|
271
|
+
raise EmptyResult(
|
|
272
|
+
f"The query '{response.url}' did not return any results."
|
|
273
|
+
)
|
|
274
|
+
header, rows = data[0], data[1:]
|
|
275
|
+
|
|
276
|
+
df = pd.DataFrame(rows, columns=header)
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
return (self._prepare_results_on_period(df))
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def _prepare_results(self, df):
|
|
284
|
+
|
|
285
|
+
df = df.rename(columns=self.col_mapping)
|
|
286
|
+
|
|
287
|
+
df["date"] = (pd.to_datetime(
|
|
288
|
+
df["year"].astype(str) + "-" + df["month"].astype(str).str.zfill(2))
|
|
289
|
+
.dt.to_period('M')
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
existing_cols = [c for c in self._cols_to_return if c in df.columns]
|
|
295
|
+
|
|
296
|
+
df = df[existing_cols]
|
|
297
|
+
df = df.loc[:, ~df.columns.duplicated()]
|
|
298
|
+
|
|
299
|
+
return self._apply_types(df)
|
|
300
|
+
|
|
301
|
+
def _prepare_results_on_period(self, df):
|
|
302
|
+
df = df.rename(columns= self.col_mapping)
|
|
303
|
+
df["date"] = (
|
|
304
|
+
pd.to_datetime(df["time"], format="%Y-%m", errors="coerce")
|
|
305
|
+
.dt.to_period("M")
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
existing_cols = [c for c in self._cols_to_return if c in df.columns]
|
|
309
|
+
df = df[existing_cols]
|
|
310
|
+
df = df.loc[:, ~df.columns.duplicated()]
|
|
311
|
+
|
|
312
|
+
return self._apply_types(df)
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def _apply_types(self, df):
|
|
317
|
+
for col, t in self.type_map.items():
|
|
318
|
+
if col not in df:
|
|
319
|
+
continue
|
|
320
|
+
|
|
321
|
+
if t == "int":
|
|
322
|
+
df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0).astype(int)
|
|
323
|
+
|
|
324
|
+
elif t == "float":
|
|
325
|
+
df[col] = pd.to_numeric(df[col], errors="coerce").astype(float)
|
|
326
|
+
|
|
327
|
+
elif t == "datetime":
|
|
328
|
+
df[col] = (
|
|
329
|
+
df[col].astype(str).str.strip()
|
|
330
|
+
.str.replace(r"$", "-01", regex=True)
|
|
331
|
+
.pipe(pd.to_datetime, errors="coerce")
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
elif t == "str":
|
|
335
|
+
df[col] = df[col].astype(str)
|
|
336
|
+
|
|
337
|
+
return df.sort_values(by = "date").reset_index(drop=True)
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
####### COUNTRIES FUNCTIONS #######
|
|
341
|
+
|
|
342
|
+
def get_country_by_name(self, country: str)-> countries.Country:
|
|
343
|
+
"""
|
|
344
|
+
Search a country with its name
|
|
345
|
+
"""
|
|
346
|
+
return self._country_by_name[country.lower()]
|
|
347
|
+
|
|
348
|
+
def get_country_by_code(self, cty_code: str)-> countries.Country:
|
|
349
|
+
"""
|
|
350
|
+
Search a country with its code
|
|
351
|
+
"""
|
|
352
|
+
return self._country_by_code[cty_code]
|
|
353
|
+
|
|
354
|
+
def get_country_by_iso2(self, iso2: str)-> countries.Country:
|
|
355
|
+
"""
|
|
356
|
+
Search a country with its ISO 2 ID
|
|
357
|
+
"""
|
|
358
|
+
return self._country_by_iso[iso2.upper()]
|
|
359
|
+
|
|
360
|
+
def _normalize_country(self, inp: str, output="code"):
|
|
361
|
+
|
|
362
|
+
def return_output(country):
|
|
363
|
+
match output:
|
|
364
|
+
case "code": return country.code
|
|
365
|
+
case "name": return country.name
|
|
366
|
+
case "iso2": return country.iso2
|
|
367
|
+
case _:
|
|
368
|
+
raise ValueError(f"Invalid output type: {output!r}")
|
|
369
|
+
|
|
370
|
+
if isinstance(inp, countries.Country):
|
|
371
|
+
return return_output(inp)
|
|
372
|
+
|
|
373
|
+
value = str(inp).strip()
|
|
374
|
+
upper = value.upper()
|
|
375
|
+
lower = value.lower()
|
|
376
|
+
|
|
377
|
+
if upper in self._country_by_iso:
|
|
378
|
+
country = self._country_by_iso[upper]
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
elif lower in self._country_by_name:
|
|
382
|
+
country = self._country_by_name[lower]
|
|
383
|
+
|
|
384
|
+
elif value in self._country_by_code:
|
|
385
|
+
country = self._country_by_code[value]
|
|
386
|
+
|
|
387
|
+
else:
|
|
388
|
+
raise ValueError(f"Unknown country: {inp!r}")
|
|
389
|
+
|
|
390
|
+
return return_output(country)
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
####### HS CODES FUNCTIONS #######
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
def get_desc_from_code(self, hs: str)->str:
|
|
397
|
+
"""
|
|
398
|
+
Returns the description of the specified HS code
|
|
399
|
+
|
|
400
|
+
## Args:
|
|
401
|
+
hs (str): the HS code (ex: '1806')
|
|
402
|
+
"""
|
|
403
|
+
if isinstance(hs, str):
|
|
404
|
+
if hs in self._codes_by_hs_codes:
|
|
405
|
+
return self._codes_by_hs_codes[hs].description
|
|
406
|
+
else:
|
|
407
|
+
if len(hs) == 1:
|
|
408
|
+
raise CodeNotFoundError(
|
|
409
|
+
f"HS code '{hs}' could not be found in the listed codes. Did you mean '0{hs}'?"
|
|
410
|
+
)
|
|
411
|
+
else:
|
|
412
|
+
raise CodeNotFoundError(
|
|
413
|
+
f"HS code '{hs}' could not be found in the listed codes."
|
|
414
|
+
)
|
|
415
|
+
else:
|
|
416
|
+
raise InvalidCodeError(
|
|
417
|
+
f"Code must be a str instance - received a {type(hs).__name__!r}"
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
def get_product(self, hs: str) -> HSCode:
|
|
421
|
+
"""
|
|
422
|
+
Returns all the informations on a specified HS code through a HSCode object
|
|
423
|
+
|
|
424
|
+
## Args:
|
|
425
|
+
hs (str): the HS code (ex: '1806')
|
|
426
|
+
"""
|
|
427
|
+
if isinstance(hs, str):
|
|
428
|
+
if hs in self._codes_by_hs_codes:
|
|
429
|
+
return self._codes_by_hs_codes[hs]
|
|
430
|
+
else:
|
|
431
|
+
if len(hs) == 1:
|
|
432
|
+
raise CodeNotFoundError(
|
|
433
|
+
f"HS code '{hs}' could not be found in the listed codes. Did you mean '0{hs}'?"
|
|
434
|
+
)
|
|
435
|
+
else:
|
|
436
|
+
raise CodeNotFoundError(
|
|
437
|
+
f"HS code '{hs}' could not be found in the listed codes."
|
|
438
|
+
)
|
|
439
|
+
|
|
440
|
+
else:
|
|
441
|
+
raise InvalidCodeError(
|
|
442
|
+
f"Code must be a str instance - received a {type(hs).__name__!r}"
|
|
443
|
+
)
|
|
444
|
+
|
|
445
|
+
def get_children_codes(self, code: str | HSCode, return_names = True)-> dict | list[str]:
|
|
446
|
+
"""
|
|
447
|
+
Returns a dict of the codes and their desc directly attached to code in the hierarchy
|
|
448
|
+
|
|
449
|
+
## Args:
|
|
450
|
+
code (str | HSCode): either the code as a string or the HSCode object
|
|
451
|
+
return_names (bool): returns a dict with the code and the description if true, a list of the codes if false
|
|
452
|
+
|
|
453
|
+
"""
|
|
454
|
+
if isinstance(code, str):
|
|
455
|
+
if code in self._codes_by_hs_codes:
|
|
456
|
+
if return_names:
|
|
457
|
+
res = {}
|
|
458
|
+
for p in self.get_product(code)._get_children():
|
|
459
|
+
res[p] = self.get_desc_from_code(p)
|
|
460
|
+
return res
|
|
461
|
+
else:
|
|
462
|
+
return self.get_product(code)._get_children()
|
|
463
|
+
|
|
464
|
+
else:
|
|
465
|
+
raise CodeNotFoundError(
|
|
466
|
+
f"HS code '{code}' could not be found in the listed codes"
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
elif isinstance(code, HSCode):
|
|
470
|
+
if code.hscode in self._codes_by_hs_codes:
|
|
471
|
+
return code._get_children()
|
|
472
|
+
else:
|
|
473
|
+
raise CodeNotFoundError(
|
|
474
|
+
f"HS code '{code.hscode}' could not be found in the listed codes"
|
|
475
|
+
)
|
|
476
|
+
else:
|
|
477
|
+
raise InvalidCodeError(
|
|
478
|
+
f"Code must be a str or a HSCode instance - received a {type(code).__name__!r}"
|
|
479
|
+
)
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
|