ustrade 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ustrade/__init__.py ADDED
@@ -0,0 +1,175 @@
1
+ import pandas as pd
2
+ from .countries import Country
3
+ from .client import CensusClient
4
+ from .codes import HSCode
5
+ from .errors import *
6
+
7
+ from importlib import metadata
8
+
9
+ try:
10
+ __version__ = metadata.version("ustrade")
11
+ except metadata.PackageNotFoundError:
12
+ __version__ = "0.0.0"
13
+
14
+
15
+ _default_client: CensusClient | None = None
16
+
17
+ def _get_default_client() -> CensusClient:
18
+ global _default_client
19
+ if _default_client is None:
20
+ _default_client = CensusClient()
21
+ return _default_client
22
+
23
+ def get_imports(country : str| Country | list[str | Country], product : str|list[str], date : str)-> pd.DataFrame:
24
+ """
25
+ Returns the import value from the US to the specified country of the product for the month
26
+ Args:
27
+ country (str | Country | list[str | Country]) : can be the ISO2 code, the full name, the Census Bureau code for this country, or a Country object
28
+ product (str | list[str]) : HS code
29
+ date (str): the month, in format 'YYYY-MM'
30
+
31
+ Examples:
32
+ >>> ut.get_imports(["France", "GB"], ["12", "13"], "2018-03")
33
+ >>> ut.get_imports("GB", "12", "2018-03")
34
+ """
35
+ return _get_default_client().get_imports(country = country, product= product, date = date)
36
+
37
+ def get_exports(country : str| Country | list[str | Country], product : str|list[str], date : str)-> pd.DataFrame:
38
+ """
39
+ Returns the export value from the US to the specified country of the product for the month
40
+
41
+ Args:
42
+ country (str | Country | list[str | Country]) : can be the ISO2 code, the full name, the Census Bureau code for this country, or a Country object
43
+ product (str | list[str]) : HS code
44
+ date (str): the date, in format 'YYYY-MM'
45
+ Examples:
46
+ >>> ut.get_exports(["France", "GB"], ["08", "09"], "2018-03")
47
+ >>> ut.get_exports("GB", "08", "2018-03")
48
+ """
49
+ return _get_default_client().get_exports(country = country, product= product, date = date)
50
+
51
+ def get_imports_on_period(country : str| Country | list[str | Country], product : str|list[str], start: str, end: str)->pd.DataFrame:
52
+ """
53
+ Return the imports on the specified period
54
+
55
+ Args:
56
+ country (str | Country | list[str | Country]):
57
+ ISO2 code, full name, Census Bureau code, or a Country object.
58
+ product (str | list[str]):
59
+ HS code.
60
+ start (str):
61
+ Starting date in format "YYYY-MM".
62
+ end (str):
63
+ Ending date in format "YYYY-MM".
64
+
65
+ Examples:
66
+ >>> ut.get_imports_on_period(["France", "DE", "GB"], ["09", "08", "07"], "2016-01", "2018-01")
67
+ >>> from ustrade import CensusClient
68
+ >>> c = CensusClient(timeout=120)
69
+ >>> c.get_imports_on_period(["France", "DE", "GB"], ["08", "07"], "2016-01", "2018-01")
70
+
71
+ Notes:
72
+ - Queries can take time to load.
73
+ - Consider increasing `timeout`.
74
+ - Data is only available from 2010-01.
75
+ """
76
+ return _get_default_client().get_imports_on_period(country, product, start, end)
77
+
78
+
79
+ def get_exports_on_period(country : str| Country | list[str | Country], product : str|list[str], start: str, end: str)->pd.DataFrame:
80
+ """
81
+ Return the exports on the specified period.
82
+
83
+ Args:
84
+ country (str | Country | list[str | Country]):
85
+ ISO2 code, full name, Census Bureau code, or a Country object.
86
+ product (str | list[str]):
87
+ HS code(s).
88
+ start (str):
89
+ Start date in format "YYYY-MM".
90
+ end (str):
91
+ End date in format "YYYY-MM".
92
+
93
+ Examples:
94
+ >>> ut.get_exports_on_period(["France", "DE", "GB"], ["09", "08", "07"], "2016-01", "2018-01")
95
+ >>> from ustrade import CensusClient
96
+ >>> c = CensusClient(timeout=120)
97
+ >>> c.get_exports_on_period(["France", "DE", "GB"], ["08", "07"], "2016-01", "2018-01")
98
+
99
+ Notes:
100
+ - Queries can take time to load.
101
+ - Consider increasing `timeout`.
102
+ - Data is only available from 2010-01.
103
+ """
104
+ return _get_default_client().get_exports_on_period(country, product, start, end)
105
+
106
+ def get_country_by_name(country: str)-> Country:
107
+ """
108
+ Search a country with its name
109
+
110
+ Args:
111
+ country (str) : the full name of the country (ex: 'France')
112
+ """
113
+ return _get_default_client().get_country_by_name(country)
114
+
115
+ def get_country_by_code(cty_code: str):
116
+ """
117
+ Search a country with its code
118
+
119
+ Args:
120
+ cty_code (str) : the Census Bureau code of the country (ex: '4120')
121
+ """
122
+ return _get_default_client().get_country_by_code(cty_code)
123
+
124
+ def get_country_by_iso2(iso2: str):
125
+ """
126
+ Search a country with its ISO 2 ID
127
+
128
+ Args:
129
+ iso2 (str) : the ISO2 code of the country (ex: 'IT')
130
+ """
131
+ return _get_default_client().get_country_by_iso2(iso2)
132
+
133
+ def get_desc_from_code(hs: str):
134
+ """
135
+ Returns the description associated with the HS code specified
136
+
137
+ Args:
138
+ hs (str): the HS code (ex: '73')
139
+ """
140
+ return _get_default_client().get_desc_from_code(hs)
141
+
142
+ def get_children_codes(code: str | HSCode, return_names = True)-> dict | list[str]:
143
+ """
144
+ Returns a dict of the codes and their desc directly attached to code in the hierarchy
145
+
146
+ Args:
147
+ code (str | HSCode): either the code as a string or the HSCode object
148
+ return_names (bool): returns a dict with the code and the description if true, a list of the codes if false
149
+ """
150
+ return _get_default_client().get_children_codes(code, return_names)
151
+
152
+ def get_product(hs: str) -> HSCode:
153
+ """
154
+ Returns all the informations on a specified HS code through a HSCode object
155
+
156
+ Args:
157
+ hs (str): the HS code (ex: '1806')
158
+ """
159
+ return _get_default_client().get_product(hs)
160
+
161
+
162
+ __all__ = [
163
+ "CensusClient",
164
+ "Country",
165
+ "get_imports",
166
+ "get_exports",
167
+ "get_imports_on_period",
168
+ "get_exports_on_period",
169
+ "get_country_by_name",
170
+ "get_country_by_code",
171
+ "get_country_by_iso2",
172
+ "get_desc_from_code",
173
+ "get_children_codes",
174
+ "get_product"
175
+ ]
ustrade/client.py ADDED
@@ -0,0 +1,490 @@
1
+ import requests
2
+ import socket
3
+ from datetime import datetime
4
+ import pandas as pd
5
+ from urllib.parse import urlencode
6
+ from . import countries
7
+ from .countries import Country
8
+ from . import codes
9
+ from .codes import HSCode
10
+ from .errors import *
11
+
12
+ class CensusClient:
13
+
14
+
15
+ def __init__(self, timeout=60, retries = 3):
16
+ self.timeout = timeout
17
+ self.retries = retries
18
+ self._country_codes = countries._load_countries()
19
+ self._country_by_code = {c.code: c for c in self._country_codes}
20
+ self._country_by_name = {c.name.lower(): c for c in self._country_codes}
21
+ self._country_by_iso = {c.iso2.upper(): c for c in self._country_codes}
22
+
23
+ self.BASE_URL = "api.census.gov"
24
+ self.BASE_PORT = 443
25
+
26
+ self._hs_codes, self._codes_by_hs_codes = codes._load_codes()
27
+ self._code_tree = codes.build_tree_from_codes(self._hs_codes)
28
+
29
+ self.col_mapping = {
30
+
31
+ "CTY_CODE": "country_code",
32
+ 'CTY_NAME': "country_name",
33
+ "I_ENDUSE": "product_code",
34
+ "I_COMMODITY": "product_code",
35
+ "E_COMMODITY": "product_code",
36
+ "E_ENDUSE": 'product_code',
37
+ "I_ENDUSE_LDESC" : 'product_name',
38
+ "E_ENDUSE_LDESC" : "product_name",
39
+ "I_COMMODITY_SDESC": "product_name",
40
+ "E_COMMODITY_SDESC": "product_name",
41
+ "GEN_VAL_MO" : "import_value",
42
+ 'ALL_VAL_MO': "export_value",
43
+ "CON_VAL_MO": 'consumption_import_value',
44
+ "YEAR": "year",
45
+ "MONTH": "month"
46
+ }
47
+
48
+ self.type_map = {
49
+ "import_value": "float",
50
+ "export_value": "float",
51
+ "product_name": 'str',
52
+ "product_code": 'str',
53
+ "consumption_import_value": 'float',
54
+ "country": "str",
55
+ "time": "datetime",
56
+ 'date': "datetime",
57
+ "country_code": 'str'
58
+ }
59
+
60
+
61
+ self._cols_to_return = ["date",
62
+ "country_name",
63
+ "country_code",
64
+ "product_name",
65
+ "product_code",
66
+ "import_value",
67
+ "export_value",
68
+ "consumption_import_value"
69
+ ]
70
+
71
+ def _check_connectivity(self) -> bool:
72
+ """
73
+ Check if connection can be made to the API
74
+ """
75
+ try:
76
+ with socket.create_connection(
77
+ (self.BASE_URL, self.BASE_PORT),
78
+ timeout=self.timeout
79
+ ):
80
+ return True
81
+ except OSError as e:
82
+ print(e)
83
+ return False
84
+
85
+ ##### DATA RESEARCH FUNCTIONS #######
86
+
87
+ def get_imports(self, country : str| Country | list[str | Country], product : str|list[str], date : str)-> pd.DataFrame:
88
+ """
89
+ Returns the import value from the US to the specified country of the product for the month
90
+ Args:
91
+ country (str | Country | list[str | Country]) : can be the ISO2 code, the full name, the Census Bureau code for this country, or a Country object
92
+ product (str | list[str]) : HS code
93
+ date (str): the month, in format 'YYYY-MM'
94
+
95
+ Examples:
96
+ >>> ut.get_imports(["France", "GB"], ["12", "13"], "2018-03")
97
+ >>> ut.get_imports("GB", "12", "2018-03")
98
+ """
99
+ return self._get_flow(country, product, date=date, flux="imports")
100
+
101
+ def get_exports(self, country : str| Country | list[str | Country], product : str|list[str], date : str)-> pd.DataFrame:
102
+ """
103
+ Returns the export value from the US to the specified country of the product for the month
104
+
105
+ Args:
106
+ country (str | Country | list[str | Country]) : can be the ISO2 code, the full name, the Census Bureau code for this country, or a Country object
107
+ product (str | list[str]) : HS code
108
+ date (str): the date, in format 'YYYY-MM'
109
+ Examples:
110
+ >>> ut.get_exports(["France", "GB"], ["08", "09"], "2018-03")
111
+ >>> ut.get_exports("GB", "08", "2018-03")
112
+ """
113
+ return self._get_flow(country, product, date, "exports")
114
+
115
+
116
+ def _build_params(self,
117
+ country: str|list,
118
+ product: str|list,
119
+ flux: str,
120
+ date:str = None,
121
+ start:str = None,
122
+ end:str= None)->dict:
123
+
124
+ if isinstance(country, (str, countries.Country)):
125
+ cty = self._normalize_country(country)
126
+ country = [cty]
127
+ if isinstance(country, list):
128
+ cty_list = []
129
+ for c in country:
130
+ cty_list.append(self._normalize_country(c))
131
+ country = cty_list
132
+
133
+ if isinstance(product, str):
134
+ product = [product]
135
+
136
+
137
+ flux_letter = flux[0].upper()
138
+
139
+ if date:
140
+ dt = datetime.strptime(date, "%Y-%m")
141
+ year = dt.year
142
+ month = f"{dt.month:02d}"
143
+ date_range = False
144
+
145
+ if start and end:
146
+ dt_start = datetime.strptime(start, "%Y-%m")
147
+ year_start = dt_start.year
148
+ month_start = f"{dt_start.month:02d}"
149
+
150
+ dt_end = datetime.strptime(end, "%Y-%m")
151
+ year_end = dt_end.year
152
+ month_end = f"{dt_end.month:02d}"
153
+ time_range = f"from+{year_start}-{month_start}+to+{year_end}-{month_end}"
154
+ date_range=True
155
+
156
+ #Base arguments ####
157
+ if flux == 'imports':
158
+ params = {"get":
159
+ f"CTY_CODE,CTY_NAME,{flux_letter}_COMMODITY,{flux_letter}_COMMODITY_SDESC,GEN_VAL_MO,CON_VAL_MO"}
160
+
161
+ if flux == 'exports':
162
+ params = {'get' :
163
+ f"CTY_CODE,CTY_NAME,{flux_letter}_COMMODITY,{flux_letter}_COMMODITY_SDESC,ALL_VAL_MO"}
164
+
165
+ query = urlencode(params)
166
+
167
+ url = f"https://{self.BASE_URL}/data/timeseries/intltrade/{flux}/hs?{query}"
168
+
169
+ #Adding countries + codes: ####
170
+ for c in country:
171
+ url += f"&CTY_CODE={str(c)}"
172
+ for k in product:
173
+ url += f'&{flux_letter}_COMMODITY={str(k)}'
174
+
175
+ ### Adding Time ranges: ###
176
+
177
+ if date_range:
178
+ url += f"&time={time_range}"
179
+
180
+ else:
181
+ url += f'&YEAR={year}&MONTH={month}'
182
+ return url
183
+
184
+
185
+
186
+ def _get_flow(self, country, product, date, flux):
187
+
188
+ url = self._build_params(country, product, date= date,flux= flux)
189
+
190
+ response = requests.get(url, timeout=self.timeout)
191
+ response.raise_for_status()
192
+
193
+ try:
194
+ data = response.json()
195
+ except requests.exceptions.JSONDecodeError:
196
+ return pd.DataFrame()
197
+ header, rows = data[0], data[1:]
198
+
199
+ df = pd.DataFrame(rows, columns=header)
200
+
201
+
202
+ return (self._prepare_results(df))
203
+
204
+
205
+ def get_imports_on_period(self, country : str| Country | list[str | Country], product : str|list[str], start: str, end: str)->pd.DataFrame:
206
+ """
207
+ Return the imports on the specified period
208
+
209
+ Args:
210
+ country (str | Country | list[str | Country]):
211
+ ISO2 code, full name, Census Bureau code, or a Country object.
212
+ product (str | list[str]):
213
+ HS code.
214
+ start (str):
215
+ Starting date in format "YYYY-MM".
216
+ end (str):
217
+ Ending date in format "YYYY-MM".
218
+
219
+ Examples:
220
+ >>> ut.get_imports_on_period(["France", "DE", "GB"], ["09", "08", "07"], "2016-01", "2018-01")
221
+ >>> from ustrade import CensusClient
222
+ >>> c = CensusClient(timeout=120)
223
+ >>> c.get_imports_on_period(["France", "DE", "GB"], ["08", "07"], "2016-01", "2018-01")
224
+
225
+ Notes:
226
+ - Queries can take time to load.
227
+ - Consider increasing `timeout`.
228
+ - Data is only available from 2010-01.
229
+ """
230
+ return self._get_flow_on_period(country, product, start=start,end= end,flux= 'imports')
231
+
232
+
233
+ def get_exports_on_period(self, country : str| Country | list[str | Country], product : str|list[str], start: str, end: str)->pd.DataFrame:
234
+ """
235
+ Return the exports on the specified period.
236
+
237
+ Args:
238
+ country (str | Country | list[str | Country]):
239
+ ISO2 code, full name, Census Bureau code, or a Country object.
240
+ product (str | list[str]):
241
+ HS code(s).
242
+ start (str):
243
+ Start date in format "YYYY-MM".
244
+ end (str):
245
+ End date in format "YYYY-MM".
246
+
247
+ Examples:
248
+ >>> ut.get_exports_on_period(["France", "DE", "GB"], ["09", "08", "07"], "2016-01", "2018-01")
249
+ >>> from ustrade import CensusClient
250
+ >>> c = CensusClient(timeout=120)
251
+ >>> c.get_exports_on_period(["France", "DE", "GB"], ["08", "07"], "2016-01", "2018-01")
252
+
253
+ Notes:
254
+ - Queries can take time to load.
255
+ - Consider increasing `timeout`.
256
+ - Data is only available from 2010-01.
257
+ """
258
+ return self._get_flow_on_period(country, product, start=start, end=end, flux='exports')
259
+
260
+
261
+ def _get_flow_on_period(self, country, product, start, end, flux):
262
+ url = self._build_params(country, product, start = start,end = end,flux= flux)
263
+
264
+ response = requests.get(url, timeout=self.timeout)
265
+ response.raise_for_status()
266
+
267
+
268
+ try:
269
+ data = response.json()
270
+ except requests.exceptions.JSONDecodeError:
271
+ raise EmptyResult(
272
+ f"The query '{response.url}' did not return any results."
273
+ )
274
+ header, rows = data[0], data[1:]
275
+
276
+ df = pd.DataFrame(rows, columns=header)
277
+
278
+
279
+ return (self._prepare_results_on_period(df))
280
+
281
+
282
+
283
+ def _prepare_results(self, df):
284
+
285
+ df = df.rename(columns=self.col_mapping)
286
+
287
+ df["date"] = (pd.to_datetime(
288
+ df["year"].astype(str) + "-" + df["month"].astype(str).str.zfill(2))
289
+ .dt.to_period('M')
290
+ )
291
+
292
+
293
+
294
+ existing_cols = [c for c in self._cols_to_return if c in df.columns]
295
+
296
+ df = df[existing_cols]
297
+ df = df.loc[:, ~df.columns.duplicated()]
298
+
299
+ return self._apply_types(df)
300
+
301
+ def _prepare_results_on_period(self, df):
302
+ df = df.rename(columns= self.col_mapping)
303
+ df["date"] = (
304
+ pd.to_datetime(df["time"], format="%Y-%m", errors="coerce")
305
+ .dt.to_period("M")
306
+ )
307
+
308
+ existing_cols = [c for c in self._cols_to_return if c in df.columns]
309
+ df = df[existing_cols]
310
+ df = df.loc[:, ~df.columns.duplicated()]
311
+
312
+ return self._apply_types(df)
313
+
314
+
315
+
316
+ def _apply_types(self, df):
317
+ for col, t in self.type_map.items():
318
+ if col not in df:
319
+ continue
320
+
321
+ if t == "int":
322
+ df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0).astype(int)
323
+
324
+ elif t == "float":
325
+ df[col] = pd.to_numeric(df[col], errors="coerce").astype(float)
326
+
327
+ elif t == "datetime":
328
+ df[col] = (
329
+ df[col].astype(str).str.strip()
330
+ .str.replace(r"$", "-01", regex=True)
331
+ .pipe(pd.to_datetime, errors="coerce")
332
+ )
333
+
334
+ elif t == "str":
335
+ df[col] = df[col].astype(str)
336
+
337
+ return df.sort_values(by = "date").reset_index(drop=True)
338
+
339
+
340
+ ####### COUNTRIES FUNCTIONS #######
341
+
342
+ def get_country_by_name(self, country: str)-> countries.Country:
343
+ """
344
+ Search a country with its name
345
+ """
346
+ return self._country_by_name[country.lower()]
347
+
348
+ def get_country_by_code(self, cty_code: str)-> countries.Country:
349
+ """
350
+ Search a country with its code
351
+ """
352
+ return self._country_by_code[cty_code]
353
+
354
+ def get_country_by_iso2(self, iso2: str)-> countries.Country:
355
+ """
356
+ Search a country with its ISO 2 ID
357
+ """
358
+ return self._country_by_iso[iso2.upper()]
359
+
360
+ def _normalize_country(self, inp: str, output="code"):
361
+
362
+ def return_output(country):
363
+ match output:
364
+ case "code": return country.code
365
+ case "name": return country.name
366
+ case "iso2": return country.iso2
367
+ case _:
368
+ raise ValueError(f"Invalid output type: {output!r}")
369
+
370
+ if isinstance(inp, countries.Country):
371
+ return return_output(inp)
372
+
373
+ value = str(inp).strip()
374
+ upper = value.upper()
375
+ lower = value.lower()
376
+
377
+ if upper in self._country_by_iso:
378
+ country = self._country_by_iso[upper]
379
+
380
+
381
+ elif lower in self._country_by_name:
382
+ country = self._country_by_name[lower]
383
+
384
+ elif value in self._country_by_code:
385
+ country = self._country_by_code[value]
386
+
387
+ else:
388
+ raise ValueError(f"Unknown country: {inp!r}")
389
+
390
+ return return_output(country)
391
+
392
+
393
+ ####### HS CODES FUNCTIONS #######
394
+
395
+
396
+ def get_desc_from_code(self, hs: str)->str:
397
+ """
398
+ Returns the description of the specified HS code
399
+
400
+ ## Args:
401
+ hs (str): the HS code (ex: '1806')
402
+ """
403
+ if isinstance(hs, str):
404
+ if hs in self._codes_by_hs_codes:
405
+ return self._codes_by_hs_codes[hs].description
406
+ else:
407
+ if len(hs) == 1:
408
+ raise CodeNotFoundError(
409
+ f"HS code '{hs}' could not be found in the listed codes. Did you mean '0{hs}'?"
410
+ )
411
+ else:
412
+ raise CodeNotFoundError(
413
+ f"HS code '{hs}' could not be found in the listed codes."
414
+ )
415
+ else:
416
+ raise InvalidCodeError(
417
+ f"Code must be a str instance - received a {type(hs).__name__!r}"
418
+ )
419
+
420
+ def get_product(self, hs: str) -> HSCode:
421
+ """
422
+ Returns all the informations on a specified HS code through a HSCode object
423
+
424
+ ## Args:
425
+ hs (str): the HS code (ex: '1806')
426
+ """
427
+ if isinstance(hs, str):
428
+ if hs in self._codes_by_hs_codes:
429
+ return self._codes_by_hs_codes[hs]
430
+ else:
431
+ if len(hs) == 1:
432
+ raise CodeNotFoundError(
433
+ f"HS code '{hs}' could not be found in the listed codes. Did you mean '0{hs}'?"
434
+ )
435
+ else:
436
+ raise CodeNotFoundError(
437
+ f"HS code '{hs}' could not be found in the listed codes."
438
+ )
439
+
440
+ else:
441
+ raise InvalidCodeError(
442
+ f"Code must be a str instance - received a {type(hs).__name__!r}"
443
+ )
444
+
445
+ def get_children_codes(self, code: str | HSCode, return_names = True)-> dict | list[str]:
446
+ """
447
+ Returns a dict of the codes and their desc directly attached to code in the hierarchy
448
+
449
+ ## Args:
450
+ code (str | HSCode): either the code as a string or the HSCode object
451
+ return_names (bool): returns a dict with the code and the description if true, a list of the codes if false
452
+
453
+ """
454
+ if isinstance(code, str):
455
+ if code in self._codes_by_hs_codes:
456
+ if return_names:
457
+ res = {}
458
+ for p in self.get_product(code)._get_children():
459
+ res[p] = self.get_desc_from_code(p)
460
+ return res
461
+ else:
462
+ return self.get_product(code)._get_children()
463
+
464
+ else:
465
+ raise CodeNotFoundError(
466
+ f"HS code '{code}' could not be found in the listed codes"
467
+ )
468
+
469
+ elif isinstance(code, HSCode):
470
+ if code.hscode in self._codes_by_hs_codes:
471
+ return code._get_children()
472
+ else:
473
+ raise CodeNotFoundError(
474
+ f"HS code '{code.hscode}' could not be found in the listed codes"
475
+ )
476
+ else:
477
+ raise InvalidCodeError(
478
+ f"Code must be a str or a HSCode instance - received a {type(code).__name__!r}"
479
+ )
480
+
481
+
482
+
483
+
484
+
485
+
486
+
487
+
488
+
489
+
490
+