tracktolib 0.35.0__tar.gz → 0.37.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tracktolib-0.35.0 → tracktolib-0.37.0}/PKG-INFO +1 -1
- {tracktolib-0.35.0 → tracktolib-0.37.0}/pyproject.toml +2 -2
- {tracktolib-0.35.0 → tracktolib-0.37.0}/setup.py +1 -1
- {tracktolib-0.35.0 → tracktolib-0.37.0}/tracktolib/pg/query.py +6 -21
- {tracktolib-0.35.0 → tracktolib-0.37.0}/tracktolib/pg/utils.py +19 -5
- tracktolib-0.37.0/tracktolib/pg_utils.py +58 -0
- tracktolib-0.35.0/tracktolib/pg_utils.py +0 -18
- {tracktolib-0.35.0 → tracktolib-0.37.0}/LICENSE +0 -0
- {tracktolib-0.35.0 → tracktolib-0.37.0}/README.md +0 -0
- {tracktolib-0.35.0 → tracktolib-0.37.0}/tracktolib/__init__.py +0 -0
- {tracktolib-0.35.0 → tracktolib-0.37.0}/tracktolib/api.py +0 -0
- {tracktolib-0.35.0 → tracktolib-0.37.0}/tracktolib/http.py +0 -0
- {tracktolib-0.35.0 → tracktolib-0.37.0}/tracktolib/logs.py +0 -0
- {tracktolib-0.35.0 → tracktolib-0.37.0}/tracktolib/pg/__init__.py +0 -0
- {tracktolib-0.35.0 → tracktolib-0.37.0}/tracktolib/pg_sync.py +0 -0
- {tracktolib-0.35.0 → tracktolib-0.37.0}/tracktolib/s3/__init__.py +0 -0
- {tracktolib-0.35.0 → tracktolib-0.37.0}/tracktolib/s3/minio.py +0 -0
- {tracktolib-0.35.0 → tracktolib-0.37.0}/tracktolib/s3/s3.py +0 -0
- {tracktolib-0.35.0 → tracktolib-0.37.0}/tracktolib/tests.py +0 -0
- {tracktolib-0.35.0 → tracktolib-0.37.0}/tracktolib/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "tracktolib"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.37.0"
|
|
4
4
|
description = "Utility library for python"
|
|
5
5
|
authors = ["Julien Brayere <julien.brayere@tracktor.fr>"]
|
|
6
6
|
license = "MIT"
|
|
@@ -70,7 +70,7 @@ pythonPlatform = "Linux"
|
|
|
70
70
|
|
|
71
71
|
[tool.commitizen]
|
|
72
72
|
name = "cz_conventional_commits"
|
|
73
|
-
version = "0.
|
|
73
|
+
version = "0.37.0"
|
|
74
74
|
tag_format = "$version"
|
|
75
75
|
version_files = [
|
|
76
76
|
"pyproject.toml:version"
|
|
@@ -19,7 +19,7 @@ extras_require = \
|
|
|
19
19
|
|
|
20
20
|
setup_kwargs = {
|
|
21
21
|
'name': 'tracktolib',
|
|
22
|
-
'version': '0.
|
|
22
|
+
'version': '0.37.0',
|
|
23
23
|
'description': 'Utility library for python',
|
|
24
24
|
'long_description': "# Tracktolib\n\n[](https://pypi.python.org/pypi/tracktolib)\n[](https://pypi.python.org/pypi/tracktolib)\n[](https://app.circleci.com/pipelines/github/Tracktor/tracktolib?branch=master)\n\nUtility library for python\n\n# Installation\n\nYou can choose to not install all the dependencies by specifying\nthe [extra](https://python-poetry.org/docs/cli/#options-4) parameter such as:\n\n```bash\npoetry add tracktolib@latest -E pg-sync -E tests --group dev \n```\n\nHere we only install the utilities using `psycopg` (pg-sync) and `deepdiff` (tests) for the dev environment.\n\n# Utilities\n\n- **log**\n\nUtility functions for logging.\n\n```python\nimport logging\nfrom tracktolib.logs import init_logging\n\nlogger = logging.getLogger()\nformatter, stream_handler = init_logging(logger, 'json', version='0.0.1')\n```\n\n- **pg**\n\nUtility functions for [asyncpg](https://github.com/MagicStack/asyncpg)\n\n- **pg-sync**\n\nUtility functions based on psycopg such as `fetch_one`, `insert_many`, `fetch_count` ...\n\nTo use the functions, create a `Connection` using psycopg: `conn = psycopg2.connect()`\n\n*fetch_one*\n\n```python\nfrom pg.pg_sync import (\n insert_many, fetch_one, fetch_count, fetch_all\n)\n\ndata = [\n {'foo': 'bar', 'value': 1},\n {'foo': 'baz', 'value': 2}\n]\ninsert_many(conn, 'public.test', data) # Will insert the 2 dict\nquery = 'SELECT foo from public.test order by value asc'\nvalue = fetch_one(conn, query, required=True) # Will return {'foo': 'bar'}, raise an error is not found\nassert fetch_count(conn, 'public.test') == 2\nquery = 'SELECT * from public.test order by value asc'\nassert fetch_all(conn, query) == data\n\n```\n\n- **tests**\n\nUtility functions for testing\n\n- **s3-minio**\n\nUtility functions for [minio](https://min.io/docs/minio/linux/developers/python/API.html)\n\n- **s3**\n\nUtility functions for [aiobotocore](https://github.com/aio-libs/aiobotocore)\n\n- **logs**\n\nUtility functions to initialize the logging formatting and streams\n\n- **http**\n\nUtility functions using [httpx](https://www.python-httpx.org/)\n\n- **api**\n\nUtility functions using [fastapi](https://fastapi.tiangolo.com/)\n",
|
|
25
25
|
'author': 'Julien Brayere',
|
|
@@ -3,6 +3,7 @@ from dataclasses import dataclass, field
|
|
|
3
3
|
from typing import (
|
|
4
4
|
TypeVar, Iterable, Callable, Generic, Iterator, TypeAlias,
|
|
5
5
|
overload, Any, Literal)
|
|
6
|
+
from ..pg_utils import get_conflict_query
|
|
6
7
|
|
|
7
8
|
try:
|
|
8
9
|
import asyncpg
|
|
@@ -29,27 +30,11 @@ def _get_on_conflict_query(query: str,
|
|
|
29
30
|
keys: Iterable[K],
|
|
30
31
|
update_keys: Iterable[K] | None,
|
|
31
32
|
ignore_keys: Iterable[K] | None,
|
|
32
|
-
constraint:
|
|
33
|
-
on_conflict:
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
if constraint:
|
|
38
|
-
query = f'{query} ON CONFLICT ON CONSTRAINT {constraint}'
|
|
39
|
-
elif update_keys:
|
|
40
|
-
update_keys_str = ', '.join(sorted(update_keys))
|
|
41
|
-
query = f'{query} ON CONFLICT ({update_keys_str})'
|
|
42
|
-
else:
|
|
43
|
-
raise NotImplementedError('update_keys or constraint must be set')
|
|
44
|
-
|
|
45
|
-
_ignore_keys = [*(update_keys or []), *(ignore_keys or [])]
|
|
46
|
-
fields = ', '.join(f'{x} = COALESCE(EXCLUDED.{x}, t.{x})'
|
|
47
|
-
for x in keys
|
|
48
|
-
if x not in _ignore_keys)
|
|
49
|
-
if not fields:
|
|
50
|
-
raise ValueError('No fields set')
|
|
51
|
-
|
|
52
|
-
return f'{query} DO UPDATE SET {fields}'
|
|
33
|
+
constraint: K | None,
|
|
34
|
+
on_conflict: K | None) -> str:
|
|
35
|
+
_on_conflict = get_conflict_query(keys=keys, update_keys=update_keys, ignore_keys=ignore_keys,
|
|
36
|
+
constraint=constraint, on_conflict=on_conflict)
|
|
37
|
+
return f'{query} {_on_conflict}'
|
|
53
38
|
|
|
54
39
|
|
|
55
40
|
ReturningFn = Callable[[Iterable[K] | None, K | None], None]
|
|
@@ -3,8 +3,9 @@ import datetime as dt
|
|
|
3
3
|
import functools
|
|
4
4
|
import logging
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import AsyncIterator
|
|
6
|
+
from typing import AsyncIterator, Iterable, Sequence
|
|
7
7
|
from typing_extensions import LiteralString
|
|
8
|
+
from ..pg_utils import get_conflict_query
|
|
8
9
|
|
|
9
10
|
try:
|
|
10
11
|
import asyncpg
|
|
@@ -89,14 +90,27 @@ async def upsert_csv(conn: asyncpg.Connection,
|
|
|
89
90
|
*,
|
|
90
91
|
chunk_size: int = 5_000,
|
|
91
92
|
show_progress: bool = False,
|
|
92
|
-
nb_lines: int | None = None
|
|
93
|
+
nb_lines: int | None = None,
|
|
94
|
+
on_conflict_keys: Iterable[LiteralString] | None = None,
|
|
95
|
+
delimiter: str = ',',
|
|
96
|
+
col_names: Sequence[str] | None = None,
|
|
97
|
+
skip_header: bool = False):
|
|
93
98
|
infos = await get_table_infos(conn, schema, table)
|
|
94
99
|
|
|
100
|
+
on_conflict_str = 'ON CONFLICT DO NOTHING'
|
|
101
|
+
if on_conflict_keys is not None:
|
|
102
|
+
on_conflict_str = get_conflict_query(keys=infos.keys(),
|
|
103
|
+
update_keys=on_conflict_keys)
|
|
104
|
+
|
|
95
105
|
with csv_path.open('r') as f:
|
|
96
|
-
reader = csv.DictReader(f)
|
|
97
|
-
|
|
106
|
+
reader = csv.DictReader(f, delimiter=delimiter, fieldnames=col_names)
|
|
107
|
+
if skip_header:
|
|
108
|
+
next(reader)
|
|
109
|
+
_columns = col_names if col_names else [x.lower() for x in (reader.fieldnames or [])]
|
|
98
110
|
async with conn.transaction():
|
|
99
|
-
_tmp_table, _tmp_query, _insert_query = get_tmp_table_query(schema, table
|
|
111
|
+
_tmp_table, _tmp_query, _insert_query = get_tmp_table_query(schema, table,
|
|
112
|
+
columns=infos.keys(),
|
|
113
|
+
on_conflict=on_conflict_str)
|
|
100
114
|
logger.info(f'Creating tmp table: {_tmp_table!r}')
|
|
101
115
|
await conn.execute(_tmp_query)
|
|
102
116
|
logger.info(f'Inserting data from {csv_path!r} to {_tmp_table!r}')
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
from typing_extensions import LiteralString
|
|
2
|
+
from typing import Iterable
|
|
3
|
+
from typing import cast
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def get_tmp_table_query(schema: LiteralString,
|
|
7
|
+
table: LiteralString,
|
|
8
|
+
columns: Iterable[LiteralString] | None = None,
|
|
9
|
+
on_conflict: LiteralString = 'ON CONFLICT DO NOTHING'):
|
|
10
|
+
tmp_table_name = f'{schema}_{table}_tmp'
|
|
11
|
+
create_tmp_table_query = f"""
|
|
12
|
+
CREATE TEMP TABLE {tmp_table_name}
|
|
13
|
+
(LIKE {schema}.{table} INCLUDING DEFAULTS)
|
|
14
|
+
ON COMMIT DROP;
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
if columns:
|
|
18
|
+
_columns = ','.join(columns)
|
|
19
|
+
insert_query = f"""
|
|
20
|
+
INSERT INTO {schema}.{table} as t({_columns})
|
|
21
|
+
SELECT *
|
|
22
|
+
FROM {tmp_table_name}
|
|
23
|
+
{on_conflict};
|
|
24
|
+
"""
|
|
25
|
+
else:
|
|
26
|
+
insert_query = f"""
|
|
27
|
+
INSERT INTO {schema}.{table}
|
|
28
|
+
SELECT *
|
|
29
|
+
FROM {tmp_table_name}
|
|
30
|
+
{on_conflict};
|
|
31
|
+
"""
|
|
32
|
+
return tmp_table_name, create_tmp_table_query, insert_query
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def get_conflict_query(keys: Iterable[str],
|
|
36
|
+
update_keys: Iterable[str] | None = None,
|
|
37
|
+
ignore_keys: Iterable[str] | None = None,
|
|
38
|
+
constraint: str | None = None,
|
|
39
|
+
on_conflict: str | None = None) -> LiteralString:
|
|
40
|
+
if on_conflict:
|
|
41
|
+
return cast(LiteralString, on_conflict)
|
|
42
|
+
|
|
43
|
+
if constraint:
|
|
44
|
+
query = f'ON CONFLICT ON CONSTRAINT {constraint}'
|
|
45
|
+
elif update_keys:
|
|
46
|
+
update_keys_str = ', '.join(sorted(update_keys))
|
|
47
|
+
query = f'ON CONFLICT ({update_keys_str})'
|
|
48
|
+
else:
|
|
49
|
+
raise NotImplementedError('update_keys or constraint must be set')
|
|
50
|
+
|
|
51
|
+
_ignore_keys = [*(update_keys or []), *(ignore_keys or [])]
|
|
52
|
+
fields = ', '.join(f'{x} = COALESCE(EXCLUDED.{x}, t.{x})'
|
|
53
|
+
for x in keys
|
|
54
|
+
if x not in _ignore_keys)
|
|
55
|
+
if not fields:
|
|
56
|
+
raise ValueError('No fields set')
|
|
57
|
+
|
|
58
|
+
return cast(LiteralString, f'{query} DO UPDATE SET {fields}')
|
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
from typing_extensions import LiteralString
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
def get_tmp_table_query(schema: LiteralString,
|
|
5
|
-
table: LiteralString):
|
|
6
|
-
tmp_table_name = f'{schema}_{table}_tmp'
|
|
7
|
-
create_tmp_table_query = f"""
|
|
8
|
-
CREATE TEMP TABLE {tmp_table_name}
|
|
9
|
-
(LIKE {schema}.{table} INCLUDING DEFAULTS)
|
|
10
|
-
ON COMMIT DROP;
|
|
11
|
-
"""
|
|
12
|
-
insert_query = f"""
|
|
13
|
-
INSERT INTO {schema}.{table}
|
|
14
|
-
SELECT *
|
|
15
|
-
FROM {tmp_table_name}
|
|
16
|
-
ON CONFLICT DO NOTHING;
|
|
17
|
-
"""
|
|
18
|
-
return tmp_table_name, create_tmp_table_query, insert_query
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|