tracktolib 0.35.0__tar.gz → 0.37.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tracktolib
3
- Version: 0.35.0
3
+ Version: 0.37.0
4
4
  Summary: Utility library for python
5
5
  Home-page: https://github.com/tracktor/tracktolib
6
6
  License: MIT
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "tracktolib"
3
- version = "0.35.0"
3
+ version = "0.37.0"
4
4
  description = "Utility library for python"
5
5
  authors = ["Julien Brayere <julien.brayere@tracktor.fr>"]
6
6
  license = "MIT"
@@ -70,7 +70,7 @@ pythonPlatform = "Linux"
70
70
 
71
71
  [tool.commitizen]
72
72
  name = "cz_conventional_commits"
73
- version = "0.35.0"
73
+ version = "0.37.0"
74
74
  tag_format = "$version"
75
75
  version_files = [
76
76
  "pyproject.toml:version"
@@ -19,7 +19,7 @@ extras_require = \
19
19
 
20
20
  setup_kwargs = {
21
21
  'name': 'tracktolib',
22
- 'version': '0.35.0',
22
+ 'version': '0.37.0',
23
23
  'description': 'Utility library for python',
24
24
  'long_description': "# Tracktolib\n\n[![Python versions](https://img.shields.io/pypi/pyversions/tracktolib)](https://pypi.python.org/pypi/tracktolib)\n[![Latest PyPI version](https://img.shields.io/pypi/v/tracktolib?logo=pypi)](https://pypi.python.org/pypi/tracktolib)\n[![CircleCI](https://circleci.com/gh/Tracktor/tracktolib/tree/master.svg?style=shield)](https://app.circleci.com/pipelines/github/Tracktor/tracktolib?branch=master)\n\nUtility library for python\n\n# Installation\n\nYou can choose to not install all the dependencies by specifying\nthe [extra](https://python-poetry.org/docs/cli/#options-4) parameter such as:\n\n```bash\npoetry add tracktolib@latest -E pg-sync -E tests --group dev \n```\n\nHere we only install the utilities using `psycopg` (pg-sync) and `deepdiff` (tests) for the dev environment.\n\n# Utilities\n\n- **log**\n\nUtility functions for logging.\n\n```python\nimport logging\nfrom tracktolib.logs import init_logging\n\nlogger = logging.getLogger()\nformatter, stream_handler = init_logging(logger, 'json', version='0.0.1')\n```\n\n- **pg**\n\nUtility functions for [asyncpg](https://github.com/MagicStack/asyncpg)\n\n- **pg-sync**\n\nUtility functions based on psycopg such as `fetch_one`, `insert_many`, `fetch_count` ...\n\nTo use the functions, create a `Connection` using psycopg: `conn = psycopg2.connect()`\n\n*fetch_one*\n\n```python\nfrom pg.pg_sync import (\n insert_many, fetch_one, fetch_count, fetch_all\n)\n\ndata = [\n {'foo': 'bar', 'value': 1},\n {'foo': 'baz', 'value': 2}\n]\ninsert_many(conn, 'public.test', data) # Will insert the 2 dict\nquery = 'SELECT foo from public.test order by value asc'\nvalue = fetch_one(conn, query, required=True) # Will return {'foo': 'bar'}, raise an error is not found\nassert fetch_count(conn, 'public.test') == 2\nquery = 'SELECT * from public.test order by value asc'\nassert fetch_all(conn, query) == data\n\n```\n\n- **tests**\n\nUtility functions for testing\n\n- **s3-minio**\n\nUtility functions for [minio](https://min.io/docs/minio/linux/developers/python/API.html)\n\n- **s3**\n\nUtility functions for [aiobotocore](https://github.com/aio-libs/aiobotocore)\n\n- **logs**\n\nUtility functions to initialize the logging formatting and streams\n\n- **http**\n\nUtility functions using [httpx](https://www.python-httpx.org/)\n\n- **api**\n\nUtility functions using [fastapi](https://fastapi.tiangolo.com/)\n",
25
25
  'author': 'Julien Brayere',
@@ -3,6 +3,7 @@ from dataclasses import dataclass, field
3
3
  from typing import (
4
4
  TypeVar, Iterable, Callable, Generic, Iterator, TypeAlias,
5
5
  overload, Any, Literal)
6
+ from ..pg_utils import get_conflict_query
6
7
 
7
8
  try:
8
9
  import asyncpg
@@ -29,27 +30,11 @@ def _get_on_conflict_query(query: str,
29
30
  keys: Iterable[K],
30
31
  update_keys: Iterable[K] | None,
31
32
  ignore_keys: Iterable[K] | None,
32
- constraint: str | None,
33
- on_conflict: str | None) -> str:
34
- if on_conflict:
35
- return f'{query} {on_conflict}'
36
-
37
- if constraint:
38
- query = f'{query} ON CONFLICT ON CONSTRAINT {constraint}'
39
- elif update_keys:
40
- update_keys_str = ', '.join(sorted(update_keys))
41
- query = f'{query} ON CONFLICT ({update_keys_str})'
42
- else:
43
- raise NotImplementedError('update_keys or constraint must be set')
44
-
45
- _ignore_keys = [*(update_keys or []), *(ignore_keys or [])]
46
- fields = ', '.join(f'{x} = COALESCE(EXCLUDED.{x}, t.{x})'
47
- for x in keys
48
- if x not in _ignore_keys)
49
- if not fields:
50
- raise ValueError('No fields set')
51
-
52
- return f'{query} DO UPDATE SET {fields}'
33
+ constraint: K | None,
34
+ on_conflict: K | None) -> str:
35
+ _on_conflict = get_conflict_query(keys=keys, update_keys=update_keys, ignore_keys=ignore_keys,
36
+ constraint=constraint, on_conflict=on_conflict)
37
+ return f'{query} {_on_conflict}'
53
38
 
54
39
 
55
40
  ReturningFn = Callable[[Iterable[K] | None, K | None], None]
@@ -3,8 +3,9 @@ import datetime as dt
3
3
  import functools
4
4
  import logging
5
5
  from pathlib import Path
6
- from typing import AsyncIterator
6
+ from typing import AsyncIterator, Iterable, Sequence
7
7
  from typing_extensions import LiteralString
8
+ from ..pg_utils import get_conflict_query
8
9
 
9
10
  try:
10
11
  import asyncpg
@@ -89,14 +90,27 @@ async def upsert_csv(conn: asyncpg.Connection,
89
90
  *,
90
91
  chunk_size: int = 5_000,
91
92
  show_progress: bool = False,
92
- nb_lines: int | None = None):
93
+ nb_lines: int | None = None,
94
+ on_conflict_keys: Iterable[LiteralString] | None = None,
95
+ delimiter: str = ',',
96
+ col_names: Sequence[str] | None = None,
97
+ skip_header: bool = False):
93
98
  infos = await get_table_infos(conn, schema, table)
94
99
 
100
+ on_conflict_str = 'ON CONFLICT DO NOTHING'
101
+ if on_conflict_keys is not None:
102
+ on_conflict_str = get_conflict_query(keys=infos.keys(),
103
+ update_keys=on_conflict_keys)
104
+
95
105
  with csv_path.open('r') as f:
96
- reader = csv.DictReader(f)
97
- _columns = [x.lower() for x in (reader.fieldnames or [])]
106
+ reader = csv.DictReader(f, delimiter=delimiter, fieldnames=col_names)
107
+ if skip_header:
108
+ next(reader)
109
+ _columns = col_names if col_names else [x.lower() for x in (reader.fieldnames or [])]
98
110
  async with conn.transaction():
99
- _tmp_table, _tmp_query, _insert_query = get_tmp_table_query(schema, table)
111
+ _tmp_table, _tmp_query, _insert_query = get_tmp_table_query(schema, table,
112
+ columns=infos.keys(),
113
+ on_conflict=on_conflict_str)
100
114
  logger.info(f'Creating tmp table: {_tmp_table!r}')
101
115
  await conn.execute(_tmp_query)
102
116
  logger.info(f'Inserting data from {csv_path!r} to {_tmp_table!r}')
@@ -0,0 +1,58 @@
1
+ from typing_extensions import LiteralString
2
+ from typing import Iterable
3
+ from typing import cast
4
+
5
+
6
+ def get_tmp_table_query(schema: LiteralString,
7
+ table: LiteralString,
8
+ columns: Iterable[LiteralString] | None = None,
9
+ on_conflict: LiteralString = 'ON CONFLICT DO NOTHING'):
10
+ tmp_table_name = f'{schema}_{table}_tmp'
11
+ create_tmp_table_query = f"""
12
+ CREATE TEMP TABLE {tmp_table_name}
13
+ (LIKE {schema}.{table} INCLUDING DEFAULTS)
14
+ ON COMMIT DROP;
15
+ """
16
+
17
+ if columns:
18
+ _columns = ','.join(columns)
19
+ insert_query = f"""
20
+ INSERT INTO {schema}.{table} as t({_columns})
21
+ SELECT *
22
+ FROM {tmp_table_name}
23
+ {on_conflict};
24
+ """
25
+ else:
26
+ insert_query = f"""
27
+ INSERT INTO {schema}.{table}
28
+ SELECT *
29
+ FROM {tmp_table_name}
30
+ {on_conflict};
31
+ """
32
+ return tmp_table_name, create_tmp_table_query, insert_query
33
+
34
+
35
+ def get_conflict_query(keys: Iterable[str],
36
+ update_keys: Iterable[str] | None = None,
37
+ ignore_keys: Iterable[str] | None = None,
38
+ constraint: str | None = None,
39
+ on_conflict: str | None = None) -> LiteralString:
40
+ if on_conflict:
41
+ return cast(LiteralString, on_conflict)
42
+
43
+ if constraint:
44
+ query = f'ON CONFLICT ON CONSTRAINT {constraint}'
45
+ elif update_keys:
46
+ update_keys_str = ', '.join(sorted(update_keys))
47
+ query = f'ON CONFLICT ({update_keys_str})'
48
+ else:
49
+ raise NotImplementedError('update_keys or constraint must be set')
50
+
51
+ _ignore_keys = [*(update_keys or []), *(ignore_keys or [])]
52
+ fields = ', '.join(f'{x} = COALESCE(EXCLUDED.{x}, t.{x})'
53
+ for x in keys
54
+ if x not in _ignore_keys)
55
+ if not fields:
56
+ raise ValueError('No fields set')
57
+
58
+ return cast(LiteralString, f'{query} DO UPDATE SET {fields}')
@@ -1,18 +0,0 @@
1
- from typing_extensions import LiteralString
2
-
3
-
4
- def get_tmp_table_query(schema: LiteralString,
5
- table: LiteralString):
6
- tmp_table_name = f'{schema}_{table}_tmp'
7
- create_tmp_table_query = f"""
8
- CREATE TEMP TABLE {tmp_table_name}
9
- (LIKE {schema}.{table} INCLUDING DEFAULTS)
10
- ON COMMIT DROP;
11
- """
12
- insert_query = f"""
13
- INSERT INTO {schema}.{table}
14
- SELECT *
15
- FROM {tmp_table_name}
16
- ON CONFLICT DO NOTHING;
17
- """
18
- return tmp_table_name, create_tmp_table_query, insert_query
File without changes
File without changes