udata-hydra-csvapi 0.2.0.dev0__tar.gz → 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {udata_hydra_csvapi-0.2.0.dev0 → udata_hydra_csvapi-0.2.2}/PKG-INFO +53 -2
- {udata_hydra_csvapi-0.2.0.dev0 → udata_hydra_csvapi-0.2.2}/README.md +52 -1
- {udata_hydra_csvapi-0.2.0.dev0 → udata_hydra_csvapi-0.2.2}/api_tabular/app.py +19 -6
- {udata_hydra_csvapi-0.2.0.dev0 → udata_hydra_csvapi-0.2.2}/api_tabular/config_default.toml +1 -0
- {udata_hydra_csvapi-0.2.0.dev0 → udata_hydra_csvapi-0.2.2}/api_tabular/metrics.py +5 -5
- {udata_hydra_csvapi-0.2.0.dev0 → udata_hydra_csvapi-0.2.2}/api_tabular/utils.py +209 -94
- {udata_hydra_csvapi-0.2.0.dev0 → udata_hydra_csvapi-0.2.2}/pyproject.toml +3 -2
- {udata_hydra_csvapi-0.2.0.dev0 → udata_hydra_csvapi-0.2.2}/api_tabular/__init__.py +0 -0
- {udata_hydra_csvapi-0.2.0.dev0 → udata_hydra_csvapi-0.2.2}/api_tabular/error.py +0 -0
- {udata_hydra_csvapi-0.2.0.dev0 → udata_hydra_csvapi-0.2.2}/api_tabular/query.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: udata-hydra-csvapi
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Summary: API for CSV converted by udata-hydra
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: data.gouv.fr
|
|
@@ -130,7 +130,7 @@ curl http://localhost:8005/api/resources/aaaaaaaa-1111-bbbb-2222-cccccccccccc/da
|
|
|
130
130
|
}
|
|
131
131
|
```
|
|
132
132
|
|
|
133
|
-
This endpoint can be queried with the following operators as query string (replacing `column_name` with the name of an actual column):
|
|
133
|
+
This endpoint can be queried with the following operators as query string (replacing `column_name` with the name of an actual column), if the column type allows it (see the swagger for each column's allowed parameter):
|
|
134
134
|
|
|
135
135
|
```
|
|
136
136
|
# sort by column
|
|
@@ -160,8 +160,30 @@ column_name__strictly_less=value
|
|
|
160
160
|
|
|
161
161
|
# strictly greater
|
|
162
162
|
column_name__strictly_greater=value
|
|
163
|
+
|
|
164
|
+
# group by values
|
|
165
|
+
column_name__groupby
|
|
166
|
+
|
|
167
|
+
# count values
|
|
168
|
+
column_name__count
|
|
169
|
+
|
|
170
|
+
# mean / average
|
|
171
|
+
column_name__avg
|
|
172
|
+
|
|
173
|
+
# minimum
|
|
174
|
+
column_name__min
|
|
175
|
+
|
|
176
|
+
# maximum
|
|
177
|
+
column_name__max
|
|
178
|
+
|
|
179
|
+
# sum
|
|
180
|
+
column_name__sum
|
|
163
181
|
```
|
|
164
182
|
|
|
183
|
+
> /!\ WARNING: aggregation requests are only available for resources that are listed in the `ALLOW_AGGREGATION` list of the config file.
|
|
184
|
+
|
|
185
|
+
> NB : passing an aggregation operator (`count`, `avg`, `min`, `max`, `sum`) returns a column that is named `<column_name>__<operator>` (for instance: `?birth__groupby&score__sum` will return a list of dicts with the keys `birth` and `score__sum`).
|
|
186
|
+
|
|
165
187
|
For instance:
|
|
166
188
|
```shell
|
|
167
189
|
curl http://localhost:8005/api/resources/aaaaaaaa-1111-bbbb-2222-cccccccccccc/data/?score__greater=0.9&decompte__exact=13
|
|
@@ -203,6 +225,31 @@ returns
|
|
|
203
225
|
}
|
|
204
226
|
```
|
|
205
227
|
|
|
228
|
+
With filters and aggregators (filtering is always done **before** aggregation, no matter the order in the parameters):
|
|
229
|
+
```shell
|
|
230
|
+
curl http://localhost:8005/api/resources/aaaaaaaa-1111-bbbb-2222-cccccccccccc/data/?decompte__groupby&birth__less=1996&score__avg
|
|
231
|
+
```
|
|
232
|
+
i.e. `decompte` and average of `score` for all rows where `birth<="1996"`, grouped by `decompte`, returns
|
|
233
|
+
```json
|
|
234
|
+
{
|
|
235
|
+
"data": [
|
|
236
|
+
{
|
|
237
|
+
"decompte": 55,
|
|
238
|
+
"score__avg": 0.7123333333333334
|
|
239
|
+
},
|
|
240
|
+
{
|
|
241
|
+
"decompte": 27,
|
|
242
|
+
"score__avg": 0.6068888888888889
|
|
243
|
+
},
|
|
244
|
+
{
|
|
245
|
+
"decompte": 23,
|
|
246
|
+
"score__avg": 0.4603333333333334
|
|
247
|
+
},
|
|
248
|
+
...
|
|
249
|
+
]
|
|
250
|
+
}
|
|
251
|
+
```
|
|
252
|
+
|
|
206
253
|
Pagination is made through queries with `page` and `page_size`:
|
|
207
254
|
```shell
|
|
208
255
|
curl http://localhost:8005/api/resources/aaaaaaaa-1111-bbbb-2222-cccccccccccc/data/?page=2&page_size=30
|
|
@@ -227,3 +274,7 @@ You can run the following command to lint and format the code:
|
|
|
227
274
|
poetry run ruff check --fix && poetry run ruff format
|
|
228
275
|
```
|
|
229
276
|
|
|
277
|
+
### Releases
|
|
278
|
+
|
|
279
|
+
The release process uses [bump'X](https://github.com/datagouv/bumpx).
|
|
280
|
+
|
|
@@ -112,7 +112,7 @@ curl http://localhost:8005/api/resources/aaaaaaaa-1111-bbbb-2222-cccccccccccc/da
|
|
|
112
112
|
}
|
|
113
113
|
```
|
|
114
114
|
|
|
115
|
-
This endpoint can be queried with the following operators as query string (replacing `column_name` with the name of an actual column):
|
|
115
|
+
This endpoint can be queried with the following operators as query string (replacing `column_name` with the name of an actual column), if the column type allows it (see the swagger for each column's allowed parameter):
|
|
116
116
|
|
|
117
117
|
```
|
|
118
118
|
# sort by column
|
|
@@ -142,8 +142,30 @@ column_name__strictly_less=value
|
|
|
142
142
|
|
|
143
143
|
# strictly greater
|
|
144
144
|
column_name__strictly_greater=value
|
|
145
|
+
|
|
146
|
+
# group by values
|
|
147
|
+
column_name__groupby
|
|
148
|
+
|
|
149
|
+
# count values
|
|
150
|
+
column_name__count
|
|
151
|
+
|
|
152
|
+
# mean / average
|
|
153
|
+
column_name__avg
|
|
154
|
+
|
|
155
|
+
# minimum
|
|
156
|
+
column_name__min
|
|
157
|
+
|
|
158
|
+
# maximum
|
|
159
|
+
column_name__max
|
|
160
|
+
|
|
161
|
+
# sum
|
|
162
|
+
column_name__sum
|
|
145
163
|
```
|
|
146
164
|
|
|
165
|
+
> /!\ WARNING: aggregation requests are only available for resources that are listed in the `ALLOW_AGGREGATION` list of the config file.
|
|
166
|
+
|
|
167
|
+
> NB : passing an aggregation operator (`count`, `avg`, `min`, `max`, `sum`) returns a column that is named `<column_name>__<operator>` (for instance: `?birth__groupby&score__sum` will return a list of dicts with the keys `birth` and `score__sum`).
|
|
168
|
+
|
|
147
169
|
For instance:
|
|
148
170
|
```shell
|
|
149
171
|
curl http://localhost:8005/api/resources/aaaaaaaa-1111-bbbb-2222-cccccccccccc/data/?score__greater=0.9&decompte__exact=13
|
|
@@ -185,6 +207,31 @@ returns
|
|
|
185
207
|
}
|
|
186
208
|
```
|
|
187
209
|
|
|
210
|
+
With filters and aggregators (filtering is always done **before** aggregation, no matter the order in the parameters):
|
|
211
|
+
```shell
|
|
212
|
+
curl http://localhost:8005/api/resources/aaaaaaaa-1111-bbbb-2222-cccccccccccc/data/?decompte__groupby&birth__less=1996&score__avg
|
|
213
|
+
```
|
|
214
|
+
i.e. `decompte` and average of `score` for all rows where `birth<="1996"`, grouped by `decompte`, returns
|
|
215
|
+
```json
|
|
216
|
+
{
|
|
217
|
+
"data": [
|
|
218
|
+
{
|
|
219
|
+
"decompte": 55,
|
|
220
|
+
"score__avg": 0.7123333333333334
|
|
221
|
+
},
|
|
222
|
+
{
|
|
223
|
+
"decompte": 27,
|
|
224
|
+
"score__avg": 0.6068888888888889
|
|
225
|
+
},
|
|
226
|
+
{
|
|
227
|
+
"decompte": 23,
|
|
228
|
+
"score__avg": 0.4603333333333334
|
|
229
|
+
},
|
|
230
|
+
...
|
|
231
|
+
]
|
|
232
|
+
}
|
|
233
|
+
```
|
|
234
|
+
|
|
188
235
|
Pagination is made through queries with `page` and `page_size`:
|
|
189
236
|
```shell
|
|
190
237
|
curl http://localhost:8005/api/resources/aaaaaaaa-1111-bbbb-2222-cccccccccccc/data/?page=2&page_size=30
|
|
@@ -208,3 +255,7 @@ You can run the following command to lint and format the code:
|
|
|
208
255
|
```shell
|
|
209
256
|
poetry run ruff check --fix && poetry run ruff format
|
|
210
257
|
```
|
|
258
|
+
|
|
259
|
+
### Releases
|
|
260
|
+
|
|
261
|
+
The release process uses [bump'X](https://github.com/datagouv/bumpx).
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
+
from datetime import datetime, timezone
|
|
2
3
|
|
|
3
4
|
import aiohttp_cors
|
|
4
5
|
import sentry_sdk
|
|
@@ -17,6 +18,7 @@ from api_tabular.utils import (
|
|
|
17
18
|
build_link_with_page,
|
|
18
19
|
build_sql_query_string,
|
|
19
20
|
build_swagger_file,
|
|
21
|
+
get_app_version,
|
|
20
22
|
url_for,
|
|
21
23
|
)
|
|
22
24
|
|
|
@@ -93,10 +95,11 @@ async def resource_data(request):
|
|
|
93
95
|
offset = 0
|
|
94
96
|
|
|
95
97
|
try:
|
|
96
|
-
sql_query = build_sql_query_string(query_string, page_size, offset)
|
|
97
|
-
except ValueError:
|
|
98
|
-
raise QueryException(400, None, "Invalid query string", "Malformed query")
|
|
99
|
-
|
|
98
|
+
sql_query = build_sql_query_string(query_string, resource_id, page_size, offset)
|
|
99
|
+
except ValueError as e:
|
|
100
|
+
raise QueryException(400, None, "Invalid query string", f"Malformed query: {e}")
|
|
101
|
+
except PermissionError as e:
|
|
102
|
+
raise QueryException(403, None, "Unauthorized parameters", str(e))
|
|
100
103
|
resource = await get_resource(request.app["csession"], resource_id, ["parsing_table"])
|
|
101
104
|
response, total = await get_resource_data(request.app["csession"], resource, sql_query)
|
|
102
105
|
|
|
@@ -121,9 +124,11 @@ async def resource_data_csv(request):
|
|
|
121
124
|
query_string = request.query_string.split("&") if request.query_string else []
|
|
122
125
|
|
|
123
126
|
try:
|
|
124
|
-
sql_query = build_sql_query_string(query_string)
|
|
127
|
+
sql_query = build_sql_query_string(query_string, resource_id)
|
|
125
128
|
except ValueError:
|
|
126
129
|
raise QueryException(400, None, "Invalid query string", "Malformed query")
|
|
130
|
+
except PermissionError as e:
|
|
131
|
+
raise QueryException(403, None, "Unauthorized parameters", str(e))
|
|
127
132
|
|
|
128
133
|
resource = await get_resource(request.app["csession"], resource_id, ["parsing_table"])
|
|
129
134
|
|
|
@@ -143,12 +148,20 @@ async def resource_data_csv(request):
|
|
|
143
148
|
|
|
144
149
|
@routes.get(r"/health/")
|
|
145
150
|
async def get_health(request):
|
|
146
|
-
|
|
151
|
+
"""Return health check status"""
|
|
152
|
+
start_time = request.app["start_time"]
|
|
153
|
+
current_time = datetime.now(timezone.utc)
|
|
154
|
+
uptime_seconds = (current_time - start_time).total_seconds()
|
|
155
|
+
return web.json_response(
|
|
156
|
+
{"status": "ok", "version": request.app["app_version"], "uptime_seconds": uptime_seconds}
|
|
157
|
+
)
|
|
147
158
|
|
|
148
159
|
|
|
149
160
|
async def app_factory():
|
|
150
161
|
async def on_startup(app):
|
|
151
162
|
app["csession"] = ClientSession()
|
|
163
|
+
app["start_time"] = datetime.now(timezone.utc)
|
|
164
|
+
app["app_version"] = await get_app_version()
|
|
152
165
|
|
|
153
166
|
async def on_cleanup(app):
|
|
154
167
|
await app["csession"].close()
|
|
@@ -78,9 +78,9 @@ async def metrics_data(request):
|
|
|
78
78
|
else:
|
|
79
79
|
offset = 0
|
|
80
80
|
try:
|
|
81
|
-
sql_query = build_sql_query_string(query_string, page_size, offset)
|
|
82
|
-
except ValueError:
|
|
83
|
-
raise QueryException(400, None, "Invalid query string", "Malformed query")
|
|
81
|
+
sql_query = build_sql_query_string(query_string, page_size=page_size, offset=offset)
|
|
82
|
+
except ValueError as e:
|
|
83
|
+
raise QueryException(400, None, "Invalid query string", f"Malformed query: {e}")
|
|
84
84
|
|
|
85
85
|
response, total = await get_object_data(request.app["csession"], model, sql_query)
|
|
86
86
|
|
|
@@ -104,8 +104,8 @@ async def metrics_data_csv(request):
|
|
|
104
104
|
|
|
105
105
|
try:
|
|
106
106
|
sql_query = build_sql_query_string(query_string)
|
|
107
|
-
except ValueError:
|
|
108
|
-
raise QueryException(400, None, "Invalid query string", "Malformed query")
|
|
107
|
+
except ValueError as e:
|
|
108
|
+
raise QueryException(400, None, "Invalid query string", f"Malformed query: {e}")
|
|
109
109
|
|
|
110
110
|
response_headers = {
|
|
111
111
|
"Content-Disposition": f'attachment; filename="{model}.csv"',
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
from collections import defaultdict
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
import tomllib
|
|
1
5
|
import yaml
|
|
2
6
|
from aiohttp.web_request import Request
|
|
3
7
|
from aiohttp.web_response import Response
|
|
@@ -5,13 +9,37 @@ from aiohttp.web_response import Response
|
|
|
5
9
|
from api_tabular import config
|
|
6
10
|
|
|
7
11
|
TYPE_POSSIBILITIES = {
|
|
8
|
-
"string": ["compare", "contains", "differs", "exact", "in", "sort"],
|
|
9
|
-
"float": [
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
12
|
+
"string": ["compare", "contains", "differs", "exact", "in", "sort", "groupby", "count"],
|
|
13
|
+
"float": [
|
|
14
|
+
"compare",
|
|
15
|
+
"differs",
|
|
16
|
+
"exact",
|
|
17
|
+
"in",
|
|
18
|
+
"sort",
|
|
19
|
+
"groupby",
|
|
20
|
+
"count",
|
|
21
|
+
"avg",
|
|
22
|
+
"max",
|
|
23
|
+
"min",
|
|
24
|
+
"sum",
|
|
25
|
+
],
|
|
26
|
+
"int": [
|
|
27
|
+
"compare",
|
|
28
|
+
"differs",
|
|
29
|
+
"exact",
|
|
30
|
+
"in",
|
|
31
|
+
"sort",
|
|
32
|
+
"groupby",
|
|
33
|
+
"count",
|
|
34
|
+
"avg",
|
|
35
|
+
"max",
|
|
36
|
+
"min",
|
|
37
|
+
"sum",
|
|
38
|
+
],
|
|
39
|
+
"bool": ["differs", "exact", "in", "sort", "groupby", "count"],
|
|
40
|
+
"date": ["compare", "contains", "differs", "exact", "in", "sort", "groupby", "count"],
|
|
41
|
+
"datetime": ["compare", "contains", "differs", "exact", "in", "sort", "groupby", "count"],
|
|
42
|
+
"json": ["contains", "differs", "exact", "in", "groupby", "count"],
|
|
15
43
|
}
|
|
16
44
|
|
|
17
45
|
MAP_TYPES = {
|
|
@@ -21,50 +49,168 @@ MAP_TYPES = {
|
|
|
21
49
|
"float": "number",
|
|
22
50
|
}
|
|
23
51
|
|
|
52
|
+
OPERATORS_DESCRIPTIONS = {
|
|
53
|
+
"exact": {
|
|
54
|
+
"name": "{}__exact=value",
|
|
55
|
+
"description": "Exact match in column: {}",
|
|
56
|
+
},
|
|
57
|
+
"differs": {
|
|
58
|
+
"name": "{}__differs=value",
|
|
59
|
+
"description": "Differs from in column: {}",
|
|
60
|
+
},
|
|
61
|
+
"contains": {
|
|
62
|
+
"name": "{}__contains=value",
|
|
63
|
+
"description": "String contains in column: {}",
|
|
64
|
+
},
|
|
65
|
+
"in": {
|
|
66
|
+
"name": "{}__in=value1,value2,...",
|
|
67
|
+
"description": "Value in list in column: {}",
|
|
68
|
+
},
|
|
69
|
+
"groupby": {
|
|
70
|
+
"name": "{}__groupby",
|
|
71
|
+
"description": "Performs `group by values` operation in column: {}",
|
|
72
|
+
"is_aggregator": True,
|
|
73
|
+
},
|
|
74
|
+
"count": {
|
|
75
|
+
"name": "{}__count",
|
|
76
|
+
"description": "Performs `count values` operation in column: {}",
|
|
77
|
+
"is_aggregator": True,
|
|
78
|
+
},
|
|
79
|
+
"avg": {
|
|
80
|
+
"name": "{}__avg",
|
|
81
|
+
"description": "Performs `mean` operation in column: {}",
|
|
82
|
+
"is_aggregator": True,
|
|
83
|
+
},
|
|
84
|
+
"min": {
|
|
85
|
+
"name": "{}__min",
|
|
86
|
+
"description": "Performs `minimum` operation in column: {}",
|
|
87
|
+
"is_aggregator": True,
|
|
88
|
+
},
|
|
89
|
+
"max": {
|
|
90
|
+
"name": "{}__max",
|
|
91
|
+
"description": "Performs `maximum` operation in column: {}",
|
|
92
|
+
"is_aggregator": True,
|
|
93
|
+
},
|
|
94
|
+
"sum": {
|
|
95
|
+
"name": "{}__sum",
|
|
96
|
+
"description": "Performs `sum` operation in column: {}",
|
|
97
|
+
"is_aggregator": True,
|
|
98
|
+
},
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def is_aggregation_allowed(resource_id: str):
|
|
103
|
+
return resource_id in config.ALLOW_AGGREGATION
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
async def get_app_version() -> str:
|
|
107
|
+
"""Parse pyproject.toml and return the version or an error."""
|
|
108
|
+
try:
|
|
109
|
+
with open("pyproject.toml", "rb") as f:
|
|
110
|
+
pyproject = tomllib.load(f)
|
|
111
|
+
return pyproject.get("tool", {}).get("poetry", {}).get("version", "unknown")
|
|
112
|
+
except FileNotFoundError:
|
|
113
|
+
return "unknown (pyproject.toml not found)"
|
|
114
|
+
except Exception as e:
|
|
115
|
+
return f"unknown ({str(e)})"
|
|
116
|
+
|
|
24
117
|
|
|
25
|
-
def build_sql_query_string(
|
|
118
|
+
def build_sql_query_string(
|
|
119
|
+
request_arg: list,
|
|
120
|
+
resource_id: Optional[str] = None,
|
|
121
|
+
page_size: int = None,
|
|
122
|
+
offset: int = 0,
|
|
123
|
+
) -> str:
|
|
26
124
|
sql_query = []
|
|
125
|
+
aggregators = defaultdict(list)
|
|
27
126
|
sorted = False
|
|
28
127
|
for arg in request_arg:
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
128
|
+
_split = arg.split("=")
|
|
129
|
+
# filters are expected to have the syntax `<column_name>__<operator>=<value>`
|
|
130
|
+
if len(_split) == 2:
|
|
131
|
+
_filter, _sorted = add_filter(*_split)
|
|
132
|
+
if _filter:
|
|
133
|
+
sorted = sorted or _sorted
|
|
134
|
+
sql_query.append(_filter)
|
|
135
|
+
# aggregators are expected to have the syntax `<column_name>__<operator>`
|
|
136
|
+
elif len(_split) == 1:
|
|
137
|
+
column, operator = add_aggregator(_split[0])
|
|
138
|
+
if column:
|
|
139
|
+
aggregators[operator].append(column)
|
|
140
|
+
else:
|
|
141
|
+
raise ValueError(f"argument '{arg}' could not be parsed")
|
|
142
|
+
if aggregators:
|
|
143
|
+
if resource_id and not is_aggregation_allowed(resource_id):
|
|
144
|
+
raise PermissionError(
|
|
145
|
+
f"Aggregation parameters `{'`, `'.join(aggregators.keys())}` "
|
|
146
|
+
f"are not allowed for resource '{resource_id}'"
|
|
147
|
+
)
|
|
148
|
+
agg_query = "select="
|
|
149
|
+
for operator in aggregators:
|
|
150
|
+
if operator == "groupby":
|
|
151
|
+
agg_query += f"{','.join(aggregators[operator])},"
|
|
152
|
+
else:
|
|
153
|
+
for column in aggregators[operator]:
|
|
154
|
+
# aggregated columns are named `<column_name>__<operator>`
|
|
155
|
+
# we pop the heading and trailing " that were added upstream
|
|
156
|
+
# and put them around the new column name
|
|
157
|
+
agg_query += f'"{column[1:-1]}__{operator}":{column}.{operator}(),'
|
|
158
|
+
# we pop the trailing comma (it's always there, by construction)
|
|
159
|
+
sql_query.append(agg_query[:-1])
|
|
59
160
|
if page_size:
|
|
60
161
|
sql_query.append(f"limit={page_size}")
|
|
61
162
|
if offset >= 1:
|
|
62
163
|
sql_query.append(f"offset={offset}")
|
|
63
|
-
if not sorted:
|
|
164
|
+
if not sorted and not aggregators:
|
|
64
165
|
sql_query.append("order=__id.asc")
|
|
65
166
|
return "&".join(sql_query)
|
|
66
167
|
|
|
67
168
|
|
|
169
|
+
def get_column_and_operator(argument: str) -> tuple[str, str]:
|
|
170
|
+
*column_split, comparator = argument.split("__")
|
|
171
|
+
normalized_comparator = comparator.lower()
|
|
172
|
+
# handling headers with "__" and special characters
|
|
173
|
+
# we're escaping the " because they are the encapsulators of the label
|
|
174
|
+
column = '"{}"'.format("__".join(column_split).replace('"', '\\"'))
|
|
175
|
+
return column, normalized_comparator
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def add_filter(argument: str, value: str) -> tuple[Optional[str], bool]:
|
|
179
|
+
if argument in ["page", "page_size"]: # processed differently
|
|
180
|
+
return None, False
|
|
181
|
+
if "__" in argument:
|
|
182
|
+
column, normalized_comparator = get_column_and_operator(argument)
|
|
183
|
+
if normalized_comparator == "sort":
|
|
184
|
+
q = f"order={column}.{value}"
|
|
185
|
+
return q, True
|
|
186
|
+
elif normalized_comparator == "exact":
|
|
187
|
+
return f"{column}=eq.{value}", False
|
|
188
|
+
elif normalized_comparator == "differs":
|
|
189
|
+
return f"{column}=neq.{value}", False
|
|
190
|
+
elif normalized_comparator == "contains":
|
|
191
|
+
return f"{column}=ilike.*{value}*", False
|
|
192
|
+
elif normalized_comparator == "in":
|
|
193
|
+
return f"{column}=in.({value})", False
|
|
194
|
+
elif normalized_comparator == "less":
|
|
195
|
+
return f"{column}=lte.{value}", False
|
|
196
|
+
elif normalized_comparator == "greater":
|
|
197
|
+
return f"{column}=gte.{value}", False
|
|
198
|
+
elif normalized_comparator == "strictly_less":
|
|
199
|
+
return f"{column}=lt.{value}", False
|
|
200
|
+
elif normalized_comparator == "strictly_greater":
|
|
201
|
+
return f"{column}=gt.{value}", False
|
|
202
|
+
raise ValueError(f"argument '{argument}={value}' could not be parsed")
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def add_aggregator(argument: str) -> tuple[str, str]:
|
|
206
|
+
operator = None
|
|
207
|
+
if "__" in argument:
|
|
208
|
+
column, operator = get_column_and_operator(argument)
|
|
209
|
+
if operator in ["avg", "count", "max", "min", "sum", "groupby"]:
|
|
210
|
+
return column, operator
|
|
211
|
+
raise ValueError(f"argument '{argument}' could not be parsed")
|
|
212
|
+
|
|
213
|
+
|
|
68
214
|
def process_total(res: Response) -> int:
|
|
69
215
|
# the Content-Range looks like this: '0-49/21777'
|
|
70
216
|
# see https://docs.postgrest.org/en/stable/references/api/pagination_count.html
|
|
@@ -73,25 +219,25 @@ def process_total(res: Response) -> int:
|
|
|
73
219
|
return int(str_total)
|
|
74
220
|
|
|
75
221
|
|
|
76
|
-
def external_url(url):
|
|
222
|
+
def external_url(url) -> str:
|
|
77
223
|
return f"{config.SCHEME}://{config.SERVER_NAME}{url}"
|
|
78
224
|
|
|
79
225
|
|
|
80
|
-
def build_link_with_page(request: Request, query_string: str, page: int, page_size: int):
|
|
226
|
+
def build_link_with_page(request: Request, query_string: str, page: int, page_size: int) -> str:
|
|
81
227
|
q = [string for string in query_string if not string.startswith("page")]
|
|
82
228
|
q.extend([f"page={page}", f"page_size={page_size}"])
|
|
83
229
|
rebuilt_q = "&".join(q)
|
|
84
230
|
return external_url(f"{request.path}?{rebuilt_q}")
|
|
85
231
|
|
|
86
232
|
|
|
87
|
-
def url_for(request: Request, route: str, *args, **kwargs):
|
|
233
|
+
def url_for(request: Request, route: str, *args, **kwargs) -> str:
|
|
88
234
|
router = request.app.router
|
|
89
235
|
if kwargs.pop("_external", None):
|
|
90
236
|
return external_url(router[route].url_for(**kwargs))
|
|
91
237
|
return router[route].url_for(**kwargs)
|
|
92
238
|
|
|
93
239
|
|
|
94
|
-
def swagger_parameters(resource_columns):
|
|
240
|
+
def swagger_parameters(resource_columns: dict, resource_id: str) -> list:
|
|
95
241
|
parameters_list = [
|
|
96
242
|
{
|
|
97
243
|
"name": "page",
|
|
@@ -112,42 +258,23 @@ def swagger_parameters(resource_columns):
|
|
|
112
258
|
# see metier_to_python here: https://github.com/datagouv/csv-detective/blob/master/csv_detective/explore_csv.py
|
|
113
259
|
# see cast for db here: https://github.com/datagouv/hydra/blob/main/udata_hydra/analysis/csv.py
|
|
114
260
|
for key, value in resource_columns.items():
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
"in": "query",
|
|
133
|
-
"description": f"Differs from in column: {key}",
|
|
134
|
-
"required": False,
|
|
135
|
-
"schema": {"type": "string"},
|
|
136
|
-
},
|
|
137
|
-
]
|
|
138
|
-
)
|
|
139
|
-
if "in" in TYPE_POSSIBILITIES[value["python_type"]]:
|
|
140
|
-
parameters_list.extend(
|
|
141
|
-
[
|
|
142
|
-
{
|
|
143
|
-
"name": f"{key}__in=value1,value2,...",
|
|
144
|
-
"in": "query",
|
|
145
|
-
"description": f"Value in list in column: {key}",
|
|
146
|
-
"required": False,
|
|
147
|
-
"schema": {"type": "string"},
|
|
148
|
-
},
|
|
149
|
-
]
|
|
150
|
-
)
|
|
261
|
+
for op in OPERATORS_DESCRIPTIONS:
|
|
262
|
+
if not is_aggregation_allowed(resource_id) and OPERATORS_DESCRIPTIONS[op].get(
|
|
263
|
+
"is_aggregator"
|
|
264
|
+
):
|
|
265
|
+
continue
|
|
266
|
+
if op in TYPE_POSSIBILITIES[value["python_type"]]:
|
|
267
|
+
parameters_list.extend(
|
|
268
|
+
[
|
|
269
|
+
{
|
|
270
|
+
"name": OPERATORS_DESCRIPTIONS[op]["name"].format(key),
|
|
271
|
+
"in": "query",
|
|
272
|
+
"description": OPERATORS_DESCRIPTIONS[op]["description"].format(key),
|
|
273
|
+
"required": False,
|
|
274
|
+
"schema": {"type": "string"},
|
|
275
|
+
},
|
|
276
|
+
]
|
|
277
|
+
)
|
|
151
278
|
if "sort" in TYPE_POSSIBILITIES[value["python_type"]]:
|
|
152
279
|
parameters_list.extend(
|
|
153
280
|
[
|
|
@@ -167,18 +294,6 @@ def swagger_parameters(resource_columns):
|
|
|
167
294
|
},
|
|
168
295
|
]
|
|
169
296
|
)
|
|
170
|
-
if "contains" in TYPE_POSSIBILITIES[value["python_type"]]:
|
|
171
|
-
parameters_list.extend(
|
|
172
|
-
[
|
|
173
|
-
{
|
|
174
|
-
"name": f"{key}__contains=value",
|
|
175
|
-
"in": "query",
|
|
176
|
-
"description": f"String contains in column: {key}",
|
|
177
|
-
"required": False,
|
|
178
|
-
"schema": {"type": "string"},
|
|
179
|
-
},
|
|
180
|
-
]
|
|
181
|
-
)
|
|
182
297
|
if "compare" in TYPE_POSSIBILITIES[value["python_type"]]:
|
|
183
298
|
parameters_list.extend(
|
|
184
299
|
[
|
|
@@ -215,7 +330,7 @@ def swagger_parameters(resource_columns):
|
|
|
215
330
|
return parameters_list
|
|
216
331
|
|
|
217
332
|
|
|
218
|
-
def swagger_component(resource_columns):
|
|
333
|
+
def swagger_component(resource_columns: dict) -> dict:
|
|
219
334
|
resource_prop_dict = {}
|
|
220
335
|
for key, value in resource_columns.items():
|
|
221
336
|
type = MAP_TYPES.get(value["python_type"], "string")
|
|
@@ -262,8 +377,8 @@ def swagger_component(resource_columns):
|
|
|
262
377
|
return component_dict
|
|
263
378
|
|
|
264
379
|
|
|
265
|
-
def build_swagger_file(resource_columns, rid):
|
|
266
|
-
parameters_list = swagger_parameters(resource_columns)
|
|
380
|
+
def build_swagger_file(resource_columns: dict, rid: str) -> str:
|
|
381
|
+
parameters_list = swagger_parameters(resource_columns, rid)
|
|
267
382
|
component_dict = swagger_component(resource_columns)
|
|
268
383
|
swagger_dict = {
|
|
269
384
|
"openapi": "3.0.3",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "udata-hydra-csvapi"
|
|
3
|
-
version = "0.2.
|
|
3
|
+
version = "0.2.2"
|
|
4
4
|
description = "API for CSV converted by udata-hydra"
|
|
5
5
|
authors = ["data.gouv.fr <opendatateam@data.gouv.fr>"]
|
|
6
6
|
license = "MIT"
|
|
@@ -17,9 +17,10 @@ sentry-sdk = "^2.13.0"
|
|
|
17
17
|
[tool.poetry.group.dev.dependencies]
|
|
18
18
|
aiohttp-devtools = "^1.0.post0"
|
|
19
19
|
aioresponses = "^0.7.4"
|
|
20
|
-
|
|
20
|
+
bumpx = "^0.3.10"
|
|
21
21
|
pytest = "^7.2.1"
|
|
22
22
|
pytest-asyncio = "^0.20.3"
|
|
23
|
+
pytest-mock = "^3.14.0"
|
|
23
24
|
ruff = "^0.6.5"
|
|
24
25
|
|
|
25
26
|
[tool.ruff]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|