welearn-database 1.0.0.dev1__tar.gz → 1.0.0.dev2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {welearn_database-1.0.0.dev1 → welearn_database-1.0.0.dev2}/PKG-INFO +1 -1
- {welearn_database-1.0.0.dev1 → welearn_database-1.0.0.dev2}/pyproject.toml +1 -1
- {welearn_database-1.0.0.dev1 → welearn_database-1.0.0.dev2}/welearn_database/data/enumeration.py +2 -13
- {welearn_database-1.0.0.dev1 → welearn_database-1.0.0.dev2}/welearn_database/data/models/corpus_related.py +105 -4
- {welearn_database-1.0.0.dev1 → welearn_database-1.0.0.dev2}/welearn_database/data/models/document_related.py +36 -113
- {welearn_database-1.0.0.dev1 → welearn_database-1.0.0.dev2}/LICENSE +0 -0
- {welearn_database-1.0.0.dev1 → welearn_database-1.0.0.dev2}/README.md +0 -0
- {welearn_database-1.0.0.dev1 → welearn_database-1.0.0.dev2}/welearn_database/__init__.py +0 -0
- {welearn_database-1.0.0.dev1 → welearn_database-1.0.0.dev2}/welearn_database/alembic/README +0 -0
- {welearn_database-1.0.0.dev1 → welearn_database-1.0.0.dev2}/welearn_database/alembic/env.py +0 -0
- {welearn_database-1.0.0.dev1 → welearn_database-1.0.0.dev2}/welearn_database/alembic/script.py.mako +0 -0
- {welearn_database-1.0.0.dev1 → welearn_database-1.0.0.dev2}/welearn_database/alembic/versions/16ff997426d3_remove_error_retrieval_unique_constraint.py +0 -0
- {welearn_database-1.0.0.dev1 → welearn_database-1.0.0.dev2}/welearn_database/alembic/versions/4c7161819e5a_grafana_views.py +0 -0
- {welearn_database-1.0.0.dev1 → welearn_database-1.0.0.dev2}/welearn_database/alembic/versions/4fcbfb7f3145_added_api_key_management_table.py +0 -0
- {welearn_database-1.0.0.dev1 → welearn_database-1.0.0.dev2}/welearn_database/alembic/versions/5d82613c9aca_context_document.py +0 -0
- {welearn_database-1.0.0.dev1 → welearn_database-1.0.0.dev2}/welearn_database/alembic/versions/821173cf9c5d_initial_migration.py +0 -0
- {welearn_database-1.0.0.dev1 → welearn_database-1.0.0.dev2}/welearn_database/alembic/versions/89920abb7ff8_add_category.py +0 -0
- {welearn_database-1.0.0.dev1 → welearn_database-1.0.0.dev2}/welearn_database/alembic/versions/a50a1db3ca2a_add_used_since_column_for_embeddings.py +0 -0
- {welearn_database-1.0.0.dev1 → welearn_database-1.0.0.dev2}/welearn_database/alembic/versions/b031206324b7_agent_related.py +0 -0
- {welearn_database-1.0.0.dev1 → welearn_database-1.0.0.dev2}/welearn_database/alembic/versions/e354666f951d_inferred_user.py +0 -0
- {welearn_database-1.0.0.dev1 → welearn_database-1.0.0.dev2}/welearn_database/data/__init__.py +0 -0
- {welearn_database-1.0.0.dev1 → welearn_database-1.0.0.dev2}/welearn_database/data/models/__init__.py +0 -0
- {welearn_database-1.0.0.dev1 → welearn_database-1.0.0.dev2}/welearn_database/data/models/agent_related.py +0 -0
- {welearn_database-1.0.0.dev1 → welearn_database-1.0.0.dev2}/welearn_database/data/models/grafana.py +0 -0
- {welearn_database-1.0.0.dev1 → welearn_database-1.0.0.dev2}/welearn_database/data/models/user_related.py +0 -0
- {welearn_database-1.0.0.dev1 → welearn_database-1.0.0.dev2}/welearn_database/database_utils.py +0 -0
- {welearn_database-1.0.0.dev1 → welearn_database-1.0.0.dev2}/welearn_database/exceptions.py +0 -0
- {welearn_database-1.0.0.dev1 → welearn_database-1.0.0.dev2}/welearn_database/modules/__init__.py +0 -0
- {welearn_database-1.0.0.dev1 → welearn_database-1.0.0.dev2}/welearn_database/modules/text_cleaning.py +0 -0
{welearn_database-1.0.0.dev1 → welearn_database-1.0.0.dev2}/welearn_database/data/enumeration.py
RENAMED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from enum import Enum,
|
|
1
|
+
from enum import Enum, auto, StrEnum
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
class Step(Enum):
|
|
@@ -13,11 +13,9 @@ class Step(Enum):
|
|
|
13
13
|
KEPT_FOR_TRACE = "kept_for_trace"
|
|
14
14
|
DOCUMENT_IS_IRRETRIEVABLE = "document_is_irretrievable"
|
|
15
15
|
|
|
16
|
-
|
|
17
16
|
class Counter(Enum):
|
|
18
17
|
HIT = auto()
|
|
19
18
|
|
|
20
|
-
|
|
21
19
|
class DbSchemaEnum(StrEnum):
|
|
22
20
|
GRAFANA = auto()
|
|
23
21
|
AGENT_RELATED = auto()
|
|
@@ -25,16 +23,7 @@ class DbSchemaEnum(StrEnum):
|
|
|
25
23
|
DOCUMENT_RELATED = auto()
|
|
26
24
|
USER_RELATED = auto()
|
|
27
25
|
|
|
28
|
-
|
|
29
26
|
class ContextType(StrEnum):
|
|
30
27
|
INTRODUCTION = auto()
|
|
31
28
|
TARGET = auto()
|
|
32
|
-
SUBJECT = auto()
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
class ExternalIdType(StrEnum):
|
|
36
|
-
DOI = auto()
|
|
37
|
-
API_ID = auto()
|
|
38
|
-
HANDLE = auto()
|
|
39
|
-
SLUG = auto()
|
|
40
|
-
QID = auto()
|
|
29
|
+
SUBJECT = auto()
|
|
@@ -1,15 +1,17 @@
|
|
|
1
1
|
from datetime import datetime
|
|
2
2
|
from uuid import UUID
|
|
3
3
|
|
|
4
|
-
from sqlalchemy import
|
|
4
|
+
from sqlalchemy import ForeignKey, UniqueConstraint, func, types
|
|
5
5
|
from sqlalchemy.dialects.postgresql import TIMESTAMP
|
|
6
|
-
from sqlalchemy.orm import mapped_column,
|
|
6
|
+
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
|
7
7
|
|
|
8
|
-
from . import Base
|
|
9
8
|
from welearn_database.data.enumeration import DbSchemaEnum
|
|
10
9
|
|
|
10
|
+
from . import Base
|
|
11
|
+
|
|
11
12
|
schema_name = DbSchemaEnum.CORPUS_RELATED.value
|
|
12
13
|
|
|
14
|
+
|
|
13
15
|
class Corpus(Base):
|
|
14
16
|
__tablename__ = "corpus"
|
|
15
17
|
__table_args__ = {"schema": schema_name}
|
|
@@ -26,6 +28,7 @@ class Corpus(Base):
|
|
|
26
28
|
ForeignKey(f"{schema_name}.category.id"),
|
|
27
29
|
)
|
|
28
30
|
|
|
31
|
+
|
|
29
32
|
class Category(Base):
|
|
30
33
|
__tablename__ = "category"
|
|
31
34
|
__table_args__ = {"schema": schema_name}
|
|
@@ -97,11 +100,109 @@ class NClassifierModel(Base):
|
|
|
97
100
|
server_default="NOW()",
|
|
98
101
|
)
|
|
99
102
|
|
|
103
|
+
|
|
100
104
|
class CorpusNameEmbeddingModelLang(Base):
|
|
101
105
|
__tablename__ = "corpus_name_embedding_model_lang"
|
|
102
106
|
__table_args__ = {"schema": schema_name}
|
|
103
107
|
__read_only__ = True
|
|
104
|
-
source_name
|
|
108
|
+
source_name: Mapped[str] = mapped_column(primary_key=True)
|
|
105
109
|
title: Mapped[str]
|
|
106
110
|
lang: Mapped[str]
|
|
107
111
|
|
|
112
|
+
|
|
113
|
+
class CorpusEmbeddingModel(Base):
|
|
114
|
+
__tablename__ = "corpus_embedding_model"
|
|
115
|
+
__table_args__ = (
|
|
116
|
+
UniqueConstraint(
|
|
117
|
+
"corpus_id",
|
|
118
|
+
"embedding_model_id",
|
|
119
|
+
name="unique_corpus_embedding_association",
|
|
120
|
+
),
|
|
121
|
+
{"schema": schema_name},
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
corpus_id = mapped_column(
|
|
125
|
+
types.Uuid,
|
|
126
|
+
ForeignKey(f"{DbSchemaEnum.CORPUS_RELATED.value}.corpus.id"),
|
|
127
|
+
primary_key=True,
|
|
128
|
+
)
|
|
129
|
+
embedding_model_id = mapped_column(
|
|
130
|
+
types.Uuid,
|
|
131
|
+
ForeignKey(f"{DbSchemaEnum.CORPUS_RELATED.value}.embedding_model.id"),
|
|
132
|
+
primary_key=True,
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
used_since: Mapped[datetime] = mapped_column(
|
|
136
|
+
TIMESTAMP(timezone=False),
|
|
137
|
+
nullable=False,
|
|
138
|
+
default=func.localtimestamp(),
|
|
139
|
+
server_default="NOW()",
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
embedding_model: Mapped["EmbeddingModel"] = relationship()
|
|
143
|
+
corpus: Mapped["Corpus"] = relationship()
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class CorpusNClassifierModel(Base):
|
|
147
|
+
__tablename__ = "corpus_n_classifier_model"
|
|
148
|
+
__table_args__ = (
|
|
149
|
+
UniqueConstraint(
|
|
150
|
+
"corpus_id",
|
|
151
|
+
"n_classifier_model_id",
|
|
152
|
+
name="unique_corpus_n_classifier_association",
|
|
153
|
+
),
|
|
154
|
+
{"schema": schema_name},
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
corpus_id = mapped_column(
|
|
158
|
+
types.Uuid,
|
|
159
|
+
ForeignKey(f"{DbSchemaEnum.CORPUS_RELATED.value}.corpus.id"),
|
|
160
|
+
primary_key=True,
|
|
161
|
+
)
|
|
162
|
+
n_classifier_model_id = mapped_column(
|
|
163
|
+
types.Uuid,
|
|
164
|
+
ForeignKey(f"{DbSchemaEnum.CORPUS_RELATED.value}.n_classifier_model.id"),
|
|
165
|
+
primary_key=True,
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
used_since: Mapped[datetime] = mapped_column(
|
|
169
|
+
TIMESTAMP(timezone=False),
|
|
170
|
+
nullable=False,
|
|
171
|
+
default=func.localtimestamp(),
|
|
172
|
+
server_default="NOW()",
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
n_classifier_model: Mapped["NClassifierModel"] = relationship()
|
|
176
|
+
corpus: Mapped["Corpus"] = relationship()
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
class CorpusBiClassifierModel(Base):
|
|
180
|
+
__tablename__ = "corpus_bi_classifier_model"
|
|
181
|
+
__table_args__ = (
|
|
182
|
+
UniqueConstraint(
|
|
183
|
+
"corpus_id",
|
|
184
|
+
"bi_classifier_model_id",
|
|
185
|
+
name="unique_corpus_bi_classifier_association",
|
|
186
|
+
),
|
|
187
|
+
{"schema": schema_name},
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
corpus_id = mapped_column(
|
|
191
|
+
types.Uuid,
|
|
192
|
+
ForeignKey(f"{DbSchemaEnum.CORPUS_RELATED.value}.corpus.id"),
|
|
193
|
+
primary_key=True,
|
|
194
|
+
)
|
|
195
|
+
bi_classifier_model_id = mapped_column(
|
|
196
|
+
types.Uuid,
|
|
197
|
+
ForeignKey(f"{DbSchemaEnum.CORPUS_RELATED.value}.bi_classifier_model.id"),
|
|
198
|
+
primary_key=True,
|
|
199
|
+
)
|
|
200
|
+
used_since: Mapped[datetime] = mapped_column(
|
|
201
|
+
TIMESTAMP(timezone=False),
|
|
202
|
+
nullable=False,
|
|
203
|
+
default=func.localtimestamp(),
|
|
204
|
+
server_default="NOW()",
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
bi_classifier_model: Mapped["BiClassifierModel"] = relationship()
|
|
208
|
+
corpus: Mapped["Corpus"] = relationship()
|
|
@@ -8,13 +8,7 @@ from sqlalchemy import ForeignKey, Integer, LargeBinary, UniqueConstraint, func,
|
|
|
8
8
|
from sqlalchemy.dialects.postgresql import ARRAY, ENUM, TIMESTAMP
|
|
9
9
|
from sqlalchemy.orm import Mapped, mapped_column, relationship, validates
|
|
10
10
|
|
|
11
|
-
from welearn_database.data.enumeration import
|
|
12
|
-
ContextType,
|
|
13
|
-
Counter,
|
|
14
|
-
DbSchemaEnum,
|
|
15
|
-
ExternalIdType,
|
|
16
|
-
Step,
|
|
17
|
-
)
|
|
11
|
+
from welearn_database.data.enumeration import ContextType, Counter, DbSchemaEnum, Step
|
|
18
12
|
from welearn_database.data.models import Base
|
|
19
13
|
from welearn_database.data.models.corpus_related import (
|
|
20
14
|
BiClassifierModel,
|
|
@@ -54,14 +48,6 @@ class WeLearnDocument(Base):
|
|
|
54
48
|
id: Mapped[UUID] = mapped_column(
|
|
55
49
|
types.Uuid, primary_key=True, nullable=False, server_default="gen_random_uuid()"
|
|
56
50
|
)
|
|
57
|
-
external_id: Mapped[str | None]
|
|
58
|
-
external_id_type: Mapped[str | None] = mapped_column(
|
|
59
|
-
ENUM(
|
|
60
|
-
*(e.value.lower() for e in ExternalIdType),
|
|
61
|
-
name="external_id_type",
|
|
62
|
-
schema="document_related",
|
|
63
|
-
),
|
|
64
|
-
)
|
|
65
51
|
url: Mapped[str] = mapped_column(nullable=False)
|
|
66
52
|
title: Mapped[str | None]
|
|
67
53
|
lang: Mapped[str | None]
|
|
@@ -264,6 +250,41 @@ class ErrorRetrieval(Base):
|
|
|
264
250
|
document: Mapped["WeLearnDocument"] = relationship()
|
|
265
251
|
|
|
266
252
|
|
|
253
|
+
class ErrorDataQuality(Base):
|
|
254
|
+
__tablename__ = "error_data_quality"
|
|
255
|
+
__table_args__ = ({"schema": schema_name},)
|
|
256
|
+
|
|
257
|
+
id: Mapped[UUID] = mapped_column(
|
|
258
|
+
types.Uuid, primary_key=True, nullable=False, server_default="gen_random_uuid()"
|
|
259
|
+
)
|
|
260
|
+
document_id: Mapped[UUID] = mapped_column(
|
|
261
|
+
types.Uuid,
|
|
262
|
+
ForeignKey(
|
|
263
|
+
f"{DbSchemaEnum.DOCUMENT_RELATED.value}.welearn_document.id",
|
|
264
|
+
name="error_data_quality_document_id_fkey",
|
|
265
|
+
),
|
|
266
|
+
nullable=False,
|
|
267
|
+
)
|
|
268
|
+
slice_id: Mapped[UUID] = mapped_column(
|
|
269
|
+
types.Uuid,
|
|
270
|
+
ForeignKey(
|
|
271
|
+
f"{DbSchemaEnum.DOCUMENT_RELATED.value}.document_slice.id",
|
|
272
|
+
name="error_data_quality_slice_id_fkey",
|
|
273
|
+
),
|
|
274
|
+
nullable=True,
|
|
275
|
+
)
|
|
276
|
+
error_raiser: Mapped[str]
|
|
277
|
+
error_info: Mapped[str]
|
|
278
|
+
created_at: Mapped[datetime] = mapped_column(
|
|
279
|
+
TIMESTAMP(timezone=False),
|
|
280
|
+
nullable=False,
|
|
281
|
+
default=func.localtimestamp(),
|
|
282
|
+
server_default="NOW()",
|
|
283
|
+
)
|
|
284
|
+
document: Mapped["WeLearnDocument"] = relationship()
|
|
285
|
+
slice: Mapped["DocumentSlice"] = relationship()
|
|
286
|
+
|
|
287
|
+
|
|
267
288
|
class DocumentSlice(Base):
|
|
268
289
|
__tablename__ = "document_slice"
|
|
269
290
|
__table_args__ = {"schema": schema_name}
|
|
@@ -327,104 +348,6 @@ class AnalyticCounter(Base):
|
|
|
327
348
|
document: Mapped["WeLearnDocument"] = relationship()
|
|
328
349
|
|
|
329
350
|
|
|
330
|
-
class CorpusEmbeddingModel(Base):
|
|
331
|
-
__tablename__ = "corpus_embedding_model"
|
|
332
|
-
__table_args__ = (
|
|
333
|
-
UniqueConstraint(
|
|
334
|
-
"corpus_id",
|
|
335
|
-
"embedding_model_id",
|
|
336
|
-
name="unique_corpus_embedding_association",
|
|
337
|
-
),
|
|
338
|
-
{"schema": schema_name},
|
|
339
|
-
)
|
|
340
|
-
|
|
341
|
-
corpus_id = mapped_column(
|
|
342
|
-
types.Uuid,
|
|
343
|
-
ForeignKey(f"{DbSchemaEnum.CORPUS_RELATED.value}.corpus.id"),
|
|
344
|
-
primary_key=True,
|
|
345
|
-
)
|
|
346
|
-
embedding_model_id = mapped_column(
|
|
347
|
-
types.Uuid,
|
|
348
|
-
ForeignKey(f"{DbSchemaEnum.CORPUS_RELATED.value}.embedding_model.id"),
|
|
349
|
-
primary_key=True,
|
|
350
|
-
)
|
|
351
|
-
|
|
352
|
-
used_since: Mapped[datetime] = mapped_column(
|
|
353
|
-
TIMESTAMP(timezone=False),
|
|
354
|
-
nullable=False,
|
|
355
|
-
default=func.localtimestamp(),
|
|
356
|
-
server_default="NOW()",
|
|
357
|
-
)
|
|
358
|
-
|
|
359
|
-
embedding_model: Mapped["EmbeddingModel"] = relationship()
|
|
360
|
-
corpus: Mapped["Corpus"] = relationship()
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
class CorpusNClassifierModel(Base):
|
|
364
|
-
__tablename__ = "corpus_n_classifier_model"
|
|
365
|
-
__table_args__ = (
|
|
366
|
-
UniqueConstraint(
|
|
367
|
-
"corpus_id",
|
|
368
|
-
"n_classifier_model_id",
|
|
369
|
-
name="unique_corpus_n_classifier_association",
|
|
370
|
-
),
|
|
371
|
-
{"schema": schema_name},
|
|
372
|
-
)
|
|
373
|
-
|
|
374
|
-
corpus_id = mapped_column(
|
|
375
|
-
types.Uuid,
|
|
376
|
-
ForeignKey(f"{DbSchemaEnum.CORPUS_RELATED.value}.corpus.id"),
|
|
377
|
-
primary_key=True,
|
|
378
|
-
)
|
|
379
|
-
n_classifier_model_id = mapped_column(
|
|
380
|
-
types.Uuid,
|
|
381
|
-
ForeignKey(f"{DbSchemaEnum.CORPUS_RELATED.value}.n_classifier_model.id"),
|
|
382
|
-
primary_key=True,
|
|
383
|
-
)
|
|
384
|
-
|
|
385
|
-
used_since: Mapped[datetime] = mapped_column(
|
|
386
|
-
TIMESTAMP(timezone=False),
|
|
387
|
-
nullable=False,
|
|
388
|
-
default=func.localtimestamp(),
|
|
389
|
-
server_default="NOW()",
|
|
390
|
-
)
|
|
391
|
-
|
|
392
|
-
n_classifier_model: Mapped["NClassifierModel"] = relationship()
|
|
393
|
-
corpus: Mapped["Corpus"] = relationship()
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
class CorpusBiClassifierModel(Base):
|
|
397
|
-
__tablename__ = "corpus_bi_classifier_model"
|
|
398
|
-
__table_args__ = (
|
|
399
|
-
UniqueConstraint(
|
|
400
|
-
"corpus_id",
|
|
401
|
-
"bi_classifier_model_id",
|
|
402
|
-
name="unique_corpus_bi_classifier_association",
|
|
403
|
-
),
|
|
404
|
-
{"schema": schema_name},
|
|
405
|
-
)
|
|
406
|
-
|
|
407
|
-
corpus_id = mapped_column(
|
|
408
|
-
types.Uuid,
|
|
409
|
-
ForeignKey(f"{DbSchemaEnum.CORPUS_RELATED.value}.corpus.id"),
|
|
410
|
-
primary_key=True,
|
|
411
|
-
)
|
|
412
|
-
bi_classifier_model_id = mapped_column(
|
|
413
|
-
types.Uuid,
|
|
414
|
-
ForeignKey(f"{DbSchemaEnum.CORPUS_RELATED.value}.bi_classifier_model.id"),
|
|
415
|
-
primary_key=True,
|
|
416
|
-
)
|
|
417
|
-
used_since: Mapped[datetime] = mapped_column(
|
|
418
|
-
TIMESTAMP(timezone=False),
|
|
419
|
-
nullable=False,
|
|
420
|
-
default=func.localtimestamp(),
|
|
421
|
-
server_default="NOW()",
|
|
422
|
-
)
|
|
423
|
-
|
|
424
|
-
bi_classifier_model: Mapped["BiClassifierModel"] = relationship()
|
|
425
|
-
corpus: Mapped["Corpus"] = relationship()
|
|
426
|
-
|
|
427
|
-
|
|
428
351
|
class Sdg(Base):
|
|
429
352
|
__tablename__ = "sdg"
|
|
430
353
|
__table_args__ = {"schema": schema_name}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{welearn_database-1.0.0.dev1 → welearn_database-1.0.0.dev2}/welearn_database/alembic/script.py.mako
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{welearn_database-1.0.0.dev1 → welearn_database-1.0.0.dev2}/welearn_database/data/__init__.py
RENAMED
|
File without changes
|
{welearn_database-1.0.0.dev1 → welearn_database-1.0.0.dev2}/welearn_database/data/models/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{welearn_database-1.0.0.dev1 → welearn_database-1.0.0.dev2}/welearn_database/data/models/grafana.py
RENAMED
|
File without changes
|
|
File without changes
|
{welearn_database-1.0.0.dev1 → welearn_database-1.0.0.dev2}/welearn_database/database_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
{welearn_database-1.0.0.dev1 → welearn_database-1.0.0.dev2}/welearn_database/modules/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|