welearn-database 1.3.0.dev2__tar.gz → 1.4.0.dev0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. {welearn_database-1.3.0.dev2 → welearn_database-1.4.0.dev0}/PKG-INFO +1 -1
  2. {welearn_database-1.3.0.dev2 → welearn_database-1.4.0.dev0}/pyproject.toml +1 -1
  3. {welearn_database-1.3.0.dev2 → welearn_database-1.4.0.dev0}/welearn_database/alembic/env.py +9 -0
  4. welearn_database-1.4.0.dev0/welearn_database/alembic/versions/2ad4895b2674_data_collection.py +100 -0
  5. welearn_database-1.4.0.dev0/welearn_database/alembic/versions/9b4f1da0c1f2_data_collection_for_focus_group.py +76 -0
  6. {welearn_database-1.3.0.dev2 → welearn_database-1.4.0.dev0}/welearn_database/data/enumeration.py +5 -0
  7. {welearn_database-1.3.0.dev2 → welearn_database-1.4.0.dev0}/welearn_database/data/models/user_related.py +53 -2
  8. {welearn_database-1.3.0.dev2 → welearn_database-1.4.0.dev0}/LICENSE +0 -0
  9. {welearn_database-1.3.0.dev2 → welearn_database-1.4.0.dev0}/README.md +0 -0
  10. {welearn_database-1.3.0.dev2 → welearn_database-1.4.0.dev0}/welearn_database/__init__.py +0 -0
  11. {welearn_database-1.3.0.dev2 → welearn_database-1.4.0.dev0}/welearn_database/alembic/README +0 -0
  12. {welearn_database-1.3.0.dev2 → welearn_database-1.4.0.dev0}/welearn_database/alembic/script.py.mako +0 -0
  13. {welearn_database-1.3.0.dev2 → welearn_database-1.4.0.dev0}/welearn_database/alembic/versions/068312e7800c_add_referrer_origin_column_to_user_and_.py +0 -0
  14. {welearn_database-1.3.0.dev2 → welearn_database-1.4.0.dev0}/welearn_database/alembic/versions/0e0bc0fca384_doc_qty_per_source.py +0 -0
  15. {welearn_database-1.3.0.dev2 → welearn_database-1.4.0.dev0}/welearn_database/alembic/versions/16ff997426d3_remove_error_retrieval_unique_constraint.py +0 -0
  16. {welearn_database-1.3.0.dev2 → welearn_database-1.4.0.dev0}/welearn_database/alembic/versions/4c7161819e5a_grafana_views.py +0 -0
  17. {welearn_database-1.3.0.dev2 → welearn_database-1.4.0.dev0}/welearn_database/alembic/versions/4f5a188dd614_add_main_url_column.py +0 -0
  18. {welearn_database-1.3.0.dev2 → welearn_database-1.4.0.dev0}/welearn_database/alembic/versions/4fcbfb7f3145_added_api_key_management_table.py +0 -0
  19. {welearn_database-1.3.0.dev2 → welearn_database-1.4.0.dev0}/welearn_database/alembic/versions/5d82613c9aca_context_document.py +0 -0
  20. {welearn_database-1.3.0.dev2 → welearn_database-1.4.0.dev0}/welearn_database/alembic/versions/821173cf9c5d_initial_migration.py +0 -0
  21. {welearn_database-1.3.0.dev2 → welearn_database-1.4.0.dev0}/welearn_database/alembic/versions/84c42739c17b_create_table_errordataquality.py +0 -0
  22. {welearn_database-1.3.0.dev2 → welearn_database-1.4.0.dev0}/welearn_database/alembic/versions/89920abb7ff8_add_category.py +0 -0
  23. {welearn_database-1.3.0.dev2 → welearn_database-1.4.0.dev0}/welearn_database/alembic/versions/96bba9e4842a_merge_external_id_error_quality.py +0 -0
  24. {welearn_database-1.3.0.dev2 → welearn_database-1.4.0.dev0}/welearn_database/alembic/versions/a50a1db3ca2a_add_used_since_column_for_embeddings.py +0 -0
  25. {welearn_database-1.3.0.dev2 → welearn_database-1.4.0.dev0}/welearn_database/alembic/versions/b031206324b7_agent_related.py +0 -0
  26. {welearn_database-1.3.0.dev2 → welearn_database-1.4.0.dev0}/welearn_database/alembic/versions/ccdbd708c997_create_column_external_id.py +0 -0
  27. {welearn_database-1.3.0.dev2 → welearn_database-1.4.0.dev0}/welearn_database/alembic/versions/e354666f951d_inferred_user.py +0 -0
  28. {welearn_database-1.3.0.dev2 → welearn_database-1.4.0.dev0}/welearn_database/data/__init__.py +0 -0
  29. {welearn_database-1.3.0.dev2 → welearn_database-1.4.0.dev0}/welearn_database/data/models/__init__.py +0 -0
  30. {welearn_database-1.3.0.dev2 → welearn_database-1.4.0.dev0}/welearn_database/data/models/agent_related.py +0 -0
  31. {welearn_database-1.3.0.dev2 → welearn_database-1.4.0.dev0}/welearn_database/data/models/corpus_related.py +0 -0
  32. {welearn_database-1.3.0.dev2 → welearn_database-1.4.0.dev0}/welearn_database/data/models/document_related.py +0 -0
  33. {welearn_database-1.3.0.dev2 → welearn_database-1.4.0.dev0}/welearn_database/data/models/grafana.py +0 -0
  34. {welearn_database-1.3.0.dev2 → welearn_database-1.4.0.dev0}/welearn_database/database_utils.py +0 -0
  35. {welearn_database-1.3.0.dev2 → welearn_database-1.4.0.dev0}/welearn_database/exceptions.py +0 -0
  36. {welearn_database-1.3.0.dev2 → welearn_database-1.4.0.dev0}/welearn_database/modules/__init__.py +0 -0
  37. {welearn_database-1.3.0.dev2 → welearn_database-1.4.0.dev0}/welearn_database/modules/text_cleaning.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: welearn-database
3
- Version: 1.3.0.dev2
3
+ Version: 1.4.0.dev0
4
4
  Summary: All stuff related to relationnal database from the WeLearn project
5
5
  License: cc-by-sa-nc
6
6
  Author: Théo
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "welearn-database"
3
- version = "1.3.0.dev2"
3
+ version = "1.4.0.dev0"
4
4
  description = "All stuff related to relationnal database from the WeLearn project"
5
5
  authors = [
6
6
  {name = "Théo",email = "theo.nardin@cri-paris.org"}
@@ -1,3 +1,4 @@
1
+ import os
1
2
  from logging.config import fileConfig
2
3
 
3
4
  from alembic import context
@@ -75,6 +76,14 @@ def run_migrations_online() -> None:
75
76
  load_dotenv()
76
77
  connectable = create_sqlalchemy_engine()
77
78
 
79
+ if "prod" in os.getenv("PG_HOST").lower():
80
+ print("Connecting to production database for migrations!")
81
+ input("Press Enter to continue...")
82
+ elif "dev" in os.getenv("PG_HOST" "").lower():
83
+ print("Connecting to development database for migrations!")
84
+ else:
85
+ print("Connecting to unknown database for migrations!")
86
+
78
87
  with connectable.connect() as connection:
79
88
  context.configure(
80
89
  connection=connection,
@@ -0,0 +1,100 @@
1
+ """data collection
2
+
3
+ Revision ID: 2ad4895b2674
4
+ Revises: 068312e7800c
5
+ Create Date: 2026-01-16 15:55:41.447852
6
+
7
+ """
8
+
9
+ from typing import Sequence, Union
10
+
11
+ import sqlalchemy as sa
12
+ from alembic import op
13
+ from sqlalchemy.dialects import postgresql
14
+
15
+ # revision identifiers, used by Alembic.
16
+ revision: str = "2ad4895b2674"
17
+ down_revision: Union[str, None] = "068312e7800c"
18
+ branch_labels: Union[str, Sequence[str], None] = None
19
+ depends_on: Union[str, Sequence[str], None] = None
20
+
21
+
22
+ def upgrade() -> None:
23
+ op.create_table(
24
+ "data_collection_campaign_management",
25
+ sa.Column(
26
+ "id", sa.Uuid(), server_default=sa.func.gen_random_uuid(), nullable=False
27
+ ),
28
+ sa.Column("is_active", sa.Boolean(), nullable=False),
29
+ sa.Column("end_at", postgresql.TIMESTAMP(), nullable=False),
30
+ sa.Column(
31
+ "created_at", postgresql.TIMESTAMP(), server_default="NOW()", nullable=False
32
+ ),
33
+ sa.PrimaryKeyConstraint("id"),
34
+ schema="user_related",
35
+ )
36
+
37
+ op.add_column(
38
+ "chat_message",
39
+ sa.Column("role", sa.String(), nullable=False),
40
+ schema="user_related",
41
+ )
42
+ op.add_column(
43
+ "chat_message",
44
+ sa.Column("inferred_user_id", sa.Uuid(), nullable=False),
45
+ schema="user_related",
46
+ )
47
+ op.add_column(
48
+ "chat_message",
49
+ sa.Column("conversation_id", sa.Uuid(), nullable=False),
50
+ schema="user_related",
51
+ )
52
+ op.drop_constraint(
53
+ op.f("message_user_id_fkey"),
54
+ "chat_message",
55
+ schema="user_related",
56
+ type_="foreignkey",
57
+ )
58
+ op.create_foreign_key(
59
+ "message_inferred_user_id_fkey",
60
+ "chat_message",
61
+ "inferred_user",
62
+ ["inferred_user_id"],
63
+ ["id"],
64
+ source_schema="user_related",
65
+ referent_schema="user_related",
66
+ )
67
+ op.drop_column("chat_message", "user_id", schema="user_related")
68
+ op.add_column(
69
+ "returned_document",
70
+ sa.Column("is_clicked", sa.Boolean(), nullable=False),
71
+ schema="user_related",
72
+ )
73
+
74
+
75
+ def downgrade() -> None:
76
+ op.drop_column("returned_document", "is_clicked", schema="user_related")
77
+ op.add_column(
78
+ "chat_message",
79
+ sa.Column("user_id", sa.Uuid(), nullable=False),
80
+ schema="user_related",
81
+ )
82
+ op.drop_constraint(
83
+ "message_inferred_user_id_fkey",
84
+ "chat_message",
85
+ schema="user_related",
86
+ type_="foreignkey",
87
+ )
88
+ op.create_foreign_key(
89
+ op.f("message_user_id_fkey"),
90
+ "chat_message",
91
+ "user_profile",
92
+ ["user_id"],
93
+ ["id"],
94
+ source_schema="user_related",
95
+ referent_schema="user_related",
96
+ )
97
+ op.drop_column("chat_message", "conversation_id", schema="user_related")
98
+ op.drop_column("chat_message", "inferred_user_id", schema="user_related")
99
+ op.drop_column("chat_message", "role", schema="user_related")
100
+ op.drop_table("data_collection_campaign_management", schema="user_related")
@@ -0,0 +1,76 @@
1
+ """data collection for focus group
2
+
3
+ Revision ID: 9b4f1da0c1f2
4
+ Revises: 2ad4895b2674
5
+ Create Date: 2026-02-23 18:11:55.857517
6
+
7
+ """
8
+
9
+ from typing import Sequence, Union
10
+
11
+ import sqlalchemy as sa
12
+ from alembic import op
13
+ from sqlalchemy.dialects import postgresql
14
+
15
+ # revision identifiers, used by Alembic.
16
+ revision: str = "9b4f1da0c1f2"
17
+ down_revision: Union[str, None] = "2ad4895b2674"
18
+ branch_labels: Union[str, Sequence[str], None] = None
19
+ depends_on: Union[str, Sequence[str], None] = None
20
+
21
+
22
+ def upgrade() -> None:
23
+ op.add_column(
24
+ "chat_message",
25
+ sa.Column(
26
+ "is_retrieved_by_user",
27
+ sa.Boolean(),
28
+ nullable=False,
29
+ default=False,
30
+ server_default="False",
31
+ ),
32
+ schema="user_related",
33
+ )
34
+ op.add_column(
35
+ "chat_message",
36
+ sa.Column("original_feature_name", sa.String(), nullable=True),
37
+ schema="user_related",
38
+ )
39
+ op.create_table(
40
+ "filter_used_in_query",
41
+ sa.Column(
42
+ "id", sa.Uuid(), server_default=sa.func.gen_random_uuid(), nullable=False
43
+ ),
44
+ sa.Column("message_id", sa.Uuid(), nullable=False),
45
+ sa.Column(
46
+ "filter_type",
47
+ postgresql.ENUM(
48
+ "sdg",
49
+ "source",
50
+ name="filter_type",
51
+ schema="user_related",
52
+ ),
53
+ nullable=False,
54
+ ),
55
+ sa.Column("filter_value", sa.String(), nullable=False),
56
+ sa.ForeignKeyConstraint(
57
+ ["message_id"],
58
+ ["user_related.chat_message.id"],
59
+ name="filter_used_in_query_message_id_fkey",
60
+ ),
61
+ sa.PrimaryKeyConstraint("id"),
62
+ schema="user_related",
63
+ )
64
+
65
+
66
+ def downgrade() -> None:
67
+ op.drop_column("chat_message", "is_retrieved_by_user", schema="user_related")
68
+ op.drop_column("chat_message", "original_feature_name", schema="user_related")
69
+ op.drop_constraint(
70
+ "filter_used_in_query_message_id_fkey",
71
+ "filter_used_in_query",
72
+ schema="user_related",
73
+ type_="foreignkey",
74
+ )
75
+ op.drop_table("filter_used_in_query", schema="user_related")
76
+ op.execute("DROP TYPE IF EXISTS user_related.filter_type")
@@ -38,3 +38,8 @@ class ExternalIdType(StrEnum):
38
38
  HANDLE = auto()
39
39
  SLUG = auto()
40
40
  QID = auto()
41
+
42
+
43
+ class FilterType(StrEnum):
44
+ SDG = auto()
45
+ SOURCE = auto()
@@ -2,10 +2,10 @@ from datetime import datetime
2
2
  from uuid import UUID
3
3
 
4
4
  from sqlalchemy import ForeignKey, func, types
5
- from sqlalchemy.dialects.postgresql import TIMESTAMP
5
+ from sqlalchemy.dialects.postgresql import ENUM, TIMESTAMP
6
6
  from sqlalchemy.orm import Mapped, mapped_column, relationship
7
7
 
8
- from welearn_database.data.enumeration import DbSchemaEnum
8
+ from welearn_database.data.enumeration import DbSchemaEnum, FilterType
9
9
  from welearn_database.data.models.document_related import WeLearnDocument
10
10
 
11
11
  from . import Base
@@ -90,8 +90,16 @@ class ChatMessage(Base):
90
90
  ForeignKey(f"{DbSchemaEnum.USER_RELATED.value}.inferred_user.id"),
91
91
  nullable=False,
92
92
  )
93
+ conversation_id = mapped_column(
94
+ types.Uuid,
95
+ nullable=False,
96
+ )
93
97
  role: Mapped[str]
94
98
  textual_content: Mapped[str]
99
+ is_retrieved_by_user: Mapped[bool] = mapped_column(
100
+ default=False, server_default="False"
101
+ )
102
+ original_feature_name: Mapped[str | None]
95
103
 
96
104
  created_at: Mapped[datetime] = mapped_column(
97
105
  TIMESTAMP(timezone=False),
@@ -129,6 +137,8 @@ class ReturnedDocument(Base):
129
137
  ),
130
138
  nullable=False,
131
139
  )
140
+ is_clicked: Mapped[bool] = mapped_column(default=False)
141
+
132
142
  welearn_document: Mapped["WeLearnDocument"] = relationship()
133
143
  chat_message: Mapped["ChatMessage"] = relationship()
134
144
 
@@ -159,6 +169,23 @@ class APIKeyManagement(Base):
159
169
  )
160
170
 
161
171
 
172
+ class DataCollectionCampaignManagement(Base):
173
+ __tablename__ = "data_collection_campaign_management"
174
+ __table_args__ = {"schema": DbSchemaEnum.USER_RELATED.value}
175
+
176
+ id: Mapped[UUID] = mapped_column(
177
+ types.Uuid, primary_key=True, nullable=False, server_default="gen_random_uuid()"
178
+ )
179
+ is_active: Mapped[bool]
180
+ end_at: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=False), nullable=False)
181
+ created_at: Mapped[datetime] = mapped_column(
182
+ TIMESTAMP(timezone=False),
183
+ nullable=False,
184
+ default=func.localtimestamp(),
185
+ server_default="NOW()",
186
+ )
187
+
188
+
162
189
  class Session(Base):
163
190
  __tablename__ = "session"
164
191
  __table_args__ = {"schema": "user_related"}
@@ -218,3 +245,27 @@ class EndpointRequest(Base):
218
245
  server_default="NOW()",
219
246
  )
220
247
  session = relationship("Session", foreign_keys=[session_id])
248
+
249
+
250
+ class FilterUsedInQuery(Base):
251
+ __tablename__ = "filter_used_in_query"
252
+ __table_args__ = {"schema": DbSchemaEnum.USER_RELATED.value}
253
+
254
+ id: Mapped[UUID] = mapped_column(
255
+ types.Uuid, primary_key=True, nullable=False, server_default="gen_random_uuid()"
256
+ )
257
+ message_id = mapped_column(
258
+ types.Uuid,
259
+ ForeignKey(f"{DbSchemaEnum.USER_RELATED.value}.chat_message.id"),
260
+ nullable=False,
261
+ )
262
+ filter_type: Mapped[str] = mapped_column(
263
+ ENUM(
264
+ *(e.value.lower() for e in FilterType),
265
+ name="filter_type",
266
+ schema=DbSchemaEnum.USER_RELATED.value,
267
+ ),
268
+ )
269
+ filter_value: Mapped[str]
270
+
271
+ chat_message: Mapped["ChatMessage"] = relationship()