welearn-database 0.2.4__tar.gz → 0.2.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {welearn_database-0.2.4 → welearn_database-0.2.6}/PKG-INFO +1 -1
  2. {welearn_database-0.2.4 → welearn_database-0.2.6}/pyproject.toml +1 -1
  3. {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/data/models/document_related.py +20 -24
  4. {welearn_database-0.2.4 → welearn_database-0.2.6}/LICENSE +0 -0
  5. {welearn_database-0.2.4 → welearn_database-0.2.6}/README.md +0 -0
  6. {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/__init__.py +0 -0
  7. {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/alembic/README +0 -0
  8. {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/alembic/env.py +0 -0
  9. {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/alembic/script.py.mako +0 -0
  10. {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/alembic/versions/16ff997426d3_remove_error_retrieval_unique_constraint.py +0 -0
  11. {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/alembic/versions/4c7161819e5a_grafana_views.py +0 -0
  12. {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/alembic/versions/4fcbfb7f3145_added_api_key_management_table.py +0 -0
  13. {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/alembic/versions/5d82613c9aca_context_document.py +0 -0
  14. {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/alembic/versions/821173cf9c5d_initial_migration.py +0 -0
  15. {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/alembic/versions/89920abb7ff8_add_category.py +0 -0
  16. {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/alembic/versions/a50a1db3ca2a_add_used_since_column_for_embeddings.py +0 -0
  17. {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/alembic/versions/b031206324b7_agent_related.py +0 -0
  18. {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/alembic/versions/e354666f951d_inferred_user.py +0 -0
  19. {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/data/__init__.py +0 -0
  20. {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/data/enumeration.py +0 -0
  21. {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/data/models/__init__.py +0 -0
  22. {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/data/models/agent_related.py +0 -0
  23. {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/data/models/corpus_related.py +0 -0
  24. {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/data/models/grafana.py +0 -0
  25. {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/data/models/user_related.py +0 -0
  26. {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/database_utils.py +0 -0
  27. {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/exceptions.py +0 -0
  28. {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/modules/__init__.py +0 -0
  29. {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/modules/text_cleaning.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: welearn-database
3
- Version: 0.2.4
3
+ Version: 0.2.6
4
4
  Summary: All stuff related to relationnal database from the WeLearn project
5
5
  License: cc-by-sa-nc
6
6
  Author: Théo
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "welearn-database"
3
- version = "0.2.4"
3
+ version = "0.2.6"
4
4
  description = "All stuff related to relationnal database from the WeLearn project"
5
5
  authors = [
6
6
  {name = "Théo",email = "theo.nardin@cri-paris.org"}
@@ -6,7 +6,6 @@ from zlib import adler32
6
6
 
7
7
  from sqlalchemy import ForeignKey, Integer, LargeBinary, UniqueConstraint, func, types
8
8
  from sqlalchemy.dialects.postgresql import ARRAY, ENUM, TIMESTAMP
9
- from sqlalchemy.ext.hybrid import hybrid_property
10
9
  from sqlalchemy.orm import Mapped, mapped_column, relationship, validates
11
10
 
12
11
  from welearn_database.data.enumeration import ContextType, Counter, DbSchemaEnum, Step
@@ -52,8 +51,8 @@ class WeLearnDocument(Base):
52
51
  url: Mapped[str] = mapped_column(nullable=False)
53
52
  title: Mapped[str | None]
54
53
  lang: Mapped[str | None]
55
- _description: Mapped[str | None]
56
- _full_content: Mapped[str | None]
54
+ description: Mapped[str | None]
55
+ full_content: Mapped[str | None]
57
56
  details: Mapped[dict[str, Any] | None]
58
57
  _trace: Mapped[int | None] = mapped_column(types.BIGINT)
59
58
  corpus_id: Mapped[UUID] = mapped_column(
@@ -92,7 +91,7 @@ class WeLearnDocument(Base):
92
91
  raise InvalidURLScheme("There is an error on the URL form : %s", value)
93
92
  return value
94
93
 
95
- @validates("_full_content")
94
+ @validates("full_content")
96
95
  def validate_full_content(self, key, value):
97
96
  """
98
97
  Validate the full content to ensure it meets the minimum length requirement.
@@ -105,29 +104,26 @@ class WeLearnDocument(Base):
105
104
  return value
106
105
  if len(value) < 25:
107
106
  raise ValueError(f"Content is too short : {len(value)}")
108
- return value
109
-
110
- @hybrid_property
111
- def full_content(self):
112
- return self._full_content
113
-
114
- @full_content.setter
115
- def full_content(self, full_content):
116
- self._full_content = clean_text(full_content)
117
-
118
- @hybrid_property
119
- def description(self):
120
- return self._description
107
+ return clean_text(value)
121
108
 
122
- @description.setter
123
- def description(self, description):
124
- if not description:
125
- self._description = description
126
- self._description = clean_text(description)
109
+ @validates("description")
110
+ def validate_description(self, key, value):
111
+ """
112
+ Validate and clean the description text.
113
+ :param key: The name of the attribute being validated.
114
+ :param value: The value of the description to validate.
115
+ :return: The cleaned description text. If the value is None or empty, it returns the value as is.
116
+ """
117
+ if not value:
118
+ return value
119
+ return clean_text(value)
127
120
 
128
- @hybrid_property
121
+ @property
129
122
  def trace(self):
130
- return adler32(bytes(self.full_content, "utf-8"))
123
+ if self.full_content:
124
+ return adler32(bytes(self.full_content, "utf-8"))
125
+ else:
126
+ return None
131
127
 
132
128
 
133
129
  class ProcessState(Base):