welearn-database 0.2.4__tar.gz → 0.2.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {welearn_database-0.2.4 → welearn_database-0.2.6}/PKG-INFO +1 -1
- {welearn_database-0.2.4 → welearn_database-0.2.6}/pyproject.toml +1 -1
- {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/data/models/document_related.py +20 -24
- {welearn_database-0.2.4 → welearn_database-0.2.6}/LICENSE +0 -0
- {welearn_database-0.2.4 → welearn_database-0.2.6}/README.md +0 -0
- {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/__init__.py +0 -0
- {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/alembic/README +0 -0
- {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/alembic/env.py +0 -0
- {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/alembic/script.py.mako +0 -0
- {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/alembic/versions/16ff997426d3_remove_error_retrieval_unique_constraint.py +0 -0
- {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/alembic/versions/4c7161819e5a_grafana_views.py +0 -0
- {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/alembic/versions/4fcbfb7f3145_added_api_key_management_table.py +0 -0
- {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/alembic/versions/5d82613c9aca_context_document.py +0 -0
- {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/alembic/versions/821173cf9c5d_initial_migration.py +0 -0
- {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/alembic/versions/89920abb7ff8_add_category.py +0 -0
- {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/alembic/versions/a50a1db3ca2a_add_used_since_column_for_embeddings.py +0 -0
- {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/alembic/versions/b031206324b7_agent_related.py +0 -0
- {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/alembic/versions/e354666f951d_inferred_user.py +0 -0
- {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/data/__init__.py +0 -0
- {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/data/enumeration.py +0 -0
- {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/data/models/__init__.py +0 -0
- {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/data/models/agent_related.py +0 -0
- {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/data/models/corpus_related.py +0 -0
- {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/data/models/grafana.py +0 -0
- {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/data/models/user_related.py +0 -0
- {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/database_utils.py +0 -0
- {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/exceptions.py +0 -0
- {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/modules/__init__.py +0 -0
- {welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/modules/text_cleaning.py +0 -0
{welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/data/models/document_related.py
RENAMED
|
@@ -6,7 +6,6 @@ from zlib import adler32
|
|
|
6
6
|
|
|
7
7
|
from sqlalchemy import ForeignKey, Integer, LargeBinary, UniqueConstraint, func, types
|
|
8
8
|
from sqlalchemy.dialects.postgresql import ARRAY, ENUM, TIMESTAMP
|
|
9
|
-
from sqlalchemy.ext.hybrid import hybrid_property
|
|
10
9
|
from sqlalchemy.orm import Mapped, mapped_column, relationship, validates
|
|
11
10
|
|
|
12
11
|
from welearn_database.data.enumeration import ContextType, Counter, DbSchemaEnum, Step
|
|
@@ -52,8 +51,8 @@ class WeLearnDocument(Base):
|
|
|
52
51
|
url: Mapped[str] = mapped_column(nullable=False)
|
|
53
52
|
title: Mapped[str | None]
|
|
54
53
|
lang: Mapped[str | None]
|
|
55
|
-
|
|
56
|
-
|
|
54
|
+
description: Mapped[str | None]
|
|
55
|
+
full_content: Mapped[str | None]
|
|
57
56
|
details: Mapped[dict[str, Any] | None]
|
|
58
57
|
_trace: Mapped[int | None] = mapped_column(types.BIGINT)
|
|
59
58
|
corpus_id: Mapped[UUID] = mapped_column(
|
|
@@ -92,7 +91,7 @@ class WeLearnDocument(Base):
|
|
|
92
91
|
raise InvalidURLScheme("There is an error on the URL form : %s", value)
|
|
93
92
|
return value
|
|
94
93
|
|
|
95
|
-
@validates("
|
|
94
|
+
@validates("full_content")
|
|
96
95
|
def validate_full_content(self, key, value):
|
|
97
96
|
"""
|
|
98
97
|
Validate the full content to ensure it meets the minimum length requirement.
|
|
@@ -105,29 +104,26 @@ class WeLearnDocument(Base):
|
|
|
105
104
|
return value
|
|
106
105
|
if len(value) < 25:
|
|
107
106
|
raise ValueError(f"Content is too short : {len(value)}")
|
|
108
|
-
return value
|
|
109
|
-
|
|
110
|
-
@hybrid_property
|
|
111
|
-
def full_content(self):
|
|
112
|
-
return self._full_content
|
|
113
|
-
|
|
114
|
-
@full_content.setter
|
|
115
|
-
def full_content(self, full_content):
|
|
116
|
-
self._full_content = clean_text(full_content)
|
|
117
|
-
|
|
118
|
-
@hybrid_property
|
|
119
|
-
def description(self):
|
|
120
|
-
return self._description
|
|
107
|
+
return clean_text(value)
|
|
121
108
|
|
|
122
|
-
@description
|
|
123
|
-
def
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
109
|
+
@validates("description")
|
|
110
|
+
def validate_description(self, key, value):
|
|
111
|
+
"""
|
|
112
|
+
Validate and clean the description text.
|
|
113
|
+
:param key: The name of the attribute being validated.
|
|
114
|
+
:param value: The value of the description to validate.
|
|
115
|
+
:return: The cleaned description text. If the value is None or empty, it returns the value as is.
|
|
116
|
+
"""
|
|
117
|
+
if not value:
|
|
118
|
+
return value
|
|
119
|
+
return clean_text(value)
|
|
127
120
|
|
|
128
|
-
@
|
|
121
|
+
@property
|
|
129
122
|
def trace(self):
|
|
130
|
-
|
|
123
|
+
if self.full_content:
|
|
124
|
+
return adler32(bytes(self.full_content, "utf-8"))
|
|
125
|
+
else:
|
|
126
|
+
return None
|
|
131
127
|
|
|
132
128
|
|
|
133
129
|
class ProcessState(Base):
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/data/models/agent_related.py
RENAMED
|
File without changes
|
{welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/data/models/corpus_related.py
RENAMED
|
File without changes
|
|
File without changes
|
{welearn_database-0.2.4 → welearn_database-0.2.6}/welearn_database/data/models/user_related.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|