wbnews 1.54.23__tar.gz → 1.59.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {wbnews-1.54.23 → wbnews-1.59.5}/PKG-INFO +1 -1
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/filters/news.py +1 -1
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/import_export/parsers/emails/utils.py +2 -2
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/models/news.py +4 -2
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/tasks.py +2 -1
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/tests/parsers/test_emails.py +6 -6
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/tests/test_models.py +22 -12
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/utils.py +1 -1
- {wbnews-1.54.23 → wbnews-1.59.5}/.gitignore +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/pyproject.toml +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/.coveragerc +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/__init__.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/admin.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/apps.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/factories.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/filters/__init__.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/fixtures/wbnews.yaml +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/import_export/__init__.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/import_export/backends/__init__.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/import_export/backends/news.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/import_export/handlers/__init__.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/import_export/handlers/news.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/import_export/parsers/__init__.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/import_export/parsers/emails/__init__.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/import_export/parsers/emails/news.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/import_export/parsers/rss/__init__.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/import_export/parsers/rss/news.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/locale/de/LC_MESSAGES/django.mo +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/locale/de/LC_MESSAGES/django.po +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/locale/de/LC_MESSAGES/django.po.translated +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/locale/en/LC_MESSAGES/django.mo +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/locale/en/LC_MESSAGES/django.po +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/locale/fr/LC_MESSAGES/django.mo +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/locale/fr/LC_MESSAGES/django.po +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/migrations/0001_initial_squashed_0005_alter_news_import_source.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/migrations/0006_alter_news_language.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/migrations/0007_auto_20240103_0955.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/migrations/0008_alter_news_language.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/migrations/0009_newsrelationship_analysis_newsrelationship_sentiment.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/migrations/0010_newsrelationship_important.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/migrations/0011_newsrelationship_content_object_repr.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/migrations/0012_alter_news_unique_together_news_identifier_and_more.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/migrations/0013_alter_news_datetime.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/migrations/0014_newsrelationship_unique_news_relationship.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/migrations/__init__.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/models/__init__.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/models/llm/cleaned_news.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/models/relationships.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/models/sources.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/models/utils.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/serializers.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/signals.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/tests/__init__.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/tests/conftest.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/tests/parsers/__init__.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/tests/test_utils.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/tests/tests.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/urls.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/viewsets/__init__.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/viewsets/buttons.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/viewsets/display.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/viewsets/endpoints.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/viewsets/menu.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/viewsets/titles.py +0 -0
- {wbnews-1.54.23 → wbnews-1.59.5}/wbnews/viewsets/views.py +0 -0
|
@@ -4,7 +4,7 @@ from wbnews.models import News, NewsRelationship, NewsSource
|
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
class NewsFilterSet(wb_filters.FilterSet):
|
|
7
|
-
datetime = wb_filters.DateTimeRangeFilter(
|
|
7
|
+
datetime = wb_filters.DateTimeRangeFilter()
|
|
8
8
|
|
|
9
9
|
class Meta:
|
|
10
10
|
model = News
|
|
@@ -22,10 +22,10 @@ class EmlContentParser:
|
|
|
22
22
|
html = self.get_html(self.message)
|
|
23
23
|
return html.decode(self.encoding) if html else None
|
|
24
24
|
|
|
25
|
-
def get_html(
|
|
25
|
+
def get_html(self, parsed: message.Message) -> bytes | None:
|
|
26
26
|
if parsed.is_multipart():
|
|
27
27
|
for item in parsed.get_payload(): # type:message.Message
|
|
28
|
-
if html :=
|
|
28
|
+
if html := self.get_html(item):
|
|
29
29
|
return html
|
|
30
30
|
elif parsed.get_content_type() == "text/html":
|
|
31
31
|
return parsed.get_payload(decode=True)
|
|
@@ -14,6 +14,7 @@ from slugify import slugify
|
|
|
14
14
|
from wbcore.contrib.ai.llm.decorators import llm
|
|
15
15
|
from wbcore.contrib.io.mixins import ImportMixin
|
|
16
16
|
from wbcore.models import WBModel
|
|
17
|
+
from wbcore.workers import Queue
|
|
17
18
|
|
|
18
19
|
from wbnews.import_export.handlers.news import NewsImportHandler
|
|
19
20
|
from wbnews.models.llm.cleaned_news import clean_news_config, summarized_news_config
|
|
@@ -23,7 +24,7 @@ from wbnews.signals import create_news_relationships
|
|
|
23
24
|
from ..utils import detect_near_duplicates
|
|
24
25
|
|
|
25
26
|
|
|
26
|
-
@shared_task
|
|
27
|
+
@shared_task(queue=Queue.DEFAULT.value)
|
|
27
28
|
def create_relationship(chain_results: list[list[dict[str, Any]]], news_id: int):
|
|
28
29
|
objs = []
|
|
29
30
|
for relationships in chain_results:
|
|
@@ -81,7 +82,8 @@ class News(ImportMixin, WBModel):
|
|
|
81
82
|
"""
|
|
82
83
|
tasks = []
|
|
83
84
|
for sender, task_signature in create_news_relationships.send(sender=News, instance=self):
|
|
84
|
-
|
|
85
|
+
if not isinstance(task_signature, Signature):
|
|
86
|
+
raise AssertionError(self.errors["relationship_signal"].format(sender))
|
|
85
87
|
tasks.append(task_signature)
|
|
86
88
|
if tasks:
|
|
87
89
|
res = chord(tasks, create_relationship.s(self.id))
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
from datetime import date, timedelta
|
|
2
2
|
|
|
3
3
|
from celery import shared_task
|
|
4
|
+
from wbcore.workers import Queue
|
|
4
5
|
|
|
5
6
|
from wbnews.models import News
|
|
6
7
|
|
|
7
8
|
|
|
8
|
-
@shared_task()
|
|
9
|
+
@shared_task(queue=Queue.BACKGROUND.value)
|
|
9
10
|
def handle_daily_news_duplicates(
|
|
10
11
|
task_date: date | None = None,
|
|
11
12
|
day_interval: int = 7,
|
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
from unittest import
|
|
2
|
-
from unittest.mock import patch, PropertyMock
|
|
1
|
+
from unittest.mock import PropertyMock, patch
|
|
3
2
|
|
|
4
3
|
import pytest
|
|
5
4
|
|
|
@@ -7,19 +6,20 @@ from wbnews.import_export.parsers.emails.utils import EmlContentParser
|
|
|
7
6
|
|
|
8
7
|
|
|
9
8
|
class TestEmlContentParser:
|
|
10
|
-
|
|
11
9
|
@pytest.fixture
|
|
12
10
|
def content_parser(self):
|
|
13
|
-
parser = EmlContentParser(b
|
|
11
|
+
parser = EmlContentParser(b"")
|
|
14
12
|
parser.message = {"From": "main@acme.com"}
|
|
15
13
|
return parser
|
|
16
14
|
|
|
17
15
|
@patch.object(EmlContentParser, "text", new_callable=PropertyMock)
|
|
18
16
|
def test_source_from_in_text(self, mock_text, content_parser):
|
|
19
|
-
mock_text.return_value =
|
|
17
|
+
mock_text.return_value = (
|
|
18
|
+
"some random email content with a From field From: source name <email@test.com> and the rest of the email"
|
|
19
|
+
)
|
|
20
20
|
assert content_parser.source == {"title": "Source Name", "endpoint": "email@test.com", "type": "EMAIL"}
|
|
21
21
|
|
|
22
22
|
@patch.object(EmlContentParser, "text", new_callable=PropertyMock)
|
|
23
|
-
def
|
|
23
|
+
def test_source_from_in_text_alt(self, mock_text, content_parser):
|
|
24
24
|
mock_text.return_value = "some random email content without a From field"
|
|
25
25
|
assert content_parser.source == {"title": "Acme.Com", "endpoint": "main@acme.com", "type": "EMAIL"}
|
|
@@ -1,14 +1,15 @@
|
|
|
1
|
-
from datetime import timedelta,
|
|
2
|
-
from django.utils import timezone as django_timezone
|
|
1
|
+
from datetime import timedelta, timezone
|
|
3
2
|
from unittest.mock import patch
|
|
4
3
|
|
|
5
4
|
import pytest
|
|
5
|
+
from django.utils import timezone as django_timezone
|
|
6
|
+
from faker import Faker
|
|
6
7
|
|
|
7
8
|
from wbnews.models import News, NewsSource
|
|
8
|
-
from faker import Faker
|
|
9
9
|
|
|
10
10
|
fake = Faker()
|
|
11
11
|
|
|
12
|
+
|
|
12
13
|
@pytest.mark.django_db
|
|
13
14
|
class TestSource:
|
|
14
15
|
@pytest.mark.parametrize("news_source__title", ["source1"])
|
|
@@ -19,17 +20,19 @@ class TestSource:
|
|
|
19
20
|
ns1 = news_source_factory.create()
|
|
20
21
|
ns2 = news_source_factory.create()
|
|
21
22
|
|
|
22
|
-
assert NewsSource.source_dict_to_model({"id": ns1.id, "identifier": ns2.identifier}) == ns1
|
|
23
|
-
assert
|
|
24
|
-
|
|
23
|
+
assert NewsSource.source_dict_to_model({"id": ns1.id, "identifier": ns2.identifier}) == ns1 # priority to "id"
|
|
24
|
+
assert (
|
|
25
|
+
NewsSource.source_dict_to_model({"endpoint": ns1.endpoint, "identifier": ns2.identifier}) == ns2
|
|
26
|
+
) # priority to "identifier"
|
|
27
|
+
assert NewsSource.source_dict_to_model({"endpoint": ns2.endpoint}) == ns2 # exact match on endpoint
|
|
25
28
|
|
|
26
29
|
ns1.endpoint = ".*@test.com"
|
|
27
30
|
ns1.save()
|
|
28
|
-
assert NewsSource.source_dict_to_model({"endpoint": "abc@test.com"}) == ns1
|
|
31
|
+
assert NewsSource.source_dict_to_model({"endpoint": "abc@test.com"}) == ns1 # regex match on endpoint
|
|
29
32
|
|
|
30
33
|
new_source = NewsSource.source_dict_to_model({"endpoint": "abc@main_source.com", "title": "New Source"})
|
|
31
34
|
assert new_source not in [ns1, ns2]
|
|
32
|
-
assert new_source.endpoint == ".*@main_source\.com"
|
|
35
|
+
assert new_source.endpoint == r".*@main_source\.com"
|
|
33
36
|
assert new_source.title == "New Source"
|
|
34
37
|
assert new_source.author == "Main Source"
|
|
35
38
|
|
|
@@ -45,24 +48,31 @@ class TestNews:
|
|
|
45
48
|
|
|
46
49
|
def test_get_default_guid(self):
|
|
47
50
|
assert News.get_default_guid("This is a title", None) == "this-is-a-title"
|
|
48
|
-
assert
|
|
51
|
+
assert (
|
|
52
|
+
News.get_default_guid("This is a title", "http://mylink.com") == "http://mylink.com"
|
|
53
|
+
) # link takes precendence
|
|
49
54
|
assert News.get_default_guid("a" * 24, None, max_length=20) == "a" * 20
|
|
50
55
|
|
|
51
56
|
def test_future_news(self, news_factory):
|
|
52
57
|
# ensure a future datetime always default to now
|
|
53
58
|
now = django_timezone.now()
|
|
54
59
|
future_news = news_factory.create(datetime=now + timedelta(days=1))
|
|
55
|
-
assert (future_news.datetime - now).seconds < 1
|
|
60
|
+
assert (future_news.datetime - now).seconds < 1 # we do that to account for clock difference
|
|
56
61
|
|
|
57
62
|
@patch("wbnews.models.news.detect_near_duplicates")
|
|
58
63
|
def test_handle_duplicates(self, mock_fct, news_factory):
|
|
59
64
|
val_date = fake.date_time(tzinfo=timezone.utc)
|
|
60
|
-
n0 = news_factory.create(
|
|
65
|
+
n0 = news_factory.create(
|
|
66
|
+
datetime=val_date - timedelta(days=1)
|
|
67
|
+
) # we exclude this news from the duplicate search
|
|
61
68
|
n1 = news_factory.create(datetime=val_date)
|
|
62
69
|
n2 = news_factory.create(datetime=val_date)
|
|
63
70
|
n3 = news_factory.create(datetime=val_date)
|
|
64
71
|
|
|
65
|
-
mock_fct.return_value = [
|
|
72
|
+
mock_fct.return_value = [
|
|
73
|
+
n0.id,
|
|
74
|
+
n3.id,
|
|
75
|
+
] # n0 is considered as duplicate but does not fall within the specified daterange so it will not be marked
|
|
66
76
|
News.handle_duplicates(val_date, val_date)
|
|
67
77
|
|
|
68
78
|
n3.refresh_from_db()
|
|
@@ -11,7 +11,7 @@ logger = logging.getLogger("news")
|
|
|
11
11
|
|
|
12
12
|
def _get_similarity_matrix_df(data: dict[int, str]) -> pd.DataFrame:
|
|
13
13
|
# Convert texts to TF-IDF vectors
|
|
14
|
-
ids, texts = zip(*data.items())
|
|
14
|
+
ids, texts = zip(*data.items(), strict=False)
|
|
15
15
|
vectorizer = TfidfVectorizer()
|
|
16
16
|
tfidf_matrix = vectorizer.fit_transform(texts)
|
|
17
17
|
# Compute pairwise cosine similarity...
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{wbnews-1.54.23 → wbnews-1.59.5}/wbnews/migrations/0011_newsrelationship_content_object_repr.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{wbnews-1.54.23 → wbnews-1.59.5}/wbnews/migrations/0014_newsrelationship_unique_news_relationship.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|