wbnews 1.46.12__py2.py3-none-any.whl → 1.60.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wbnews/admin.py +4 -1
- wbnews/factories.py +7 -5
- wbnews/filters/__init__.py +1 -1
- wbnews/filters/news.py +39 -2
- wbnews/import_export/backends/news.py +3 -3
- wbnews/import_export/handlers/news.py +35 -3
- wbnews/import_export/parsers/emails/news.py +2 -11
- wbnews/import_export/parsers/emails/utils.py +16 -12
- wbnews/import_export/parsers/rss/news.py +3 -9
- wbnews/locale/de/LC_MESSAGES/django.mo +0 -0
- wbnews/locale/de/LC_MESSAGES/django.po +92 -39
- wbnews/locale/de/LC_MESSAGES/django.po.translated +173 -0
- wbnews/locale/en/LC_MESSAGES/django.mo +0 -0
- wbnews/locale/en/LC_MESSAGES/django.po +159 -0
- wbnews/locale/fr/LC_MESSAGES/django.mo +0 -0
- wbnews/locale/fr/LC_MESSAGES/django.po +161 -0
- wbnews/migrations/0012_alter_news_unique_together_news_identifier_and_more.py +91 -0
- wbnews/migrations/0013_alter_news_datetime.py +19 -0
- wbnews/migrations/0014_newsrelationship_unique_news_relationship.py +27 -0
- wbnews/models/llm/cleaned_news.py +26 -23
- wbnews/models/news.py +37 -22
- wbnews/models/relationships.py +20 -1
- wbnews/models/sources.py +35 -5
- wbnews/models/utils.py +15 -0
- wbnews/serializers.py +16 -7
- wbnews/tasks.py +17 -0
- wbnews/tests/parsers/__init__.py +0 -0
- wbnews/tests/parsers/test_emails.py +25 -0
- wbnews/tests/test_models.py +65 -0
- wbnews/tests/test_utils.py +7 -0
- wbnews/utils.py +57 -0
- wbnews/viewsets/display.py +25 -29
- wbnews/viewsets/endpoints.py +11 -6
- wbnews/viewsets/views.py +5 -4
- {wbnews-1.46.12.dist-info → wbnews-1.60.1.dist-info}/METADATA +1 -2
- wbnews-1.60.1.dist-info/RECORD +65 -0
- {wbnews-1.46.12.dist-info → wbnews-1.60.1.dist-info}/WHEEL +1 -1
- wbnews-1.46.12.dist-info/RECORD +0 -50
wbnews/models/relationships.py
CHANGED
|
@@ -1,9 +1,25 @@
|
|
|
1
1
|
from django.contrib.contenttypes.fields import GenericForeignKey
|
|
2
2
|
from django.contrib.contenttypes.models import ContentType
|
|
3
3
|
from django.db import models
|
|
4
|
+
from django.utils.translation import gettext as _
|
|
4
5
|
|
|
5
6
|
|
|
6
7
|
class NewsRelationship(models.Model):
|
|
8
|
+
class SentimentChoices(models.IntegerChoices):
|
|
9
|
+
POSITIVE = 4, _("Positive")
|
|
10
|
+
SLIGHTLY_POSITIVE = 3, _("Slightly Positive")
|
|
11
|
+
SLIGHTLY_NEGATIVE = 2, _("Slightly Negative")
|
|
12
|
+
NEGATIVE = 1, _("Negative")
|
|
13
|
+
|
|
14
|
+
def get_color(self):
|
|
15
|
+
colors = {
|
|
16
|
+
"POSITIVE": "#96DD99",
|
|
17
|
+
"SLIGHTLY_POSITIVE": "#FFEE8C",
|
|
18
|
+
"SLIGHTLY_NEGATIVE": "#FF964F",
|
|
19
|
+
"NEGATIVE": "#FF6961",
|
|
20
|
+
}
|
|
21
|
+
return colors[self.name]
|
|
22
|
+
|
|
7
23
|
news = models.ForeignKey(to="wbnews.News", related_name="relationships", on_delete=models.CASCADE)
|
|
8
24
|
content_type = models.ForeignKey(ContentType, on_delete=models.CASCADE)
|
|
9
25
|
object_id = models.PositiveIntegerField()
|
|
@@ -11,7 +27,7 @@ class NewsRelationship(models.Model):
|
|
|
11
27
|
content_object_repr = models.CharField(max_length=512, default="")
|
|
12
28
|
|
|
13
29
|
important = models.BooleanField(null=True, blank=True)
|
|
14
|
-
sentiment = models.PositiveIntegerField(null=True, blank=True)
|
|
30
|
+
sentiment = models.PositiveIntegerField(null=True, blank=True, choices=SentimentChoices.choices)
|
|
15
31
|
analysis = models.TextField(null=True, blank=True)
|
|
16
32
|
|
|
17
33
|
def save(self, *args, **kwargs):
|
|
@@ -24,3 +40,6 @@ class NewsRelationship(models.Model):
|
|
|
24
40
|
class Meta:
|
|
25
41
|
verbose_name = "News Relationship"
|
|
26
42
|
indexes = [models.Index(fields=["content_type", "object_id"])]
|
|
43
|
+
constraints = [
|
|
44
|
+
models.UniqueConstraint(name="unique_news_relationship", fields=["content_type", "object_id", "news"])
|
|
45
|
+
]
|
wbnews/models/sources.py
CHANGED
|
@@ -1,7 +1,11 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
1
3
|
from django.contrib.postgres.fields import ArrayField
|
|
2
4
|
from django.db import models
|
|
3
5
|
from wbcore.models import WBModel
|
|
4
6
|
|
|
7
|
+
from wbnews.models.utils import endpoint_to_author
|
|
8
|
+
|
|
5
9
|
|
|
6
10
|
class NewsSource(WBModel):
|
|
7
11
|
class Type(models.TextChoices):
|
|
@@ -16,16 +20,17 @@ class NewsSource(WBModel):
|
|
|
16
20
|
description = models.TextField(default="", blank=True)
|
|
17
21
|
author = models.CharField(max_length=255, default="")
|
|
18
22
|
clean_content = models.BooleanField(default=False)
|
|
19
|
-
|
|
20
|
-
blank=True,
|
|
21
|
-
null=True,
|
|
22
|
-
unique=True,
|
|
23
|
-
)
|
|
23
|
+
endpoint = models.CharField(max_length=1024, unique=True)
|
|
24
24
|
is_active = models.BooleanField(default=True)
|
|
25
25
|
|
|
26
26
|
def __str__(self):
|
|
27
27
|
return f"{self.title}"
|
|
28
28
|
|
|
29
|
+
def save(self, *args, **kwargs):
|
|
30
|
+
if not self.author and self.endpoint:
|
|
31
|
+
self.author = endpoint_to_author(self.endpoint)
|
|
32
|
+
super().save(*args, **kwargs)
|
|
33
|
+
|
|
29
34
|
@classmethod
|
|
30
35
|
def get_representation_endpoint(cls) -> str:
|
|
31
36
|
return "wbnews:sourcerepresentation-list"
|
|
@@ -41,3 +46,28 @@ class NewsSource(WBModel):
|
|
|
41
46
|
@classmethod
|
|
42
47
|
def get_endpoint_basename(cls) -> str:
|
|
43
48
|
return "wbnews:source"
|
|
49
|
+
|
|
50
|
+
@classmethod
|
|
51
|
+
def source_dict_to_model(cls, data: dict):
|
|
52
|
+
sources = NewsSource.objects.all()
|
|
53
|
+
endpoint = data.pop("endpoint", None)
|
|
54
|
+
if "id" in data:
|
|
55
|
+
return sources.get(id=data["id"])
|
|
56
|
+
if type := data.get("type"):
|
|
57
|
+
sources = sources.filter(type=type)
|
|
58
|
+
if identifier := data.get("identifier"):
|
|
59
|
+
sources = sources.filter(identifier=identifier)
|
|
60
|
+
elif endpoint:
|
|
61
|
+
for source in sources:
|
|
62
|
+
match = re.search(source.endpoint, endpoint)
|
|
63
|
+
if source.endpoint == endpoint or match:
|
|
64
|
+
return source
|
|
65
|
+
if sources.count() == 1:
|
|
66
|
+
return sources.first()
|
|
67
|
+
else:
|
|
68
|
+
if endpoint:
|
|
69
|
+
# Pattern to capture and replace the local part of an email
|
|
70
|
+
pattern = r"^[^@]+"
|
|
71
|
+
# Replace the local part of an email with a wildcard regex
|
|
72
|
+
endpoint = re.sub(pattern, ".*", re.escape(endpoint))
|
|
73
|
+
return NewsSource.objects.create(**data, endpoint=endpoint)
|
wbnews/models/utils.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from contextlib import suppress
|
|
2
|
+
from urllib.parse import urlparse
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def endpoint_to_author(endpoint: str) -> str:
|
|
6
|
+
author = endpoint
|
|
7
|
+
if "@" in endpoint: # simplist way to check if the endpoint is an email address
|
|
8
|
+
author = author.replace("\\", "").split("@")[-1].split(".")
|
|
9
|
+
if len(author) > 1:
|
|
10
|
+
author = ".".join(author[:-1])
|
|
11
|
+
else: # otherwise we consider it's a valid url and we extract only the domain part
|
|
12
|
+
with suppress(ValueError, IndexError):
|
|
13
|
+
author = urlparse(author).netloc.split(".")[-2]
|
|
14
|
+
|
|
15
|
+
return author.replace("_", " ").title()
|
wbnews/serializers.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from django.utils.translation import gettext_lazy as _
|
|
2
2
|
from rest_framework.reverse import reverse
|
|
3
3
|
from wbcore import serializers as wb_serializers
|
|
4
|
-
from wbcore.content_type.serializers import (
|
|
4
|
+
from wbcore.contrib.content_type.serializers import (
|
|
5
5
|
ContentTypeRepresentationSerializer,
|
|
6
6
|
DynamicObjectIDRepresentationSerializer,
|
|
7
7
|
)
|
|
@@ -73,12 +73,12 @@ class NewsModelSerializer(wb_serializers.ModelSerializer):
|
|
|
73
73
|
class NewsRelationshipModelSerializer(wb_serializers.ModelSerializer):
|
|
74
74
|
source = wb_serializers.PrimaryKeyCharField(read_only=True)
|
|
75
75
|
_source = SourceRepresentationSerializer(source="source")
|
|
76
|
-
title = wb_serializers.
|
|
77
|
-
description = wb_serializers.
|
|
78
|
-
summary = wb_serializers.
|
|
76
|
+
title = wb_serializers.TextField(read_only=True, label=_("Title"))
|
|
77
|
+
description = wb_serializers.TextField(read_only=True, label=_("Description"))
|
|
78
|
+
summary = wb_serializers.TextField(read_only=True, label=_("Summary"))
|
|
79
79
|
datetime = wb_serializers.DateTimeField(read_only=True, label=_("Date"))
|
|
80
80
|
_content_type = ContentTypeRepresentationSerializer(source="content_type")
|
|
81
|
-
object_id = wb_serializers.CharField(label="Linked Object")
|
|
81
|
+
object_id = wb_serializers.CharField(label="Linked Object", required=False)
|
|
82
82
|
_object_id = DynamicObjectIDRepresentationSerializer(
|
|
83
83
|
content_type_field_name="content_type",
|
|
84
84
|
source="object_id",
|
|
@@ -88,8 +88,19 @@ class NewsRelationshipModelSerializer(wb_serializers.ModelSerializer):
|
|
|
88
88
|
"is_security": True
|
|
89
89
|
}, # TODO needs to find a way to not create a dependency to the wbfdm module here
|
|
90
90
|
)
|
|
91
|
+
news = wb_serializers.PrimaryKeyRelatedField(
|
|
92
|
+
queryset=News.objects.all(), read_only=lambda view: not view.new_mode, label=_("News")
|
|
93
|
+
)
|
|
91
94
|
_news = NewsRepresentationSerializer(source="news")
|
|
92
95
|
|
|
96
|
+
def validate(self, data):
|
|
97
|
+
if view := self.context["view"]:
|
|
98
|
+
if view.object_id:
|
|
99
|
+
data["object_id"] = view.object_id
|
|
100
|
+
if view.content_type:
|
|
101
|
+
data["content_type"] = view.content_type
|
|
102
|
+
return super().validate(data)
|
|
103
|
+
|
|
93
104
|
class Meta:
|
|
94
105
|
model = NewsRelationship
|
|
95
106
|
read_only_fields = (
|
|
@@ -98,8 +109,6 @@ class NewsRelationshipModelSerializer(wb_serializers.ModelSerializer):
|
|
|
98
109
|
"title",
|
|
99
110
|
"description",
|
|
100
111
|
"summary",
|
|
101
|
-
"news",
|
|
102
|
-
"_news",
|
|
103
112
|
"content_type",
|
|
104
113
|
"_content_type",
|
|
105
114
|
)
|
wbnews/tasks.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from datetime import date, timedelta
|
|
2
|
+
|
|
3
|
+
from celery import shared_task
|
|
4
|
+
from wbcore.workers import Queue
|
|
5
|
+
|
|
6
|
+
from wbnews.models import News
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@shared_task(queue=Queue.BACKGROUND.value)
|
|
10
|
+
def handle_daily_news_duplicates(
|
|
11
|
+
task_date: date | None = None,
|
|
12
|
+
day_interval: int = 7,
|
|
13
|
+
):
|
|
14
|
+
if not task_date:
|
|
15
|
+
task_date = date.today()
|
|
16
|
+
|
|
17
|
+
News.handle_duplicates(task_date - timedelta(days=day_interval), task_date + timedelta(days=day_interval))
|
|
File without changes
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from unittest.mock import PropertyMock, patch
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from wbnews.import_export.parsers.emails.utils import EmlContentParser
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TestEmlContentParser:
|
|
9
|
+
@pytest.fixture
|
|
10
|
+
def content_parser(self):
|
|
11
|
+
parser = EmlContentParser(b"")
|
|
12
|
+
parser.message = {"From": "main@acme.com"}
|
|
13
|
+
return parser
|
|
14
|
+
|
|
15
|
+
@patch.object(EmlContentParser, "text", new_callable=PropertyMock)
|
|
16
|
+
def test_source_from_in_text(self, mock_text, content_parser):
|
|
17
|
+
mock_text.return_value = (
|
|
18
|
+
"some random email content with a From field From: source name <email@test.com> and the rest of the email"
|
|
19
|
+
)
|
|
20
|
+
assert content_parser.source == {"title": "Source Name", "endpoint": "email@test.com", "type": "EMAIL"}
|
|
21
|
+
|
|
22
|
+
@patch.object(EmlContentParser, "text", new_callable=PropertyMock)
|
|
23
|
+
def test_source_from_in_text_alt(self, mock_text, content_parser):
|
|
24
|
+
mock_text.return_value = "some random email content without a From field"
|
|
25
|
+
assert content_parser.source == {"title": "Acme.Com", "endpoint": "main@acme.com", "type": "EMAIL"}
|
wbnews/tests/test_models.py
CHANGED
|
@@ -1,4 +1,13 @@
|
|
|
1
|
+
from datetime import timedelta, timezone
|
|
2
|
+
from unittest.mock import patch
|
|
3
|
+
|
|
1
4
|
import pytest
|
|
5
|
+
from django.utils import timezone as django_timezone
|
|
6
|
+
from faker import Faker
|
|
7
|
+
|
|
8
|
+
from wbnews.models import News, NewsSource
|
|
9
|
+
|
|
10
|
+
fake = Faker()
|
|
2
11
|
|
|
3
12
|
|
|
4
13
|
@pytest.mark.django_db
|
|
@@ -7,9 +16,65 @@ class TestSource:
|
|
|
7
16
|
def test_str(self, news_source):
|
|
8
17
|
assert str(news_source) == f"{news_source.title}"
|
|
9
18
|
|
|
19
|
+
def test_source_dict_to_model(self, news_source_factory):
|
|
20
|
+
ns1 = news_source_factory.create()
|
|
21
|
+
ns2 = news_source_factory.create()
|
|
22
|
+
|
|
23
|
+
assert NewsSource.source_dict_to_model({"id": ns1.id, "identifier": ns2.identifier}) == ns1 # priority to "id"
|
|
24
|
+
assert (
|
|
25
|
+
NewsSource.source_dict_to_model({"endpoint": ns1.endpoint, "identifier": ns2.identifier}) == ns2
|
|
26
|
+
) # priority to "identifier"
|
|
27
|
+
assert NewsSource.source_dict_to_model({"endpoint": ns2.endpoint}) == ns2 # exact match on endpoint
|
|
28
|
+
|
|
29
|
+
ns1.endpoint = ".*@test.com"
|
|
30
|
+
ns1.save()
|
|
31
|
+
assert NewsSource.source_dict_to_model({"endpoint": "abc@test.com"}) == ns1 # regex match on endpoint
|
|
32
|
+
|
|
33
|
+
new_source = NewsSource.source_dict_to_model({"endpoint": "abc@main_source.com", "title": "New Source"})
|
|
34
|
+
assert new_source not in [ns1, ns2]
|
|
35
|
+
assert new_source.endpoint == r".*@main_source\.com"
|
|
36
|
+
assert new_source.title == "New Source"
|
|
37
|
+
assert new_source.author == "Main Source"
|
|
38
|
+
|
|
10
39
|
|
|
11
40
|
@pytest.mark.django_db
|
|
12
41
|
class TestNews:
|
|
13
42
|
@pytest.mark.parametrize("news__title", ["new1"])
|
|
14
43
|
def test_str(self, news):
|
|
15
44
|
assert str(news) == f"{news.title} ({news.source.title})"
|
|
45
|
+
|
|
46
|
+
def test_mark_as_deplicates_not_in_default_queryset(self, news):
|
|
47
|
+
assert set(News.objects.all()) == {news}
|
|
48
|
+
|
|
49
|
+
def test_get_default_guid(self):
|
|
50
|
+
assert News.get_default_guid("This is a title", None) == "this-is-a-title"
|
|
51
|
+
assert (
|
|
52
|
+
News.get_default_guid("This is a title", "http://mylink.com") == "http://mylink.com"
|
|
53
|
+
) # link takes precendence
|
|
54
|
+
assert News.get_default_guid("a" * 24, None, max_length=20) == "a" * 20
|
|
55
|
+
|
|
56
|
+
def test_future_news(self, news_factory):
|
|
57
|
+
# ensure a future datetime always default to now
|
|
58
|
+
now = django_timezone.now()
|
|
59
|
+
future_news = news_factory.create(datetime=now + timedelta(days=1))
|
|
60
|
+
assert (future_news.datetime - now).seconds < 1 # we do that to account for clock difference
|
|
61
|
+
|
|
62
|
+
@patch("wbnews.models.news.detect_near_duplicates")
|
|
63
|
+
def test_handle_duplicates(self, mock_fct, news_factory):
|
|
64
|
+
val_date = fake.date_time(tzinfo=timezone.utc)
|
|
65
|
+
n0 = news_factory.create(
|
|
66
|
+
datetime=val_date - timedelta(days=1)
|
|
67
|
+
) # we exclude this news from the duplicate search
|
|
68
|
+
n1 = news_factory.create(datetime=val_date)
|
|
69
|
+
n2 = news_factory.create(datetime=val_date)
|
|
70
|
+
n3 = news_factory.create(datetime=val_date)
|
|
71
|
+
|
|
72
|
+
mock_fct.return_value = [
|
|
73
|
+
n0.id,
|
|
74
|
+
n3.id,
|
|
75
|
+
] # n0 is considered as duplicate but does not fall within the specified daterange so it will not be marked
|
|
76
|
+
News.handle_duplicates(val_date, val_date)
|
|
77
|
+
|
|
78
|
+
n3.refresh_from_db()
|
|
79
|
+
assert n3.mark_as_duplicate is True
|
|
80
|
+
assert set(News.objects.all()) == {n0, n1, n2}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
from wbnews.models.utils import endpoint_to_author
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def test_endpoint_to_author():
|
|
5
|
+
assert endpoint_to_author("test@test_test\\.com") == "Test Test"
|
|
6
|
+
assert endpoint_to_author("http://somesubdomain.domain.com") == "Domain"
|
|
7
|
+
assert endpoint_to_author("test") == "Test"
|
wbnews/utils.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from django.utils.html import strip_tags
|
|
6
|
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
7
|
+
from sklearn.metrics.pairwise import cosine_similarity
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger("news")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _get_similarity_matrix_df(data: dict[int, str]) -> pd.DataFrame:
|
|
13
|
+
# Convert texts to TF-IDF vectors
|
|
14
|
+
ids, texts = zip(*data.items(), strict=False)
|
|
15
|
+
vectorizer = TfidfVectorizer()
|
|
16
|
+
tfidf_matrix = vectorizer.fit_transform(texts)
|
|
17
|
+
# Compute pairwise cosine similarity...
|
|
18
|
+
similarity_matrix = cosine_similarity(tfidf_matrix)
|
|
19
|
+
# convert the matrix into a proper dataframe
|
|
20
|
+
return pd.DataFrame(similarity_matrix, index=ids, columns=ids)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def detect_near_duplicates(data: dict[int, str], threshold: float = 0.9) -> list[int]:
|
|
24
|
+
"""
|
|
25
|
+
Detects near-duplicate articles based on TF-IDF & Cosine Similarity.
|
|
26
|
+
|
|
27
|
+
Parameters:
|
|
28
|
+
- data (dict[int, str]): dictionary of new id with their respective content
|
|
29
|
+
- threshold (float): Similarity threshold (default = 0.9).
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
- List of duplicated ids
|
|
33
|
+
"""
|
|
34
|
+
if len(data.keys()) < 2:
|
|
35
|
+
return []
|
|
36
|
+
logger.info(f"Processing {len(data.keys())} news")
|
|
37
|
+
# Cleanup step
|
|
38
|
+
clean_data = {}
|
|
39
|
+
for _id, text in data.items():
|
|
40
|
+
clean_data[_id] = strip_tags(text)
|
|
41
|
+
|
|
42
|
+
# get similarity matrix
|
|
43
|
+
df = _get_similarity_matrix_df(data)
|
|
44
|
+
|
|
45
|
+
# Replace the lower matrix triangle with NaN
|
|
46
|
+
df = df.where(np.triu(np.ones(df.shape)).astype(bool))
|
|
47
|
+
# melt the symmetrical matrix into a key value store
|
|
48
|
+
df = df.stack().reset_index(name="value")
|
|
49
|
+
# remove duplicate pair with same id (expected to be 1.0)
|
|
50
|
+
df = df[df["level_0"] != df["level_1"]]
|
|
51
|
+
# get duplicates candidates
|
|
52
|
+
df = df[df["value"] > threshold]
|
|
53
|
+
# return only one side of the duplicate pair
|
|
54
|
+
duplicate_ids = df["level_1"].unique().tolist()
|
|
55
|
+
logger.info(f"{len(duplicate_ids)} duplicated news found")
|
|
56
|
+
|
|
57
|
+
return duplicate_ids
|
wbnews/viewsets/display.py
CHANGED
|
@@ -2,7 +2,6 @@ from typing import Optional
|
|
|
2
2
|
|
|
3
3
|
from django.utils.translation import gettext as _
|
|
4
4
|
from wbcore.metadata.configs import display as dp
|
|
5
|
-
from wbcore.metadata.configs.display import Layout, Page, default
|
|
6
5
|
from wbcore.metadata.configs.display.instance_display.shortcuts import (
|
|
7
6
|
Display,
|
|
8
7
|
create_simple_display,
|
|
@@ -11,6 +10,8 @@ from wbcore.metadata.configs.display.instance_display.shortcuts import (
|
|
|
11
10
|
from wbcore.metadata.configs.display.instance_display.utils import repeat_field
|
|
12
11
|
from wbcore.metadata.configs.display.view_config import DisplayViewConfig
|
|
13
12
|
|
|
13
|
+
from wbnews.models import NewsRelationship
|
|
14
|
+
|
|
14
15
|
|
|
15
16
|
class SourceDisplayConfig(DisplayViewConfig):
|
|
16
17
|
def get_list_display(self) -> Optional[dp.ListDisplay]:
|
|
@@ -64,28 +65,30 @@ class NewsDisplayConfig(DisplayViewConfig):
|
|
|
64
65
|
|
|
65
66
|
class NewsRelationshipDisplayConfig(DisplayViewConfig):
|
|
66
67
|
def get_instance_display(self) -> Display:
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
68
|
+
if self.new_mode:
|
|
69
|
+
return create_simple_display(
|
|
70
|
+
[
|
|
71
|
+
["news", "news"],
|
|
72
|
+
["important", "sentiment"],
|
|
73
|
+
["analysis", "analysis"],
|
|
74
|
+
]
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
return create_simple_display(
|
|
78
|
+
[
|
|
79
|
+
["news", "news"],
|
|
80
|
+
["content_type", "object_id"],
|
|
81
|
+
[
|
|
82
|
+
"important",
|
|
83
|
+
"sentiment",
|
|
84
|
+
],
|
|
85
|
+
["analysis", "analysis"],
|
|
86
|
+
["summary", "summary"],
|
|
87
|
+
["description", "description"],
|
|
80
88
|
]
|
|
81
89
|
)
|
|
82
90
|
|
|
83
91
|
def get_list_display(self) -> Optional[dp.ListDisplay]:
|
|
84
|
-
POSITIVE = "#96DD99"
|
|
85
|
-
SLIGHTLY_POSITIVE = "#FFEE8C"
|
|
86
|
-
SLIGHTLY_NEGATIVE = "#FF964F"
|
|
87
|
-
NEGATIVE = "#FF6961"
|
|
88
|
-
|
|
89
92
|
fields = (
|
|
90
93
|
[dp.Field(key="content_object_repr", label=_("Linked Object"))]
|
|
91
94
|
if self.view.object_id is None and self.view.content_type is None
|
|
@@ -109,21 +112,14 @@ class NewsRelationshipDisplayConfig(DisplayViewConfig):
|
|
|
109
112
|
dp.Formatting(
|
|
110
113
|
column="sentiment",
|
|
111
114
|
formatting_rules=[
|
|
112
|
-
dp.FormattingRule(condition=("==",
|
|
113
|
-
|
|
114
|
-
dp.FormattingRule(condition=("==", 2), style={"backgroundColor": SLIGHTLY_NEGATIVE}),
|
|
115
|
-
dp.FormattingRule(condition=("==", 1), style={"backgroundColor": NEGATIVE}),
|
|
115
|
+
dp.FormattingRule(condition=("==", s.value), style={"backgroundColor": s.get_color()})
|
|
116
|
+
for s in NewsRelationship.SentimentChoices
|
|
116
117
|
],
|
|
117
118
|
)
|
|
118
119
|
],
|
|
119
120
|
legends=[
|
|
120
121
|
dp.Legend(
|
|
121
|
-
items=[
|
|
122
|
-
dp.LegendItem(icon=POSITIVE, label=_("Positive")),
|
|
123
|
-
dp.LegendItem(icon=SLIGHTLY_POSITIVE, label=_("Slightly Positive")),
|
|
124
|
-
dp.LegendItem(icon=SLIGHTLY_NEGATIVE, label=_("Slightly Negative")),
|
|
125
|
-
dp.LegendItem(icon=NEGATIVE, label=_("Negative")),
|
|
126
|
-
]
|
|
122
|
+
items=[dp.LegendItem(icon=s.get_color(), label=s.label) for s in NewsRelationship.SentimentChoices]
|
|
127
123
|
)
|
|
128
124
|
],
|
|
129
125
|
)
|
wbnews/viewsets/endpoints.py
CHANGED
|
@@ -1,27 +1,32 @@
|
|
|
1
1
|
from rest_framework.reverse import reverse
|
|
2
2
|
from wbcore.metadata.configs.endpoints import EndpointViewConfig
|
|
3
|
+
from wbcore.utils.urls import get_urlencode_endpoint
|
|
3
4
|
|
|
4
5
|
|
|
5
6
|
class NewsEndpointConfig(EndpointViewConfig):
|
|
6
7
|
def get_endpoint(self, **kwargs):
|
|
7
8
|
return None
|
|
8
9
|
|
|
9
|
-
def get_list_endpoint(self, **kwargs):
|
|
10
|
-
return reverse("wbnews:news-list", request=self.request)
|
|
11
|
-
|
|
12
10
|
def get_instance_endpoint(self, **kwargs):
|
|
13
|
-
return self.
|
|
11
|
+
return reverse("wbnews:news-list", request=self.request)
|
|
14
12
|
|
|
15
13
|
|
|
16
14
|
class NewsSourceEndpointConfig(NewsEndpointConfig):
|
|
17
|
-
|
|
18
|
-
return reverse("wbnews:source-news-list", args=[self.view.kwargs["source_id"]], request=self.request)
|
|
15
|
+
pass
|
|
19
16
|
|
|
20
17
|
|
|
21
18
|
class NewsRelationshipEndpointConfig(EndpointViewConfig):
|
|
22
19
|
def get_endpoint(self, **kwargs):
|
|
23
20
|
return reverse("wbnews:newsrelationship-list", args=[], request=self.request)
|
|
24
21
|
|
|
22
|
+
def get_create_endpoint(self, **kwargs):
|
|
23
|
+
params = {}
|
|
24
|
+
if ct := self.view.content_type:
|
|
25
|
+
params["content_type"] = ct.id
|
|
26
|
+
if object_id := self.view.object_id:
|
|
27
|
+
params["object_id"] = object_id
|
|
28
|
+
return get_urlencode_endpoint(self.get_endpoint(**kwargs), params)
|
|
29
|
+
|
|
25
30
|
# def get_instance_endpoint(self, **kwargs):
|
|
26
31
|
# return reverse("wbnews:news-list", args=[], request=self.request)
|
|
27
32
|
#
|
wbnews/viewsets/views.py
CHANGED
|
@@ -9,7 +9,7 @@ from rest_framework.decorators import action
|
|
|
9
9
|
from rest_framework.permissions import IsAdminUser
|
|
10
10
|
from rest_framework.response import Response
|
|
11
11
|
from wbcore import viewsets
|
|
12
|
-
from wbcore.content_type.utils import get_ancestors_content_type
|
|
12
|
+
from wbcore.contrib.content_type.utils import get_ancestors_content_type
|
|
13
13
|
|
|
14
14
|
from wbnews.models import News, NewsRelationship, NewsSource
|
|
15
15
|
from wbnews.serializers import (
|
|
@@ -20,7 +20,7 @@ from wbnews.serializers import (
|
|
|
20
20
|
SourceRepresentationSerializer,
|
|
21
21
|
)
|
|
22
22
|
|
|
23
|
-
from ..filters import NewsFilterSet
|
|
23
|
+
from ..filters import NewsFilterSet, NewsRelationshipFilterSet
|
|
24
24
|
from .buttons import NewsButtonConfig, NewsRelationshipButtonConfig
|
|
25
25
|
from .display import (
|
|
26
26
|
NewsDisplayConfig,
|
|
@@ -122,15 +122,16 @@ class NewsRelationshipModelViewSet(viewsets.ModelViewSet):
|
|
|
122
122
|
button_config_class = NewsRelationshipButtonConfig
|
|
123
123
|
endpoint_config_class = NewsRelationshipEndpointConfig
|
|
124
124
|
ordering = ["-datetime"]
|
|
125
|
+
filterset_class = NewsRelationshipFilterSet
|
|
125
126
|
|
|
126
127
|
@cached_property
|
|
127
128
|
def content_type(self) -> ContentType:
|
|
128
|
-
if content_type_id := self.request.GET.get("content_type"):
|
|
129
|
+
if content_type_id := self.request.GET.get("content_type", self.request.POST.get("content_type")):
|
|
129
130
|
return ContentType.objects.get_for_id(content_type_id)
|
|
130
131
|
|
|
131
132
|
@cached_property
|
|
132
133
|
def object_id(self) -> int:
|
|
133
|
-
return self.request.GET.get("object_id")
|
|
134
|
+
return self.request.GET.get("object_id", self.request.POST.get("object_id"))
|
|
134
135
|
|
|
135
136
|
@cached_property
|
|
136
137
|
def content_object(self):
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: wbnews
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.60.1
|
|
4
4
|
Summary: A workbench module for managing news.
|
|
5
5
|
Author-email: Christopher Wittlinger <c.wittlinger@stainly.com>
|
|
6
6
|
Requires-Dist: feedparser==6.*
|
|
7
|
-
Requires-Dist: langdetect==1.*
|
|
8
7
|
Requires-Dist: wbcore
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
wbnews/.coveragerc,sha256=OOkT651L0NuoSVSXKZiLAbnQmHe9DRoLOzo1_S_buQc,383
|
|
2
|
+
wbnews/__init__.py,sha256=J-j-u0itpEFT6irdmWmixQqYMadNl1X91TxUmoiLHMI,22
|
|
3
|
+
wbnews/admin.py,sha256=cn-PF0LclyQ0FFNfw0mdVoW7v3g0b9uotTiAFKmLVPQ,832
|
|
4
|
+
wbnews/apps.py,sha256=l4kfE3Pux84Fb34xNgKDxcxRHuPCp6odCGFE9Sa3Wzw,212
|
|
5
|
+
wbnews/factories.py,sha256=_EBIeMafeiyAjnSAG-xXxdqw7vVwAu8u-DfdLn8mtHk,1172
|
|
6
|
+
wbnews/serializers.py,sha256=cKfU14poNe8AECwT7KBJeX2k7H4rz-fi4EQXXKDvV4U,4740
|
|
7
|
+
wbnews/signals.py,sha256=eqipwffwJnDQWUZ9VTKr5Jp-OMXLmVSNwoIsawnCKvM,192
|
|
8
|
+
wbnews/tasks.py,sha256=ARkMZbhS3soilHFl9HJh4mXgm6kl5ZlJomVDF_IZ9lY,445
|
|
9
|
+
wbnews/urls.py,sha256=2Gs9RGU8x6tNOLJiuG17n_ik82QVb8jlw7bU07Lk_S4,1008
|
|
10
|
+
wbnews/utils.py,sha256=h4TdMbUL0qQ2h3o3jEpGuGwT90TWiryVRtBXc7avLW0,1955
|
|
11
|
+
wbnews/filters/__init__.py,sha256=FXJcPsLQePCU-fzjpIu44Dsdu9vCckLHr0Kr-8Z_-C4,59
|
|
12
|
+
wbnews/filters/news.py,sha256=glG45wx_Jxee57H43oqCCJIjQumrNUh8pVXiD0hBM6E,1662
|
|
13
|
+
wbnews/fixtures/wbnews.yaml,sha256=cDu1UWYwIFxz-hdivW7rxLYsNOweBm4GdN1GDsycN90,173164
|
|
14
|
+
wbnews/import_export/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
|
+
wbnews/import_export/backends/__init__.py,sha256=_2HnB9uCuGhQDNg-Z0V2uIvKn26LtRBXAxUBoNetBIo,30
|
|
16
|
+
wbnews/import_export/backends/news.py,sha256=QqMeAWYh4U3W4Sng0HlHx11RoR1LV_WZmRUr58dYrVg,1461
|
|
17
|
+
wbnews/import_export/handlers/__init__.py,sha256=zOeENt9cpEcSLG9ZaVb4otmbTnLAa0XdTPsUjD11dXs,36
|
|
18
|
+
wbnews/import_export/handlers/news.py,sha256=cdrtWIP3XOU8NMYZKF8VLYH5qHmPLboUm_GwNAvO2ew,2181
|
|
19
|
+
wbnews/import_export/parsers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
20
|
+
wbnews/import_export/parsers/emails/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
|
+
wbnews/import_export/parsers/emails/news.py,sha256=vXK1Mui3v7Qk3Sjj4HgZA0JSTxymD38iqlU-f82GFXU,1221
|
|
22
|
+
wbnews/import_export/parsers/emails/utils.py,sha256=k7R1HZx18FKXRq10COARFgHjjBadsWe1DY3AHCrwHRs,2207
|
|
23
|
+
wbnews/import_export/parsers/rss/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
24
|
+
wbnews/import_export/parsers/rss/news.py,sha256=H89avqOU_AUAyHiIXd_tfoMbcqy67GgDQ2-ucihDpks,2076
|
|
25
|
+
wbnews/locale/de/LC_MESSAGES/django.mo,sha256=M70N6el-I7EymxHLw9n773Bimbztf8s8equWa9SydBs,1283
|
|
26
|
+
wbnews/locale/de/LC_MESSAGES/django.po,sha256=Jv1u9mJ8lmWtwrz_Yfz_Q5Hpnu1uYyy90JqUUX8PGuY,3485
|
|
27
|
+
wbnews/locale/de/LC_MESSAGES/django.po.translated,sha256=ILRIDL_vON91Do5QhZr5N85Y4vsOAIHsaU8QX0FiEA4,3962
|
|
28
|
+
wbnews/locale/en/LC_MESSAGES/django.mo,sha256=UXCQbz2AxBvh-IQ7bGgjoBnijo8h9DfE9107A-2Mgkk,337
|
|
29
|
+
wbnews/locale/en/LC_MESSAGES/django.po,sha256=E1oHhFhczwVO598h2ql7UPWmppzwTh80vxba8jtv-U8,3125
|
|
30
|
+
wbnews/locale/fr/LC_MESSAGES/django.mo,sha256=t4lh3zX7kshbDAFzXa5HU_YGPXkPzKqODNXL2MeZ5KQ,429
|
|
31
|
+
wbnews/locale/fr/LC_MESSAGES/django.po,sha256=dRxnXJW76KlcwwcAqK-quI5ZQR6HiRNIcMWPDc2rhuE,3223
|
|
32
|
+
wbnews/migrations/0001_initial_squashed_0005_alter_news_import_source.py,sha256=4qxqfpAYVeU16GsWaj7kUbtOk0ZLzuECTfzhUfmni2A,14596
|
|
33
|
+
wbnews/migrations/0006_alter_news_language.py,sha256=necqSWKi20sHLok-RO9He3vnmMlmmdx07AiN3lnZPBM,4638
|
|
34
|
+
wbnews/migrations/0007_auto_20240103_0955.py,sha256=YzkH_LSWH_8qdw_BrKaTN5vqLNCPnjMlJZxs03Xbbvw,1478
|
|
35
|
+
wbnews/migrations/0008_alter_news_language.py,sha256=4tbpcVSey9PJOdf8xWndl1R8GmhmflpVQtI8YTwcevw,4648
|
|
36
|
+
wbnews/migrations/0009_newsrelationship_analysis_newsrelationship_sentiment.py,sha256=ud_yO6skjVx2s32snxzai6Z4Uao1s-40v6UZaq4k64I,3432
|
|
37
|
+
wbnews/migrations/0010_newsrelationship_important.py,sha256=seK-bFIzXa6uzja0gPnRhwS4vNgY_sR3sMm_OmP8sZE,440
|
|
38
|
+
wbnews/migrations/0011_newsrelationship_content_object_repr.py,sha256=oJUhx__5WI6TjPGPqqEqBgb3yDluc02hhh05nXTjbHw,428
|
|
39
|
+
wbnews/migrations/0012_alter_news_unique_together_news_identifier_and_more.py,sha256=SLtQiREJYMwOytGGLqaCYr_9pSeD_XD5ax7lXSETT3w,3111
|
|
40
|
+
wbnews/migrations/0013_alter_news_datetime.py,sha256=q4Gbxo25jClsv9b1kfTqBLmpPNHAq7LxOgEZeLYYRk4,497
|
|
41
|
+
wbnews/migrations/0014_newsrelationship_unique_news_relationship.py,sha256=aZUHLMKRMaXHJrrbnV9O2ZJRDVjbpkb9NjVPB7VKVW0,1073
|
|
42
|
+
wbnews/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
43
|
+
wbnews/models/__init__.py,sha256=KKIcQbpCPCVNJUPJs2MtpN9Q2Wb34Qdk-C7w6AAOy7w,99
|
|
44
|
+
wbnews/models/news.py,sha256=wkBXx9rMVBWelkWeCiaGk5frKjwp2mb1RFqsoGit1Sk,5263
|
|
45
|
+
wbnews/models/relationships.py,sha256=bcQbRa-ObS9h67pIuIrodD63RnkzbaOaRT4L6ctBoI0,1841
|
|
46
|
+
wbnews/models/sources.py,sha256=N3-rBg4myeGG_stHvKiNnSoH0yS-LS_Zr_UeKPMBr5s,2629
|
|
47
|
+
wbnews/models/utils.py,sha256=1JQxV4WxmmFGl7MdVtJoEeao3vnqqVwCi7mhSToaUvM,600
|
|
48
|
+
wbnews/models/llm/cleaned_news.py,sha256=PA0McsgMmR0TOZUNWs55sBnsCXGbaC61VtVFbJ8vx28,2226
|
|
49
|
+
wbnews/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
50
|
+
wbnews/tests/conftest.py,sha256=UEZOYH5Av-Cqqq5t8WQCR7QJdNnajhthBPpF8AnwQjY,186
|
|
51
|
+
wbnews/tests/test_models.py,sha256=op79cYyz9f5dX1uRBztw-E0J4HYAY8K-7cz4kBCN0qk,3278
|
|
52
|
+
wbnews/tests/test_utils.py,sha256=PsyHH_F_y-Uy3mdVgDINH19bPEd1bmZT_UQ7yu87BNU,278
|
|
53
|
+
wbnews/tests/tests.py,sha256=OADY-vbKZBe0bjjVEO1KNzRYAP2JA9mWkO9pZuh1TSs,280
|
|
54
|
+
wbnews/tests/parsers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
55
|
+
wbnews/tests/parsers/test_emails.py,sha256=dStdTPI7Ns1jSw5PDbDEc84FJaoNPnBHC4BskWDDMxY,1073
|
|
56
|
+
wbnews/viewsets/__init__.py,sha256=2OhRiJy0MVK9TdMpFmS9x9jl_gM-CjBxUoUHlZB3_8U,531
|
|
57
|
+
wbnews/viewsets/buttons.py,sha256=6m_IdRYCQgTH4mD51DUoOa8Vx1y1-2zPQydUdvL_nF4,1788
|
|
58
|
+
wbnews/viewsets/display.py,sha256=fwMZLlwxHcNHM4AppwYpYTOicLD_pXaQ4QeFeIl4whk,5315
|
|
59
|
+
wbnews/viewsets/endpoints.py,sha256=2slGDJin_SA2heWsIaMdNTUN46FqZJvusbY1jhAaeZM,1165
|
|
60
|
+
wbnews/viewsets/menu.py,sha256=XTShfTIykN9t7oclosPfFVb1r6o46QyglEEe1C7QCMk,979
|
|
61
|
+
wbnews/viewsets/titles.py,sha256=iMyiGBMBpzng8s2ySVLqEOwueHRnAoEuc75dt9nCPjc,1367
|
|
62
|
+
wbnews/viewsets/views.py,sha256=mdb-Bz3SRFlXI7MDyA2VYLH_oosq1ZRP87YKfrEwITs,6803
|
|
63
|
+
wbnews-1.60.1.dist-info/METADATA,sha256=DlrFUb_zUu81rYqbydoX4ZtGsbPHjW7PAhp6TJ-sJp4,215
|
|
64
|
+
wbnews-1.60.1.dist-info/WHEEL,sha256=aha0VrrYvgDJ3Xxl3db_g_MDIW-ZexDdrc_m-Hk8YY4,105
|
|
65
|
+
wbnews-1.60.1.dist-info/RECORD,,
|