wbnews 1.46.12__py2.py3-none-any.whl → 1.60.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wbnews/admin.py +4 -1
- wbnews/factories.py +7 -5
- wbnews/filters/__init__.py +1 -1
- wbnews/filters/news.py +39 -2
- wbnews/import_export/backends/news.py +3 -3
- wbnews/import_export/handlers/news.py +35 -3
- wbnews/import_export/parsers/emails/news.py +2 -11
- wbnews/import_export/parsers/emails/utils.py +16 -12
- wbnews/import_export/parsers/rss/news.py +3 -9
- wbnews/locale/de/LC_MESSAGES/django.mo +0 -0
- wbnews/locale/de/LC_MESSAGES/django.po +92 -39
- wbnews/locale/de/LC_MESSAGES/django.po.translated +173 -0
- wbnews/locale/en/LC_MESSAGES/django.mo +0 -0
- wbnews/locale/en/LC_MESSAGES/django.po +159 -0
- wbnews/locale/fr/LC_MESSAGES/django.mo +0 -0
- wbnews/locale/fr/LC_MESSAGES/django.po +161 -0
- wbnews/migrations/0012_alter_news_unique_together_news_identifier_and_more.py +91 -0
- wbnews/migrations/0013_alter_news_datetime.py +19 -0
- wbnews/migrations/0014_newsrelationship_unique_news_relationship.py +27 -0
- wbnews/models/llm/cleaned_news.py +26 -23
- wbnews/models/news.py +37 -22
- wbnews/models/relationships.py +20 -1
- wbnews/models/sources.py +35 -5
- wbnews/models/utils.py +15 -0
- wbnews/serializers.py +16 -7
- wbnews/tasks.py +17 -0
- wbnews/tests/parsers/__init__.py +0 -0
- wbnews/tests/parsers/test_emails.py +25 -0
- wbnews/tests/test_models.py +65 -0
- wbnews/tests/test_utils.py +7 -0
- wbnews/utils.py +57 -0
- wbnews/viewsets/display.py +25 -29
- wbnews/viewsets/endpoints.py +11 -6
- wbnews/viewsets/views.py +5 -4
- {wbnews-1.46.12.dist-info → wbnews-1.60.1.dist-info}/METADATA +1 -2
- wbnews-1.60.1.dist-info/RECORD +65 -0
- {wbnews-1.46.12.dist-info → wbnews-1.60.1.dist-info}/WHEEL +1 -1
- wbnews-1.46.12.dist-info/RECORD +0 -50
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
# SOME DESCRIPTIVE TITLE.
|
|
2
|
+
# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
|
|
3
|
+
# This file is distributed under the same license as the PACKAGE package.
|
|
4
|
+
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
|
|
5
|
+
#
|
|
6
|
+
msgid ""
|
|
7
|
+
msgstr ""
|
|
8
|
+
"Project-Id-Version: PACKAGE VERSION\n"
|
|
9
|
+
"Report-Msgid-Bugs-To: \n"
|
|
10
|
+
"POT-Creation-Date: 2026-01-16 14:04+0100\n"
|
|
11
|
+
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
|
|
12
|
+
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
|
|
13
|
+
"Language-Team: LANGUAGE <LL@li.org>\n"
|
|
14
|
+
"Language: \n"
|
|
15
|
+
"MIME-Version: 1.0\n"
|
|
16
|
+
"Content-Type: text/plain; charset=UTF-8\n"
|
|
17
|
+
"Content-Transfer-Encoding: 8bit\n"
|
|
18
|
+
|
|
19
|
+
#: models/news.py:52 viewsets/display.py:54 viewsets/display.py:100
|
|
20
|
+
#: viewsets/display.py:141
|
|
21
|
+
msgid "Datetime"
|
|
22
|
+
msgstr ""
|
|
23
|
+
|
|
24
|
+
#: models/news.py:53 serializers.py:21 serializers.py:76 viewsets/display.py:20
|
|
25
|
+
#: viewsets/display.py:55 viewsets/display.py:102 viewsets/display.py:142
|
|
26
|
+
msgid "Title"
|
|
27
|
+
msgstr ""
|
|
28
|
+
|
|
29
|
+
#: models/news.py:55 serializers.py:24 serializers.py:77 viewsets/display.py:23
|
|
30
|
+
#: viewsets/display.py:57 viewsets/display.py:104 viewsets/display.py:143
|
|
31
|
+
msgid "Description"
|
|
32
|
+
msgstr ""
|
|
33
|
+
|
|
34
|
+
#: models/news.py:56 serializers.py:78 viewsets/display.py:56
|
|
35
|
+
#: viewsets/display.py:103
|
|
36
|
+
msgid "Summary"
|
|
37
|
+
msgstr ""
|
|
38
|
+
|
|
39
|
+
#: models/news.py:57 viewsets/display.py:60 viewsets/display.py:144
|
|
40
|
+
msgid "Language"
|
|
41
|
+
msgstr ""
|
|
42
|
+
|
|
43
|
+
#: models/news.py:58
|
|
44
|
+
msgid "Link"
|
|
45
|
+
msgstr ""
|
|
46
|
+
|
|
47
|
+
#: models/news.py:62 viewsets/display.py:59 viewsets/display.py:106
|
|
48
|
+
msgid "Source"
|
|
49
|
+
msgstr ""
|
|
50
|
+
|
|
51
|
+
#: models/news.py:65
|
|
52
|
+
msgid "Mark as duplicate"
|
|
53
|
+
msgstr ""
|
|
54
|
+
|
|
55
|
+
#: models/relationships.py:9
|
|
56
|
+
msgid "Positive"
|
|
57
|
+
msgstr ""
|
|
58
|
+
|
|
59
|
+
#: models/relationships.py:10
|
|
60
|
+
msgid "Slightly Positive"
|
|
61
|
+
msgstr ""
|
|
62
|
+
|
|
63
|
+
#: models/relationships.py:11
|
|
64
|
+
msgid "Slightly Negative"
|
|
65
|
+
msgstr ""
|
|
66
|
+
|
|
67
|
+
#: models/relationships.py:12
|
|
68
|
+
msgid "Negative"
|
|
69
|
+
msgstr ""
|
|
70
|
+
|
|
71
|
+
#: serializers.py:22
|
|
72
|
+
msgid "Identifier"
|
|
73
|
+
msgstr ""
|
|
74
|
+
|
|
75
|
+
#: serializers.py:25 viewsets/display.py:22
|
|
76
|
+
msgid "Author"
|
|
77
|
+
msgstr ""
|
|
78
|
+
|
|
79
|
+
#: serializers.py:26
|
|
80
|
+
msgid "Updated"
|
|
81
|
+
msgstr ""
|
|
82
|
+
|
|
83
|
+
#: serializers.py:79
|
|
84
|
+
msgid "Date"
|
|
85
|
+
msgstr ""
|
|
86
|
+
|
|
87
|
+
#: serializers.py:92 viewsets/menu.py:6 viewsets/titles.py:24
|
|
88
|
+
msgid "News"
|
|
89
|
+
msgstr ""
|
|
90
|
+
|
|
91
|
+
#: viewsets/buttons.py:14
|
|
92
|
+
msgid "Open News"
|
|
93
|
+
msgstr ""
|
|
94
|
+
|
|
95
|
+
#: viewsets/buttons.py:21 viewsets/buttons.py:22 viewsets/buttons.py:24
|
|
96
|
+
msgid "Reset relationships"
|
|
97
|
+
msgstr ""
|
|
98
|
+
|
|
99
|
+
#: viewsets/display.py:21
|
|
100
|
+
msgid "RSS feed"
|
|
101
|
+
msgstr ""
|
|
102
|
+
|
|
103
|
+
#: viewsets/display.py:24
|
|
104
|
+
msgid "Last Update"
|
|
105
|
+
msgstr ""
|
|
106
|
+
|
|
107
|
+
#: viewsets/display.py:61
|
|
108
|
+
msgid "Image"
|
|
109
|
+
msgstr ""
|
|
110
|
+
|
|
111
|
+
#: viewsets/display.py:93
|
|
112
|
+
msgid "Linked Object"
|
|
113
|
+
msgstr ""
|
|
114
|
+
|
|
115
|
+
#: viewsets/display.py:101
|
|
116
|
+
msgid "Analysis"
|
|
117
|
+
msgstr ""
|
|
118
|
+
|
|
119
|
+
#: viewsets/display.py:105
|
|
120
|
+
msgid "Important"
|
|
121
|
+
msgstr ""
|
|
122
|
+
|
|
123
|
+
#: viewsets/menu.py:11
|
|
124
|
+
msgid "News Relationships"
|
|
125
|
+
msgstr ""
|
|
126
|
+
|
|
127
|
+
#: viewsets/menu.py:17 viewsets/titles.py:9
|
|
128
|
+
msgid "Sources"
|
|
129
|
+
msgstr ""
|
|
130
|
+
|
|
131
|
+
#: viewsets/menu.py:23
|
|
132
|
+
msgid "Create Source"
|
|
133
|
+
msgstr ""
|
|
134
|
+
|
|
135
|
+
#: viewsets/titles.py:13
|
|
136
|
+
#, python-brace-format
|
|
137
|
+
msgid "Source: {source}"
|
|
138
|
+
msgstr ""
|
|
139
|
+
|
|
140
|
+
#: viewsets/titles.py:14
|
|
141
|
+
msgid "News Source"
|
|
142
|
+
msgstr ""
|
|
143
|
+
|
|
144
|
+
#: viewsets/titles.py:19
|
|
145
|
+
msgid "News Flow"
|
|
146
|
+
msgstr ""
|
|
147
|
+
|
|
148
|
+
#: viewsets/titles.py:30
|
|
149
|
+
#, python-brace-format
|
|
150
|
+
msgid "News from {source}"
|
|
151
|
+
msgstr ""
|
|
152
|
+
|
|
153
|
+
#: viewsets/titles.py:36
|
|
154
|
+
msgid "News Article for {}"
|
|
155
|
+
msgstr ""
|
|
156
|
+
|
|
157
|
+
#: viewsets/titles.py:37 viewsets/titles.py:44
|
|
158
|
+
msgid "News Article"
|
|
159
|
+
msgstr ""
|
|
Binary file
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
# SOME DESCRIPTIVE TITLE.
|
|
2
|
+
# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
|
|
3
|
+
# This file is distributed under the same license as the PACKAGE package.
|
|
4
|
+
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
|
|
5
|
+
#
|
|
6
|
+
msgid ""
|
|
7
|
+
msgstr ""
|
|
8
|
+
"Project-Id-Version: PACKAGE VERSION\n"
|
|
9
|
+
"Report-Msgid-Bugs-To: \n"
|
|
10
|
+
"POT-Creation-Date: 2026-01-16 14:04+0100\n"
|
|
11
|
+
"PO-Revision-Date: 2025-05-30 09:40+0000\n"
|
|
12
|
+
"Language-Team: French (https://app.transifex.com/stainly/teams/171242/fr/)\n"
|
|
13
|
+
"MIME-Version: 1.0\n"
|
|
14
|
+
"Content-Type: text/plain; charset=UTF-8\n"
|
|
15
|
+
"Content-Transfer-Encoding: 8bit\n"
|
|
16
|
+
"Language: fr\n"
|
|
17
|
+
"Plural-Forms: nplurals=3; plural=(n == 0 || n == 1) ? 0 : n != 0 && n % 1000000 == 0 ? 1 : 2;\n"
|
|
18
|
+
|
|
19
|
+
#: models/news.py:52 viewsets/display.py:54 viewsets/display.py:100
|
|
20
|
+
#: viewsets/display.py:141
|
|
21
|
+
msgid "Datetime"
|
|
22
|
+
msgstr ""
|
|
23
|
+
|
|
24
|
+
#: models/news.py:53 serializers.py:21 serializers.py:76
|
|
25
|
+
#: viewsets/display.py:20 viewsets/display.py:55 viewsets/display.py:102
|
|
26
|
+
#: viewsets/display.py:142
|
|
27
|
+
msgid "Title"
|
|
28
|
+
msgstr ""
|
|
29
|
+
|
|
30
|
+
#: models/news.py:55 serializers.py:24 serializers.py:77
|
|
31
|
+
#: viewsets/display.py:23 viewsets/display.py:57 viewsets/display.py:104
|
|
32
|
+
#: viewsets/display.py:143
|
|
33
|
+
msgid "Description"
|
|
34
|
+
msgstr ""
|
|
35
|
+
|
|
36
|
+
#: models/news.py:56 serializers.py:78 viewsets/display.py:56
|
|
37
|
+
#: viewsets/display.py:103
|
|
38
|
+
msgid "Summary"
|
|
39
|
+
msgstr ""
|
|
40
|
+
|
|
41
|
+
#: models/news.py:57 viewsets/display.py:60 viewsets/display.py:144
|
|
42
|
+
msgid "Language"
|
|
43
|
+
msgstr ""
|
|
44
|
+
|
|
45
|
+
#: models/news.py:58
|
|
46
|
+
msgid "Link"
|
|
47
|
+
msgstr ""
|
|
48
|
+
|
|
49
|
+
#: models/news.py:62 viewsets/display.py:59 viewsets/display.py:106
|
|
50
|
+
msgid "Source"
|
|
51
|
+
msgstr ""
|
|
52
|
+
|
|
53
|
+
#: models/news.py:65
|
|
54
|
+
msgid "Mark as duplicate"
|
|
55
|
+
msgstr ""
|
|
56
|
+
|
|
57
|
+
#: models/relationships.py:9
|
|
58
|
+
msgid "Positive"
|
|
59
|
+
msgstr ""
|
|
60
|
+
|
|
61
|
+
#: models/relationships.py:10
|
|
62
|
+
msgid "Slightly Positive"
|
|
63
|
+
msgstr ""
|
|
64
|
+
|
|
65
|
+
#: models/relationships.py:11
|
|
66
|
+
msgid "Slightly Negative"
|
|
67
|
+
msgstr ""
|
|
68
|
+
|
|
69
|
+
#: models/relationships.py:12
|
|
70
|
+
msgid "Negative"
|
|
71
|
+
msgstr ""
|
|
72
|
+
|
|
73
|
+
#: serializers.py:22
|
|
74
|
+
msgid "Identifier"
|
|
75
|
+
msgstr ""
|
|
76
|
+
|
|
77
|
+
#: serializers.py:25 viewsets/display.py:22
|
|
78
|
+
msgid "Author"
|
|
79
|
+
msgstr ""
|
|
80
|
+
|
|
81
|
+
#: serializers.py:26
|
|
82
|
+
msgid "Updated"
|
|
83
|
+
msgstr ""
|
|
84
|
+
|
|
85
|
+
#: serializers.py:79
|
|
86
|
+
msgid "Date"
|
|
87
|
+
msgstr ""
|
|
88
|
+
|
|
89
|
+
#: serializers.py:92 viewsets/menu.py:6 viewsets/titles.py:24
|
|
90
|
+
msgid "News"
|
|
91
|
+
msgstr ""
|
|
92
|
+
|
|
93
|
+
#: viewsets/buttons.py:14
|
|
94
|
+
msgid "Open News"
|
|
95
|
+
msgstr ""
|
|
96
|
+
|
|
97
|
+
#: viewsets/buttons.py:21 viewsets/buttons.py:22 viewsets/buttons.py:24
|
|
98
|
+
msgid "Reset relationships"
|
|
99
|
+
msgstr ""
|
|
100
|
+
|
|
101
|
+
#: viewsets/display.py:21
|
|
102
|
+
msgid "RSS feed"
|
|
103
|
+
msgstr ""
|
|
104
|
+
|
|
105
|
+
#: viewsets/display.py:24
|
|
106
|
+
msgid "Last Update"
|
|
107
|
+
msgstr ""
|
|
108
|
+
|
|
109
|
+
#: viewsets/display.py:61
|
|
110
|
+
msgid "Image"
|
|
111
|
+
msgstr ""
|
|
112
|
+
|
|
113
|
+
#: viewsets/display.py:93
|
|
114
|
+
msgid "Linked Object"
|
|
115
|
+
msgstr ""
|
|
116
|
+
|
|
117
|
+
#: viewsets/display.py:101
|
|
118
|
+
msgid "Analysis"
|
|
119
|
+
msgstr ""
|
|
120
|
+
|
|
121
|
+
#: viewsets/display.py:105
|
|
122
|
+
msgid "Important"
|
|
123
|
+
msgstr ""
|
|
124
|
+
|
|
125
|
+
#: viewsets/menu.py:11
|
|
126
|
+
msgid "News Relationships"
|
|
127
|
+
msgstr ""
|
|
128
|
+
|
|
129
|
+
#: viewsets/menu.py:17 viewsets/titles.py:9
|
|
130
|
+
msgid "Sources"
|
|
131
|
+
msgstr ""
|
|
132
|
+
|
|
133
|
+
#: viewsets/menu.py:23
|
|
134
|
+
msgid "Create Source"
|
|
135
|
+
msgstr ""
|
|
136
|
+
|
|
137
|
+
#: viewsets/titles.py:13
|
|
138
|
+
#, python-brace-format
|
|
139
|
+
msgid "Source: {source}"
|
|
140
|
+
msgstr ""
|
|
141
|
+
|
|
142
|
+
#: viewsets/titles.py:14
|
|
143
|
+
msgid "News Source"
|
|
144
|
+
msgstr ""
|
|
145
|
+
|
|
146
|
+
#: viewsets/titles.py:19
|
|
147
|
+
msgid "News Flow"
|
|
148
|
+
msgstr ""
|
|
149
|
+
|
|
150
|
+
#: viewsets/titles.py:30
|
|
151
|
+
#, python-brace-format
|
|
152
|
+
msgid "News from {source}"
|
|
153
|
+
msgstr ""
|
|
154
|
+
|
|
155
|
+
#: viewsets/titles.py:36
|
|
156
|
+
msgid "News Article for {}"
|
|
157
|
+
msgstr ""
|
|
158
|
+
|
|
159
|
+
#: viewsets/titles.py:37 viewsets/titles.py:44
|
|
160
|
+
msgid "News Article"
|
|
161
|
+
msgstr ""
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# Generated by Django 5.0.12 on 2025-02-24 13:45
|
|
2
|
+
from collections import defaultdict
|
|
3
|
+
|
|
4
|
+
from django.core.exceptions import ValidationError
|
|
5
|
+
from django.core.validators import URLValidator
|
|
6
|
+
from django.db import migrations, models
|
|
7
|
+
from tqdm import tqdm
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def migrate_identifier( apps, schema_editor):
|
|
11
|
+
from wbnews.models.news import News as NewsClass
|
|
12
|
+
News = apps.get_model('wbnews', 'News')
|
|
13
|
+
objs = []
|
|
14
|
+
qs = News.objects.all()
|
|
15
|
+
guid = set()
|
|
16
|
+
for new in tqdm(qs, total=qs.count()):
|
|
17
|
+
new.guid = NewsClass.get_default_guid(new.title, new.link)
|
|
18
|
+
if new.guid not in guid:
|
|
19
|
+
guid.add(new.guid)
|
|
20
|
+
objs.append(new)
|
|
21
|
+
print(len(objs))
|
|
22
|
+
News.objects.bulk_update(objs, ['guid'], batch_size=10000)
|
|
23
|
+
|
|
24
|
+
leftovers = News.objects.filter(guid__isnull=True)
|
|
25
|
+
print(leftovers.count())
|
|
26
|
+
leftovers.delete()
|
|
27
|
+
|
|
28
|
+
qs = News.objects.all()
|
|
29
|
+
for new in tqdm(qs, total=qs.count()):
|
|
30
|
+
try:
|
|
31
|
+
URLValidator()(new.link)
|
|
32
|
+
except ValidationError:
|
|
33
|
+
new.link = None
|
|
34
|
+
objs.append(new)
|
|
35
|
+
|
|
36
|
+
News.objects.bulk_update(objs, ['link'], batch_size=10000)
|
|
37
|
+
|
|
38
|
+
class Migration(migrations.Migration):
|
|
39
|
+
|
|
40
|
+
dependencies = [
|
|
41
|
+
('io', '0008_importsource_resource_kwargs'),
|
|
42
|
+
('wbnews', '0011_newsrelationship_content_object_repr'),
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
operations = [
|
|
46
|
+
migrations.AlterUniqueTogether(
|
|
47
|
+
name='news',
|
|
48
|
+
unique_together=set(),
|
|
49
|
+
),
|
|
50
|
+
migrations.AddField(
|
|
51
|
+
model_name='news',
|
|
52
|
+
name='guid',
|
|
53
|
+
field=models.CharField(default=None, blank=True, null=True, max_length=1024),
|
|
54
|
+
preserve_default=False,
|
|
55
|
+
),
|
|
56
|
+
migrations.RunSQL(sql="SET CONSTRAINTS ALL IMMEDIATE;"),
|
|
57
|
+
migrations.RunPython(migrate_identifier),
|
|
58
|
+
migrations.RunSQL(sql="SET CONSTRAINTS ALL DEFERRED;"),
|
|
59
|
+
migrations.AlterField(
|
|
60
|
+
model_name='news',
|
|
61
|
+
name='guid',
|
|
62
|
+
field=models.CharField(max_length=1024, unique=True),
|
|
63
|
+
),
|
|
64
|
+
migrations.AlterField(
|
|
65
|
+
model_name='news',
|
|
66
|
+
name='link',
|
|
67
|
+
field=models.URLField(max_length=1024, blank=True, null=True, verbose_name='Link'),
|
|
68
|
+
),
|
|
69
|
+
migrations.AddField(
|
|
70
|
+
model_name='news',
|
|
71
|
+
name='mark_as_duplicate',
|
|
72
|
+
field=models.BooleanField(default=False, verbose_name='Mark as duplicate'),
|
|
73
|
+
),
|
|
74
|
+
migrations.AlterField(
|
|
75
|
+
model_name='newsrelationship',
|
|
76
|
+
name='sentiment',
|
|
77
|
+
field=models.PositiveIntegerField(blank=True, choices=[(4, 'Positive'), (3, 'Slightly Positive'),
|
|
78
|
+
(2, 'Slightly Negative'), (1, 'Negative')],
|
|
79
|
+
null=True),
|
|
80
|
+
),
|
|
81
|
+
migrations.RenameField(
|
|
82
|
+
model_name='newssource',
|
|
83
|
+
old_name='url',
|
|
84
|
+
new_name='endpoint',
|
|
85
|
+
),
|
|
86
|
+
migrations.AlterField(
|
|
87
|
+
model_name='newssource',
|
|
88
|
+
name='endpoint',
|
|
89
|
+
field=models.CharField(max_length=1024, unique=True),
|
|
90
|
+
),
|
|
91
|
+
]
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# Generated by Django 5.0.12 on 2025-04-03 08:08
|
|
2
|
+
|
|
3
|
+
import django.utils.timezone
|
|
4
|
+
from django.db import migrations, models
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Migration(migrations.Migration):
|
|
8
|
+
|
|
9
|
+
dependencies = [
|
|
10
|
+
('wbnews', '0012_alter_news_unique_together_news_identifier_and_more'),
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
operations = [
|
|
14
|
+
migrations.AlterField(
|
|
15
|
+
model_name='news',
|
|
16
|
+
name='datetime',
|
|
17
|
+
field=models.DateTimeField(default=django.utils.timezone.now, verbose_name='Datetime'),
|
|
18
|
+
),
|
|
19
|
+
]
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# Generated by Django 5.0.14 on 2025-05-06 14:44
|
|
2
|
+
|
|
3
|
+
from django.db import migrations, models
|
|
4
|
+
from django.db.models import Count
|
|
5
|
+
from tqdm import tqdm
|
|
6
|
+
|
|
7
|
+
def handle_duplicated_relationship(apps, schema_editor):
|
|
8
|
+
NewsRelationship = apps.get_model("wbnews", "NewsRelationship")
|
|
9
|
+
qs = NewsRelationship.objects.values('content_type', 'object_id', 'news').annotate(c=Count('*')).filter(c__gt=1)
|
|
10
|
+
for row in tqdm(qs, total=qs.count()):
|
|
11
|
+
for rel in NewsRelationship.objects.filter(content_type=row['content_type'], news=row['news'], object_id=row["object_id"])[1:]:
|
|
12
|
+
rel.delete()
|
|
13
|
+
|
|
14
|
+
class Migration(migrations.Migration):
|
|
15
|
+
|
|
16
|
+
dependencies = [
|
|
17
|
+
('contenttypes', '0002_remove_content_type_name'),
|
|
18
|
+
('wbnews', '0013_alter_news_datetime'),
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
operations = [
|
|
22
|
+
migrations.RunPython(handle_duplicated_relationship),
|
|
23
|
+
migrations.AddConstraint(
|
|
24
|
+
model_name='newsrelationship',
|
|
25
|
+
constraint=models.UniqueConstraint(fields=('content_type', 'object_id', 'news'), name='unique_news_relationship'),
|
|
26
|
+
),
|
|
27
|
+
]
|
|
@@ -5,27 +5,7 @@ from pydantic import BaseModel, Field
|
|
|
5
5
|
from wbcore.contrib.ai.llm.config import LLMConfig
|
|
6
6
|
|
|
7
7
|
if TYPE_CHECKING:
|
|
8
|
-
from wbnews.models import News
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
def clean_news_prompt(news: "News"):
|
|
12
|
-
return [
|
|
13
|
-
SystemMessage(
|
|
14
|
-
content="I have an HTML email title and body, and I want to extract only the meaningful content in plain text format, removing all metadata, subscription links, and non-essential parts. The output should be HTML- and Markdown-free and should exclude any text related to links, subscription information, or common phrases like 'Unsubscribe' or 'View online'. Only retain the main email content but do not remove any information related to news."
|
|
15
|
-
),
|
|
16
|
-
HumanMessage(
|
|
17
|
-
content=f"Title: {news.title}\n\nDescription: {news.description}",
|
|
18
|
-
),
|
|
19
|
-
]
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
def summarize_news_prompt(news: "News"):
|
|
23
|
-
return [
|
|
24
|
-
SystemMessage(content="Given this news description, please extract a short summary"),
|
|
25
|
-
HumanMessage(
|
|
26
|
-
content=f"Description: {news.description}",
|
|
27
|
-
),
|
|
28
|
-
]
|
|
8
|
+
from wbnews.models import News # noqa
|
|
29
9
|
|
|
30
10
|
|
|
31
11
|
class CleanNewsModel(BaseModel):
|
|
@@ -46,18 +26,41 @@ class SummarizedNewsModel(BaseModel):
|
|
|
46
26
|
)
|
|
47
27
|
|
|
48
28
|
|
|
29
|
+
def get_clean_news_config_query(instance):
|
|
30
|
+
return {"description": instance.description, "title": instance.title}
|
|
31
|
+
|
|
32
|
+
|
|
49
33
|
clean_news_config = LLMConfig["News"](
|
|
50
34
|
key="clean",
|
|
51
35
|
output_model=CleanNewsModel,
|
|
52
|
-
prompt=
|
|
36
|
+
prompt=[
|
|
37
|
+
SystemMessage(
|
|
38
|
+
content="I have an HTML email title and body, and I want to extract only the meaningful content in plain text format, removing all metadata, subscription links, and non-essential parts. The output should be HTML- and Markdown-free and should exclude any text related to links, subscription information, or common phrases like 'Unsubscribe' or 'View online'. Only retain the main email content but do not remove any information related to news."
|
|
39
|
+
),
|
|
40
|
+
HumanMessage(
|
|
41
|
+
content="Title: {title}\n\nDescription: {description}",
|
|
42
|
+
),
|
|
43
|
+
],
|
|
53
44
|
on_save=True,
|
|
54
45
|
on_condition=lambda n: n.source.clean_content,
|
|
46
|
+
query=get_clean_news_config_query,
|
|
55
47
|
)
|
|
56
48
|
|
|
49
|
+
|
|
50
|
+
def get_summarized_news_config_query(instance):
|
|
51
|
+
return {"description": instance.description}
|
|
52
|
+
|
|
53
|
+
|
|
57
54
|
summarized_news_config = LLMConfig["News"](
|
|
58
55
|
key="summarize",
|
|
59
56
|
output_model=SummarizedNewsModel,
|
|
60
|
-
prompt=
|
|
57
|
+
prompt=[
|
|
58
|
+
SystemMessage(content="Given this news description, please extract a short summary"),
|
|
59
|
+
HumanMessage(
|
|
60
|
+
content="Description: {description}",
|
|
61
|
+
),
|
|
62
|
+
],
|
|
61
63
|
on_save=True,
|
|
62
64
|
on_condition=lambda n: not n.summary,
|
|
65
|
+
query=get_summarized_news_config_query,
|
|
63
66
|
)
|
wbnews/models/news.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from datetime import date
|
|
1
2
|
from typing import Any
|
|
2
3
|
|
|
3
4
|
from celery import chord, shared_task
|
|
@@ -7,19 +8,23 @@ from django.contrib.postgres.fields import ArrayField
|
|
|
7
8
|
from django.db import models
|
|
8
9
|
from django.db.models.signals import post_save
|
|
9
10
|
from django.dispatch import receiver
|
|
11
|
+
from django.utils import timezone
|
|
10
12
|
from django.utils.translation import gettext_lazy as _
|
|
13
|
+
from slugify import slugify
|
|
11
14
|
from wbcore.contrib.ai.llm.decorators import llm
|
|
12
15
|
from wbcore.contrib.io.mixins import ImportMixin
|
|
13
16
|
from wbcore.models import WBModel
|
|
17
|
+
from wbcore.workers import Queue
|
|
14
18
|
|
|
15
19
|
from wbnews.import_export.handlers.news import NewsImportHandler
|
|
16
20
|
from wbnews.models.llm.cleaned_news import clean_news_config, summarized_news_config
|
|
17
21
|
from wbnews.models.relationships import NewsRelationship
|
|
18
|
-
from wbnews.models.sources import NewsSource
|
|
19
22
|
from wbnews.signals import create_news_relationships
|
|
20
23
|
|
|
24
|
+
from ..utils import detect_near_duplicates
|
|
21
25
|
|
|
22
|
-
|
|
26
|
+
|
|
27
|
+
@shared_task(queue=Queue.DEFAULT.value)
|
|
23
28
|
def create_relationship(chain_results: list[list[dict[str, Any]]], news_id: int):
|
|
24
29
|
objs = []
|
|
25
30
|
for relationships in chain_results:
|
|
@@ -32,6 +37,11 @@ def create_relationship(chain_results: list[list[dict[str, Any]]], news_id: int)
|
|
|
32
37
|
)
|
|
33
38
|
|
|
34
39
|
|
|
40
|
+
class DefaultObjectManager(models.Manager):
|
|
41
|
+
def get_queryset(self):
|
|
42
|
+
return super().get_queryset().filter(mark_as_duplicate=False)
|
|
43
|
+
|
|
44
|
+
|
|
35
45
|
@llm([clean_news_config, summarized_news_config])
|
|
36
46
|
class News(ImportMixin, WBModel):
|
|
37
47
|
errors = {
|
|
@@ -39,21 +49,29 @@ class News(ImportMixin, WBModel):
|
|
|
39
49
|
}
|
|
40
50
|
import_export_handler_class = NewsImportHandler
|
|
41
51
|
|
|
42
|
-
datetime = models.DateTimeField(verbose_name=_("Datetime"))
|
|
52
|
+
datetime = models.DateTimeField(verbose_name=_("Datetime"), default=timezone.now)
|
|
43
53
|
title = models.CharField(max_length=500, verbose_name=_("Title"))
|
|
54
|
+
guid = models.CharField(max_length=1024, unique=True)
|
|
44
55
|
description = models.TextField(blank=True, verbose_name=_("Description"))
|
|
45
56
|
summary = models.TextField(blank=True, verbose_name=_("Summary"))
|
|
46
57
|
language = models.CharField(max_length=16, choices=LANGUAGES, blank=True, null=True, verbose_name=_("Language"))
|
|
47
|
-
link = models.
|
|
58
|
+
link = models.URLField(max_length=1024, blank=True, null=True, verbose_name=_("Link"))
|
|
48
59
|
tags = ArrayField(models.CharField(max_length=16), default=list)
|
|
49
60
|
enclosures = ArrayField(models.URLField(), default=list)
|
|
50
61
|
source = models.ForeignKey(
|
|
51
62
|
"wbnews.NewsSource", on_delete=models.CASCADE, related_name="news", verbose_name=_("Source")
|
|
52
63
|
)
|
|
53
64
|
image_url = models.URLField(blank=True, null=True)
|
|
65
|
+
mark_as_duplicate = models.BooleanField(default=False, verbose_name=_("Mark as duplicate"))
|
|
54
66
|
|
|
55
|
-
|
|
56
|
-
|
|
67
|
+
objects = DefaultObjectManager()
|
|
68
|
+
all_objects = models.Manager()
|
|
69
|
+
|
|
70
|
+
def save(self, *args, **kwargs):
|
|
71
|
+
self.datetime = min(self.datetime, timezone.now()) # we ensure a news is never in the future
|
|
72
|
+
if self.guid is None:
|
|
73
|
+
self.guid = self.get_default_guid(self.title, self.link)
|
|
74
|
+
super().save(*args, **kwargs)
|
|
57
75
|
|
|
58
76
|
def __str__(self) -> str:
|
|
59
77
|
return f"{self.title} ({self.source.title})"
|
|
@@ -64,7 +82,8 @@ class News(ImportMixin, WBModel):
|
|
|
64
82
|
"""
|
|
65
83
|
tasks = []
|
|
66
84
|
for sender, task_signature in create_news_relationships.send(sender=News, instance=self):
|
|
67
|
-
|
|
85
|
+
if not isinstance(task_signature, Signature):
|
|
86
|
+
raise AssertionError(self.errors["relationship_signal"].format(sender))
|
|
68
87
|
tasks.append(task_signature)
|
|
69
88
|
if tasks:
|
|
70
89
|
res = chord(tasks, create_relationship.s(self.id))
|
|
@@ -73,22 +92,11 @@ class News(ImportMixin, WBModel):
|
|
|
73
92
|
else:
|
|
74
93
|
res.apply_async()
|
|
75
94
|
|
|
76
|
-
# TODO: Consider moving this into a get_or_create queryset method on NewsSource?
|
|
77
95
|
@classmethod
|
|
78
|
-
def
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
if identifier := data.get("identifier"):
|
|
83
|
-
sources = sources.filter(identifier=identifier)
|
|
84
|
-
elif url := data.get("url"):
|
|
85
|
-
sources = sources.filter(url=url)
|
|
86
|
-
elif title := data.get("title"):
|
|
87
|
-
sources = sources.filter(title=title)
|
|
88
|
-
if sources.count() == 1:
|
|
89
|
-
return sources.first()
|
|
90
|
-
else:
|
|
91
|
-
return NewsSource.objects.create(**data)
|
|
96
|
+
def get_default_guid(cls, title: str, link: str | None, max_length: int = 1024) -> str:
|
|
97
|
+
if link:
|
|
98
|
+
return link
|
|
99
|
+
return slugify(title)[0:max_length]
|
|
92
100
|
|
|
93
101
|
@classmethod
|
|
94
102
|
def get_representation_endpoint(cls) -> str:
|
|
@@ -106,6 +114,13 @@ class News(ImportMixin, WBModel):
|
|
|
106
114
|
def get_endpoint_basename(cls) -> str:
|
|
107
115
|
return "wbnews:news"
|
|
108
116
|
|
|
117
|
+
@classmethod
|
|
118
|
+
def handle_duplicates(cls, start: date, end: date, content_label: str = "description", threshold: float = 0.9):
|
|
119
|
+
qs = News.objects.filter(datetime__gte=start, datetime__lte=end)
|
|
120
|
+
data = dict(qs.values_list("id", content_label))
|
|
121
|
+
duplicate_ids = detect_near_duplicates(data, threshold=threshold)
|
|
122
|
+
qs.filter(id__in=duplicate_ids).update(mark_as_duplicate=True)
|
|
123
|
+
|
|
109
124
|
|
|
110
125
|
@receiver(post_save, sender="wbnews.News")
|
|
111
126
|
def post_save_create_news_relationships(sender: type, instance: "News", raw: bool, created: bool, **kwargs):
|