wbnews 2.2.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. wbnews/.coveragerc +23 -0
  2. wbnews/__init__.py +1 -0
  3. wbnews/admin.py +27 -0
  4. wbnews/apps.py +9 -0
  5. wbnews/factories.py +33 -0
  6. wbnews/import_export/__init__.py +0 -0
  7. wbnews/import_export/backends/__init__.py +1 -0
  8. wbnews/import_export/backends/news.py +35 -0
  9. wbnews/import_export/handlers/__init__.py +1 -0
  10. wbnews/import_export/handlers/news.py +25 -0
  11. wbnews/import_export/parsers/__init__.py +0 -0
  12. wbnews/import_export/parsers/emails/__init__.py +0 -0
  13. wbnews/import_export/parsers/emails/news.py +48 -0
  14. wbnews/import_export/parsers/emails/utils.py +61 -0
  15. wbnews/import_export/parsers/rss/__init__.py +0 -0
  16. wbnews/import_export/parsers/rss/news.py +63 -0
  17. wbnews/migrations/0001_initial_squashed_0005_alter_news_import_source.py +349 -0
  18. wbnews/migrations/0006_alter_news_language.py +122 -0
  19. wbnews/migrations/0007_auto_20240103_0955.py +43 -0
  20. wbnews/migrations/0008_alter_news_language.py +123 -0
  21. wbnews/migrations/0009_newsrelationship_analysis_newsrelationship_sentiment.py +94 -0
  22. wbnews/migrations/__init__.py +0 -0
  23. wbnews/models/__init__.py +3 -0
  24. wbnews/models/news.py +116 -0
  25. wbnews/models/relationships.py +20 -0
  26. wbnews/models/sources.py +43 -0
  27. wbnews/serializers.py +83 -0
  28. wbnews/signals.py +4 -0
  29. wbnews/tests/__init__.py +0 -0
  30. wbnews/tests/conftest.py +6 -0
  31. wbnews/tests/test_models.py +15 -0
  32. wbnews/tests/tests.py +12 -0
  33. wbnews/urls.py +29 -0
  34. wbnews/viewsets/__init__.py +12 -0
  35. wbnews/viewsets/buttons.py +23 -0
  36. wbnews/viewsets/display.py +133 -0
  37. wbnews/viewsets/endpoints.py +18 -0
  38. wbnews/viewsets/menu.py +23 -0
  39. wbnews/viewsets/titles.py +39 -0
  40. wbnews/viewsets/views.py +140 -0
  41. wbnews-2.2.1.dist-info/METADATA +8 -0
  42. wbnews-2.2.1.dist-info/RECORD +43 -0
  43. wbnews-2.2.1.dist-info/WHEEL +5 -0
wbnews/.coveragerc ADDED
@@ -0,0 +1,23 @@
1
+ [report]
2
+ exclude_lines =
3
+ print()
4
+ def api_endpoints_root
5
+ def get_or_create_model_sql
6
+ def profile_check
7
+ if hasattr
8
+ raise Exception
9
+ raise Http404
10
+ except:
11
+
12
+
13
+ [run]
14
+ omit = */migrations/*
15
+ manage.py
16
+ */tests/*
17
+ */wbnews_config/*
18
+ */apps.py
19
+ */docs/*
20
+ */dynamic_preferences_registry.py
21
+ */permissions.py
22
+ */preferences/*
23
+ */.venv/*
wbnews/__init__.py ADDED
@@ -0,0 +1 @@
1
+ __version__ = "1.0.0"
wbnews/admin.py ADDED
@@ -0,0 +1,27 @@
1
+ from django.contrib import admin
2
+
3
+ from .models import News, NewsRelationship, NewsSource
4
+
5
+
6
+ @admin.register(NewsRelationship)
7
+ class NewsRelationshipAdmin(admin.ModelAdmin):
8
+ list_display = ["news", "content_object"]
9
+ autocomplete_fields = ["news"]
10
+
11
+
12
+ @admin.register(News)
13
+ class NewsAdmin(admin.ModelAdmin):
14
+ search_fields = ("title", "description")
15
+ raw_id_fields = ["import_source"]
16
+ autocomplete_fields = [
17
+ "source",
18
+ ]
19
+ list_display = ["title", "language", "tags", "source", "datetime"]
20
+
21
+ list_filter = ("source",)
22
+
23
+
24
+ @admin.register(NewsSource)
25
+ class NewsSourceAdmin(admin.ModelAdmin):
26
+ search_fields = ("type", "title", "identifier", "description", "author", "url")
27
+ list_filter = ("type",)
wbnews/apps.py ADDED
@@ -0,0 +1,9 @@
1
+ from django.apps import AppConfig
2
+ from django.utils.module_loading import autodiscover_modules
3
+
4
+
5
+ class NewConfig(AppConfig):
6
+ name = "wbnews"
7
+
8
+ def ready(self) -> None:
9
+ autodiscover_modules("news")
wbnews/factories.py ADDED
@@ -0,0 +1,33 @@
1
+ import factory
2
+ from django.conf.global_settings import LANGUAGES
3
+ from django.utils import timezone
4
+ from faker import Factory
5
+ from wbnews.models import News, NewsSource
6
+
7
+ langs = [n for (n, v) in LANGUAGES]
8
+ faker = Factory.create()
9
+
10
+
11
+ class NewsSourceFactory(factory.django.DjangoModelFactory):
12
+ title = factory.Sequence(lambda n: f"source_{n}")
13
+ identifier = factory.Sequence(lambda n: f"http://myurl_{n}.com")
14
+ image = faker.url()
15
+ description = factory.Faker("sentence", nb_words=32)
16
+ author = faker.name()
17
+ url = factory.Faker("url")
18
+
19
+ class Meta:
20
+ model = NewsSource
21
+
22
+
23
+ class NewsFactory(factory.django.DjangoModelFactory):
24
+ datetime = factory.LazyFunction(timezone.now)
25
+ title = factory.Sequence(lambda n: f"news_{n}")
26
+ description = factory.Faker("sentence", nb_words=32)
27
+ summary = factory.Faker("sentence", nb_words=32)
28
+ language = factory.Iterator(langs)
29
+ link = faker.url()
30
+ source = factory.SubFactory(NewsSourceFactory)
31
+
32
+ class Meta:
33
+ model = News
File without changes
@@ -0,0 +1 @@
1
+ from .news import DataBackend
@@ -0,0 +1,35 @@
1
+ import json
2
+ from datetime import datetime
3
+ from io import BytesIO
4
+ from typing import Generator
5
+
6
+ import feedparser
7
+ from django.db.models import QuerySet
8
+ from slugify import slugify
9
+ from wbcore.contrib.io.backends.abstract import AbstractDataBackend
10
+ from wbcore.contrib.io.backends.utils import register
11
+ from wbnews.models import NewsSource
12
+
13
+
14
+ @register("News RSS Backend", save_data_in_import_source=True)
15
+ class DataBackend(AbstractDataBackend):
16
+ def is_object_valid(self, obj: "NewsSource") -> bool:
17
+ return obj.type == NewsSource.Type.RSS and obj.is_active and obj.url
18
+
19
+ def get_default_queryset(self) -> QuerySet["NewsSource"]:
20
+ return NewsSource.objects.filter(type=NewsSource.Type.RSS, is_active=True, url__isnull=False)
21
+
22
+ def get_files(
23
+ self, execution_time: datetime, queryset=None, **kwargs
24
+ ) -> Generator[tuple[str, BytesIO], None, None] | None:
25
+ if queryset is not None:
26
+ for source in queryset:
27
+ data = feedparser.parse(source.url)
28
+ if not data.get("bozo_exception"):
29
+ data["news_source"] = source.id
30
+ content_file = BytesIO()
31
+ content_file.write(json.dumps(data).encode())
32
+ file_name = (
33
+ f"{slugify(source.title, separator='_')}_rss_file_{datetime.timestamp(execution_time)}.json"
34
+ )
35
+ yield file_name, content_file
@@ -0,0 +1 @@
1
+ from .news import NewsImportHandler
@@ -0,0 +1,25 @@
1
+ from datetime import datetime
2
+ from typing import Any, Dict, Optional
3
+
4
+ import pytz
5
+ from django.db import models
6
+ from django.utils import timezone
7
+ from wbcore.contrib.io.imports import ImportExportHandler
8
+
9
+
10
+ class NewsImportHandler(ImportExportHandler):
11
+ MODEL_APP_LABEL = "wbnews.News"
12
+
13
+ def _deserialize(self, data: Dict[str, Any]):
14
+ data["source"] = self.model.source_dict_to_model(data["source"])
15
+ if parsed_datetime := data.get("datetime", None):
16
+ data["datetime"] = pytz.utc.localize(datetime.strptime(parsed_datetime, "%Y-%m-%dT%H:%M:%S"))
17
+ else:
18
+ data["datetime"] = timezone.now()
19
+
20
+ def _get_instance(self, data: Dict[str, Any], history: Optional[models.QuerySet] = None, **kwargs) -> models.Model:
21
+ return self.model.objects.filter(source=data["source"], datetime=data["datetime"], title=data["title"]).first()
22
+
23
+ def _create_instance(self, data: Dict[str, Any], **kwargs) -> models.Model:
24
+ self.import_source.log += "\nCreate News."
25
+ return self.model.objects.create(**data, import_source=self.import_source)
File without changes
File without changes
@@ -0,0 +1,48 @@
1
+ import re
2
+ from contextlib import suppress
3
+ from datetime import datetime
4
+
5
+ from django.conf.global_settings import LANGUAGES
6
+ from langdetect import detect, lang_detect_exception
7
+ from wbcore.utils.importlib import import_from_dotted_path
8
+
9
+ from .utils import EmlContentParser
10
+
11
+ languages_dict = dict(LANGUAGES)
12
+
13
+
14
+ def clean_string_with_paragraphs(string):
15
+ return re.sub(r" +", " ", re.sub(r"(?<!\.)\\n", " ", string.strip()))
16
+
17
+
18
+ def parse(import_source):
19
+ parser = EmlContentParser(
20
+ import_source.file.read(), encoding=import_source.source.import_parameters.get("email_encoding", "latin-1")
21
+ )
22
+ email_date = parser.date if parser.date else datetime.now()
23
+
24
+ # Source
25
+ html = parser.html
26
+
27
+ # If source define a custom html parser, we import it and convert the returned html
28
+ if html_parser_path := import_source.source.import_parameters.get("html_parser", None):
29
+ with suppress(ModuleNotFoundError):
30
+ html_parser = import_from_dotted_path(html_parser_path)
31
+ html = html_parser(html)
32
+
33
+ data = {
34
+ "datetime": email_date.strftime("%Y-%m-%dT%H:%M:%S"),
35
+ "title": parser.subject.replace(f"[{import_source.source.uuid}]", ""),
36
+ "description": html,
37
+ "source": parser.source,
38
+ }
39
+
40
+ # Language
41
+ try:
42
+ language = detect(data["description"])
43
+ if language in languages_dict:
44
+ data["language"] = language
45
+ except lang_detect_exception.LangDetectException:
46
+ pass
47
+
48
+ return {"data": [data]}
@@ -0,0 +1,61 @@
1
+ from email import message, parser
2
+ from email.utils import parseaddr, parsedate_to_datetime
3
+
4
+
5
+ class EmlContentParser:
6
+ def __init__(self, email: bytes, encoding: str = "latin-1"):
7
+ self.message = parser.BytesParser().parsebytes(email)
8
+ self.encoding = encoding
9
+
10
+ @property
11
+ def date(self):
12
+ if date_str := self.message.get("date"):
13
+ return parsedate_to_datetime(date_str)
14
+
15
+ @property
16
+ def subject(self) -> str:
17
+ return self.message.get("subject", "")
18
+
19
+ @property
20
+ def html(self):
21
+ html = self.get_html(self.message)
22
+ return html.decode(self.encoding) if html else None
23
+
24
+ def get_html(cls, parsed: message.Message) -> bytes | None:
25
+ if parsed.is_multipart():
26
+ for item in parsed.get_payload(): # type:message.Message
27
+ if html := cls.get_html(item):
28
+ return html
29
+ elif parsed.get_content_type() == "text/html":
30
+ return parsed.get_payload(decode=True)
31
+ return None
32
+
33
+ @property
34
+ def text(self):
35
+ text = self.get_text(self.message)
36
+ return text.decode(self.encoding) if text else None
37
+
38
+ @classmethod
39
+ def get_text(cls, parsed: message.Message) -> bytes | None:
40
+ if parsed.is_multipart():
41
+ for item in parsed.get_payload():
42
+ if text := cls.get_text(item):
43
+ return text
44
+ elif parsed.get_content_type() == "text/plain":
45
+ return parsed.get_payload(decode=True)
46
+ return None
47
+
48
+ @property
49
+ def source(self) -> dict[str, any]:
50
+ name, email = parseaddr(self.message["From"])
51
+ if not name:
52
+ name = "Generic"
53
+ if not email:
54
+ email = "generic"
55
+ source = {
56
+ "title": f"{name} Research Email",
57
+ "identifier": "research-email-" + email.lower(),
58
+ "author": name,
59
+ "url": email,
60
+ }
61
+ return source
File without changes
@@ -0,0 +1,63 @@
1
+ import json
2
+ from datetime import datetime
3
+ from time import mktime
4
+
5
+ from django.conf.global_settings import LANGUAGES
6
+ from langdetect import detect, lang_detect_exception
7
+
8
+ languages_dict = dict(LANGUAGES)
9
+
10
+
11
+ def _get_source(d):
12
+ source = {}
13
+ if source_id := d.get("news_source"):
14
+ source["id"] = source_id
15
+ else:
16
+ if "title" in d["feed"]:
17
+ source["title"] = d["feed"]["title"]
18
+ if "author" in d["feed"]:
19
+ source["author"] = d["feed"]["author"]
20
+ if "image" in d["feed"]:
21
+ source["image"] = d["feed"]["image"]["href"]
22
+ if "href" in d["feed"]:
23
+ source["identifier"] = d["feed"]["href"]
24
+ if "link" in d["feed"]:
25
+ source["url"] = d["feed"]["link"]
26
+ return source
27
+
28
+
29
+ def parse(import_source):
30
+ content = json.load(import_source.file)
31
+ data = []
32
+ source = _get_source(content)
33
+
34
+ for entry in content["entries"]:
35
+ if summary := entry.get("summary", None):
36
+ description = entry.get("description", summary)
37
+ res = {
38
+ "description": description,
39
+ "summary": summary,
40
+ "source": source,
41
+ "title": entry.get("title", ""),
42
+ "link": entry.get("link", None),
43
+ }
44
+ try:
45
+ language = detect(entry["summary"])
46
+ if language in languages_dict:
47
+ res["language"] = language
48
+ except lang_detect_exception.LangDetectException:
49
+ pass
50
+ if published_parsed := entry.get("published_parsed", None):
51
+ updated = datetime.fromtimestamp(mktime(tuple(published_parsed)))
52
+ res["datetime"] = updated.strftime("%Y-%m-%dT%H:%M:%S")
53
+ if enclosures := entry.get("enclosures", None):
54
+ res["enclosures"] = [e.get("href", "") for e in enclosures]
55
+ if (
56
+ (media_content := entry.get("media_content", []))
57
+ and isinstance(media_content, list)
58
+ and len(media_content) > 0
59
+ and (image_url := media_content[0].get("url", None))
60
+ ):
61
+ res["image_url"] = image_url
62
+ data.append(res)
63
+ return {"data": data}