Source code for eucrim.article.models

# SPDX-FileCopyrightText: 2024 Thomas Breitner <t.breitner@csl.mpg.de>
#
# SPDX-License-Identifier: EUPL-1.2

from pathlib import Path
from datetime import timedelta, datetime

from django import forms
from django.db import models
from django.db.models import Prefetch
from django.core.paginator import Paginator, EmptyPage, PageNotAnInteger
from django.core.validators import FileExtensionValidator
from django.shortcuts import render
from django.conf import settings
from django.http import HttpResponse, HttpResponseRedirect

from wagtail.models import Orderable, Page, PageManager
from wagtail.admin.panels import (
    FieldPanel,
    InlinePanel,
    MultiFieldPanel,
    TabbedInterface,
    ObjectList,
    FieldRowPanel,
    HelpPanel,  # added
)
from wagtail.contrib.routable_page.models import RoutablePageMixin, route
from wagtail.search import index

from modelcluster.fields import ParentalKey

import requests

from eucrim.core.abstracts import RelatedLink, AbstractPublicationPage
from eucrim.core.models.mixins import HideShowInMenusField
from eucrim.core.storage import OverwriteStorage
from .export_pdf import set_article_pdf
from .forms import ArticleForm
from .utils import export_bibtex



[docs]
def article_authors_prefetch():
    return Prefetch(
        "article_authors",
        queryset=ArticlePageAuthor.objects.select_related("author__avatar").order_by(
            "sort_order"
        ),
        to_attr="prefetched_article_authors",
    )




[docs]
class ArticleIndexPage(HideShowInMenusField, RoutablePageMixin, Page):
    parent_page_types = ["core.HomePage"]
    subpage_types = ["ArticlePage"]
    max_count = 1
    show_in_menus = True

    paginate_articles_at = models.PositiveSmallIntegerField(
        blank=True,
        null=True,
        default=5,
        help_text="Show X articles on paginated view (default: 5)",
    )

    settings_panels = Page.settings_panels + [
        FieldPanel("paginate_articles_at"),
    ]


[docs]
    def articles(self):
        articles = (
            ArticlePage.objects.descendant_of(self)
            .live()
            .prefetch_related(article_authors_prefetch())
        )
        articles = articles.order_by("-latest_revision_created_at")
        return articles



[docs]
    def get_context(self, request, *args, **kwargs):
        context = super().get_context(request, *args, **kwargs)
        context["articles"] = self.articles
        context["article_index_page"] = self
        return context


    class Meta:
        verbose_name = "Article"
        verbose_name_plural = "Articles"


[docs]
    @route(r"^$")
    def filterarticles(self, request, *args, **kwargs):
        from .filters import ArticleFilter

        all_articles = (
            ArticlePage.objects.live()
            .prefetch_related(article_authors_prefetch())
            .order_by("-publication_date")
            .distinct()
        )

        latest_editorials = all_articles.filter(article_type=ArticlePage.EDITORIAL)[:4]

        _request_copy = request.GET.copy()
        parameters = _request_copy.pop("page", True) and _request_copy.urlencode()

        filter = ArticleFilter(request.GET, queryset=all_articles)
        filtered_qs = ArticleFilter(request.GET, queryset=all_articles).qs

        paginator = Paginator(filtered_qs, self.paginate_articles_at)
        page = request.GET.get("page", 1)
        try:
            items = paginator.page(page)
        except PageNotAnInteger:
            items = paginator.page(1)
        except EmptyPage:
            items = paginator.page(paginator.num_pages)

        context = super().get_context(request)
        context.update(
            {
                "parameters": parameters,
                "filter": filter,
                "items": items,
                "all_articles": all_articles,
                "all_articles_count": all_articles.count(),
                "filter_qs_count": filter.qs.count(),
                "article_index_page": self,
                "latest_editorials": latest_editorials,
            }
        )

        return render(request, self.template, context, *args, **kwargs)





[docs]
class ArticlePageRelatedLink(Orderable, RelatedLink):
    page = ParentalKey("article.ArticlePage", related_name="related_links")



# class ArticlePageRelatedAuthor(Orderable, AuthorPage):
#     page = ParentalKey('article.ArticlePage', related_name='authors', null=True, on_delete=models.SET_NULL)



[docs]
class ArticlePageAuthor(Orderable):
    page = ParentalKey(
        to="article.ArticlePage",
        related_name="article_authors",
    )
    author = models.ForeignKey(
        "profile.ProfilePage",
        limit_choices_to={"is_author": True},
        on_delete=models.CASCADE,
    )

    panels = [
        FieldPanel("author"),
    ]

    class Meta:
        constraints = [
            models.UniqueConstraint(
                fields=["author", "page"], name="unique_authorship_per_article"
            )
        ]




[docs]
class ArticlePageManager(PageManager):

[docs]
    def get_queryset(self):
        queryset = (
            super().get_queryset()
            # .select_related('issue')
            # .prefetch_related('authors')
        )

        return queryset



[docs]
    def with_prefetched_authors(self):
        return self.get_queryset().prefetch_related(article_authors_prefetch())





[docs]
class ArticlePage(HideShowInMenusField, RoutablePageMixin, AbstractPublicationPage):
    GERMAN = "GE"
    ENGLISH = "EN"
    FRENCH = "FR"
    LANGUAGE_CHOICES = (
        (GERMAN, "German"),
        (ENGLISH, "English"),
        (FRENCH, "French"),
    )

    ARTICLE = "AR"
    EDITORIAL = "ED"
    ARTICLE_TYPE_CHOICES = (
        (ARTICLE, "Article"),
        (EDITORIAL, "Editorial"),
    )

    parent_page_types = ["article.ArticleIndexPage"]
    subpage_types = []
    show_in_menus = False

    subtitle = models.CharField(max_length=255, blank=True)

    # We moved authors to a new orderable model and expose these via a property
    # authors = ParentalManyToManyField(
    #     "profile.ProfilePage",
    #     blank=True,
    #     limit_choices_to={"is_author": True},
    #     related_name="articles",
    # )

    @property
    def authors(self):
        # Use prefetched relation data when present to avoid per-article queries.
        prefetched_article_authors = getattr(self, "prefetched_article_authors", None)
        if prefetched_article_authors is not None:
            return [aa.author for aa in prefetched_article_authors]

        cached = getattr(self, "_prefetched_objects_cache", {})
        if "article_authors" in cached:
            article_authors = sorted(
                cached["article_authors"],
                key=lambda aa: aa.sort_order,
            )
            return [aa.author for aa in article_authors]

        return [
            aa.author
            for aa in self.article_authors.select_related("author__avatar").order_by(
                "sort_order"
            )
        ]

    publication_date = models.DateField(
        blank=True,
        null=True,
        help_text="If blank, the date of the first publication process is used.",
    )
    notify_authors = models.BooleanField(
        default=False,
        verbose_name="Send notification email to authors",
        help_text="Send notification email to all article authors with hints"
        "about this article and their profile and account.",
    )
    notify_authors_triggered_at = models.TextField(
        blank=True,
        help_text='Stores datetime objects when "notify_authors" was triggered',
    )
    doi_id = models.CharField(
        max_length=8,
        blank=True,
        verbose_name="eucrim DOI suffix",
        help_text='Format: YYYY-NNN. Only the part after "https://doi.org/10.30709/eucrim-"',
    )
    article_type = models.CharField(
        max_length=2,
        choices=ARTICLE_TYPE_CHOICES,
        default=ARTICLE,
        help_text='Type of article: "Article" (default) or "Editorial"',
    )
    issue = models.ForeignKey(
        "issue.IssuePage",
        on_delete=models.SET_NULL,
        null=True,
        blank=True,
        related_name="articles",
        help_text="Printed issue, in which this article will be/was published.",
    )
    issue_page_from = models.PositiveIntegerField(
        blank=True,
        null=True,
        help_text="On which page this article starts in the issue pdf file",
    )
    issue_page_to = models.PositiveIntegerField(
        blank=True,
        null=True,
        help_text="On which page this article ends in the issue pdf file",
    )
    word_file = models.FileField(
        upload_to="articles_raw/",
        blank=False,
        help_text="Only docx file types are supported.",
        validators=[FileExtensionValidator(allowed_extensions=["docx", "odt"])],
    )
    language = models.CharField(
        max_length=2,
        choices=LANGUAGE_CHOICES,
        default=ENGLISH,
        help_text="Language of this article.",
    )
    # header_image = models.ForeignKey(
    #     'wagtailimages.Image',
    #     null=True,
    #     blank=True,
    #     on_delete=models.SET_NULL,
    #     related_name='+',
    # )
    excerpt = models.TextField(
        max_length=2000,
        verbose_name="Excerpt/Abstract",
        blank=True,
        help_text="Entry excerpt to be displayed on entries list. If this "
        "field is not filled, a truncated version of body text "
        "will be used.",
    )
    pdf_file = models.FileField(
        upload_to="articles_pdf/",
        blank=True,
        verbose_name="PDF file",
        validators=[FileExtensionValidator(allowed_extensions=["pdf"])],
    )
    pdf_file_linktext = models.CharField(
        max_length=255,
        blank=True,
        verbose_name="If given, this text will be used as the clickable text for the PDF file.",
        help_text="Linktext for PDF file.",
    )
    pdf_file_cover_image = models.ForeignKey(
        "wagtailimages.Image",
        null=True,
        blank=True,
        on_delete=models.SET_NULL,
        related_name="+",
    )
    author_statement = models.TextField(
        blank=True, help_text="Hint: `Sternchenfussnote` for an article."
    )
    # as_html5 and auto_excerpt get filled in forms.py:
    as_html5 = models.TextField(
        blank=True, help_text="Generated from docx. Do not edit!"
    )
    auto_excerpt = models.TextField(
        blank=True, help_text="Generated from docx. Do not edit!"
    )
    article_pdf_generated = models.FileField(
        upload_to="articles/pdf/",
        blank=True,
        storage=OverwriteStorage(),
        verbose_name="Generated article PDF, including cover page",
    )

    # We are providing our own, extended form for this section
    base_form_class = ArticleForm

    objects = ArticlePageManager()

    class Meta:
        verbose_name = "Article"
        verbose_name_plural = "Articles"
        ordering = ["-publication_date"]

    @property
    def article_index_page(self):
        return self.get_ancestors().type(ArticleIndexPage).first()

    # @property
    # def get_publication_date(self):
    #     return self.publication_date if self.publication_date == datetime(1970, 1, 1) else self.first_published_at

    @property
    def has_been_modified(self):
        if self.first_published_at and self.last_published_at:
            if (self.last_published_at - self.first_published_at) > timedelta(
                minutes=60
            ):
                return True

    @property
    def get_doi(self):
        return "{}{}".format(settings.DOI_PREFIX, self.doi_id) if self.doi_id else None

    @property
    def get_obj_og_description(self):
        return self.excerpt

    @property
    def get_obj_og_image(self):
        return self.issue.cover

    @property
    def get_pdf_filename(self):
        if self.pdf_file:
            return Path(self.pdf_file.name).name

    @property
    def get_issue_title(self):
        """
        This property is used for the article export in the custom ExportableArticleIndexView
        """
        return (
            f"{self.issue.get_official_notation_w_volume} - {self.issue.focus_en}"
            if self.issue
            else ""
        )

    @property
    def get_authors_list(self):
        authors = ", ".join(
            [f"{author.first_name[0]}. {author.last_name}" for author in self.authors]
        )
        return authors

    @property
    def get_article_pdf_filename(self):
        file_extension = ".pdf"

        # This should also take the "upload_to" path into account
        # which gets too confusing.
        # db_max_length = self._meta.get_field("article_pdf_generated").max_length
        # max_filename_length = db_max_length - len(file_extension)
        # filename = f"eucrim-{self.slug}"
        # if len(filename) > max_filename_length:
        #     filename = filename[:max_filename_length]

        filename = f"eucrim-article-{self.doi_id or ''}{file_extension}"
        return filename

    # @property
    # def get_bibtex(self):
    #     filename = 'media/bibtex/{}.bib'.format(self.slug)
    #     path = Path(settings.BASE_DIR) / filename
    #     if path.exists():
    #         return filename


[docs]
    def get_citation(self, from_doi=False, style="apa"):
        """
        Get a formatted citation for this article.

        Args:
            from_doi (bool): If True, fetch citation from DOI service.
            style (str): Citation style (apa, mla, chicago, harvard, etc.) or "eucrim_internal".

        Returns:
            str: Formatted citation or None if no DOI available
        """

        if from_doi:
            # Get citation DOI from external DOI service
            if not self.get_doi:
                return None

            doi = self.get_doi
            if not doi.startswith("https://doi.org/"):
                doi = f"https://doi.org/{doi}"

            headers = {"Accept": f"text/x-bibliography; style={style}"}

            try:
                response = requests.get(doi, headers=headers)
                if response.status_code == 200:
                    # Explicitly set encoding to UTF-8
                    response.encoding = "utf-8"
                    snippet = response.text.strip()
                    return snippet
                else:
                    return None
            except Exception:
                return None

        else:
            # Internal eucrim citation style
            authors = ", ".join(
                [
                    f"{author.first_name[0]}. {author.last_name}"
                    for author in self.authors
                ]
            )
            title = f"“{self.title}”"
            journal = "eucrim"
            volume_issue = self.issue.get_official_notation_w_volume
            page_ref_obj = self.get_pdf_page_reference
            page_references = page_ref_obj.text.replace(" ", "") if page_ref_obj else ""

            # Build list compactly and omit empty parts
            parts = [authors, title, volume_issue, journal, page_references]
            parts = [p for p in parts if p]  # filter falsy values
            snippet = ", ".join(parts) + "."

            doi = self.get_doi
            if doi:
                snippet += f" DOI: {doi}"
            return snippet



[docs]
    @route(r"^bibtex/$")
    def bibtex(self, request):
        """
        View function for a bibtex file export
        """
        bibtex = export_bibtex(self)
        response = HttpResponse(
            bibtex.data,
            content_type="application/x-bibtex",
        )
        response["Content-Disposition"] = 'attachment; filename="{}"'.format(
            bibtex.filename
        )
        return response



[docs]
    @route(r"^pdf/$")
    def article_pdf(self, request):
        """
        View function for to generate a PDF for a given ArticlePage.
        """
        force_render = request.GET.get("force-render", "false")
        # font_family = request.GET.get("font-family", "sans-serif")
        # if font_family not in ["sans-serif", "serif"]:
        #     font_family = (
        #         "sans-serif"  # Default to sans-serif if invalid value is provided
        #     )

        if force_render == "true" or not self.article_pdf_generated:
            set_article_pdf(article_page=self, save=True)

        redirect_response = HttpResponseRedirect(self.article_pdf_generated.url)
        redirect_response["Content-Disposition"] = (
            f"inline; filename={self.get_article_pdf_filename}"
        )
        redirect_response["Cache-Control"] = (
            "no-store, no-cache, must-revalidate, max-age=0"
        )
        redirect_response["Pragma"] = "no-cache"
        redirect_response["Expires"] = "0"
        return redirect_response



[docs]
    @route(r"^jats/$")
    def article_jats(self, request):
        """
        View function to generate and serve a JATS XML representation for a given ArticlePage.
        """
        from .export_jats import export_article_to_jats

        try:
            jats_content = export_article_to_jats(self)

            response = HttpResponse(jats_content, content_type="application/xml")
            response["Content-Disposition"] = (
                f'attachment; filename="{self.slug}-jats.xml"'
            )
            return response
        except ValueError as e:
            # Known error (e.g. missing issue): show project 404 page with message
            return render(request, "404.html", {"message": str(e)}, status=404)
        except Exception as e:
            # Unexpected error: show project 500 page with message
            return render(request, "500.html", {"message": str(e)}, status=500)


    content_panels = Page.content_panels + [
        FieldPanel("subtitle"),
        FieldPanel("excerpt"),
        FieldPanel("word_file"),
        InlinePanel(
            "related_links",
            label="Related links",
            classname="collapsible collapsed",
        ),
        MultiFieldPanel(
            [
                FieldPanel("pdf_file"),
                FieldPanel("pdf_file_linktext"),
                # pdf_file_cover_image is auto generated from pdf_file
                # (see forms.py)
                # FieldPanel('pdf_file_cover_image'),
            ],
            heading="Fulltext PDF",
            classname="collapsible collapsed",
        ),
        MultiFieldPanel(
            [
                FieldPanel("as_html5", read_only=True, classname="full"),
                FieldPanel("auto_excerpt", read_only=True, classname="full"),
            ],
            heading="Auto-populated fields (Generated from docx file - do not edit!)",
            classname="collapsible collapsed",
        ),
    ]

    settings_panels = Page.settings_panels + [
        MultiFieldPanel(
            [
                FieldRowPanel(
                    [
                        FieldPanel("publication_date"),
                        FieldPanel("doi_id"),
                    ]
                ),
                FieldRowPanel(
                    [
                        FieldPanel("article_type"),
                        FieldPanel("language"),
                    ]
                ),
            ],
            heading="Article settings",
            classname="collapsible",
        ),
        MultiFieldPanel(
            [
                FieldPanel("issue", widget=forms.Select, classname="col12"),
                FieldPanel("issue_page_from", classname="col6"),
                FieldPanel("issue_page_to", classname="col6"),
            ],
            heading="Issue",
            classname="collapsible",
        ),
    ]

    promote_panels = HideShowInMenusField.promote_panels + [
        HelpPanel(
            heading="Generated article PDF, including cover page",
            template="article/panels/article_pdf_link.html",
        ),
        # FieldPanel('header_image'),
        # FieldPanel('tags'),
        # FieldPanel('categories', widget=forms.CheckboxSelectMultiple),
    ]

    authors_panels = [
        InlinePanel("article_authors", label="Authors"),
        FieldPanel(
            "author_statement",
        ),
        MultiFieldPanel(
            [
                FieldPanel("notify_authors"),
                FieldPanel(
                    "notify_authors_triggered_at",
                    read_only=True,
                ),
            ],
            heading="Notify authors",
            classname="collapsible",
        ),
    ]

    search_fields = Page.search_fields + [
        index.SearchField("subtitle"),
        index.SearchField("doi_id"),
        index.SearchField("excerpt"),
        index.SearchField("auto_excerpt"),
        index.SearchField("as_html5"),
    ]

    edit_handler = TabbedInterface(
        [
            ObjectList(content_panels, heading="Content"),
            ObjectList(authors_panels, heading="Authors"),
            ObjectList(promote_panels, heading="Promote"),
            ObjectList(settings_panels, heading="Settings", classname="settings"),
        ]
    )


[docs]
    def arcana_markdown(self):
        from .export_markdown import export_article_to_markdown

        return export_article_to_markdown(self)


    @property
    def as_markdown(self):
        return self.arcana_markdown()


[docs]
    def save(self, *args, **kwargs):
        """
        In case ``publication_date`` is not set or is 1970-01-01 (our import default value),
        set ``publication_date`` to ``first_published_at`` date. This ensures we could
        use ``publication_date`` for all orderings etc.
        """
        if self.publication_date == datetime(1970, 1, 1).date():
            self.publication_date = self.first_published_at
        elif not self.publication_date:
            self.publication_date = self.first_published_at

        super().save(*args, **kwargs)