Skip to content

Commit

Permalink
Merge pull request #48 from unicef/feature/cards
Browse files Browse the repository at this point in the history
Feature/cards
  • Loading branch information
srugano authored Jun 21, 2024
2 parents 7466e3e + 631486b commit fde1258
Show file tree
Hide file tree
Showing 10 changed files with 202 additions and 106 deletions.
Binary file not shown.
133 changes: 42 additions & 91 deletions src/hope_country_report/apps/power_query/processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,13 @@
from io import BytesIO
from pathlib import Path

from django.conf import settings
from django.core.files.temp import NamedTemporaryFile
from django.template import Context, Template
from django.utils.functional import classproperty

import fitz
import pdfkit
from PIL import Image, ImageDraw, ImageFont
from PIL import Image
from pypdf import PdfReader, PdfWriter
from pypdf.constants import AnnotationDictionaryAttributes, FieldDictionaryAttributes, FieldFlag
from pypdf.generic import ArrayObject
Expand All @@ -25,7 +24,12 @@

from hope_country_report.apps.power_query.storage import HopeStorage

from .utils import to_dataset
from hope_country_report.apps.power_query.utils import (
get_field_rect,
to_dataset,
convert_pdf_to_image_pdf,
insert_special_image,
)

logger = logging.getLogger(__name__)
if TYPE_CHECKING:
Expand Down Expand Up @@ -194,27 +198,30 @@ def process(self, context: Dict[str, Any]) -> bytes:
tpl = self.formatter.template
reader = PdfReader(tpl.doc)

font_size = context.get("context", {}).get("font_size", 10)
font_color = context.get("context", {}).get("font_color", "black")
ds = to_dataset(context["dataset"].data).dict
output_pdf = PdfWriter()
for index, entry in enumerate(ds, start=1):
with NamedTemporaryFile(suffix=".pdf", delete=True) as temp_pdf_file:
writer = PdfWriter()
text_values = {}
arabic_values = {}
special_values = {}
images = {}
try: # Load, insert, and save
try:
for page in reader.pages:
for annot in page.annotations:
annot = annot.get_object()
field_name = annot[FieldDictionaryAttributes.T]
if field_name in entry:
value = entry[field_name]
language = self.is_special_language_field(field_name)
if self.is_image_field(annot):
rect = annot[AnnotationDictionaryAttributes.Rect]
text_values[field_name] = None
images[field_name] = [rect, value]
elif self.is_arabic_field(value):
arabic_values[field_name] = value
elif language:
special_values[field_name] = {"value": value, "language": language}
else:
text_values[field_name] = value
except IndexError as exc:
Expand All @@ -229,14 +236,14 @@ def process(self, context: Dict[str, Any]) -> bytes:
temp_pdf_file.write(output_stream.read())

document = fitz.open(stream=output_stream.getvalue(), filetype="pdf")
for field_name, text in arabic_values.items():
self.insert_arabic_image(document, field_name, text)
for field_name, text in special_values.items():
insert_special_image(document, field_name, text, font_size, font_color)
for field_name, (rect, image_path) in images.items():
if image_path:
self.insert_external_image(document, field_name, image_path)
else:
logger.warning(f"Image not found for field: {field_name}")
document.ez_save(temp_pdf_file.name, deflate_fonts=1, deflate_images=1, deflate=1)
document.ez_save(temp_pdf_file.name, deflate_fonts=True, deflate_images=1, deflate=1)
output_stream.seek(0)
output_pdf.append_pages_from_reader(PdfReader(temp_pdf_file.name))
output_stream = io.BytesIO()
Expand All @@ -245,52 +252,25 @@ def process(self, context: Dict[str, Any]) -> bytes:
fitz_pdf_document = fitz.open(stream=output_stream, filetype="pdf")

# Convert the PDF to an image-based PDF
image_pdf_bytes = self.convert_pdf_to_image_pdf(fitz_pdf_document, dpi=300)
image_pdf_bytes = convert_pdf_to_image_pdf(fitz_pdf_document, dpi=300)

return image_pdf_bytes

def convert_pdf_to_image_pdf(self, pdf_document: fitz.Document, dpi: int = 300) -> bytes:
"""
Converts each page of a PDF document to an image and then creates a new PDF
with these images as its pages.
"""
new_pdf_document = fitz.open()

for page_num in range(len(pdf_document)):
pix = pdf_document[page_num].get_pixmap(dpi=dpi)
new_pdf_document.new_page(width=pix.width, height=pix.height)
new_pdf_document[page_num].insert_image(fitz.Rect(0, 0, pix.width, pix.height), pixmap=pix)
new_pdf_bytes = io.BytesIO()
new_pdf_document.save(new_pdf_bytes, deflate_fonts=1, deflate_images=1, deflate=1)
new_pdf_bytes.seek(0)
return new_pdf_bytes.getvalue()

def insert_arabic_image(self, document: fitz.Document, field_name: str, text: str):
"""
Generates and inserts an image containing the given Arabic text into
the specified field.
"""
rect, page_index = self.get_field_rect(document, field_name)
if rect:
image_stream = self.generate_arabic_image(text, rect)
img_rect = fitz.Rect(*rect)
page = document[page_index]
page.insert_image(img_rect, stream=image_stream, keep_proportion=False)

def insert_external_image(self, document: fitz.Document, field_name: str, image_path: str):
def insert_external_image(self, document: fitz.Document, field_name: str, image_path: str, font_size: int = 10):
"""
Loads, resizes, and inserts an external image into the specified field.
"""
rect, page_index = self.get_field_rect(document, field_name)
rect, page_index = get_field_rect(document, field_name)
if rect is None or page_index is None:
logger.error(f"No valid rectangle or page index found for field {field_name}. Cannot insert image.")
return
page = document[page_index]
try:
image_stream = self.load_image_from_blob_storage(image_path)
image = Image.open(image_stream)
image = Image.open(image_stream).rotate(-90, expand=True)
image.thumbnail((800, 600), Image.LANCZOS)
output_stream = io.BytesIO()
image.save(output_stream, format="JPEG", quality=75)
image.save(output_stream, format="PNG")
output_stream.seek(0)
for widget in page.widgets():
if widget.field_name == field_name:
Expand All @@ -304,60 +284,31 @@ def insert_external_image(self, document: fitz.Document, field_name: str, image_
logger.exception(e)
capture_exception(e)
page.insert_textbox(
rect, "Image unreadable", color=(1, 0, 0), fontsize=11, fontname="helv", align=fitz.TEXT_ALIGN_CENTER
rect,
"Image unreadable",
color=(1, 0, 0),
fontsize=font_size,
fontname="helv",
align=fitz.TEXT_ALIGN_CENTER,
)

def is_image_field(self, annot: ArrayObject) -> bool:
"""
Checks if a given PDF annotation represents an image field.
"""
return annot.get(FieldDictionaryAttributes.FT) == "/Btn" and AnnotationDictionaryAttributes.AP in annot

def is_arabic_field(self, value: str) -> bool:
arabic_pattern = re.compile("[\u0600-\u06FF]")
return isinstance(value, str) and arabic_pattern.search(value)

def get_field_rect(self, document: fitz.Document, field_name: str) -> Optional[tuple[fitz.Rect, int]]:
"""
Returns the Rect and page index of the specified field.
"""
for page_num in range(len(document)):
page = document[page_num]
for widget in page.widgets():
if widget.field_name == field_name:
if widget.field_type == 7:
widget.field_flags |= fitz.PDF_FIELD_IS_READ_ONLY
widget.update()
return widget.rect, page_num
return None, None

def generate_arabic_image(self, text: str, rect: fitz.Rect, dpi: int = 300) -> BytesIO:
font_size = 10
rect_width_in_inches = (rect.x1 - rect.x0) / 72
rect_height_in_inches = (rect.y1 - rect.y0) / 72
# Generate the image
width = int(rect_width_in_inches * dpi)
height = int(rect_height_in_inches * dpi)
image = Image.new("RGBA", (width, height), (28, 171, 231, 0))
draw = ImageDraw.Draw(image)
font_size_scaled = int(font_size * dpi / 72)
font_file_path = Path(settings.PACKAGE_DIR / "web" / "static" / "fonts" / "NotoNaskhArabic-Bold.ttf")
font = ImageFont.truetype(font_file_path, font_size_scaled)

# Calculate text size and position it in the center
text_bbox = draw.textbbox((0, 0), text, font=font)
text_width = text_bbox[2] - text_bbox[0]
text_height = text_bbox[3] - text_bbox[1]
x = (width - text_width) / 2
y = (height - text_height) / 2
draw.text((x, y), text, font=font, fill="black", direction="rtl", align="right")

# Save the image to a bytes buffer
img_byte_arr = io.BytesIO()
image.save(img_byte_arr, format="PNG", optimize=True)
img_byte_arr.seek(0)

return img_byte_arr.getvalue()
return (
annot.get(FieldDictionaryAttributes.FT) == "/Btn"
and AnnotationDictionaryAttributes.P in annot
and AnnotationDictionaryAttributes.AP in annot
)

def is_special_language_field(self, field_name: str) -> Optional[str]:
"""Extract language code from the field name if it exists."""
special_language_suffixes = {"_ar": "arabic", "_bn": "bengali", "_ru": "cyrillic", "_bur": "burmese"}
for suffix, lang_code in special_language_suffixes.items():
if field_name.endswith(suffix):
return lang_code
return None

def load_image_from_blob_storage(self, image_path: str) -> BytesIO:
with HopeStorage().open(image_path, "rb") as img_file:
Expand Down
106 changes: 101 additions & 5 deletions src/hope_country_report/apps/power_query/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from typing import Any, Dict, TYPE_CHECKING

from typing import Any, Dict, Optional, TYPE_CHECKING, Union
from pathlib import Path
from io import BytesIO
from django.conf import settings
import base64
import binascii
import datetime
Expand All @@ -8,14 +10,13 @@
import logging
from collections.abc import Callable, Iterable
from functools import wraps
from pathlib import Path

from django.conf import settings
from PIL import Image, ImageDraw, ImageFont
from django.contrib.auth import authenticate
from django.db.models import QuerySet
from django.http import HttpRequest, HttpResponse
from django.utils.safestring import mark_safe

import fitz
import tablib
from constance import config
from sentry_sdk import configure_scope
Expand Down Expand Up @@ -145,3 +146,98 @@ def wrapper(*args: Any, **kwargs: Any) -> Any:
return func(*args, **kwargs)

return wrapper


def load_font_for_language(language: str, font_size: int = 12):
"""Returns the appropriate font for the given language."""
# Base directory for fonts
base_font_path = Path(settings.PACKAGE_DIR) / "web" / "static" / "fonts"
font_files = {
"arabic": base_font_path / "NotoNaskhArabic-Bold.ttf",
"cyrillic": base_font_path / "FreeSansBold.ttf",
"bengali": base_font_path / "NotoSansBengali-Bold.ttf",
"burmese": base_font_path / "NotoSerifMyanmar-Bold.ttf",
}

default_font = base_font_path / "FreeSansBold.ttf"
font_path = font_files.get(language, default_font)
return ImageFont.truetype(str(font_path), size=font_size)


def get_field_rect(document: fitz.Document, field_name: str) -> Optional[tuple[fitz.Rect, int]]:
"""
Returns the Rect and page index of the specified field.
"""
for page_num in range(len(document)):
page = document[page_num]
for widget in page.widgets():
if widget.field_name == field_name:
if widget.field_type == 7:
widget.field_flags |= fitz.PDF_FIELD_IS_READ_ONLY
widget.update()
return widget.rect, page_num
return None, None


def insert_special_language_image(
text: str, rect: fitz.Rect, language: str, dpi: int = 300, font_size: int = 10, font_color: str = "black"
) -> BytesIO:
"""Generate an image with text properly handled for special languages."""
rect_width_in_inches = (rect.x1 - rect.x0) / 72
rect_height_in_inches = (rect.y1 - rect.y0) / 72
width = int(rect_width_in_inches * dpi)
height = int(rect_height_in_inches * dpi)
image = Image.new("RGBA", (width, height), (28, 171, 231, 0))
draw = ImageDraw.Draw(image)
font_size_scaled = int(font_size * dpi / 72)
font = load_font_for_language(language, font_size_scaled)

text_bbox = draw.textbbox((0, 0), text, font=font)
text_width = text_bbox[2] - text_bbox[0]
text_height = text_bbox[3] - text_bbox[1]
x = (width - text_width) / 2
y = (height - text_height) / 2
draw.text((x, y), text, font=font, fill=font_color)

# Save the image to a bytes buffer
img_byte_arr = BytesIO()
image.save(img_byte_arr, format="PNG", optimize=True)
img_byte_arr.seek(0)

return img_byte_arr.getvalue()


def convert_pdf_to_image_pdf(pdf_document: fitz.Document, dpi: int = 300) -> bytes:
"""
Converts each page of a PDF document to an image and then creates a new PDF
with these images as its pages.
"""
new_pdf_document = fitz.open()

for page_num in range(len(pdf_document)):
pix = pdf_document[page_num].get_pixmap(dpi=dpi)
new_pdf_document.new_page(width=pix.width, height=pix.height)
new_pdf_document[page_num].insert_image(fitz.Rect(0, 0, pix.width, pix.height), pixmap=pix)
new_pdf_bytes = BytesIO()
new_pdf_document.save(new_pdf_bytes, deflate_fonts=1, deflate_images=1, deflate=1)
new_pdf_bytes.seek(0)
return new_pdf_bytes.getvalue()


def insert_special_image(
document: fitz.Document, field_name: str, text_info: dict, font_size: int = 10, font_color: str = "black"
):
"""
Generates and inserts an image containing the given special non-Latin text into
the specified field as an annotation.
"""
text = text_info["value"]
language = text_info["language"]
rect, page_index = get_field_rect(document, field_name)
if rect:
image_stream = insert_special_language_image(text, rect, language, font_size=font_size, font_color=font_color)
img_rect = fitz.Rect(*rect)
page = document[page_index]
page.insert_image(img_rect, stream=image_stream, keep_proportion=False)
else:
logger.info(f"Field {field_name} not found")
Binary file not shown.
Binary file not shown.
Binary file not shown.
4 changes: 2 additions & 2 deletions tests/admin/test_admin_rt.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@

@pytest.fixture()
def report_template():
from testutils.factories import ReportTemplate
from testutils.factories import ReportTemplateFactory

return ReportTemplate.objects.first()
return ReportTemplateFactory()


@pytest.fixture()
Expand Down
10 changes: 8 additions & 2 deletions tests/extras/testutils/factories/power_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,9 @@

from django.apps import apps
from django.core.files.base import ContentFile

from django.core.files.uploadedfile import SimpleUploadedFile
import factory
from strategy_field.utils import fqn

from hope_country_report.apps.power_query.models import (
ChartPage,
Dataset,
Expand Down Expand Up @@ -49,6 +48,13 @@ class Meta:
model = ReportTemplate
django_get_or_create = ("name",)

country_office = factory.SubFactory(CountryOfficeFactory)
name = factory.Faker("word")
file_suffix = ".pdf"
doc = factory.LazyAttribute(
lambda _: SimpleUploadedFile("test_template.pdf", b"Test file content", content_type="application/pdf")
)


class FormatterFactory(AutoRegisterModelFactory):
name = "Queryset To HTML"
Expand Down
Loading

0 comments on commit fde1258

Please sign in to comment.