From 84754fefd8ccfff11137b016a2c80d3c2f5ea217 Mon Sep 17 00:00:00 2001 From: David Kaufmann Date: Thu, 6 Feb 2020 23:08:03 +0100 Subject: Initial commit --- annotation-summary.py | 67 +++++++++++++++++++++++++++++++++++++++++++++++++ annotation-summary.spec | 42 +++++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+) create mode 100755 annotation-summary.py create mode 100644 annotation-summary.spec diff --git a/annotation-summary.py b/annotation-summary.py new file mode 100755 index 0000000..0b0fb5e --- /dev/null +++ b/annotation-summary.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 + +import argparse +import popplerqt5 +import pdfkit + +def extract(fn, show=False): + doc = popplerqt5.Poppler.Document.load(fn) + annotations = {} + for i in range(doc.numPages()): + annotations[i] = [] + for annot in doc.page(i).annotations(): + annotations[i].append(annot) + return annotations + +def generate_pdf(annotations, destination="annotations.pdf", pagesize="A4"): + options = { + 'page-size': pagesize, + 'encoding': "utf-8", + 'margin-top': '0.75in', + 'margin-right': '0.75in', + 'margin-bottom': '0.75in', + 'margin-left': '0.75in', + } + + blocks = """ + + + + + + + """ + + pages = list(annotations.keys()) + pages.sort() + for page in pages: + for annotation in annotations[page]: + content = annotation.contents() + if not content: + continue + author = annotation.author() + if author != "": + header = f"Page: {page + 1} | Author: {author}" + else: + header = f"Page: {page + 1}" + blocks += f""" +
+ {header}
+
+
{content}
+
+
+ """ + + blocks += """ + + + """ + pdfkit.from_string(blocks, destination, options=options) + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('source') + args = parser.parse_args() + annotations = extract(args.source, show=True) + generate_pdf(annotations) diff --git a/annotation-summary.spec b/annotation-summary.spec new file mode 100644 index 0000000..88aa683 --- /dev/null +++ b/annotation-summary.spec @@ -0,0 +1,42 @@ +%global commit 0123456789abcdef0123456789abcdef01234567 +%global shortcommit %(c=%{commit}; echo ${c:0:7}) +%global snapinfo 20200206git%{shortcommit} + +Name: annotation-summary +Version: 0.1 +Release: 1.%{snapinfo}%{?dist} +Summary: Extract annotations/notes from a pdf file + +License: +URL: https://git.ionic.at/pub/astra/%{name} +#Source0: %{url}/snapshot/%{name}-%{commit}.tar.gz + +Requires: python3-poppler-qt5 +Requires: python3-pdfkit +Requires: wkhtmltopdf + +%description +Extract annotations/notes from a pdf file + + +%prep +%autosetup + + +%build +%py3_build + + +%install +%py3_install + + +%files +%license add-license-file-here +%doc add-docs-here + + + +%changelog +* Thu Feb 6 2020 David Kaufmann - 0.1-1 +- Initial commit -- cgit v1.2.3