From 84754fefd8ccfff11137b016a2c80d3c2f5ea217 Mon Sep 17 00:00:00 2001 From: David Kaufmann Date: Thu, 6 Feb 2020 23:08:03 +0100 Subject: Initial commit --- annotation-summary.py | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100755 annotation-summary.py (limited to 'annotation-summary.py') diff --git a/annotation-summary.py b/annotation-summary.py new file mode 100755 index 0000000..0b0fb5e --- /dev/null +++ b/annotation-summary.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 + +import argparse +import popplerqt5 +import pdfkit + +def extract(fn, show=False): + doc = popplerqt5.Poppler.Document.load(fn) + annotations = {} + for i in range(doc.numPages()): + annotations[i] = [] + for annot in doc.page(i).annotations(): + annotations[i].append(annot) + return annotations + +def generate_pdf(annotations, destination="annotations.pdf", pagesize="A4"): + options = { + 'page-size': pagesize, + 'encoding': "utf-8", + 'margin-top': '0.75in', + 'margin-right': '0.75in', + 'margin-bottom': '0.75in', + 'margin-left': '0.75in', + } + + blocks = """ + + + + + + + """ + + pages = list(annotations.keys()) + pages.sort() + for page in pages: + for annotation in annotations[page]: + content = annotation.contents() + if not content: + continue + author = annotation.author() + if author != "": + header = f"Page: {page + 1} | Author: {author}" + else: + header = f"Page: {page + 1}" + blocks += f""" +
+ {header}
+
+
{content}
+
+
+ """ + + blocks += """ + + + """ + pdfkit.from_string(blocks, destination, options=options) + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('source') + args = parser.parse_args() + annotations = extract(args.source, show=True) + generate_pdf(annotations) -- cgit v1.2.3