aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xannotation-summary.py67
-rw-r--r--annotation-summary.spec42
2 files changed, 109 insertions, 0 deletions
diff --git a/annotation-summary.py b/annotation-summary.py
new file mode 100755
index 0000000..0b0fb5e
--- /dev/null
+++ b/annotation-summary.py
@@ -0,0 +1,67 @@
+#!/usr/bin/env python3
+
+import argparse
+import popplerqt5
+import pdfkit
+
+def extract(fn, show=False):
+ doc = popplerqt5.Poppler.Document.load(fn)
+ annotations = {}
+ for i in range(doc.numPages()):
+ annotations[i] = []
+ for annot in doc.page(i).annotations():
+ annotations[i].append(annot)
+ return annotations
+
+def generate_pdf(annotations, destination="annotations.pdf", pagesize="A4"):
+ options = {
+ 'page-size': pagesize,
+ 'encoding': "utf-8",
+ 'margin-top': '0.75in',
+ 'margin-right': '0.75in',
+ 'margin-bottom': '0.75in',
+ 'margin-left': '0.75in',
+ }
+
+ blocks = """
+ <!DOCTYPE html>
+ <html>
+ <head>
+ <meta charset="utf-8">
+ </head>
+ <body>
+ """
+
+ pages = list(annotations.keys())
+ pages.sort()
+ for page in pages:
+ for annotation in annotations[page]:
+ content = annotation.contents()
+ if not content:
+ continue
+ author = annotation.author()
+ if author != "":
+ header = f"<span>Page: {page + 1}</span> | <span>Author: {author}</span>"
+ else:
+ header = f"<span>Page: {page + 1}</span>"
+ blocks += f"""
+ <div style="border: 1px solid #300;">
+ {header}<br/>
+ <hr/>
+ <div style="white-space: pre-wrap;">{content}</div>
+ </div>
+ <br/>
+ """
+
+ blocks += """
+ </body>
+ </html>
+ """
+ pdfkit.from_string(blocks, destination, options=options)
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument('source')
+ args = parser.parse_args()
+ annotations = extract(args.source, show=True)
+ generate_pdf(annotations)
diff --git a/annotation-summary.spec b/annotation-summary.spec
new file mode 100644
index 0000000..88aa683
--- /dev/null
+++ b/annotation-summary.spec
@@ -0,0 +1,42 @@
+%global commit 0123456789abcdef0123456789abcdef01234567
+%global shortcommit %(c=%{commit}; echo ${c:0:7})
+%global snapinfo 20200206git%{shortcommit}
+
+Name: annotation-summary
+Version: 0.1
+Release: 1.%{snapinfo}%{?dist}
+Summary: Extract annotations/notes from a pdf file
+
+License:
+URL: https://git.ionic.at/pub/astra/%{name}
+#Source0: %{url}/snapshot/%{name}-%{commit}.tar.gz
+
+Requires: python3-poppler-qt5
+Requires: python3-pdfkit
+Requires: wkhtmltopdf
+
+%description
+Extract annotations/notes from a pdf file
+
+
+%prep
+%autosetup
+
+
+%build
+%py3_build
+
+
+%install
+%py3_install
+
+
+%files
+%license add-license-file-here
+%doc add-docs-here
+
+
+
+%changelog
+* Thu Feb 6 2020 David Kaufmann <astra@ionic.at> - 0.1-1
+- Initial commit