diff options
| -rwxr-xr-x | annotation-summary.py | 67 | ||||
| -rw-r--r-- | annotation-summary.spec | 42 |
2 files changed, 109 insertions, 0 deletions
diff --git a/annotation-summary.py b/annotation-summary.py new file mode 100755 index 0000000..0b0fb5e --- /dev/null +++ b/annotation-summary.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 + +import argparse +import popplerqt5 +import pdfkit + +def extract(fn, show=False): + doc = popplerqt5.Poppler.Document.load(fn) + annotations = {} + for i in range(doc.numPages()): + annotations[i] = [] + for annot in doc.page(i).annotations(): + annotations[i].append(annot) + return annotations + +def generate_pdf(annotations, destination="annotations.pdf", pagesize="A4"): + options = { + 'page-size': pagesize, + 'encoding': "utf-8", + 'margin-top': '0.75in', + 'margin-right': '0.75in', + 'margin-bottom': '0.75in', + 'margin-left': '0.75in', + } + + blocks = """ + <!DOCTYPE html> + <html> + <head> + <meta charset="utf-8"> + </head> + <body> + """ + + pages = list(annotations.keys()) + pages.sort() + for page in pages: + for annotation in annotations[page]: + content = annotation.contents() + if not content: + continue + author = annotation.author() + if author != "": + header = f"<span>Page: {page + 1}</span> | <span>Author: {author}</span>" + else: + header = f"<span>Page: {page + 1}</span>" + blocks += f""" + <div style="border: 1px solid #300;"> + {header}<br/> + <hr/> + <div style="white-space: pre-wrap;">{content}</div> + </div> + <br/> + """ + + blocks += """ + </body> + </html> + """ + pdfkit.from_string(blocks, destination, options=options) + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('source') + args = parser.parse_args() + annotations = extract(args.source, show=True) + generate_pdf(annotations) diff --git a/annotation-summary.spec b/annotation-summary.spec new file mode 100644 index 0000000..88aa683 --- /dev/null +++ b/annotation-summary.spec @@ -0,0 +1,42 @@ +%global commit 0123456789abcdef0123456789abcdef01234567 +%global shortcommit %(c=%{commit}; echo ${c:0:7}) +%global snapinfo 20200206git%{shortcommit} + +Name: annotation-summary +Version: 0.1 +Release: 1.%{snapinfo}%{?dist} +Summary: Extract annotations/notes from a pdf file + +License: +URL: https://git.ionic.at/pub/astra/%{name} +#Source0: %{url}/snapshot/%{name}-%{commit}.tar.gz + +Requires: python3-poppler-qt5 +Requires: python3-pdfkit +Requires: wkhtmltopdf + +%description +Extract annotations/notes from a pdf file + + +%prep +%autosetup + + +%build +%py3_build + + +%install +%py3_install + + +%files +%license add-license-file-here +%doc add-docs-here + + + +%changelog +* Thu Feb 6 2020 David Kaufmann <astra@ionic.at> - 0.1-1 +- Initial commit |
