diff options
Diffstat (limited to 'annotation-summary.py')
| -rwxr-xr-x | annotation-summary.py | 67 |
1 files changed, 67 insertions, 0 deletions
diff --git a/annotation-summary.py b/annotation-summary.py new file mode 100755 index 0000000..0b0fb5e --- /dev/null +++ b/annotation-summary.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 + +import argparse +import popplerqt5 +import pdfkit + +def extract(fn, show=False): + doc = popplerqt5.Poppler.Document.load(fn) + annotations = {} + for i in range(doc.numPages()): + annotations[i] = [] + for annot in doc.page(i).annotations(): + annotations[i].append(annot) + return annotations + +def generate_pdf(annotations, destination="annotations.pdf", pagesize="A4"): + options = { + 'page-size': pagesize, + 'encoding': "utf-8", + 'margin-top': '0.75in', + 'margin-right': '0.75in', + 'margin-bottom': '0.75in', + 'margin-left': '0.75in', + } + + blocks = """ + <!DOCTYPE html> + <html> + <head> + <meta charset="utf-8"> + </head> + <body> + """ + + pages = list(annotations.keys()) + pages.sort() + for page in pages: + for annotation in annotations[page]: + content = annotation.contents() + if not content: + continue + author = annotation.author() + if author != "": + header = f"<span>Page: {page + 1}</span> | <span>Author: {author}</span>" + else: + header = f"<span>Page: {page + 1}</span>" + blocks += f""" + <div style="border: 1px solid #300;"> + {header}<br/> + <hr/> + <div style="white-space: pre-wrap;">{content}</div> + </div> + <br/> + """ + + blocks += """ + </body> + </html> + """ + pdfkit.from_string(blocks, destination, options=options) + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('source') + args = parser.parse_args() + annotations = extract(args.source, show=True) + generate_pdf(annotations) |
