Category: "Python"

Python3 - metadata extraction utilities
Dec 11th
EXIF
pip install exifread
#!/usr/bin/python3 import sys import exifread def main(argv): imagefile = '' if len(sys.argv) < 2: print('exif.py <imagefile>') sys.exit(1) imagefile = sys.argv[1] with open(imagefile,'rb') as imagefile: tags = exifread.process_file(imagefile) for key in tags: if key.find('Thumbnail') == -1: print(key,'->',tags[key]) main(sys.argv[1:])
.docx
pip install python-docx
#!/usr/bin/python3 import sys import docx def main(argv): docxfile = '' if len(sys.argv) < 2: print('dcx.py <docxfile>') sys.exit(1) docxfile = sys.argv[1] doc = docx.Document(docxfile) props = doc.core_properties for p in dir(props): attr = getattr(props,p) if not p.startswith('_') and not callable(attr): print(p,':',attr) main(sys.argv[1:])
pip install pdfminer
#!/usr/bin/python3 import sys from pdfminer.pdfparser import PDFParser from pdfminer.pdfdocument import PDFDocument def main(argv): pdffile = '' if len(sys.argv) < 2: print('pdf.py <pdffile>') sys.exit(1) pdffile = sys.argv[1] fp = open(pdffile, 'rb') parser = PDFParser(fp) doc = PDFDocument(parser) props = doc.info[0] for p in props: attr = props[p] if not p.startswith('_') and not callable(attr): print(p,':',attr) main(sys.argv[1:])