Category: "Python"

Python3 - metadata extraction utilities

EXIF

pip install exifread

#!/usr/bin/python3 

import sys
import exifread

def main(argv):
	imagefile = ''
	if len(sys.argv) < 2:
		print('exif.py <imagefile>')
		sys.exit(1)

	imagefile = sys.argv[1]

	with open(imagefile,'rb') as imagefile:
		tags = exifread.process_file(imagefile)
		for key in tags:
			if key.find('Thumbnail') == -1:
				print(key,'->',tags[key])

main(sys.argv[1:])

.docx

pip install python-docx

#!/usr/bin/python3 

import sys
import docx

def main(argv):
	docxfile = ''
	if len(sys.argv) < 2:
		print('dcx.py <docxfile>')
		sys.exit(1)

	docxfile = sys.argv[1]
	doc = docx.Document(docxfile)

	props = doc.core_properties
	for p in dir(props):
		attr = getattr(props,p)
		if not p.startswith('_') and not callable(attr):
			print(p,':',attr)

main(sys.argv[1:])

PDF

pip install pdfminer

#!/usr/bin/python3 

import sys
from pdfminer.pdfparser import PDFParser
from pdfminer.pdfdocument import PDFDocument

def main(argv):
	pdffile = ''
	if len(sys.argv) < 2:
		print('pdf.py <pdffile>')
		sys.exit(1)

	pdffile = sys.argv[1]

	fp = open(pdffile, 'rb')
	parser = PDFParser(fp)
	doc = PDFDocument(parser)

	props = doc.info[0]
	for p in props:
		attr = props[p]
		if not p.startswith('_') and not callable(attr):
			print(p,':',attr)

main(sys.argv[1:])