The following Python script is meant to be run in the Terminal with Word (.docx only) documents provided on the command-line. It will print the footnote index, and the associated footnote text for each document. If the footnote text exceeds a predetermined line length, it will then aesthetically wrap, and preserve the output formatting.
This is a read-only script, and does not change the Word document. It focuses all of its energy on finding footnotes, and nothing else. There is no warranty expressed or implied.
Copy and paste the following Python code into a programmer's editor, or TextEdit in plain text mode. It is syntax clean as posted. Keep it that way by not pasting into a word processor. 😉 Save the file out as fnote.py, or whatever name you please.
#!/usr/bin/python
# coding: utf-8
"""
fnote.py
Usage: fnote.py doc1.docx doc2.docx ~/Desktop/Files/*.docx ... docn.docx
For each command-line, provided Word (.docx only) document, print out
the footnotes found in the document. Long footnote text will wrap aligned.
Sample output:
[doc1.docx]
1: This is the first and only footnote text
[doc2.docx]
No footnotes found in document
Tested: OS X 10.11.6/Python 2.7.10, 2.7.12, OS X 10.12/Python 2.7.10
VikingOSX, Oct. 29, 2016, Apple Support Communities
"""
import zipfile
import re
import os
import sys
from itertools import izip
import textwrap
FNXML = 'word/footnotes.xml'
findex = []
fnotes = []
space4 = ' ' * 4
line_length = 70
def get_footnotes(ifile):
global findex, fnotes
work = []
try:
with zipfile.ZipFile(ifile, 'r') as docx:
xmldata = docx.read(FNXML)
except KeyError:
print('[{}]'.format(os.path.basename(ifile)))
print('{}No footnotes found in document'.format(space4))
return False
work = re.findall(r'(?<=<w:footnote w:id=\")(\d+)(?=\">)|(?=\" w:type=\"cont)',
xmldata, re.M)
# Microsoft uses a '0' footnote index with no text associated with it
# This results in a blank list entry that we can remove here,
# but the footnote indices are artificially numbered +1 higher than reality
findex = filter(None, work)
fnotes = re.findall(r'(?<=<w:t>)(.*?)(?=</w:t>)', xmldata, (re.M | re.U))
return True
def main():
if len(sys.argv) == 1:
sys.exit('Usage: {} file1.docx, file2.docx ... filen.docx'.format(sys.argv[0]))
for adocx in sys.argv[1:]:
fname = os.path.basename(adocx)
if not adocx.endswith('docx'):
continue
result = get_footnotes(os.path.expanduser(adocx))
if not result:
continue
if findex and fnotes:
# mash the indices and footnote lists into an unordered dictionary
adict = dict(izip(findex, fnotes))
print("[{}]".format(fname))
for keys, values in sorted(adict.items()):
if len(values) > line_length:
# shift footnote numbers downward to match content
prefix = space4 + str(int(keys) - 1) + ': '
# wrap and align really long footnote text
wrapper = textwrap.TextWrapper(initial_indent=prefix, width=line_length,
subsequent_indent=' ' * len(prefix))
print("{}".format(wrapper.fill(values)))
else:
# make the footnote indices match their footnote text
print("{}{}:".format(space4, int(keys) - 1)),
print("{}".format(values))
if __name__ == '__main__':
sys.exit(main())