Hello
There's no pre-compiled build of rubycocoa 1.2.0 for OS X 10.11 as of 2016-03. You'd need to build and install rubycocoa 1.2.0 from source code by yourself.
Meanwhile, you may try the following pyobjc version of the original rubycocoa script. It should work without additional installation although I have only tested it with pybojc 2.2b3 and python 2.6.1 under OS X 10.6.8.
#!/usr/bin/python
# coding: utf-8
#
# file:
# split_pdf.py
#
# usage:
# split_pdf.py pdf [pdf ...] [output_directory]
# argv[1..] : source pdf file(s)
# argv[-1] : output directory
#
# * If output directory is specified and present, pages of every pdf are saved in the directory.
# Otherwise, pages of each pdf are saved in directory named after pdf followed by "'s pages" in the same directory as pdf.
# * Each page is named after text in the first non-blank line of the page without leading and trailing white spaces.
#
import sys, os, re
from Foundation import NSURL
from Quartz.PDFKit import PDFDocument, PDFPage
def usage():
sys.stderr.write('Usage: %s pdf [pdf ...] [output_directory]\n' % os.path.basename(sys.argv[0]))
sys.exit(1)
def main():
if len(sys.argv) < 2: usage()
outdir = sys.argv.pop().rstrip('/') if os.path.isdir(sys.argv[-1]) else None
if len(sys.argv) < 2: usage()
for f in [ a.decode('utf-8') for a in sys.argv[1:] ]:
url = NSURL.fileURLWithPath_(f)
doc = PDFDocument.alloc().initWithURL_(url)
if not doc:
sys.stderr.write('%s: not a pdf file\n' % f.encode('utf-8'))
continue
odir = outdir if outdir else (f + "'s pages")
if not os.path.isdir(odir): os.mkdir(odir)
path = doc.documentURL().path()
pcnt = doc.pageCount()
for i in range(0, pcnt):
page = doc.pageAtIndex_(i)
m = re.search(r'^[!\s]*(\S.*?)[!\s]*(\r\n|\r|\n|\Z)', page.string(), re.M) # [1]
if not m:
sys.stderr.write('No matching string in page %d of %s\n' % (i + 1, path.encode('utf-8')))
continue # ignore this page
n = m.group(1)
n = re.sub(r':', ';', n) # replace : with ; (: in POSIX name is changed to / in HFS+ name)
n = re.sub(r'/', ':', n) # replace / with : (/ is reserved as node separator in POSIX path)
doc1 = PDFDocument.alloc().initWithData_(page.dataRepresentation())
if not doc1.writeToFile_('%s/%s.pdf' % (odir, n)):
sys.stderr.write('Failed to save page %d of %s' % (i + 1, path.encode('utf-8')))
main()
#
# Notes
# [1] ! is present before tab in string returned by PDFPage -string method
#
And in case, here's its AppleScript wrapper, which will return errors if any in result pane/window of (Apple)Script Editor.
--APPLESCRIPT
_main()
on _main()
set ff to (choose file of type {"com.adobe.pdf"} with prompt "Choose source pdf file(s)." with multiple selections allowed)
set d to (choose folder with prompt "Choose destination folder.")
set args to ""
repeat with a in ff & d
set args to args & space & a's POSIX path's quoted form
end repeat
do shell script "/usr/bin/python <<'EOF' - " & args & " 2>&1
# coding: utf-8
#
# file:
# split_pdf.py
#
# usage:
# split_pdf.py pdf [pdf ...] [output_directory]
# argv[1..] : source pdf file(s)
# argv[-1] : output directory
#
# * If output directory is specified and present, pages of every pdf are saved in the directory.
# Otherwise, pages of each pdf are saved in directory named after pdf followed by \"'s pages\" in the same directory as pdf.
# * Each page is named after text in the first non-blank line of the page without leading and trailing white spaces.
#
import sys, os, re
from Foundation import NSURL
from Quartz.PDFKit import PDFDocument, PDFPage
def usage():
sys.stderr.write('Usage: %s pdf [pdf ...] [output_directory]\\n' % os.path.basename(sys.argv[0]))
sys.exit(1)
def main():
if len(sys.argv) < 2: usage()
outdir = sys.argv.pop().rstrip('/') if os.path.isdir(sys.argv[-1]) else None
if len(sys.argv) < 2: usage()
for f in [ a.decode('utf-8') for a in sys.argv[1:] ]:
url = NSURL.fileURLWithPath_(f)
doc = PDFDocument.alloc().initWithURL_(url)
if not doc:
sys.stderr.write('%s: not a pdf file\\n' % f.encode('utf-8'))
continue
odir = outdir if outdir else (f + \"'s pages\")
if not os.path.isdir(odir): os.mkdir(odir)
path = doc.documentURL().path()
pcnt = doc.pageCount()
for i in range(0, pcnt):
page = doc.pageAtIndex_(i)
m = re.search(r'^[!\\s]*(\\S.*?)[!\\s]*(\\r\\n|\\r|\\n|\\Z)', page.string(), re.M) # [1]
if not m:
sys.stderr.write('No matching string in page %d of %s\\n' % (i + 1, path.encode('utf-8')))
continue # ignore this page
n = m.group(1)
n = re.sub(r':', ';', n) # replace : with ; (: in POSIX name is changed to / in HFS+ name)
n = re.sub(r'/', ':', n) # replace / with : (/ is reserved as node separator in POSIX path)
doc1 = PDFDocument.alloc().initWithData_(page.dataRepresentation())
if not doc1.writeToFile_('%s/%s.pdf' % (odir, n)):
sys.stderr.write('Failed to save page %d of %s' % (i + 1, path.encode('utf-8')))
main()
#
# Notes
# [1] ! is present before tab in string returned by PDFPage -string method
#
EOF"
end _main
--END OF APPLESCRIPT
Regards,
H