Here is a Python program that reads in US-ASCII CSV and outputs it as a UTF-8 quoted field, Excel compatible CSV. The output opens and formats nicely in Numbers v3.6.1, and LibreOffice Calc v5.1.2.2.
Usage: ucsv.py input.csv output.csv
Copy and paste the following Python code into a programmer's editor. If it is Sublime Text 3, then use Paste and Indent. Otherwise, paste into a TextEdit plain text file, and save as ucvs.py. Make the Python script executable in the Terminal.
Test this on a small CSV and open it in a spreadsheet application to verify it works ok for you.
Code:
#!/usr/bin/env python
# coding: utf-8
'''
ucsv.py
Read in a US-ASCII CSV document and write out a quoted field Excel CSV.
Output CSV read correctly by Numbers v3.6.1, LibreOffice Calc 5.1.2.2.
Usage: ucsv.py us-ascii-input.csv utf8_output.csv
Derived from : https://docs.python.org/2.7/library/csv.html#examples
http://stackoverflow.com/questions/17245415/read-and-write-csv-files-including-unicode-with-python-2-7
'''
import csv
import codecs
import cStringIO
import os
import sys
class UTF8Recoder:
def __init__(self, f, encoding):
self.reader = codecs.getreader(encoding)(f)
def __iter__(self):
return self
def next(self):
return self.reader.next().encode("utf-8")
class UnicodeReader:
def __init__(self, f, dialect=csv.excel, encoding="utf-8-sig", **kwds):
f = UTF8Recoder(f, encoding)
self.reader = csv.reader(f, dialect=dialect, **kwds)
def next(self):
'''next() -> unicode
This function reads and returns the next line as a Unicode string.
'''
row = self.reader.next()
return [unicode(s, "utf-8") for s in row]
def __iter__(self):
return self
class UnicodeWriter:
def __init__(self, f, dialect=csv.excel, encoding="utf-8-sig", **kwds):
self.queue = cStringIO.StringIO()
self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
self.stream = f
self.encoder = codecs.getincrementalencoder(encoding)()
def writerow(self, row):
'''writerow(unicode) -> None
This function takes a Unicode string and encodes it to the output.
'''
self.writer.writerow([s.encode("utf-8") for s in row])
data = self.queue.getvalue()
data = data.decode("utf-8")
data = self.encoder.encode(data)
self.stream.write(data)
self.queue.truncate(0)
def writerows(self, rows):
for row in rows:
self.writerow(row)
if len(sys.argv) < 3:
sys.exit("{} <ascii-csv> <utf-csv>\n".format(sys.argv[0]))
if os.path.exists(sys.argv[1]) and sys.argv[1].endswith('.csv'):
ascii_csv = os.path.expanduser(sys.argv[1])
utf8_csv = os.path.expanduser(sys.argv[2])
else:
sys.exit("One or both of the input files do not exist.")
with open(ascii_csv, 'rb') as fin, open(utf8_csv, 'wb') as fout:
reader = UnicodeReader(fin)
writer = UnicodeWriter(fout, quoting=csv.QUOTE_ALL)
for line in reader:
writer.writerow(line)