-- CSV.Parser -- @Version 1.0.0 -- -- @NOTES -- This script treats any CR, LF or CRLF as record separators. This means that this -- script doesn't allow for embedded line breaks (which the CSV format allows for). -- -- Fields are quoted if they contains double-quotes, commas or leading/trailing white space -- White space is preserved if quoted - but note that Excel will try to be 'helpful' -- (in a very Microsoft way) and trim whitespace even if it is quoted. --@ DEPENDENCIES -- PregEx Xtra property whitespace on new (me) me.script.whitespace = " " & TAB & RETURN return me.script end on readFile (me, afile) strBuffer = pregex_readentireFile(afile) out = [] if listP(strBuffer) then strL = [strBuffer[1]] -- standardise the line breaks (not really kosher) PRegEx_Replace(strL, "(?:\x0D\x0A)|[\x0D\x0A]", "g", "\n") -- now split into rows rows = PRegEx_Split(strL, "\n", "g") -- now split the rows into columns re = ",(?=(?:[^\""E&"]*\""E&"[^\""E&"]*\""E&")*(?![^\""E&"]*\""E&"))" repeat with aRow in rows rowL = PRegEx_Split([aRow], re, "g") repeat with i = 1 to rowL.count rowL[i] = me._PostProcessEntry( rowL[i] ) end repeat out.append(rowL) end repeat end if return out end on writeFile (me, afile, aList) -- eg -- CSV = script("CSV.Parser").new() -- OK = CSV.writeFile(the moviePath & test.csv, someList) -- if NOT OK then alert (CSV.GetLastError()) if listP(aList) then out = "" numRows = count(aList) repeat with y = 1 to numRows thisRow = aList[y] numCols = thisRow.count repeat with x = 1 to numCols anEntryL = [thisRow[x]] changed = me._QuoteForWriting(anEntryL) anEntry = anEntryL[1] if changed or (anEntry contains ",") then out = out & QUOTE & anEntry & QUOTE else out = out & anEntry if x <> numCols then out = out & "," end repeat if y <> numRows then out = out & RETURN end repeat OK = pregex_writeentireFile(afile, [out]) return (OK>=0) else return 0 -- #ParamError end if end on GetLastError (me) return PRegEx_DescribeError() end on _PostProcessEntry (me, str) -- trim enclosing quotes mx = str.length if mx > 1 then if str.char[1] = QUOTE AND str.char[mx] = QUOTE then str = str.char[2..mx-1] end if end if strL = [str] -- process double quotes repeat while true x= pregex_replace(strL, "\""E&"\""E&"", "g", QUOTE) if x = 0 then exit repeat end repeat return strL[1] end on _QuoteForWriting (me, anEntryL) mx = anEntryL[1].length if mx > 0 then changed = pregex_replace(anEntryL, "\""E, "g", QUOTE"E) if not changed then changed = (me.whitespace contains anEntryL[1].char[1]) OR (me.whitespace contains anEntryL[1].char[mx]) end if return changed else return 0 end if end