module Text.CSV.Lazy.ByteString
(
CSVTable
, CSVRow
, CSVField(..)
, CSVError(..)
, CSVResult
, csvErrors
, csvTable
, csvTableFull
, csvTableHeader
, parseCSV
, parseDSV
, ppCSVError
, ppCSVField
, ppCSVTable
, ppDSVTable
, fromCSVTable
, toCSVTable
, selectFields
, expectFields
, mkEmptyColumn
, joinCSV
, mkCSVField
) where
import Data.List (groupBy, partition, elemIndex, intercalate, takeWhile
,deleteFirstsBy, nub)
import Data.Function (on)
import Data.Maybe (fromJust)
import qualified Data.ByteString.Lazy.Char8 as BS
import Data.ByteString.Lazy.Char8 (ByteString)
type CSVTable = [CSVRow]
type CSVRow = [CSVField]
data CSVField = CSVField { CSVField -> Int
csvRowNum :: !Int
, CSVField -> Int
csvColNum :: !Int
, CSVField -> (Int, Int)
csvTextStart :: !(Int,Int)
, CSVField -> (Int, Int)
csvTextEnd :: !(Int,Int)
, CSVField -> ByteString
csvFieldContent :: !ByteString
, CSVField -> Bool
csvFieldQuoted :: !Bool }
| CSVFieldError { csvRowNum :: !Int
, csvColNum :: !Int
, csvTextStart :: !(Int,Int)
, csvTextEnd :: !(Int,Int)
, CSVField -> String
csvFieldError :: !String }
deriving (CSVField -> CSVField -> Bool
(CSVField -> CSVField -> Bool)
-> (CSVField -> CSVField -> Bool) -> Eq CSVField
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: CSVField -> CSVField -> Bool
$c/= :: CSVField -> CSVField -> Bool
== :: CSVField -> CSVField -> Bool
$c== :: CSVField -> CSVField -> Bool
Eq,Int -> CSVField -> ShowS
[CSVField] -> ShowS
CSVField -> String
(Int -> CSVField -> ShowS)
-> (CSVField -> String) -> ([CSVField] -> ShowS) -> Show CSVField
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [CSVField] -> ShowS
$cshowList :: [CSVField] -> ShowS
show :: CSVField -> String
$cshow :: CSVField -> String
showsPrec :: Int -> CSVField -> ShowS
$cshowsPrec :: Int -> CSVField -> ShowS
Show)
data CSVError = IncorrectRow { CSVError -> Int
csvRow :: Int
, CSVError -> Int
csvColsExpected :: Int
, CSVError -> Int
csvColsActual :: Int
, CSVError -> [CSVField]
csvFields :: [CSVField] }
| BlankLine { csvRow :: !Int
, csvColsExpected :: !Int
, csvColsActual :: !Int
, CSVError -> CSVField
csvField :: CSVField }
| FieldError { csvField :: CSVField }
| { csvColsExpected :: !Int
, :: !Int
, CSVError -> String
csvDuplicate :: !String }
| NoData
deriving (CSVError -> CSVError -> Bool
(CSVError -> CSVError -> Bool)
-> (CSVError -> CSVError -> Bool) -> Eq CSVError
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: CSVError -> CSVError -> Bool
$c/= :: CSVError -> CSVError -> Bool
== :: CSVError -> CSVError -> Bool
$c== :: CSVError -> CSVError -> Bool
Eq,Int -> CSVError -> ShowS
[CSVError] -> ShowS
CSVError -> String
(Int -> CSVError -> ShowS)
-> (CSVError -> String) -> ([CSVError] -> ShowS) -> Show CSVError
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [CSVError] -> ShowS
$cshowList :: [CSVError] -> ShowS
show :: CSVError -> String
$cshow :: CSVError -> String
showsPrec :: Int -> CSVError -> ShowS
$cshowsPrec :: Int -> CSVError -> ShowS
Show)
type CSVResult = [ Either [CSVError] [CSVField] ]
csvTable :: CSVResult -> CSVTable
csvTable :: CSVResult -> CSVTable
csvTable r :: CSVResult
r = [ [CSVField]
row | Right row :: [CSVField]
row <- CSVResult
r ]
csvErrors :: CSVResult -> [CSVError]
csvErrors :: CSVResult -> [CSVError]
csvErrors r :: CSVResult
r = [[CSVError]] -> [CSVError]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [ [CSVError]
err | Left err :: [CSVError]
err <- CSVResult
r ]
csvTableFull:: CSVResult -> CSVTable
csvTableFull :: CSVResult -> CSVTable
csvTableFull = (Either [CSVError] [CSVField] -> [CSVField])
-> CSVResult -> CSVTable
forall a b. (a -> b) -> [a] -> [b]
map Either [CSVError] [CSVField] -> [CSVField]
beCareful (CSVResult -> CSVTable)
-> (CSVResult -> CSVResult) -> CSVResult -> CSVTable
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CSVResult -> CSVResult
deduplicate
where beCareful :: Either [CSVError] [CSVField] -> [CSVField]
beCareful (Right row :: [CSVField]
row) = [CSVField]
row
beCareful (Left (r :: CSVError
r@IncorrectRow{}:_)) =
CSVError -> [CSVField]
csvFields CSVError
r [CSVField] -> [CSVField] -> [CSVField]
forall a. [a] -> [a] -> [a]
++
Int -> CSVField -> [CSVField]
forall a. Int -> a -> [a]
replicate (CSVError -> Int
csvColsExpected CSVError
r Int -> Int -> Int
forall a. Num a => a -> a -> a
- CSVError -> Int
csvColsActual CSVError
r)
(Int -> Int -> ByteString -> CSVField
mkCSVField (CSVError -> Int
csvRow CSVError
r) 0 ByteString
BS.empty)
beCareful (Left (r :: CSVError
r@BlankLine{}:_)) =
Int -> CSVField -> [CSVField]
forall a. Int -> a -> [a]
replicate (CSVError -> Int
csvColsExpected CSVError
r)
(Int -> Int -> ByteString -> CSVField
mkCSVField (CSVError -> Int
csvRow CSVError
r) 0 ByteString
BS.empty)
beCareful (Left (r :: CSVError
r@DuplicateHeader{}:_)) =
Int -> CSVField -> [CSVField]
forall a. Int -> a -> [a]
replicate (CSVError -> Int
csvColsExpected CSVError
r)
(Int -> Int -> ByteString -> CSVField
mkCSVField 0 0 ByteString
BS.empty)
beCareful (Left (FieldError{}:r :: [CSVError]
r)) = Either [CSVError] [CSVField] -> [CSVField]
beCareful ([CSVError] -> Either [CSVError] [CSVField]
forall a b. a -> Either a b
Left [CSVError]
r)
beCareful (Left (NoData:_)) = []
beCareful (Left []) = []
deduplicate :: CSVResult -> CSVResult
deduplicate (Left (errs :: [CSVError]
errs@(DuplicateHeader{}:_)):Right heads :: [CSVField]
heads:rows :: CSVResult
rows) =
[CSVField] -> Either [CSVError] [CSVField]
forall a b. b -> Either a b
Right ([CSVError] -> [(CSVField, Int)] -> [CSVField]
replaceInOrder [CSVError]
errs ([CSVField] -> [Int] -> [(CSVField, Int)]
forall a b. [a] -> [b] -> [(a, b)]
zip [CSVField]
heads [0..]))
Either [CSVError] [CSVField] -> CSVResult -> CSVResult
forall a. a -> [a] -> [a]
: CSVResult
rows
deduplicate rows :: CSVResult
rows = CSVResult
rows
replaceInOrder :: [CSVError] -> [(CSVField, Int)] -> [CSVField]
replaceInOrder [] headers :: [(CSVField, Int)]
headers = ((CSVField, Int) -> CSVField) -> [(CSVField, Int)] -> [CSVField]
forall a b. (a -> b) -> [a] -> [b]
map (CSVField, Int) -> CSVField
forall a b. (a, b) -> a
fst [(CSVField, Int)]
headers
replaceInOrder _ [] = []
replaceInOrder (d :: CSVError
d:dups :: [CSVError]
dups) ((h :: CSVField
h,n :: Int
n):headers :: [(CSVField, Int)]
headers)
| CSVError -> Int
csvHeaderSerial CSVError
d Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
n = CSVField
h{ csvFieldContent :: ByteString
csvFieldContent = String -> ByteString
BS.pack
(CSVError -> String
csvDuplicate CSVError
dString -> ShowS
forall a. [a] -> [a] -> [a]
++"_"String -> ShowS
forall a. [a] -> [a] -> [a]
++Int -> String
forall a. Show a => a -> String
show Int
n) }
CSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
: [CSVError] -> [(CSVField, Int)] -> [CSVField]
replaceInOrder [CSVError]
dups [(CSVField, Int)]
headers
| Bool
otherwise = CSVField
hCSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
: [CSVError] -> [(CSVField, Int)] -> [CSVField]
replaceInOrder (CSVError
dCSVError -> [CSVError] -> [CSVError]
forall a. a -> [a] -> [a]
:[CSVError]
dups) [(CSVField, Int)]
headers
csvTableHeader :: CSVResult -> [String]
= (CSVField -> String) -> [CSVField] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map (ByteString -> String
BS.unpack (ByteString -> String)
-> (CSVField -> ByteString) -> CSVField -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CSVField -> ByteString
csvFieldContent) ([CSVField] -> [String])
-> (CSVResult -> [CSVField]) -> CSVResult -> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CSVResult -> [CSVField]
forall a p. [Either a p] -> p
firstRow
where firstRow :: [Either a p] -> p
firstRow (Left _: rest :: [Either a p]
rest) = [Either a p] -> p
firstRow [Either a p]
rest
firstRow (Right x :: p
x: _) = p
x
parseCSV :: ByteString -> CSVResult
parseCSV :: ByteString -> CSVResult
parseCSV = Bool -> Char -> ByteString -> CSVResult
parseDSV Bool
True ','
parseDSV :: Bool -> Char -> ByteString -> CSVResult
parseDSV :: Bool -> Char -> ByteString -> CSVResult
parseDSV qn :: Bool
qn delim :: Char
delim = CSVTable -> CSVResult
validate
(CSVTable -> CSVResult)
-> (ByteString -> CSVTable) -> ByteString -> CSVResult
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (CSVField -> CSVField -> Bool) -> [CSVField] -> CSVTable
forall a. (a -> a -> Bool) -> [a] -> [[a]]
groupBy (Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
(==)(Int -> Int -> Bool)
-> (CSVField -> Int) -> CSVField -> CSVField -> Bool
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on`CSVField -> Int
csvRowNum)
([CSVField] -> CSVTable)
-> (ByteString -> [CSVField]) -> ByteString -> CSVTable
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Bool -> Char -> ByteString -> [CSVField]
lexCSV Bool
qn Char
delim
validate :: [CSVRow] -> CSVResult
validate :: CSVTable -> CSVResult
validate [] = [[CSVError] -> Either [CSVError] [CSVField]
forall a b. a -> Either a b
Left [CSVError
NoData]]
validate xs :: CSVTable
xs@(x :: [CSVField]
x:_) = [CSVField] -> CSVResult -> CSVResult
checkDuplicateHeaders [CSVField]
x (CSVResult -> CSVResult) -> CSVResult -> CSVResult
forall a b. (a -> b) -> a -> b
$ ([CSVField] -> Either [CSVError] [CSVField])
-> CSVTable -> CSVResult
forall a b. (a -> b) -> [a] -> [b]
map (Int -> [CSVField] -> Either [CSVError] [CSVField]
extractErrs ([CSVField] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [CSVField]
x)) CSVTable
xs
extractErrs :: Int -> CSVRow -> Either [CSVError] CSVRow
size :: Int
size row :: [CSVField]
row
| [CSVField] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [CSVField]
row0 Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
size Bool -> Bool -> Bool
&& [CSVField] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [CSVField]
errs0 = [CSVField] -> Either [CSVError] [CSVField]
forall a b. b -> Either a b
Right [CSVField]
row0
| [CSVField] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [CSVField]
row0 Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== 1 Bool -> Bool -> Bool
&& CSVField -> Bool
empty CSVField
field0 = [CSVError] -> Either [CSVError] [CSVField]
forall a b. a -> Either a b
Left [CSVField -> CSVError
blankLine CSVField
field0]
| Bool
otherwise = [CSVError] -> Either [CSVError] [CSVField]
forall a b. a -> Either a b
Left ((CSVField -> CSVError) -> [CSVField] -> [CSVError]
forall a b. (a -> b) -> [a] -> [b]
map CSVField -> CSVError
convert [CSVField]
errs0
[CSVError] -> [CSVError] -> [CSVError]
forall a. [a] -> [a] -> [a]
++ [CSVField] -> [CSVError]
validateColumns [CSVField]
row0)
where
(row0 :: [CSVField]
row0,errs0 :: [CSVField]
errs0) = (CSVField -> Bool) -> [CSVField] -> ([CSVField], [CSVField])
forall a. (a -> Bool) -> [a] -> ([a], [a])
partition CSVField -> Bool
isField [CSVField]
row
(field0 :: CSVField
field0:_) = [CSVField]
row0
isField :: CSVField -> Bool
isField (CSVField{}) = Bool
True
isField (CSVFieldError{}) = Bool
False
empty :: CSVField -> Bool
empty f :: CSVField
f@(CSVField{}) = ByteString -> Bool
BS.null (CSVField -> ByteString
csvFieldContent CSVField
f)
empty _ = Bool
False
convert :: CSVField -> CSVError
convert err :: CSVField
err = FieldError :: CSVField -> CSVError
FieldError {csvField :: CSVField
csvField = CSVField
err}
validateColumns :: [CSVField] -> [CSVError]
validateColumns r :: [CSVField]
r =
if [CSVField] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [CSVField]
r Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
size then []
else [ IncorrectRow :: Int -> Int -> Int -> [CSVField] -> CSVError
IncorrectRow{ csvRow :: Int
csvRow = if [CSVField] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [CSVField]
r then 0 else CSVField -> Int
csvRowNum ([CSVField] -> CSVField
forall a. [a] -> a
head [CSVField]
r)
, csvColsExpected :: Int
csvColsExpected = Int
size
, csvColsActual :: Int
csvColsActual = [CSVField] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [CSVField]
r
, csvFields :: [CSVField]
csvFields = [CSVField]
r } ]
blankLine :: CSVField -> CSVError
blankLine f :: CSVField
f = $WBlankLine :: Int -> Int -> Int -> CSVField -> CSVError
BlankLine{ csvRow :: Int
csvRow = CSVField -> Int
csvRowNum CSVField
f
, csvColsExpected :: Int
csvColsExpected = Int
size
, csvColsActual :: Int
csvColsActual = 1
, csvField :: CSVField
csvField = CSVField
f }
checkDuplicateHeaders :: CSVRow -> CSVResult -> CSVResult
row :: [CSVField]
row result :: CSVResult
result =
let headers :: [CSVField]
headers = [ CSVField
f | f :: CSVField
f@(CSVField{}) <- [CSVField]
row ]
dups :: [CSVField]
dups = (CSVField -> CSVField -> Bool)
-> [CSVField] -> [CSVField] -> [CSVField]
forall a. (a -> a -> Bool) -> [a] -> [a] -> [a]
deleteFirstsBy (ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
(==)(ByteString -> ByteString -> Bool)
-> (CSVField -> ByteString) -> CSVField -> CSVField -> Bool
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on`CSVField -> ByteString
csvFieldContent)
[CSVField]
headers ([CSVField] -> [CSVField]
forall a. Eq a => [a] -> [a]
nub [CSVField]
headers)
n :: Int
n = [CSVField] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [CSVField]
headers
in if [CSVField] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [CSVField]
dups then CSVResult
result
else [CSVError] -> Either [CSVError] [CSVField]
forall a b. a -> Either a b
Left ((CSVField -> CSVError) -> [CSVField] -> [CSVError]
forall a b. (a -> b) -> [a] -> [b]
map (\d :: CSVField
d-> $WDuplicateHeader :: Int -> Int -> String -> CSVError
DuplicateHeader
{ csvColsExpected :: Int
csvColsExpected = Int
n
, csvHeaderSerial :: Int
csvHeaderSerial = CSVField -> Int
csvColNum CSVField
d
, csvDuplicate :: String
csvDuplicate = ByteString -> String
BS.unpack (CSVField -> ByteString
csvFieldContent CSVField
d)})
[CSVField]
dups)
Either [CSVError] [CSVField] -> CSVResult -> CSVResult
forall a. a -> [a] -> [a]
: CSVResult
result
data CSVState = CSVState { CSVState -> Int
tableRow, CSVState -> Int
tableCol :: !Int
, CSVState -> Int
textRow, CSVState -> Int
textCol :: !Int }
deriving Int -> CSVState -> ShowS
[CSVState] -> ShowS
CSVState -> String
(Int -> CSVState -> ShowS)
-> (CSVState -> String) -> ([CSVState] -> ShowS) -> Show CSVState
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [CSVState] -> ShowS
$cshowList :: [CSVState] -> ShowS
show :: CSVState -> String
$cshow :: CSVState -> String
showsPrec :: Int -> CSVState -> ShowS
$cshowsPrec :: Int -> CSVState -> ShowS
Show
incTableRow, incTableCol, incTextRow :: CSVState -> CSVState
incTableRow :: CSVState -> CSVState
incTableRow st :: CSVState
st = CSVState
st { tableRow :: Int
tableRow = CSVState -> Int
tableRow CSVState
st Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1 , tableCol :: Int
tableCol = 1 }
incTableCol :: CSVState -> CSVState
incTableCol st :: CSVState
st = CSVState
st { tableCol :: Int
tableCol = CSVState -> Int
tableCol CSVState
st Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1 }
incTextRow :: CSVState -> CSVState
incTextRow st :: CSVState
st = CSVState
st { textRow :: Int
textRow = CSVState -> Int
textRow CSVState
st Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1 , textCol :: Int
textCol = 1 }
incTextCol :: Int -> CSVState -> CSVState
incTextCol :: Int -> CSVState -> CSVState
incTextCol n :: Int
n st :: CSVState
st = CSVState
st { textCol :: Int
textCol = CSVState -> Int
textCol CSVState
st Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n }
here :: CSVState -> (Int,Int)
here :: CSVState -> (Int, Int)
here st :: CSVState
st = (CSVState -> Int
textRow CSVState
st, CSVState -> Int
textCol CSVState
st)
lexCSV :: Bool -> Char -> ByteString -> [CSVField]
lexCSV :: Bool -> Char -> ByteString -> [CSVField]
lexCSV qn :: Bool
qn delim :: Char
delim =
Bool -> Char -> CSVState -> (Int, Int) -> ByteString -> [CSVField]
getFields Bool
qn Char
delim
($WCSVState :: Int -> Int -> Int -> Int -> CSVState
CSVState{tableRow :: Int
tableRow=1,tableCol :: Int
tableCol=1,textRow :: Int
textRow=1,textCol :: Int
textCol=1}) (1,1)
getFields :: Bool -> Char -> CSVState -> (Int, Int) -> ByteString -> [CSVField]
getFields :: Bool -> Char -> CSVState -> (Int, Int) -> ByteString -> [CSVField]
getFields q :: Bool
q d :: Char
d state :: CSVState
state begin :: (Int, Int)
begin bs0 :: ByteString
bs0
= case ByteString -> Maybe (Char, ByteString)
BS.uncons ByteString
bs0 of
Nothing -> []
Just ('"', bs1 :: ByteString
bs1) -> Bool
-> Char
-> CSVState
-> (Int, Int)
-> ByteString
-> ByteString
-> [CSVField]
doStringFieldContent Bool
q Char
d (Int -> CSVState -> CSVState
incTextCol 1 CSVState
state) (Int, Int)
begin
ByteString
BS.empty ByteString
bs1
_ ->
case (Char -> Bool) -> ByteString -> (ByteString, ByteString)
BS.break Char -> Bool
interestingChar ByteString
bs0 of
(fieldBs :: ByteString
fieldBs, bs1 :: ByteString
bs1) ->
let field :: CSVField
field = CSVState -> (Int, Int) -> ByteString -> Bool -> CSVField
mkField CSVState
end (Int, Int)
begin ByteString
fieldBs Bool
False
end :: CSVState
end = Int -> CSVState -> CSVState
incTextCol (Int
lenInt -> Int -> Int
forall a. Num a => a -> a -> a
-1) (CSVState -> CSVState) -> CSVState -> CSVState
forall a b. (a -> b) -> a -> b
$ CSVState
state
state' :: CSVState
state' = CSVState -> CSVState
incTableCol (CSVState -> CSVState) -> CSVState -> CSVState
forall a b. (a -> b) -> a -> b
$ Int -> CSVState -> CSVState
incTextCol 2 CSVState
end
stateNL :: CSVState
stateNL = CSVState -> CSVState
incTableRow (CSVState -> CSVState)
-> (CSVState -> CSVState) -> CSVState -> CSVState
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CSVState -> CSVState
incTextRow (CSVState -> CSVState) -> CSVState -> CSVState
forall a b. (a -> b) -> a -> b
$ CSVState
state
len :: Int
len = Int64 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int64 -> Int) -> Int64 -> Int
forall a b. (a -> b) -> a -> b
$ ByteString -> Int64
BS.length ByteString
fieldBs
in case ByteString -> Maybe (Char, ByteString)
BS.uncons ByteString
bs1 of
Just (c :: Char
c,bs2 :: ByteString
bs2)
| Char
cChar -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
d -> CSVField
fieldCSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
: Bool -> Char -> CSVState -> (Int, Int) -> ByteString -> [CSVField]
getFields Bool
q Char
d CSVState
state' (CSVState -> (Int, Int)
here CSVState
state') ByteString
bs2
Just ('\r',bs2 :: ByteString
bs2) ->
case ByteString -> Maybe (Char, ByteString)
BS.uncons ByteString
bs2 of
Just ('\n',bs3 :: ByteString
bs3)
-> CSVField
fieldCSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
: Bool -> Char -> CSVState -> (Int, Int) -> ByteString -> [CSVField]
getFields Bool
q Char
d CSVState
stateNL (CSVState -> (Int, Int)
here CSVState
stateNL) ByteString
bs3
_ -> CSVField
fieldCSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
: Bool -> Char -> CSVState -> (Int, Int) -> ByteString -> [CSVField]
getFields Bool
q Char
d CSVState
stateNL (CSVState -> (Int, Int)
here CSVState
stateNL) ByteString
bs2
Just ('\n',bs2 :: ByteString
bs2) -> CSVField
fieldCSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
: Bool -> Char -> CSVState -> (Int, Int) -> ByteString -> [CSVField]
getFields Bool
q Char
d CSVState
stateNL (CSVState -> (Int, Int)
here CSVState
stateNL) ByteString
bs2
Just ('"', _) -> CSVField
fieldCSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
:
CSVState -> (Int, Int) -> String -> CSVField
mkError CSVState
state' (Int, Int)
begin
"unexpected quote, resync at EOL"CSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
:
Bool -> Char -> CSVState -> (Int, Int) -> ByteString -> [CSVField]
getFields Bool
q Char
d CSVState
stateNL (CSVState -> (Int, Int)
here CSVState
stateNL)
((Char -> Bool) -> ByteString -> ByteString
BS.dropWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/='\n') ByteString
bs1)
Just _ -> [CSVState -> (Int, Int) -> String -> CSVField
mkError CSVState
state' (Int, Int)
begin "XXX Can't happen"]
Nothing -> CSVField
fieldCSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
: Bool -> Char -> CSVState -> (Int, Int) -> ByteString -> [CSVField]
getFields Bool
q Char
d CSVState
stateNL (CSVState -> (Int, Int)
here CSVState
stateNL) ByteString
bs1
where interestingChar :: Char -> Bool
interestingChar '\r' = Bool
True
interestingChar '\n' = Bool
True
interestingChar '"' = Bool
True
interestingChar c :: Char
c | Char
cChar -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
d = Bool
True
interestingChar _ = Bool
False
doStringFieldContent :: Bool -> Char -> CSVState -> (Int, Int) -> ByteString
-> ByteString -> [CSVField]
doStringFieldContent :: Bool
-> Char
-> CSVState
-> (Int, Int)
-> ByteString
-> ByteString
-> [CSVField]
doStringFieldContent q :: Bool
q d :: Char
d state :: CSVState
state begin :: (Int, Int)
begin acc :: ByteString
acc bs1 :: ByteString
bs1
= case (Char -> Bool) -> ByteString -> (ByteString, ByteString)
BS.break Char -> Bool
interestingCharInsideString ByteString
bs1 of
(newBs :: ByteString
newBs, bs2 :: ByteString
bs2) ->
let fieldBs :: ByteString
fieldBs = ByteString
acc ByteString -> ByteString -> ByteString
`BS.append` ByteString
newBs
field :: CSVField
field = CSVState -> (Int, Int) -> ByteString -> Bool -> CSVField
mkField CSVState
end (Int, Int)
begin ByteString
fieldBs Bool
True
end :: CSVState
end = Int -> CSVState -> CSVState
incTextCol (Int
lenInt -> Int -> Int
forall a. Num a => a -> a -> a
-1) CSVState
state
state' :: CSVState
state' = CSVState -> CSVState
incTableCol (CSVState -> CSVState) -> CSVState -> CSVState
forall a b. (a -> b) -> a -> b
$ Int -> CSVState -> CSVState
incTextCol 3 CSVState
end
stateNL :: CSVState
stateNL = CSVState -> CSVState
incTableRow (CSVState -> CSVState)
-> (CSVState -> CSVState) -> CSVState -> CSVState
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CSVState -> CSVState
incTextRow (CSVState -> CSVState) -> CSVState -> CSVState
forall a b. (a -> b) -> a -> b
$ CSVState
state
len :: Int
len = Int64 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int64 -> Int) -> Int64 -> Int
forall a b. (a -> b) -> a -> b
$ ByteString -> Int64
BS.length ByteString
newBs
in case ByteString -> Maybe (Char, ByteString)
BS.uncons ByteString
bs2 of
Just ('\r',bs3 :: ByteString
bs3) ->
case ByteString -> Maybe (Char, ByteString)
BS.uncons ByteString
bs3 of
Just ('\n',bs4 :: ByteString
bs4) | Bool
q ->
Bool
-> Char
-> CSVState
-> (Int, Int)
-> ByteString
-> ByteString
-> [CSVField]
doStringFieldContent Bool
q Char
d (CSVState -> CSVState
incTextRow CSVState
end) (Int, Int)
begin
(ByteString
fieldBs ByteString -> ByteString -> ByteString
`BS.append` Char -> ByteString
BS.singleton '\n') ByteString
bs4
_ -> Bool
-> Char
-> CSVState
-> (Int, Int)
-> ByteString
-> ByteString
-> [CSVField]
doStringFieldContent Bool
q Char
d CSVState
end (Int, Int)
begin
(ByteString
fieldBs ByteString -> ByteString -> ByteString
`BS.append` Char -> ByteString
BS.singleton '\r') ByteString
bs3
Just ('\n',bs3 :: ByteString
bs3) | Bool
q ->
Bool
-> Char
-> CSVState
-> (Int, Int)
-> ByteString
-> ByteString
-> [CSVField]
doStringFieldContent Bool
q Char
d (CSVState -> CSVState
incTextRow CSVState
end) (Int, Int)
begin
(ByteString
fieldBs ByteString -> ByteString -> ByteString
`BS.append` Char -> ByteString
BS.singleton '\n') ByteString
bs3
Just ('\n',bs3 :: ByteString
bs3) ->
CSVField
fieldCSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
:
CSVState -> (Int, Int) -> String -> CSVField
mkError CSVState
end (Int, Int)
begin "Found newline within quoted field"CSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
:
Bool -> Char -> CSVState -> (Int, Int) -> ByteString -> [CSVField]
getFields Bool
q Char
d CSVState
stateNL (CSVState -> (Int, Int)
here CSVState
stateNL) ByteString
bs3
Just ('"', bs3 :: ByteString
bs3) ->
case ByteString -> Maybe (Char, ByteString)
BS.uncons ByteString
bs3 of
Just (c :: Char
c,bs4 :: ByteString
bs4)
| Char
cChar -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
d -> CSVField
fieldCSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
: Bool -> Char -> CSVState -> (Int, Int) -> ByteString -> [CSVField]
getFields Bool
q Char
d CSVState
state' (CSVState -> (Int, Int)
here CSVState
state') ByteString
bs4
Just ('\r',bs4 :: ByteString
bs4) ->
case ByteString -> Maybe (Char, ByteString)
BS.uncons ByteString
bs4 of
Just ('\n',bs5 :: ByteString
bs5) ->
CSVField
fieldCSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
: Bool -> Char -> CSVState -> (Int, Int) -> ByteString -> [CSVField]
getFields Bool
q Char
d CSVState
stateNL (CSVState -> (Int, Int)
here CSVState
stateNL) ByteString
bs5
_ -> CSVField
fieldCSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
: Bool -> Char -> CSVState -> (Int, Int) -> ByteString -> [CSVField]
getFields Bool
q Char
d CSVState
stateNL (CSVState -> (Int, Int)
here CSVState
stateNL) ByteString
bs4
Just ('\n',bs4 :: ByteString
bs4) -> CSVField
fieldCSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
: Bool -> Char -> CSVState -> (Int, Int) -> ByteString -> [CSVField]
getFields Bool
q Char
d CSVState
stateNL (CSVState -> (Int, Int)
here CSVState
stateNL) ByteString
bs4
Just ('"',bs4 :: ByteString
bs4) ->
Bool
-> Char
-> CSVState
-> (Int, Int)
-> ByteString
-> ByteString
-> [CSVField]
doStringFieldContent Bool
q Char
d (Int -> CSVState -> CSVState
incTextCol 3 CSVState
end) (Int, Int)
begin
(ByteString
fieldBs ByteString -> ByteString -> ByteString
`BS.append` Char -> ByteString
BS.singleton '"') ByteString
bs4
Just _ -> CSVField
fieldCSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
:
CSVState -> (Int, Int) -> String -> CSVField
mkError CSVState
state' (Int, Int)
begin "End-quote not followed by comma"CSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
:
Bool -> Char -> CSVState -> (Int, Int) -> ByteString -> [CSVField]
getFields Bool
q Char
d CSVState
state' (CSVState -> (Int, Int)
here CSVState
state') ByteString
bs3
Nothing -> CSVField
fieldCSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
: Bool -> Char -> CSVState -> (Int, Int) -> ByteString -> [CSVField]
getFields Bool
q Char
d CSVState
stateNL (CSVState -> (Int, Int)
here CSVState
stateNL) ByteString
bs3
Just _ -> [CSVState -> (Int, Int) -> String -> CSVField
mkError CSVState
state' (Int, Int)
begin "XXX Can't happen (string field)"]
Nothing -> CSVField
fieldCSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
:
CSVState -> (Int, Int) -> String -> CSVField
mkError CSVState
state' (Int, Int)
begin "CSV data ends within a quoted string"
CSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
:[]
where interestingCharInsideString :: Char -> Bool
interestingCharInsideString '\r' = Bool
True
interestingCharInsideString '\n' = Bool
True
interestingCharInsideString '"' = Bool
True
interestingCharInsideString _ = Bool
False
mkField :: CSVState -> (Int, Int) -> ByteString -> Bool -> CSVField
mkField :: CSVState -> (Int, Int) -> ByteString -> Bool -> CSVField
mkField st :: CSVState
st begin :: (Int, Int)
begin bs :: ByteString
bs q :: Bool
q = $WCSVField :: Int
-> Int
-> (Int, Int)
-> (Int, Int)
-> ByteString
-> Bool
-> CSVField
CSVField { csvRowNum :: Int
csvRowNum = CSVState -> Int
tableRow CSVState
st
, csvColNum :: Int
csvColNum = CSVState -> Int
tableCol CSVState
st
, csvTextStart :: (Int, Int)
csvTextStart = (Int, Int)
begin
, csvTextEnd :: (Int, Int)
csvTextEnd = (CSVState -> Int
textRow CSVState
st,CSVState -> Int
textCol CSVState
st)
, csvFieldContent :: ByteString
csvFieldContent = ByteString
bs
, csvFieldQuoted :: Bool
csvFieldQuoted = Bool
q }
mkError :: CSVState -> (Int, Int) -> String -> CSVField
mkError :: CSVState -> (Int, Int) -> String -> CSVField
mkError st :: CSVState
st begin :: (Int, Int)
begin e :: String
e = $WCSVFieldError :: Int -> Int -> (Int, Int) -> (Int, Int) -> String -> CSVField
CSVFieldError { csvRowNum :: Int
csvRowNum = CSVState -> Int
tableRow CSVState
st
, csvColNum :: Int
csvColNum = CSVState -> Int
tableCol CSVState
st
, csvTextStart :: (Int, Int)
csvTextStart = (Int, Int)
begin
, csvTextEnd :: (Int, Int)
csvTextEnd = (CSVState -> Int
textRow CSVState
st,CSVState -> Int
textCol CSVState
st)
, csvFieldError :: String
csvFieldError = String
e }
ppCSVError :: CSVError -> String
ppCSVError :: CSVError -> String
ppCSVError (err :: CSVError
err@IncorrectRow{}) =
"\nRow "String -> ShowS
forall a. [a] -> [a] -> [a]
++Int -> String
forall a. Show a => a -> String
show (CSVError -> Int
csvRow CSVError
err)String -> ShowS
forall a. [a] -> [a] -> [a]
++" has wrong number of fields."String -> ShowS
forall a. [a] -> [a] -> [a]
++
"\n Expected "String -> ShowS
forall a. [a] -> [a] -> [a]
++Int -> String
forall a. Show a => a -> String
show (CSVError -> Int
csvColsExpected CSVError
err)String -> ShowS
forall a. [a] -> [a] -> [a]
++" but got "String -> ShowS
forall a. [a] -> [a] -> [a]
++
Int -> String
forall a. Show a => a -> String
show (CSVError -> Int
csvColsActual CSVError
err)String -> ShowS
forall a. [a] -> [a] -> [a]
++"."String -> ShowS
forall a. [a] -> [a] -> [a]
++
"\n The fields are:"String -> ShowS
forall a. [a] -> [a] -> [a]
++
Int -> ShowS
indent 8 ((CSVField -> String) -> [CSVField] -> String
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap CSVField -> String
ppCSVField (CSVError -> [CSVField]
csvFields CSVError
err))
ppCSVError (err :: CSVError
err@BlankLine{}) =
"\nRow "String -> ShowS
forall a. [a] -> [a] -> [a]
++Int -> String
forall a. Show a => a -> String
show (CSVError -> Int
csvRow CSVError
err)String -> ShowS
forall a. [a] -> [a] -> [a]
++" is blank."String -> ShowS
forall a. [a] -> [a] -> [a]
++
"\n Expected "String -> ShowS
forall a. [a] -> [a] -> [a]
++Int -> String
forall a. Show a => a -> String
show (CSVError -> Int
csvColsExpected CSVError
err)String -> ShowS
forall a. [a] -> [a] -> [a]
++" fields."
ppCSVError (err :: CSVError
err@FieldError{}) = CSVField -> String
ppCSVField (CSVError -> CSVField
csvField CSVError
err)
ppCSVError (err :: CSVError
err@DuplicateHeader{}) =
"\nThere are two (or more) identical column headers: "String -> ShowS
forall a. [a] -> [a] -> [a]
++
ShowS
forall a. Show a => a -> String
show (CSVError -> String
csvDuplicate CSVError
err)String -> ShowS
forall a. [a] -> [a] -> [a]
++"."String -> ShowS
forall a. [a] -> [a] -> [a]
++
"\n Column number "String -> ShowS
forall a. [a] -> [a] -> [a]
++Int -> String
forall a. Show a => a -> String
show (CSVError -> Int
csvHeaderSerial CSVError
err)
ppCSVError (NoData{}) =
"\nNo usable data (after accounting for any other errors)."
ppCSVField :: CSVField -> String
ppCSVField :: CSVField -> String
ppCSVField (f :: CSVField
f@CSVField{}) =
"\n"String -> ShowS
forall a. [a] -> [a] -> [a]
++ByteString -> String
BS.unpack (Bool -> ByteString -> ByteString
quoted (CSVField -> Bool
csvFieldQuoted CSVField
f) (CSVField -> ByteString
csvFieldContent CSVField
f))String -> ShowS
forall a. [a] -> [a] -> [a]
++
"\nin row "String -> ShowS
forall a. [a] -> [a] -> [a]
++Int -> String
forall a. Show a => a -> String
show (CSVField -> Int
csvRowNum CSVField
f)String -> ShowS
forall a. [a] -> [a] -> [a]
++" at column "String -> ShowS
forall a. [a] -> [a] -> [a]
++Int -> String
forall a. Show a => a -> String
show (CSVField -> Int
csvColNum CSVField
f)String -> ShowS
forall a. [a] -> [a] -> [a]
++
" (textually from "String -> ShowS
forall a. [a] -> [a] -> [a]
++(Int, Int) -> String
forall a. Show a => a -> String
show (CSVField -> (Int, Int)
csvTextStart CSVField
f)String -> ShowS
forall a. [a] -> [a] -> [a]
++" to "String -> ShowS
forall a. [a] -> [a] -> [a]
++
(Int, Int) -> String
forall a. Show a => a -> String
show (CSVField -> (Int, Int)
csvTextEnd CSVField
f)String -> ShowS
forall a. [a] -> [a] -> [a]
++")"
ppCSVField (f :: CSVField
f@CSVFieldError{}) =
"\n"String -> ShowS
forall a. [a] -> [a] -> [a]
++CSVField -> String
csvFieldError CSVField
fString -> ShowS
forall a. [a] -> [a] -> [a]
++
"\nin row "String -> ShowS
forall a. [a] -> [a] -> [a]
++Int -> String
forall a. Show a => a -> String
show (CSVField -> Int
csvRowNum CSVField
f)String -> ShowS
forall a. [a] -> [a] -> [a]
++" at column "String -> ShowS
forall a. [a] -> [a] -> [a]
++Int -> String
forall a. Show a => a -> String
show (CSVField -> Int
csvColNum CSVField
f)String -> ShowS
forall a. [a] -> [a] -> [a]
++
" (textually from "String -> ShowS
forall a. [a] -> [a] -> [a]
++(Int, Int) -> String
forall a. Show a => a -> String
show (CSVField -> (Int, Int)
csvTextStart CSVField
f)String -> ShowS
forall a. [a] -> [a] -> [a]
++" to "String -> ShowS
forall a. [a] -> [a] -> [a]
++
(Int, Int) -> String
forall a. Show a => a -> String
show (CSVField -> (Int, Int)
csvTextEnd CSVField
f)
ppCSVTable :: CSVTable -> ByteString
ppCSVTable :: CSVTable -> ByteString
ppCSVTable = [ByteString] -> ByteString
BS.unlines ([ByteString] -> ByteString)
-> (CSVTable -> [ByteString]) -> CSVTable -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([CSVField] -> ByteString) -> CSVTable -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map (ByteString -> [ByteString] -> ByteString
BS.intercalate (String -> ByteString
BS.pack ",") ([ByteString] -> ByteString)
-> ([CSVField] -> [ByteString]) -> [CSVField] -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (CSVField -> ByteString) -> [CSVField] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map CSVField -> ByteString
ppField)
where ppField :: CSVField -> ByteString
ppField f :: CSVField
f = Bool -> ByteString -> ByteString
quoted (CSVField -> Bool
csvFieldQuoted CSVField
f) (CSVField -> ByteString
csvFieldContent CSVField
f)
ppDSVTable :: Bool -> Char -> CSVTable -> ByteString
ppDSVTable :: Bool -> Char -> CSVTable -> ByteString
ppDSVTable nl :: Bool
nl d :: Char
d = [ByteString] -> ByteString
BS.unlines ([ByteString] -> ByteString)
-> (CSVTable -> [ByteString]) -> CSVTable -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([CSVField] -> ByteString) -> CSVTable -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map (ByteString -> [ByteString] -> ByteString
BS.intercalate (String -> ByteString
BS.pack [Char
d]) ([ByteString] -> ByteString)
-> ([CSVField] -> [ByteString]) -> [CSVField] -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (CSVField -> ByteString) -> [CSVField] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map CSVField -> ByteString
ppField)
where ppField :: CSVField -> ByteString
ppField f :: CSVField
f = Bool -> ByteString -> ByteString
quoted (CSVField -> Bool
csvFieldQuoted CSVField
f) (ByteString -> ByteString
doNL (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ CSVField -> ByteString
csvFieldContent CSVField
f)
doNL :: ByteString -> ByteString
doNL | Bool
nl = ByteString -> ByteString
replaceNL
| Bool
otherwise = ByteString -> ByteString
forall a. a -> a
id
indent :: Int -> String -> String
indent :: Int -> ShowS
indent n :: Int
n = [String] -> String
unlines ([String] -> String) -> (String -> [String]) -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS -> [String] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map (Int -> Char -> String
forall a. Int -> a -> [a]
replicate Int
n ' ' String -> ShowS
forall a. [a] -> [a] -> [a]
++) ([String] -> [String])
-> (String -> [String]) -> String -> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [String]
lines
quoted :: Bool -> ByteString -> ByteString
quoted :: Bool -> ByteString -> ByteString
quoted False s :: ByteString
s = ByteString
s
quoted True s :: ByteString
s = [ByteString] -> ByteString
BS.concat [String -> ByteString
BS.pack "\"", ByteString -> ByteString
escape ByteString
s, String -> ByteString
BS.pack"\""]
where escape :: ByteString -> ByteString
escape s :: ByteString
s = let (good :: ByteString
good,next :: ByteString
next) = (Char -> Bool) -> ByteString -> (ByteString, ByteString)
BS.span (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/='"') ByteString
s
in if ByteString -> Bool
BS.null ByteString
next then ByteString
good
else [ByteString] -> ByteString
BS.concat [ ByteString
good, String -> ByteString
BS.pack "\"\"", ByteString -> ByteString
escape (ByteString -> ByteString
BS.tail ByteString
next) ]
replaceNL :: ByteString -> ByteString
replaceNL :: ByteString -> ByteString
replaceNL s :: ByteString
s = let (good :: ByteString
good,next :: ByteString
next) = (Char -> Bool) -> ByteString -> (ByteString, ByteString)
BS.span (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/='\n') ByteString
s
in if ByteString -> Bool
BS.null ByteString
next then ByteString
good
else if ByteString -> Bool
BS.null ByteString
good then ByteString -> ByteString
replaceNL (ByteString -> ByteString
BS.tail ByteString
next)
else [ByteString] -> ByteString
BS.concat [ ByteString
good, String -> ByteString
BS.pack " ", ByteString -> ByteString
replaceNL ByteString
next ]
fromCSVTable :: CSVTable -> [[ByteString]]
fromCSVTable :: CSVTable -> [[ByteString]]
fromCSVTable = ([CSVField] -> [ByteString]) -> CSVTable -> [[ByteString]]
forall a b. (a -> b) -> [a] -> [b]
map ((CSVField -> ByteString) -> [CSVField] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map CSVField -> ByteString
csvFieldContent)
toCSVTable :: [[ByteString]] -> ([CSVError], CSVTable)
toCSVTable :: [[ByteString]] -> ([CSVError], CSVTable)
toCSVTable [] = ([CSVError
NoData], [])
toCSVTable rows :: [[ByteString]]
rows@(r :: [ByteString]
r:_) = (\ (a :: [[CSVError]]
a,b :: CSVTable
b)-> ([[CSVError]] -> [CSVError]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [[CSVError]]
a, CSVTable
b)) (([[CSVError]], CSVTable) -> ([CSVError], CSVTable))
-> ([[CSVError]], CSVTable) -> ([CSVError], CSVTable)
forall a b. (a -> b) -> a -> b
$
[([CSVError], [CSVField])] -> ([[CSVError]], CSVTable)
forall a b. [(a, b)] -> ([a], [b])
unzip ((Int -> [ByteString] -> ([CSVError], [CSVField]))
-> [Int] -> [[ByteString]] -> [([CSVError], [CSVField])]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith Int -> [ByteString] -> ([CSVError], [CSVField])
walk [1..] [[ByteString]]
rows)
where
n :: Int
n = [ByteString] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [ByteString]
r
walk :: Int -> [ByteString] -> ([CSVError], CSVRow)
walk :: Int -> [ByteString] -> ([CSVError], [CSVField])
walk rnum :: Int
rnum [] = ( [Int -> CSVError
blank Int
rnum]
, (Int -> CSVField) -> [Int] -> [CSVField]
forall a b. (a -> b) -> [a] -> [b]
map (\c :: Int
c-> Int -> Int -> ByteString -> CSVField
mkCSVField Int
rnum Int
c (ByteString
BS.empty)) [1..Int
n])
walk rnum :: Int
rnum cs :: [ByteString]
cs = ( if [ByteString] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [ByteString]
cs Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
n then [Int -> [ByteString] -> CSVError
bad Int
rnum [ByteString]
cs] else []
, (Int -> ByteString -> CSVField)
-> [Int] -> [ByteString] -> [CSVField]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (Int -> Int -> ByteString -> CSVField
mkCSVField Int
rnum) [1..Int
n] [ByteString]
cs )
blank :: Int -> CSVError
blank rnum :: Int
rnum = $WBlankLine :: Int -> Int -> Int -> CSVField -> CSVError
BlankLine{ csvRow :: Int
csvRow = Int
rnum
, csvColsExpected :: Int
csvColsExpected = Int
n
, csvColsActual :: Int
csvColsActual = 0
, csvField :: CSVField
csvField = Int -> Int -> ByteString -> CSVField
mkCSVField Int
rnum 0 ByteString
BS.empty
}
bad :: Int -> [ByteString] -> CSVError
bad r :: Int
r cs :: [ByteString]
cs = IncorrectRow :: Int -> Int -> Int -> [CSVField] -> CSVError
IncorrectRow{ csvRow :: Int
csvRow = Int
r
, csvColsExpected :: Int
csvColsExpected = Int
n
, csvColsActual :: Int
csvColsActual = [ByteString] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [ByteString]
cs
, csvFields :: [CSVField]
csvFields = (Int -> ByteString -> CSVField)
-> [Int] -> [ByteString] -> [CSVField]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (Int -> Int -> ByteString -> CSVField
mkCSVField Int
r) [1..] [ByteString]
cs
}
selectFields :: [String] -> CSVTable -> Either [String] CSVTable
selectFields :: [String] -> CSVTable -> Either [String] CSVTable
selectFields names :: [String]
names table :: CSVTable
table
| CSVTable -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null CSVTable
table = [String] -> Either [String] CSVTable
forall a b. a -> Either a b
Left [String]
names
| Bool -> Bool
not ([String] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [String]
missing) = [String] -> Either [String] CSVTable
forall a b. a -> Either a b
Left [String]
missing
| Bool
otherwise = CSVTable -> Either [String] CSVTable
forall a b. b -> Either a b
Right (([CSVField] -> [CSVField]) -> CSVTable -> CSVTable
forall a b. (a -> b) -> [a] -> [b]
map [CSVField] -> [CSVField]
forall b. [b] -> [b]
select CSVTable
table)
where
header :: [String]
header = (CSVField -> String) -> [CSVField] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map (ByteString -> String
BS.unpack (ByteString -> String)
-> (CSVField -> ByteString) -> CSVField -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CSVField -> ByteString
csvFieldContent) (CSVTable -> [CSVField]
forall a. [a] -> a
head CSVTable
table)
missing :: [String]
missing = (String -> Bool) -> [String] -> [String]
forall a. (a -> Bool) -> [a] -> [a]
filter (String -> [String] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` [String]
header) [String]
names
reordering :: [Int]
reordering = (String -> Int) -> [String] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map (Maybe Int -> Int
forall a. HasCallStack => Maybe a -> a
fromJust (Maybe Int -> Int) -> (String -> Maybe Int) -> String -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (\n :: String
n-> String -> [String] -> Maybe Int
forall a. Eq a => a -> [a] -> Maybe Int
elemIndex String
n [String]
header)) [String]
names
select :: [b] -> [b]
select fields :: [b]
fields = (Int -> b) -> [Int] -> [b]
forall a b. (a -> b) -> [a] -> [b]
map ([b]
fields[b] -> Int -> b
forall a. [a] -> Int -> a
!!) [Int]
reordering
expectFields :: [String] -> CSVTable -> Either [String] CSVTable
expectFields :: [String] -> CSVTable -> Either [String] CSVTable
expectFields names :: [String]
names table :: CSVTable
table
| CSVTable -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null CSVTable
table = [String] -> Either [String] CSVTable
forall a b. a -> Either a b
Left ["CSV table is empty"]
| Bool -> Bool
not ([String] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [String]
missing) = [String] -> Either [String] CSVTable
forall a b. a -> Either a b
Left (ShowS -> [String] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map ("CSV table is missing field: "String -> ShowS
forall a. [a] -> [a] -> [a]
++)
[String]
missing)
| [String]
header [String] -> [String] -> Bool
forall a. Eq a => a -> a -> Bool
/= [String]
names = [String] -> Either [String] CSVTable
forall a b. a -> Either a b
Left ["CSV columns are in the wrong order"
,"Expected: "String -> ShowS
forall a. [a] -> [a] -> [a]
++String -> [String] -> String
forall a. [a] -> [[a]] -> [a]
intercalate ", " [String]
names
,"Found: "String -> ShowS
forall a. [a] -> [a] -> [a]
++String -> [String] -> String
forall a. [a] -> [[a]] -> [a]
intercalate ", " [String]
header]
| Bool
otherwise = CSVTable -> Either [String] CSVTable
forall a b. b -> Either a b
Right CSVTable
table
where
header :: [String]
header = (CSVField -> String) -> [CSVField] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map (ByteString -> String
BS.unpack (ByteString -> String)
-> (CSVField -> ByteString) -> CSVField -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CSVField -> ByteString
csvFieldContent) (CSVTable -> [CSVField]
forall a. [a] -> a
head CSVTable
table)
missing :: [String]
missing = (String -> Bool) -> [String] -> [String]
forall a. (a -> Bool) -> [a] -> [a]
filter (String -> [String] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` [String]
header) [String]
names
joinCSV :: CSVTable -> CSVTable -> CSVTable
joinCSV :: CSVTable -> CSVTable -> CSVTable
joinCSV = ([CSVField] -> [CSVField] -> [CSVField])
-> CSVTable -> CSVTable -> CSVTable
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith [CSVField] -> [CSVField] -> [CSVField]
forall a. [a] -> [a] -> [a]
(++)
mkEmptyColumn :: String -> CSVTable
mkEmptyColumn :: String -> CSVTable
mkEmptyColumn header :: String
header = [CSVField
headField] [CSVField] -> CSVTable -> CSVTable
forall a. a -> [a] -> [a]
: (Int -> [CSVField]) -> [Int] -> CSVTable
forall a b. (a -> b) -> [a] -> [b]
map ((CSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
:[])(CSVField -> [CSVField]) -> (Int -> CSVField) -> Int -> [CSVField]
forall b c a. (b -> c) -> (a -> b) -> a -> c
.Int -> CSVField
emptyField) [2..]
where
headField :: CSVField
headField = (Int -> CSVField
emptyField 1) { csvFieldContent :: ByteString
csvFieldContent = String -> ByteString
BS.pack String
header
, csvFieldQuoted :: Bool
csvFieldQuoted = Bool
True }
emptyField :: Int -> CSVField
emptyField n :: Int
n = $WCSVField :: Int
-> Int
-> (Int, Int)
-> (Int, Int)
-> ByteString
-> Bool
-> CSVField
CSVField { csvRowNum :: Int
csvRowNum = Int
n
, csvColNum :: Int
csvColNum = 0
, csvTextStart :: (Int, Int)
csvTextStart = (0,0)
, csvTextEnd :: (Int, Int)
csvTextEnd = (0,0)
, csvFieldContent :: ByteString
csvFieldContent = ByteString
BS.empty
, csvFieldQuoted :: Bool
csvFieldQuoted = Bool
False
}
mkCSVField :: Int -> Int -> ByteString -> CSVField
mkCSVField :: Int -> Int -> ByteString -> CSVField
mkCSVField n :: Int
n c :: Int
c text :: ByteString
text =
$WCSVField :: Int
-> Int
-> (Int, Int)
-> (Int, Int)
-> ByteString
-> Bool
-> CSVField
CSVField { csvRowNum :: Int
csvRowNum = Int
n
, csvColNum :: Int
csvColNum = Int
c
, csvTextStart :: (Int, Int)
csvTextStart = (0,0)
, csvTextEnd :: (Int, Int)
csvTextEnd = ( Int64 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral
(Int64 -> Int) -> (ByteString -> Int64) -> ByteString -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Int64
BS.length
(ByteString -> Int64)
-> (ByteString -> ByteString) -> ByteString -> Int64
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> ByteString -> ByteString
BS.filter (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
=='\n')
(ByteString -> Int) -> ByteString -> Int
forall a b. (a -> b) -> a -> b
$ ByteString
text
, Int64 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral
(Int64 -> Int) -> (ByteString -> Int64) -> ByteString -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Int64
BS.length
(ByteString -> Int64)
-> (ByteString -> ByteString) -> ByteString -> Int64
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> ByteString -> ByteString
BS.takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/='\n')
(ByteString -> ByteString)
-> (ByteString -> ByteString) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
BS.reverse (ByteString -> Int) -> ByteString -> Int
forall a b. (a -> b) -> a -> b
$ ByteString
text )
, csvFieldContent :: ByteString
csvFieldContent = ByteString
text
, csvFieldQuoted :: Bool
csvFieldQuoted = (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem`"\",\n\r") (ByteString -> String
BS.unpack ByteString
text)
}