module Unicode (
       word16to8le, word32to8le,
       bom, bomle, bomle',
       utf8to16le, utf8to16'le,
       utf8to16'
) where

import qualified Data.ByteString as L
import Data.Bits
import Data.Word

-- import Debug.Trace

{- Assistent Functions -}
word32to8le :: Word32 -> [Word8]
word32to8le x = [l0, l8, l16, l24]
        where lowByte x = x .&. 0xFF
              l0  = trans $ lowByte x
              l8  = trans . lowByte $ shiftR x 8
              l16 = trans . lowByte $ shiftR x 16
              l24 = trans . lowByte $ shiftR x 24
              trans = toEnum . fromEnum

word16to8le :: Word16 -> [Word8]
word16to8le x = map (\f->f x) [lowByte16, highByte16]

trans8to16 :: Word8 -> Word16
trans8to16 = toEnum . fromEnum

trans8to32 :: Word8 -> Word32
trans8to32 = toEnum . fromEnum

trans16to32 :: Word16 -> Word32
trans16to32 = toEnum . fromEnum

trans32to16 :: Word32 -> Word16
trans32to16 = toEnum . fromEnum

trans :: (Enum a) => a -> a
trans = toEnum . fromEnum

lowByte16 :: Word16 -> Word8
lowByte16 = toEnum . fromEnum . (0x00FF .&.)

highByte16 :: Word16 -> Word8
highByte16 = lowByte16 . flip shiftR 8


{- variant BOM -}
bomle :: L.ByteString
bomle = L.pack bomle'

bomle' :: [Word8]
bomle' = [0xFF, 0xFE]

bom :: Word16
bom = 0xFEFF


{- Encode Translation -}

utf8to16le :: L.ByteString -> L.ByteString
utf8to16le = L.pack . utf8to16'le . L.unpack



utf8to16'le :: [Word8] -> [Word8]
{- utf8to16'le = concat . map word16to8le . utf8to16' -}
{- or equivalent, but with better performance : -}
utf8to16'le [] = []
utf8to16'le (x:xs)
          | x .&. 0x80 == 0
              = x : 0 : utf8to16'le xs
          | (x `xor` 0xC0) .&. 0xE0 == 0
              = case utf8Tail xs of
                  Nothing -> utf8to16'le xs
                  Just v  -> let (h,l) = combine2 x v
                             in l:h:(utf8to16'le txs)
          | (x `xor` 0xE0) .&. 0xF0 == 0
              = case utf8Tail xs of
                  Nothing -> utf8to16'le xs
                  Just v1 -> case utf8Tail txs of
                                Nothing -> utf8to16'le txs
                                Just v2 -> let (h,l) = combine3 x v1 v2
                                           in l:h:(utf8to16'le ttxs)
          | (x `xor` 0xF0) .&. 0xF8 == 0
              = case utf8Tail xs of
                  Nothing -> utf8to16'le xs
                  Just v1 ->
                       case utf8Tail txs of
                         Nothing -> utf8to16'le txs
                         Just v2 ->
                             case utf8Tail ttxs of
                               Nothing -> utf8to16'le tttxs;
                               Just v3 ->
                                    let (u1, u2, u3, u4) = combine4 x v1 v2 v3
                                    in u1 : u2 : u3 : u4 : utf8to16'le tttxs
          | otherwise = utf8to16'le xs
          where txs  = tail xs
                ttxs = tail txs
                tttxs = tail ttxs
                {- combine2 :: Word8 -> Word8 -> Word16 -}
                combine2 a b =
                    let low  = shiftL (a .&. 0x03) 6 .|. b
                        high = shiftR a 2 .&. 0x07
                    in (high, low)
                {- combine3 :: Word8 -> Word8 -> Word8 -> Word16 -}
                combine3 a b c =
                    let bR2   = (shiftR b 2) .&. 0x0F
                        low6  = c -- already .&. 0x3F
                        low8  = (shiftL (b .&. 0x03) 6) .|. low6
                        high8 = (shiftL a 4) .|. bR2
                    in  (high8, low8)
                {- combine4 :: Word8 -> Word8 -> Word8 -> Word8
                               -> (Word8, Word8, Word8, Word8)-}
                combine4 a b c d = let ull' = c .&. 0x03
                                       ull  = (shiftL ull' 6) .|. d
                                       ul'  = (shiftR c 2) .&. 0x03
                                       ul   = ul' .|. 0xDC
                                       a'   = a .&. 0x07
                                       uh16'= (trans8to16 $ shiftL a' 8)
                                              .|. (trans8to16 $ shiftL b 2)
                                              .|. (trans8to16 $ shiftR c 4)
                                       uh16 = (uh16' - 0x0040) .|. 0xD800
                                       [ua, ub] = word16to8le uh16
                                    in (ua, ub, ull, ul)



{- skip, when invalid utf8 char appears -}
utf8to16' :: [Word8] -> [Word16]
utf8to16' [] = []
utf8to16' (x:xs)
          | x .&. 0x80 == 0
              = trans8to16 x : utf8to16' xs
          | (x `xor` 0xC0) .&. 0xE0 == 0
              = case utf8Tail xs of
                  Nothing -> utf8to16' xs
                  Just v  -> combine2 x v : utf8to16' txs
          | (x `xor` 0xE0) .&. 0xF0 == 0
              = case utf8Tail xs of
                  Nothing -> utf8to16' xs
                  Just v1 -> case utf8Tail txs of
                                Nothing -> utf8to16' txs
                                Just v2 -> combine3 x v1 v2 : utf8to16' ttxs
          | (x `xor` 0xF0) .&. 0xF8 == 0
              = case utf8Tail xs of
                  Nothing -> utf8to16' xs
                  Just v1 ->
                       case utf8Tail txs of
                         Nothing -> utf8to16' txs
                         Just v2 ->
                             case utf8Tail ttxs of
                               Nothing -> utf8to16' tttxs;
                               Just v3 -> let (u1, u2) = combine4 x v1 v2 v3
                                          in u1 : u2 : utf8to16' tttxs
          | otherwise = utf8to16' xs
          where txs  = tail xs
                ttxs = tail txs
                tttxs = tail ttxs
                {- combine2 :: Word8 -> Word8 -> Word16 -}
                combine2 a b =
                    let low = trans8to16 $ shiftL (a .&. 0x03) 6 .|. b
                        w16 = shiftL (trans8to16 a) 6 .&. 0x0700 .|. low
                    in w16 -- + 0x80
                {- combine3 :: Word8 -> Word8 -> Word8 -> Word16 -}
                combine3 a b c =
                    let bR2   = (shiftR b 2) .&. 0x0F
                        low6  = c -- already .&. 0x3F
                        low8  = trans8to16 $ (shiftL (b .&. 0x03) 6) .|. low6
                        high8 = trans8to16 $ (shiftL a 4) .|. bR2
                        w16   = (shiftL high8 8) .|. low8
                    in  w16 -- + 0x800
                {- combine4 :: Word8 -> Word8 -> Word8 -> Word8
                               -> (Word16, Word16)-}
                combine4 a b c d = let u1 = (trans8to32 a) .&. 0x00000003
                                       u2 = (trans8to32 b) .|. (shiftL u1 6)
                                       u3 = (trans8to32 c) .|. (shiftL u2 6)
                                       u  = (trans8to32 d) .|. (shiftL u3 6)
                                       u' = u - 0x00010000
                                       u'h = trans32to16 $ (shiftR u' 10)
                                                                -- .&. 0x000003FF
                                       u'l = trans32to16 $ u' .&. 0x000003FF
                                   in (u'h .|. 0xD800 , u'l .|. 0xDC00)



-- look forward 1 Byte, if 10xxxxxx  return the lower 6 bits , otherwise fail
utf8Tail :: [Word8] -> Maybe Word8
utf8Tail [] = Nothing
utf8Tail (x:_)
         | (x `xor` 0x80) .&. 0xC0 == 0 = Just $ x .&. 0x3F
         | otherwise = Nothing
