module Data.Text.Internal.Lazy.Search
(
indices
) where
import Data.Bits (unsafeShiftL)
import qualified Data.Text.Array as A
import Data.Int (Int64)
import Data.Word (Word8, Word64)
import qualified Data.Text.Internal as T
import qualified Data.Text as T (concat, isPrefixOf)
import Data.Text.Internal.Fusion.Types (PairS(..))
import Data.Text.Internal.Lazy (Text(..), foldrChunks)
import Data.Bits ((.|.), (.&.))
import Foreign.C.Types
import GHC.Exts (ByteArray#)
import System.Posix.Types (CSsize(..))
indices :: Text
-> Text
-> [Int64]
indices needle
| nlen <= 0 = const []
| nlen == 1 = indicesOne (A.unsafeIndex narr noff) 0
| otherwise = advance 0 0
where
T.Text narr noff nlen = T.concat (foldrChunks (:) [] needle)
advance !_ !_ Empty = []
advance !(g :: Int64) !(i :: Int) xxs@(Chunk x@(T.Text xarr@(A.ByteArray xarr#) xoff l) xs)
| i >= l = advance g (i l) xs
| lackingHay (i + nlen) x xs = []
| c == z && candidateMatch = g : advance (g + intToInt64 nlen) (i + nlen) xxs
| otherwise = advance (g + intToInt64 delta) (i + delta) xxs
where
c = index xxs (i + nlast)
delta | nextInPattern = nlen + 1
| c == z = skip + 1
| l >= i + nlen = case
memchr xarr# (intToCSize (xoff + i + nlen)) (intToCSize (l i nlen)) z of
1 -> max 1 (l i nlen)
s -> cSsizeToInt s + 1
| otherwise = 1
nextInPattern = mask .&. swizzle (index xxs (i + nlen)) == 0
candidateMatch
| i + nlen <= l = A.equal narr noff xarr (xoff + i) nlen
| otherwise = A.equal narr noff xarr (xoff + i) (l i) &&
T.Text narr (noff + l i) (nlen l + i) `isPrefixOf` xs
nlast = nlen 1
z = A.unsafeIndex narr (noff + nlen 1)
(mask :: Word64) :*: skip = buildTable 0 0 0 (nlen2)
swizzle :: Word8 -> Word64
swizzle w = 1 `unsafeShiftL` (word8ToInt w .&. 0x3f)
buildTable !g !i !msk !skp
| i >= nlast = (msk .|. swizzle z) :*: skp
| otherwise = buildTable (g+1) (i+1) msk' skp'
where c = A.unsafeIndex narr (noff+i)
msk' = msk .|. swizzle c
skp' | c == z = nlen g 2
| otherwise = skp
lackingHay :: Int -> T.Text -> Text -> Bool
lackingHay q (T.Text _ _ l) ps = l < q && case ps of
Empty -> True
Chunk r rs -> lackingHay (q l) r rs
index :: Text -> Int -> Word8
index Empty !_ = 0
index (Chunk (T.Text arr off len) xs) !i
| i < len = A.unsafeIndex arr (off + i)
| otherwise = index xs (i len)
indicesOne :: Word8 -> Int64 -> Text -> [Int64]
indicesOne c = chunk
where
chunk :: Int64 -> Text -> [Int64]
chunk !_ Empty = []
chunk !i (Chunk (T.Text oarr ooff olen) os) = go 0
where
go h | h >= olen = chunk (i+intToInt64 olen) os
| on == c = i + intToInt64 h : go (h+1)
| otherwise = go (h+1)
where on = A.unsafeIndex oarr (ooff+h)
isPrefixOf :: T.Text -> Text -> Bool
isPrefixOf (T.Text _ _ xlen) Empty = xlen == 0
isPrefixOf x@(T.Text xarr xoff xlen) (Chunk y@(T.Text _ _ ylen) ys)
| xlen <= ylen = x `T.isPrefixOf` y
| otherwise = y `T.isPrefixOf` x && T.Text xarr (xoff + ylen) (xlen ylen) `isPrefixOf` ys
intToInt64 :: Int -> Int64
intToInt64 = fromIntegral
word8ToInt :: Word8 -> Int
word8ToInt = fromIntegral
intToCSize :: Int -> CSize
intToCSize = fromIntegral
cSsizeToInt :: CSsize -> Int
cSsizeToInt = fromIntegral
foreign import ccall unsafe "_hs_text_memchr" memchr
:: ByteArray# -> CSize -> CSize -> Word8 -> CSsize