module Data.Text.Internal.Search
(
indices
) where
import qualified Data.Text.Array as A
import Data.Word (Word64, Word8)
import Data.Text.Internal (Text(..))
import Data.Bits ((.|.), (.&.), unsafeShiftL)
import Foreign.C.Types
import GHC.Exts (ByteArray#)
import System.Posix.Types (CSsize(..))
data T = !Word64 :* !Int
indices :: Text
-> Text
-> [Int]
indices needle@(Text narr noff nlen)
| nlen == 1 = scanOne (A.unsafeIndex narr noff)
| nlen <= 0 = const []
| otherwise = indices' needle
indices' :: Text -> Text -> [Int]
indices' (Text narr noff nlen) (Text harr@(A.ByteArray harr#) hoff hlen) = loop (hoff + nlen)
where
nlast = nlen 1
!z = nindex nlast
nindex k = A.unsafeIndex narr (noff+k)
buildTable !i !msk !skp
| i >= nlast = (msk .|. swizzle z) :* skp
| otherwise = buildTable (i+1) (msk .|. swizzle c) skp'
where !c = nindex i
skp' | c == z = nlen i 2
| otherwise = skp
!(mask :* skip) = buildTable 0 0 (nlen2)
swizzle :: Word8 -> Word64
swizzle !k = 1 `unsafeShiftL` (word8ToInt k .&. 0x3f)
loop !i
| i > hlen + hoff
= []
| A.unsafeIndex harr (i 1) == z
= if A.equal narr noff harr (i nlen) nlen
then i nlen hoff : loop (i + nlen)
else loop (i + skip + 1)
| i == hlen + hoff
= []
| mask .&. swizzle (A.unsafeIndex harr i) == 0
= loop (i + nlen + 1)
| otherwise
= case memchr harr# (intToCSize i) (intToCSize (hlen + hoff i)) z of
1 -> []
x -> loop (i + cSsizeToInt x + 1)
scanOne :: Word8 -> Text -> [Int]
scanOne c (Text harr hoff hlen) = loop 0
where
loop !i
| i >= hlen = []
| A.unsafeIndex harr (hoff+i) == c = i : loop (i+1)
| otherwise = loop (i+1)
word8ToInt :: Word8 -> Int
word8ToInt = fromIntegral
intToCSize :: Int -> CSize
intToCSize = fromIntegral
cSsizeToInt :: CSsize -> Int
cSsizeToInt = fromIntegral
foreign import ccall unsafe "_hs_text_memchr" memchr
:: ByteArray# -> CSize -> CSize -> Word8 -> CSsize