------------------------------------------------------------------------------
-- |
-- Module       : Data.Hash.Rolling
-- License      : BSD-style
--
-- Maintainer   : jcpetruzza@gmail.com
-- Stability    : experimental
-- Portability  : portable
--
-- Efficient implementation of a rolling hash, i.e., the computation of a hash through
-- a moving window of a fixed size @n@ over some stream. All operations are O(1) (in
-- particular, they do not depend on the size of the window).
--
-- Some laws that this type satisfies:
--
-- * @currentHash rh == foldl1 combine (lastHashes rh)@
--
-- * @length (lastHashes rh) <= windowSize rh@
--
-- * @length (lastHashes $ addAndRoll rh a) == windowSize rh -- whenever length (lastHashes rh) == windowSize rh@
--
-- * @last (lastHashes $ addAndRoll rh x) == hash a@
--
-- * @init (lastHashes $ addAndRoll rh a) `isSuffixOf` (lastHashes rh)@
-------------------------------------------------------------------------------
module Data.Hash.Rolling (
    -- * The @RollingHash@ type
    RollingHash,
    -- ** Construction and manipulation
    rollingHash, addAndRoll,
    -- ** Querying
    currentHash, lastHashes, windowSize
)

where

import Data.Hash.Base
import Data.Hash.Instances
import Data.Bits
import qualified Data.Sequence as S
import Data.Foldable
import Text.Show.Functions ()


data RollingHash a = RH {
     forall a. RollingHash a -> Hash
currentHash :: Hash
    ,forall a. RollingHash a -> Int
windowSize  :: Int
    ,forall a. RollingHash a -> Seq Hash
hseq        :: S.Seq Hash
    ,forall a. RollingHash a -> RollingHash a -> Hash -> RollingHash a
addHashImpl :: RollingHash a -> Hash -> RollingHash a
    } deriving Int -> RollingHash a -> ShowS
[RollingHash a] -> ShowS
RollingHash a -> String
(Int -> RollingHash a -> ShowS)
-> (RollingHash a -> String)
-> ([RollingHash a] -> ShowS)
-> Show (RollingHash a)
forall a. Int -> RollingHash a -> ShowS
forall a. [RollingHash a] -> ShowS
forall a. RollingHash a -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: forall a. Int -> RollingHash a -> ShowS
showsPrec :: Int -> RollingHash a -> ShowS
$cshow :: forall a. RollingHash a -> String
show :: RollingHash a -> String
$cshowList :: forall a. [RollingHash a] -> ShowS
showList :: [RollingHash a] -> ShowS
Show

-- | @rollingHash n@ creates a @RollingHash@ of window
--   size @n@ (for @n > 0@)
rollingHash :: Int -> RollingHash a
rollingHash :: forall a. Int -> RollingHash a
rollingHash Int
n
  | Int
n Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0    = String -> RollingHash a
forall a. HasCallStack => String -> a
error (String -> RollingHash a) -> String -> RollingHash a
forall a b. (a -> b) -> a -> b
$ String
"rollingHash: invalid window size " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
n
  | Bool
otherwise = RH {
       currentHash :: Hash
currentHash = Hash
initial_hash
      ,windowSize :: Int
windowSize  = Int
n
      ,hseq :: Seq Hash
hseq        = Hash -> Seq Hash
forall a. a -> Seq a
S.singleton Hash
initial_hash
      ,addHashImpl :: RollingHash a -> Hash -> RollingHash a
addHashImpl = Int -> RollingHash a -> Hash -> RollingHash a
forall a. Int -> RollingHash a -> Hash -> RollingHash a
accumulateNext (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
    }
    where initial_hash :: Hash
initial_hash = () -> Hash
forall a. Hashable a => a -> Hash
hash () Hash -> Hash -> Hash
`combine` Int -> Hash
forall a. Hashable a => a -> Hash
hash Int
n

defaultAddHash :: RollingHash a -> Hash -> RollingHash a
defaultAddHash :: forall a. RollingHash a -> Hash -> RollingHash a
defaultAddHash RollingHash a
rh Hash
hv = RollingHash a
rh { currentHash = (currentHash rh) `combine` (Hash $ rotate c1 k `xor` ck)
                          ,        hseq = (S.drop 1 $ hseq rh) S.|> hv
                          }
    where ck :: Word64
ck = Hash -> Word64
asWord64 Hash
hv
          c1 :: Word64
c1 = Hash -> Word64
asWord64 (Hash -> Word64) -> Hash -> Word64
forall a b. (a -> b) -> a -> b
$ Seq Hash -> Int -> Hash
forall a. Seq a -> Int -> a
S.index (RollingHash a -> Seq Hash
forall a. RollingHash a -> Seq Hash
hseq RollingHash a
rh) Int
0
          k :: Int
k = Seq Hash -> Int
forall a. Seq a -> Int
S.length (Seq Hash -> Int) -> Seq Hash -> Int
forall a b. (a -> b) -> a -> b
$ RollingHash a -> Seq Hash
forall a. RollingHash a -> Seq Hash
hseq RollingHash a
rh


-- | @addAndRoll x rh@ adds a new input element and rolls the window
--   one place through the input (if at least @n@ elements were
--   already consumed).
addAndRoll ::  Hashable a => RollingHash a -> a -> RollingHash a
addAndRoll :: forall a. Hashable a => RollingHash a -> a -> RollingHash a
addAndRoll RollingHash a
r a
a = (RollingHash a -> RollingHash a -> Hash -> RollingHash a
forall a. RollingHash a -> RollingHash a -> Hash -> RollingHash a
addHashImpl RollingHash a
r) RollingHash a
r (a -> Hash
forall a. Hashable a => a -> Hash
hash a
a)

accumulateNext :: Int -> RollingHash a -> Hash -> RollingHash a
accumulateNext :: forall a. Int -> RollingHash a -> Hash -> RollingHash a
accumulateNext Int
n | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0 = \RollingHash a
rh Hash
h -> RollingHash a
rh {
                            currentHash = currentHash rh `combine` h,
                            hseq = (hseq rh) S.|> h,
                            addHashImpl = accumulateNext (n - 1)
                        }
             | Bool
otherwise = RollingHash a -> Hash -> RollingHash a
forall a. RollingHash a -> Hash -> RollingHash a
defaultAddHash

-- | @lastHashes rh@ returns the last @n@ hashes.
lastHashes :: RollingHash a -> [Hash]
lastHashes :: forall a. RollingHash a -> [Hash]
lastHashes = Seq Hash -> [Hash]
forall a. Seq a -> [a]
forall (t :: * -> *) a. Foldable t => t a -> [a]
toList (Seq Hash -> [Hash])
-> (RollingHash a -> Seq Hash) -> RollingHash a -> [Hash]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. RollingHash a -> Seq Hash
forall a. RollingHash a -> Seq Hash
hseq