Commit 259e6289 authored by Mike Ledger's avatar Mike Ledger

separate tree for compressed radix for even more space saving goodness

parent e6aa84b8
......@@ -37,9 +37,11 @@ tests0 = V.fromList $! T.lines
\long thing that doesn't parse\n\
\z"
sourceRadixCompressed :: R.RadixTree
sourceRadixCompressed :: R.CompressedRadixTree
sourceRadixCompressed =
R.compressBy (T.concat source) sourceRadix
case R.compressBy (T.concat source) sourceRadix of
Just ct -> ct
Nothing -> error "could not compress source"
sourceRadix :: R.RadixTree
sourceRadix = R.fromFoldable source
......
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedLists #-}
{-# LANGUAGE RankNTypes #-}
......@@ -11,6 +12,7 @@
module Data.RadixTree
( RadixTree (..)
, RadixNode (..)
, CompressedRadixTree
-- * Construction
, fromFoldable
, compressBy
......@@ -24,7 +26,6 @@ import Data.Data (Data, Typeable)
import Data.Foldable (asum, foldr', toList)
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as M
import Data.Maybe
import Data.Monoid
import Data.Sequence (Seq)
import qualified Data.Sequence as Seq
......@@ -32,6 +33,7 @@ import Data.Store ()
import Data.Store.TH (makeStore)
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Array as TI (Array)
import qualified Data.Text.Internal as TI (Text (..), text)
import Data.Vector (Vector)
import qualified Data.Vector as V
......@@ -147,25 +149,60 @@ fromTrie = go . compress
Accept l m -> RadixAccept l . V.map (uncurry radixNode) . mapToVector $! m
Skip m -> RadixSkip . V.map (uncurry radixNode) . mapToVector $! m
data TextSlice = TextSlice
{-# UNPACK #-} !Int -- ^ offset (in units of Word16)
{-# UNPACK #-} !Int -- ^ length (in units of Word16)
-- | Probably dangerous magic
--
-- When the second argument is found to be within the first, we re-use the
-- 'Text' array of the first. This should allow the second argument to be
-- garbage collected. This is to improve locality and memory use.
magicallySaveSpaceSometimes :: Text -> Text -> Maybe Text
magicallySaveSpaceSometimes full@(TI.Text arr _ _) s@(TI.Text _ _ slen) =
magicallySaveSpaceSometimes :: Text -> Text -> Maybe TextSlice
magicallySaveSpaceSometimes full s@(TI.Text _ _ slen) =
case T.breakOn s full of
(TI.Text{}, r@(TI.Text _ remoffs _))
| T.null r -> Nothing
| otherwise -> Just (TI.text arr remoffs slen)
| otherwise -> Just (TextSlice remoffs slen)
data CompressedRadixTree
= CompressedRadixTree {-# UNPACK #-} !TI.Array !CompressedRadixTree1
data CompressedRadixTree1
= CompressedRadixAccept
{-# UNPACK #-} !TextSlice
{-# UNPACK #-} !(Vector CompressedRadixNode)
| CompressedRadixSkip {-# UNPACK #-} !(Vector CompressedRadixNode)
data CompressedRadixNode
= CompressedRadixNode {-# UNPACK #-} !TextSlice !CompressedRadixTree1
instance NFData CompressedRadixNode where
{-# INLINE rnf #-}
rnf (CompressedRadixNode ts t) = ts `seq` rnf t
instance NFData CompressedRadixTree where
{-# INLINE rnf #-}
rnf (CompressedRadixTree arr v) = arr `seq` rnf v
instance NFData CompressedRadixTree1 where
{-# INLINE rnf #-}
rnf (CompressedRadixAccept ts v) = ts `seq` rnf v
rnf (CompressedRadixSkip v) = rnf v
compressBy :: Text -> RadixTree -> Maybe CompressedRadixTree
compressBy full@(TI.Text arr _ _) rt =
CompressedRadixTree arr <$> recompressT rt
compressBy :: Text -> RadixTree -> RadixTree
compressBy full = recompressT
where
magic t = fromMaybe t (magicallySaveSpaceSometimes full t)
recompressN (RadixNode t tree) = RadixNode (magic t) (recompressT tree)
recompressT (RadixSkip v) = RadixSkip (V.map recompressN v)
recompressT (RadixAccept t v) = RadixAccept (magic t) (V.map recompressN v)
magic = magicallySaveSpaceSometimes full
recompressN :: RadixNode -> Maybe CompressedRadixNode
recompressN (RadixNode t tree) = CompressedRadixNode <$> magic t <*> recompressT tree
recompressT :: RadixTree -> Maybe CompressedRadixTree1
recompressT (RadixSkip v) = CompressedRadixSkip <$> V.mapM recompressN v
recompressT (RadixAccept t v) = CompressedRadixAccept <$> magic t <*> V.mapM recompressN v
fromFoldable :: Foldable f => f Text -> RadixTree
fromFoldable =
......@@ -177,10 +214,15 @@ makeStore ''RadixTree
--------------------------------------------------------------------------------
-- Parsers from 'RadixTree's
class RadixParsing radixtree where
parse :: CharParsing m => radixtree -> m Text
{-# INLINE search #-}
-- | Find all occurences of the terms in a 'RadixTree' from this point on. This
-- will consume the entire remaining input. Can lazily produce results.
search :: (Monad m, CharParsing m) => RadixTree -> m [Text]
-- will consume the entire remaining input. Can lazily produce results (but this
-- depends on your parser).
search :: (Monad m, CharParsing m, RadixParsing radixtree)
=> radixtree -> m [Text]
search r = go
where
go =
......@@ -188,19 +230,39 @@ search r = go
(anyChar >> go) <|>
return []
{-# INLINE parse #-}
-- | Parse from a 'RadixTree'
parse :: CharParsing m => RadixTree -> m Text
parse = go
where
go r = case r of
RadixAccept l nodes
| T.null l -> empty
| otherwise -> asum (V.map parseRadixNode nodes) <|> pure l
RadixSkip nodes -> asum (V.map parseRadixNode nodes)
{-# INLINE parseRadixNode #-}
parseRadixNode (RadixNode prefix tree)
| T.null prefix = go tree
| otherwise = try (text prefix *> go tree)
instance RadixParsing RadixTree where
{-# INLINE parse #-}
-- | Parse from a 'RadixTree'
parse :: CharParsing m => RadixTree -> m Text
parse = go
where
go r = case r of
RadixAccept l nodes
| T.null l -> empty
| otherwise -> asum (V.map parseRadixNode nodes) <|> pure l
RadixSkip nodes -> asum (V.map parseRadixNode nodes)
{-# INLINE parseRadixNode #-}
parseRadixNode (RadixNode prefix tree)
| T.null prefix = go tree
| otherwise = try (text prefix *> go tree)
instance RadixParsing CompressedRadixTree where
{-# INLINE parse #-}
-- | Parse from a 'RadixTree'
parse :: CharParsing m => CompressedRadixTree -> m Text
parse (CompressedRadixTree arr crt) = go crt
where
fromSlice (TextSlice offs len) = TI.text arr offs len
go r = case r of
CompressedRadixAccept ts nodes -> case fromSlice ts of
l | T.null l -> empty
| otherwise -> asum (V.map parseRadixNode nodes) <|> pure l
CompressedRadixSkip nodes -> asum (V.map parseRadixNode nodes)
{-# INLINE parseRadixNode #-}
parseRadixNode (CompressedRadixNode ts tree) = case fromSlice ts of
prefix | T.null prefix -> go tree
| otherwise -> try (text prefix *> go tree)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment