Commit b59bb928 authored by Mike Ledger's avatar Mike Ledger

allow for radixtree to be compressed according to the original input

parent 124c7fbb
Pipeline #14998854 passed with stage
in 17 minutes and 54 seconds
name: radixtree
version: 0.2.0.1
homepage: https://gitlab.com/transportengineering/radixtree
homepage: https://gitlab.com/transportengineering/rnd/radixtree
license: BSD3
license-file: LICENSE
author: Mike Ledger
......@@ -21,6 +21,7 @@ library
, microlens
, parsers >= 0.12
, store >= 0.4
, mtl >= 2.0
default-language: Haskell2010
benchmark radixtree-parsing
......
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedLists #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
......@@ -10,6 +12,7 @@ module Data.RadixTree
, RadixNode (..)
-- * Construction
, fromFoldable
, compressBy
-- * Parsing with radix trees
, parse
) where
......@@ -19,19 +22,35 @@ import Data.Data (Data, Typeable)
import Data.Foldable (asum, foldr', toList)
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as M
import Data.Maybe
import Data.Monoid
import Data.Sequence (Seq)
import qualified Data.Sequence as Seq
import Data.Store () -- has Text instance
import Data.Store ()
import Data.Store.TH (makeStore)
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Internal as TI (Text (..), text)
import Data.Vector (Vector)
import qualified Data.Vector as V
import Lens.Micro
import Text.Parser.Char (CharParsing (text))
import Text.Parser.Combinators (Parsing (try))
--------------------------------------------------------------------------------
-- Stuff to help construct RadixTrees
--
-- I'm not clever enough to write a function to go directly from a 'Foldable' to
-- a fully-optimised RadixTree. Instead, I generate a prefix-tree using a 'Map'
-- directly ('Trie'), and then gradually compress that ('CompressedTrie') before
-- packing the final result into an efficient structure using 'Text' nodes.
--
-- TODO:
-- - Generate RadixTree directly
-- - Use offsets into original 'Text' values. Would be much better for memory
-- locality
-- - Or 'just' use compact regions?
data PrefixNode a = Accept !Text !a | Skip !a
deriving (Show, Eq)
......@@ -47,17 +66,11 @@ node = lens
(\x -> case x of { Accept _ t -> t; Skip t -> t })
(\x a -> case x of { Accept l _ -> Accept l a; Skip _ -> Skip a })
skip :: Trie
skip = Trie (Skip M.empty)
accept :: Text -> Trie
accept t = Trie (Accept t M.empty)
leaf :: Text -> Text -> Trie
leaf ft t = go (T.unpack t)
where
go (x:xs) = Trie (Skip (M.singleton x (go xs)))
go [] = accept ft
go [] = Trie (Accept ft M.empty)
insert :: Text -> Text -> Trie -> Trie
insert ft text' (Trie n) = case T.uncons text' of
......@@ -114,6 +127,8 @@ instance Monoid RadixTree where
mempty = RadixSkip V.empty
mappend _ _ = mempty
-- | Compress a totally-unoptimised 'Trie' into a nice and easily-parsable
-- 'RadixTree'
fromTrie :: Trie -> RadixTree
fromTrie = go . compress
where
......@@ -128,8 +143,29 @@ fromTrie = go . compress
Accept l m -> RadixAccept l . V.map (uncurry radixNode) . mapToVector $! m
Skip m -> RadixSkip . V.map (uncurry radixNode) . mapToVector $! m
-- | Probably dangerous magic
--
-- When the second argument is found to be within the first, we re-use the
-- 'Text' array of the first. This should allow the second argument to be
-- garbage collected. This is to improve locality and memory use.
magicallySaveSpaceSometimes :: Text -> Text -> Maybe Text
magicallySaveSpaceSometimes full@(TI.Text arr _ _) s@(TI.Text _ _ slen) =
case T.breakOn s full of
(TI.Text{}, r@(TI.Text _ remoffs _))
| T.null r -> Nothing
| otherwise -> Just (TI.text arr remoffs slen)
compressBy :: Text -> RadixTree -> RadixTree
compressBy full = recompressT
where
magic t = fromMaybe t (magicallySaveSpaceSometimes full t)
recompressN (RadixNode t tree) = RadixNode (magic t) (recompressT tree)
recompressT (RadixSkip v) = RadixSkip (V.map recompressN v)
recompressT (RadixAccept t v) = RadixAccept (magic t) (V.map recompressN v)
fromFoldable :: Foldable f => f Text -> RadixTree
fromFoldable = fromTrie . foldr' (\t -> insert t t) skip
fromFoldable =
fromTrie . foldr' (\t -> insert t t) (Trie (Skip M.empty))
makeStore ''RadixNode
makeStore ''RadixTree
......@@ -139,7 +175,7 @@ makeStore ''RadixTree
{-# INLINE parse #-}
parse :: CharParsing m => RadixTree -> m Text
parse r0 = go r0
parse = go
where
go r = case r of
RadixAccept l nodes
......@@ -148,6 +184,7 @@ parse r0 = go r0
RadixSkip nodes -> asum (V.map parseRadixNode nodes)
{-# INLINE parseRadixNode #-}
parseRadixNode (RadixNode prefix tree) =
try (text prefix *> go tree)
parseRadixNode (RadixNode prefix tree)
| T.null prefix = go tree
| otherwise = try (text prefix *> go tree)
......@@ -13,17 +13,24 @@ import Test.Tasty.SmallCheck
sortOnLengthDesc :: [T.Text] -> [T.Text]
sortOnLengthDesc = sortOn (negate . T.length)
naiiveSameAsRadix :: Monad m => Property m
naiiveSameAsRadix =
naiiveSameAsRadix :: Monad m => Bool -> Property m
naiiveSameAsRadix doCompress =
forAll $ \(alternativesS :: NonEmpty (NonEmpty Char))
(textS :: NonEmpty Char) ->
let
alternatives = sortOnLengthDesc (map T.pack (coerce alternativesS))
rtree =
if doCompress
then R.compressBy (T.concat alternatives) (R.fromFoldable alternatives)
else R.fromFoldable alternatives
text = T.pack (coerce textS)
in
parseOnly (choice (map string alternatives)) text ==
parseOnly (R.parse (R.fromFoldable alternatives)) text
parseOnly (R.parse rtree) text
main :: IO ()
main = defaultMain
(testProperty "naiive parsing has the same result as radix tree parsing" naiiveSameAsRadix)
main = defaultMain $ testGroup "radixtree"
[ testProperty "naiive parsing has the same result as radix tree parsing" (naiiveSameAsRadix False)
, testProperty "naiive parsing has the same result as radix tree parsing when compressed"
(naiiveSameAsRadix True)
]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment