From d2d8bb59828ce1617b5b64adda78e50ae4019472 Mon Sep 17 00:00:00 2001 From: Thomas Sutton Date: Mon, 2 Jan 2017 10:11:12 +1100 Subject: [PATCH 01/10] Travis: test multiple LTS releases --- .ghci | 1 + .stylish-haskell.yaml | 2 +- .travis.yml | 48 ++++++++++++++++++++++++++++++++++++++++++- stack.yaml | 7 +++---- 4 files changed, 52 insertions(+), 6 deletions(-) create mode 100644 .ghci diff --git a/.ghci b/.ghci new file mode 100644 index 0000000..b9f2a7d --- /dev/null +++ b/.ghci @@ -0,0 +1 @@ +:set -ilib diff --git a/.stylish-haskell.yaml b/.stylish-haskell.yaml index 56c1a1c..e48c5d5 100644 --- a/.stylish-haskell.yaml +++ b/.stylish-haskell.yaml @@ -28,7 +28,7 @@ steps: # - none: Do not perform any alignment. # # Default: global. - align: none + align: group # Language pragmas - language_pragmas: diff --git a/.travis.yml b/.travis.yml index 370915f..5d3061e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,8 +1,54 @@ +# Include Haskell tools language: haskell ghc: 7.8 + +# Use the docker infrastructure +sudo: false + +# Cache the installed Haskell stuff +cache: + directories: + - $HOME/.stack + notifications: email: on_success: change on_failure: change + +# The following enables several GHC versions to be tested; often it's enough to +# test only against the last release in a major GHC version. Feel free to omit +# lines listings versions you don't need/want testing for. +env: + global: + - PKG_CONFIG_PATH=$HOME/.stack/local/lib/pkgconfig + - LD_LIBRARY_PATH=$HOME/.stack/local/lib + - LD_RUN_PATH=$HOME/.stack/local/lib + - CFLAGS=-I$HOME/.stack/local/include + - LDFLAGS=-L$HOME/.stack/local/lib + matrix: + - STACK_RESOLVER=lts-3 + - STACK_RESOLVER=lts-5 + - STACK_RESOLVER=lts-6 + +# Download the latest stack command. before_install: - - cabal sandbox init + - travis_retry wget https://www.stackage.org/stack/linux-x86_64 + - tar -xvf linux-x86_64 + - mv stack-*-linux-x86_64/stack stack + +install: + - echo "$(ghc --version) [$(ghc --print-project-git-commit-id 2> /dev/null || echo '?')]" + +# Here starts the actual work to be performed for the package under test; any +# command which exits with a non-zero exit code causes the build to fail. +script: + - cabal check + - cabal sdist + - export SRC=$(cabal info . | awk '{print $2;exit}') + - tar -xzf "dist/$SRC.tar.gz" + - cd "$SRC" + - travis_retry ../stack --no-terminal --resolver $STACK_RESOLVER setup + - travis_retry ../stack --no-terminal --resolver $STACK_RESOLVER install --only-snapshot -j4 --verbosity info + - ../stack --no-terminal --resolver $STACK_RESOLVER build + - ../stack --no-terminal --resolver $STACK_RESOLVER haddock --no-haddock-deps + - ../stack --no-terminal --resolver $STACK_RESOLVER test diff --git a/stack.yaml b/stack.yaml index 02519c1..517b659 100644 --- a/stack.yaml +++ b/stack.yaml @@ -1,5 +1,4 @@ -flags: {} -packages: -- '.' +resolver: lts-6.7 extra-deps: [] -resolver: lts-2.18 +flags: {} +extra-package-dbs: [] From cec316d10497b8462f6871f39ee29e79818166b0 Mon Sep 17 00:00:00 2001 From: Thomas Sutton Date: Mon, 2 Jan 2017 12:10:47 +1100 Subject: [PATCH 02/10] Progress on operations on packed vectors and chunks --- lib/Data/BitMap/Roaring.hs | 142 ++++++++------- lib/Data/BitMap/Roaring/Chunk.hs | 242 +++++++++++++++++--------- lib/Data/BitMap/Roaring/Chunk/High.hs | 94 ++++++++++ lib/Data/BitMap/Roaring/Chunk/Low.hs | 100 +++++++++++ lib/Data/BitMap/Roaring/Utility.hs | 62 +++++-- rawr.cabal | 23 +-- test/properties.hs | 18 +- 7 files changed, 496 insertions(+), 185 deletions(-) create mode 100644 lib/Data/BitMap/Roaring/Chunk/High.hs create mode 100644 lib/Data/BitMap/Roaring/Chunk/Low.hs diff --git a/lib/Data/BitMap/Roaring.hs b/lib/Data/BitMap/Roaring.hs index 022ed71..7bb26d8 100644 --- a/lib/Data/BitMap/Roaring.hs +++ b/lib/Data/BitMap/Roaring.hs @@ -1,23 +1,23 @@ -- | --- Module: Data.BitMap.Roaring +-- Module: Data.BitMap.Roaring -- Description: Compressed bitmap data structure with good performance. --- Copyright: (c) Thomas Sutton 2015 --- License: BSD3 --- Maintainer: me@thomas-sutton.id.au --- Stability: experimental +-- Copyright: (c) Thomas Sutton 2015 +-- License: BSD3 +-- Maintainer: me@thomas-sutton.id.au +-- Stability: experimental -- -- A compressed bitmaps with good space and time performance. -- -- These modules are intended to be imported qualified, to avoid name clashes -- with Prelude functions, e.g. -- --- > import Data.BitMap.Roaring (BitMap) +-- > import Data.BitMap.Roaring (BitMap) -- > import qualified Data.BitMap.Roaring as Roaring -- --- The implementation paritions values into chunks based on their high 16 bits. --- Chunks are represented differently based on their density: low-density --- chunks are stored as packed arrays of the low-order bits while high-density --- chunks are stored as bit vectors. +-- The implementation partitions values into chunks based on their +-- high 16 bits. Chunks are represented according to their density: +-- low-density chunks are stored as packed arrays of the low-order +-- bits while high-density chunks are stored as bit vectors. -- -- * Samy Chambi, Daniel Lemire, Owen Kaser, Robert Godin, -- \"/Better bitmap performance with Roaring bitmaps/\", Software: Practice @@ -25,20 +25,47 @@ -- module Data.BitMap.Roaring where -import Data.Monoid -import Data.Vector (Vector) +import Data.Bits +import Data.Monoid +import Data.Vector (Vector) import qualified Data.Vector as V -import Data.Word +import Data.Word -import Data.BitMap.Roaring.Chunk -import Data.BitMap.Roaring.Utility +import Data.BitMap.Roaring.Chunk (Chunk) +import qualified Data.BitMap.Roaring.Chunk as C +import Data.BitMap.Roaring.Utility -- | A set of bits. data BitMap = BitMap (Vector Chunk) - deriving (Show) + deriving (Show, Eq) type Key = Word32 +instance Bits BitMap where + bitSize _ = 2^32 + bitSizeMaybe _ = Just (2^32) + isSigned _ = False + + (.&.) = intersection + (.|.) = union + xor = const -- TODO + complement a = a + + shift x i = x + rotate x i = x + + zeroBits = BitMap V.empty + bit i = singleton (fromIntegral i) + + popCount x = 0 + testBit x i = False + setBit x i = x + clearBit x i = x + complementBit x i = x + +instance FiniteBits BitMap where + finiteBitSize _ = 2^32 + -- * Query -- | /O(1)./ Is the set empty? @@ -47,15 +74,15 @@ null (BitMap v) = V.null v -- | Cardinality of the set. size :: BitMap -> Int -size (BitMap cs) = V.sum $ V.map chunkCardinality cs +size (BitMap cs) = V.foldl' (\s c-> s + popCount c) 0 cs -- | Is the value a member of the set? member :: Key -> BitMap -> Bool member k (BitMap cs) = let (i,b) = splitWord k - in case vLookup (\c -> i == chunkIndex c) cs of - Nothing -> False - Just (_,c) -> chunkGet b c + in case vLookup (\c -> i == C.chunkIndex c) cs of + Nothing -> False + Just (_, c) -> C.chunkCheck b c -- | Is this a subset? -- @(s1 `isSubsetOf` s2)@ tells whether @s1@ is a subset of @s2. @@ -88,8 +115,8 @@ singleton k = insert k empty insert :: Key -> BitMap -> BitMap insert k (BitMap v) = let (i,b) = splitWord k - f = Just . maybe (chunkNew i b) (chunkSet b) - v' = vAlter f (\c -> i == chunkIndex c) v + f = Just . maybe (C.chunkNew i b) (C.chunkSet b) + v' = vAlter f (\c -> i == C.chunkIndex c) v in BitMap v' -- | Delete a value in the set. @@ -98,13 +125,13 @@ insert k (BitMap v) = delete :: Key -> BitMap -> BitMap delete k (BitMap v) = let (i,b) = splitWord k - v' = vAlter (f b) (\c -> i == chunkIndex c) v + v' = vAlter (f b) (\c -> i == C.chunkIndex c) v in BitMap v' where f _ Nothing = Nothing f b (Just c) = - let c' = chunkClear b c - in if 0 == chunkCardinality c' + let c' = C.chunkClear b c + in if 0 == popCount c' then Nothing else Just c' @@ -112,20 +139,33 @@ delete k (BitMap v) = -- | The union of two sets. union :: BitMap -> BitMap -> BitMap -union (BitMap cs) (BitMap ds) = BitMap $ mergeWith f cs ds +union (BitMap cs) (BitMap ds) = + BitMap (vMergeWith merge cs ds) where - f :: Maybe Chunk -> Maybe Chunk -> Maybe Chunk - f Nothing b = b - f a Nothing = a - f (Just a) (Just b) = Just $ mergeChunks a b - --- | The difference between two sets. -difference :: BitMap -> BitMap -> BitMap -difference _ _ = empty + merge :: Maybe Chunk -> Maybe Chunk -> Maybe Chunk + merge (Just a) (Just b) = + let c = a `C.union` b + in if C.null c then Nothing else Just c + merge (Just a) Nothing = Just a + merge Nothing (Just b) = Just b + merge Nothing Nothing = Nothing -- | The intersection of two sets. intersection :: BitMap -> BitMap -> BitMap -intersection _ _ = empty +intersection (BitMap as) (BitMap bs) = + BitMap (vMergeWith merge as bs) + where + merge (Just a) (Just b) = + let c = a `C.intersection` b + in if C.null c then Nothing else Just c + merge _ _ = Nothing + +-- | The difference between two sets. +difference :: BitMap -> BitMap -> BitMap +difference (BitMap as) (BitMap bs) = + BitMap (vMergeWith merge as bs) + where + merge _ _ = Nothing -- * Conversion @@ -147,7 +187,7 @@ toAscList :: BitMap -> [Key] toAscList (BitMap cs) = work cs [] where work cs' l | V.null cs' = l - | otherwise = let c = chunkToBits $ V.head cs' + | otherwise = let c = C.toList $ V.head cs' cs'' = V.tail cs' in work cs'' (l <> c) @@ -163,33 +203,3 @@ toDescList = reverse . toAscList -- TODO(thsutton) Implement fromAscList :: [Key] -> BitMap fromAscList _ = empty - --- * Utility - - --- | Merge two 'Vector's of 'Chunk's. --- --- Precondition: Both vectors are sorted by 'chunkIndex'. --- Postcondition: Output vector sorted by 'chunkIndex'. --- Postcondition: length(output) >= max(length(a),length(b)) -mergeWith - :: (Maybe Chunk -> Maybe Chunk -> Maybe Chunk) - -- ^ Merge two chunks with the same index. - -> Vector Chunk - -> Vector Chunk - -> Vector Chunk -mergeWith f v1 v2 - | V.null v1 = v2 - | V.null v2 = v1 - | otherwise = - let a = V.head v1 - b = V.head v2 - in work a v1 b v2 - where - -- Note: we take the head and the *entirety* of each vector; NOT the head - -- and the tail! - work :: Chunk -> Vector Chunk -> Chunk -> Vector Chunk -> Vector Chunk - work a as b bs = case a `compare` b of - LT -> a `V.cons` mergeWith f (V.tail as) bs - EQ -> mergeChunks a b `V.cons` mergeWith f (V.tail as) (V.tail bs) - GT -> b `V.cons` mergeWith f as (V.tail bs) diff --git a/lib/Data/BitMap/Roaring/Chunk.hs b/lib/Data/BitMap/Roaring/Chunk.hs index 43d50b6..ac4a57c 100644 --- a/lib/Data/BitMap/Roaring/Chunk.hs +++ b/lib/Data/BitMap/Roaring/Chunk.hs @@ -1,113 +1,189 @@ module Data.BitMap.Roaring.Chunk where -import Control.Applicative -import Data.Bits -import Data.Monoid -import qualified Data.Vector.Unboxed as U -import Data.Word +import Control.Applicative +import Control.Monad +import Control.Monad.Trans.State +import Data.Bits +import Data.Function +import Data.Monoid +import qualified Data.Vector.Algorithms.Heap as S +import qualified Data.Vector.Unboxed as U +import qualified Data.Vector.Unboxed.Mutable as M +import Data.Word -import Data.BitMap.Roaring.Utility +import Data.BitMap.Roaring.Chunk.High (HDVector (..)) +import qualified Data.BitMap.Roaring.Chunk.High as H +import Data.BitMap.Roaring.Chunk.Low (LDVector (..)) +import qualified Data.BitMap.Roaring.Chunk.Low as L +import Data.BitMap.Roaring.Utility + +-- | Chunks are identified by a 'Word16' index. +type Index = Word16 -- | A chunk representing the keys which share particular 16 high-order bits. -- -- Chunk with low density (i.e. no more than 4096 members) are represented as a -- sorted array of their low 16 bits. Chunks with high density (i.e. more than -- 4096 members) are represented by a bit vector. --- --- Both high and low density chunks include the high order bits shared by all --- entries in the chunk, and the cardinality of the chunk. data Chunk = LowDensity - { chunkIndex :: Word16 - , chunkCardinality :: Int - , chunkArray :: U.Vector Word16 + { chunkIndex :: Index + , chunkArray :: LDVector } | HighDensity - { chunkIndex :: Word16 - , chunkCardinality :: Int - , chunkBits :: U.Vector Word64 + { chunkIndex :: Index + , chunkBits :: HDVector } - deriving (Eq,Show) + deriving (Eq, Show) -- | 'Chunk's are ordered by their index. instance Ord Chunk where - compare c1 c2 = compare (chunkIndex c1) (chunkIndex c2) + compare = compare `on` chunkIndex + +instance Bits Chunk where + bitSize _ = 2^16 + bitSizeMaybe _ = Just (2^16) + isSigned _ = False + + (.|.) = union + (.&.) = intersection + + testBit (LowDensity ix a) i = L.testBit a (fromIntegral i) + testBit (HighDensity ix a) i = H.testBit a (fromIntegral i) + + bit i = singleton (fromIntegral i) + + popCount (LowDensity ix a) = L.popCount a + popCount (HighDensity ix a) = H.popCount a + +singleton :: Word32 -> Chunk +singleton i = + let (ix, b) = splitWord i + in chunkNew ix b -- | Create a new chunk. -chunkNew :: Word16 -> Word16 -> Chunk -chunkNew i v = LowDensity i 1 (U.singleton v) +chunkNew :: Index -> Word16 -> Chunk +chunkNew i v = LowDensity i (L.singleton v) + +-- | Add a word into a chunk. +set :: Word16 -> Chunk -> Chunk +set b c@(HighDensity i bs) + | H.testBit bs b = c + | otherwise = HighDensity i (H.setBit bs b) +set b c@(LowDensity i bs) + | L.testBit bs b = c + | otherwise = repackChunk $ LowDensity i (L.setBit bs b) --- | Extract the 'Word32's stored in a 'Chunk'. -chunkToBits :: Chunk -> [Word32] -chunkToBits (LowDensity i _ a) = combineWord i <$> U.toList a -chunkToBits (HighDensity i _ a) = U.toList . U.concatMap f $ U.indexed a +toList :: Chunk -> [Word32] +toList (LowDensity i bs) = map (combineWord i) $ L.toList bs +toList (HighDensity i bs) = map (combineWord i) $ H.toList bs + +bits :: Word64 -> [Word16] +bits w = foldr abit [] [0..63] where - f :: (Int, Word64) -> U.Vector Word32 - f (_p,_bs) = U.map (combineWord i) U.empty + abit :: Int -> [Word16] -> [Word16] + abit i l = if testBit w i + then (fromIntegral i) : l + else l +chunkCheck :: Word16 -> Chunk -> Bool +chunkCheck w (LowDensity _ bs) = L.testBit bs w +chunkCheck w (HighDensity _ bs) = H.testBit bs w --- | Get a bit from a 'Chunk'. -chunkGet :: Word16 -> Chunk -> Bool -chunkGet v chunk = case chunk of - LowDensity _ _ a -> U.elem v a - HighDensity{} -> False -- TODO(thsutton) implement +chunkSet :: Word16 -> Chunk -> Chunk +chunkSet w c@(HighDensity i bs) + | H.testBit bs w = c + | otherwise = HighDensity i (H.setBit bs w) +chunkSet w c@(LowDensity i bs) + | L.testBit bs w = c + | otherwise = LowDensity i (L.setBit bs w) --- | Set a bit in a chunk. +chunkClear :: Word16 -> Chunk -> Chunk +chunkClear w c + | chunkCheck w c = + case c of + LowDensity i bs -> LowDensity i (L.clearBit bs w) + HighDensity i bs -> HighDensity i (H.clearBit bs w) + | otherwise = c + +-- | Take the union of two 'Chunk's. -- --- TODO(thsutton) Promote LowDensity chunk when it rises above threshold. -chunkSet :: Word16 -> Chunk -> Chunk -chunkSet v chunk = case chunk of - LowDensity i c a -> LowDensity i c (setL v a) - HighDensity i c a -> HighDensity i c (setH v a) +-- Postcondition: popCount (a `union` b) >= (popCount a) + (popCount b) +union :: Chunk -> Chunk -> Chunk +union a b + | chunkIndex a == chunkIndex b = work a b + | otherwise = error "Cannot take union of chunks with different indexes!" where - setL :: Word16 -> U.Vector Word16 -> U.Vector Word16 - setL i a = uvInsert a i - setH :: Word16 -> U.Vector Word64 -> U.Vector Word64 - setH _ a = a -- TODO(thsutton) implement + work (HighDensity i as) (HighDensity _ bs) = + HighDensity i (as `H.union` bs) + work (HighDensity i as) (LowDensity _ bs) = + HighDensity i (as `H.union` toHDVector bs) + work (LowDensity i as) (HighDensity _ bs) = + HighDensity i (toHDVector as `H.union` bs) + work (LowDensity i as) (LowDensity _ bs) = + repackChunk $ LowDensity i (as `L.union` bs) --- | Clear a bit in a 'Chunk'. +-- | Take the intersection of two 'Chunk's. -- --- TODO(thsutton) Demote HighDensity chunk when it falls below threshold. -chunkClear :: Word16 -> Chunk -> Chunk -chunkClear v chunk = case chunk of - LowDensity i _ a -> - let a' = clearL v a - c' = U.length a' - in LowDensity i c' a' - HighDensity i _ a -> - let a' = clearH v a - c' = U.sum $ U.map popCount a' - in HighDensity i c' a' +-- TODO: Maintain the density invariant. +intersection a b + | chunkIndex a == chunkIndex b = work a b + | otherwise = error "Cannot take intersection of chunks with different indexes!" where - clearL :: Word16 -> U.Vector Word16 -> U.Vector Word16 - clearL i a = uvDelete a i - clearH :: Word16 -> U.Vector Word64 -> U.Vector Word64 - clearH _ a = a -- TODO(thsutton) implement - --- | Take the union of two 'Chunk's, raising an 'error' if they do not share an --- index. -mergeChunks :: Chunk -> Chunk -> Chunk -mergeChunks c1 c2 = - if chunkIndex c1 == chunkIndex c2 - then merge c1 c2 - else error "Attempting to merge incompatible chunks!" + work (LowDensity ia a) (LowDensity ib b) = + LowDensity ia (a `L.intersection` b) + work (HighDensity ia a) (LowDensity ib b) = + LowDensity ia (toLDVector a `L.intersection` b) + work (LowDensity ia a) (HighDensity ib b) = + LowDensity ia (a `L.intersection` toLDVector b) + work (HighDensity ia a) (HighDensity ib b) = + repackChunk $ HighDensity ia (a `H.intersection` b) + +xor :: Chunk -> Chunk -> Chunk +xor a b + | chunkIndex a == chunkIndex b = work a b + | otherwise = error "Cannot take xor of chunks with different indexes!" + where + work :: Chunk -> Chunk -> Chunk + work (LowDensity ia as) (LowDensity ib bs) = + repackChunk $ LowDensity ia (as `L.xor` bs) + work (LowDensity ia as) (HighDensity ib bs) = + repackChunk $ HighDensity ia (toHDVector as `H.xor` bs) + work (HighDensity ia as) (LowDensity ib bs) = + repackChunk $ HighDensity ia (as `H.xor` toHDVector bs) + work (HighDensity ia as) (HighDensity ib bs) = + repackChunk $ HighDensity ia (as `H.xor` bs) + +-- * Queries + +null :: Chunk -> Bool +null c = popCount c == 0 + +-- * Utility + +-- | Repack a 'Chunk' to enforce the density invariant. +repackChunk :: Chunk -> Chunk +repackChunk c@(LowDensity ix v) + | L.popCount v >= 4096 = HighDensity ix (toHDVector v) + | otherwise = c +repackChunk c@(HighDensity ix v) + | H.popCount v < 4096 = LowDensity ix (toLDVector v) + | otherwise = c + +-- | Pack a low-density vector into a high-density vector. +toHDVector :: LDVector -> HDVector +toHDVector (LDVector bs) = U.foldl' (\v b -> H.setBit v b) H.empty bs + +-- | Unpack a high-density vector to a low-density vector. +toLDVector :: HDVector -> LDVector +toLDVector v@(HDVector ws) = + let n = H.popCount v + bs = U.generate n (\i -> fromIntegral $ select i v) + in LDVector bs where - aPop :: U.Vector Word64 -> Int - aPop = U.sum . U.map popCount - aSet :: Word16 -> U.Vector Word64 -> U.Vector Word64 - aSet _i v = v - packA :: U.Vector Word16 -> U.Vector Word64 - packA _ = mempty - merge (HighDensity i _ a1) (HighDensity _ _ a2) = - let a' = U.zipWith (.|.) a1 a2 in HighDensity i (aPop a') a' - merge (HighDensity i _ ah) (LowDensity _ _ al) = - let a' = U.foldr' aSet ah al in HighDensity i (aPop a') a' - merge (LowDensity i _ al) (HighDensity _ _ ah) = - let a' = U.foldr' aSet ah al in HighDensity i (aPop a') a' - merge (LowDensity i _ a1) (LowDensity _ _ a2) = - let a' = vMerge a1 a2 - n' = U.length a' - -- TODO(thsutton): Is this eager enough? - in if n' <= 4096 - then LowDensity i n' a' - else HighDensity i n' (packA a') + -- | Select the nth set bit. + select :: Int -> HDVector -> Int + select i (HDVector v) = + let runningCount = evalState (U.mapM (\c -> modify (+ popCount c) >> get) v) 0 + (p,r) = U.span (\a -> a < i) runningCount + in -1 diff --git a/lib/Data/BitMap/Roaring/Chunk/High.hs b/lib/Data/BitMap/Roaring/Chunk/High.hs new file mode 100644 index 0000000..064ae2c --- /dev/null +++ b/lib/Data/BitMap/Roaring/Chunk/High.hs @@ -0,0 +1,94 @@ +-- | +module Data.BitMap.Roaring.Chunk.High where + +import Data.Bits ((.&.), (.|.)) +import qualified Data.Bits as B +import qualified Data.Vector.Algorithms.Heap as S +import qualified Data.Vector.Unboxed as U +import qualified Data.Vector.Unboxed.Mutable as M +import Data.Word + +-- | "High density" bits packed into a vector. +newtype HDVector = HDVector (U.Vector Word64) + deriving (Eq, Show) + +-- * Construct + +-- | Empty high-density vector. +empty :: HDVector +empty = + HDVector (U.replicate 1024 0) + +-- | New high-density vector with a single bit set. +singleton :: Word16 -> HDVector +singleton i = + let (wi, bi) = (fromIntegral i) `divMod` 64 + mk i = if i == wi + then B.bit bi + else 0 + in HDVector (U.generate 1024 mk) + +-- * Modify + +-- | Set a bit in a high-density vector. +setBit :: HDVector -> Word16 -> HDVector +setBit (HDVector bs) ix = + let (w, b) = (fromIntegral ix :: Int) `divMod` 64 + in HDVector (U.modify (\v -> M.read v w >>= M.write v w . flip B.setBit b) bs) + +-- | Clear a bit in a high-density vector. +clearBit :: HDVector -> Word16 -> HDVector +clearBit (HDVector bs) ix = + let (w, b) = (fromIntegral ix :: Int) `divMod` 64 + in HDVector (U.modify (\v -> M.read v w >>= M.write v w . flip B.clearBit b) bs) + +-- | Flip a bit in a high-density vector. +-- +-- TODO Specialise implementation. +complementBit :: HDVector -> Word16 -> HDVector +complementBit v ix + | testBit v ix = clearBit v ix + | otherwise = setBit v ix + +-- * Operators + +-- | Take the intersection of two high-density vectors. +-- +-- NOTE: Callers must check and enforce the density invariant. +intersection :: HDVector -> HDVector -> HDVector +intersection (HDVector as) (HDVector bs) = + HDVector (U.zipWith (.&.) as bs) + +-- | Take the union of two high-density vectors. +union :: HDVector -> HDVector -> HDVector +union (HDVector as) (HDVector bs) = + HDVector (U.zipWith (.|.) as bs) + +-- | Take the exclusive or of two high-density vectors. +-- +-- NOTE: Callers must check and enforce the density invariant. +xor :: HDVector -> HDVector -> HDVector +xor (HDVector as) (HDVector bs) = + HDVector (U.zipWith B.xor as bs) + +-- * Query + +-- | Test a bit in a high-density vector. +testBit :: HDVector -> Word16 -> Bool +testBit (HDVector bs) ix = + let (wi, bi) = (fromIntegral ix) `divMod` 64 + in B.testBit (bs U.! wi) bi + +-- | Query number of set bits. +popCount :: HDVector -> Int +popCount (HDVector v) = U.foldl' (\a b -> a + B.popCount b) 0 v + +-- * Conversions + +-- | Unpack the 'Word16's set in a 'HDVector'. +toList :: HDVector -> [Word16] +toList (HDVector v) = [] + where + unpackWord :: Word64 -> [Word16] + unpackWord w = map fst . filter snd $ map (\bi -> (fromIntegral bi, B.testBit w bi)) [0..63] + diff --git a/lib/Data/BitMap/Roaring/Chunk/Low.hs b/lib/Data/BitMap/Roaring/Chunk/Low.hs new file mode 100644 index 0000000..5449ec1 --- /dev/null +++ b/lib/Data/BitMap/Roaring/Chunk/Low.hs @@ -0,0 +1,100 @@ +module Data.BitMap.Roaring.Chunk.Low where + +import qualified Data.Vector.Algorithms.Heap as S +import qualified Data.Vector.Unboxed as U +import qualified Data.Vector.Unboxed.Mutable as M +import Data.Word + +import Data.BitMap.Roaring.Utility + +-- | "Low density" bits stored in a vector. +newtype LDVector = LDVector { unwrapVector :: U.Vector Word16 } + deriving (Eq, Show) + +-- * Constructors + +-- | An empty low-density vector. +empty :: LDVector +empty = LDVector U.empty + +-- | A singleton low-density vector. +singleton :: Word16 -> LDVector +singleton b = LDVector (U.singleton b) + +-- * Modifying vectors + +-- | Set a word in a low-density vector. +-- +-- TODO: Implement in O(log n). +setBit :: LDVector -> Word16 -> LDVector +setBit lv@(LDVector bs) ix + | testBit lv ix = lv + | otherwise = LDVector (U.modify (S.sortBy (compare)) $ U.cons ix bs) + +-- | Clear a word in a low-density vector. +-- +-- Pre-condition: The word is present in the vector. +-- Post-condition: The word is not present in the vector. +-- +-- TODO: Implement in O(log n) +clearBit :: LDVector -> Word16 -> LDVector +clearBit (LDVector bs) ix = + LDVector (U.filter (/= ix) bs) + +-- | Flip a bit in a low-density vector. +-- +-- TODO Implement in O(log n) +complementBit :: LDVector -> Word16 -> LDVector +complementBit lv ix + | testBit lv ix = clearBit lv ix + | otherwise = setBit lv ix + +-- * Queries + +-- | Check whether a word is present in a low-density vector. +-- +-- TODO: Implement in O(log n) +testBit :: LDVector -> Word16 -> Bool +testBit (LDVector bs) ix = U.elem ix bs + +-- | Query the number of set bits in a low-density vector. +popCount :: LDVector -> Int +popCount (LDVector v) = U.length v + +-- * Operators + +-- | Take the intersection of two low-density vectors. +intersection :: LDVector -> LDVector -> LDVector +intersection (LDVector as) (LDVector bs) = + LDVector (vMergeWith merge as bs) + where + merge (Just a) (Just b) = Just a + merge (Just a) Nothing = Nothing + merge Nothing (Just b) = Nothing + merge Nothing Nothing = Nothing + +-- | Take the union of two low-density vectors. +-- +-- NOTE: Callers must check and enforce the density invariant. +union :: LDVector -> LDVector -> LDVector +union (LDVector v1) (LDVector v2) = + LDVector (vMergeWith merge v1 v2) + where + merge (Just a) (Just b) = Just a + merge Nothing a = a + merge a Nothing = a + +-- | Take the exclusive-or of two low-density vectors. +xor :: LDVector -> LDVector -> LDVector +xor (LDVector as) (LDVector bs) = + LDVector (vMergeWith merge as bs) + where + merge a Nothing = a + merge Nothing b = b + merge _ _ = Nothing + +-- * Conversions + +-- | Unpack the 'Word16's set in a 'HDVector'. +toList :: LDVector -> [Word16] +toList (LDVector v) = U.toList v diff --git a/lib/Data/BitMap/Roaring/Utility.hs b/lib/Data/BitMap/Roaring/Utility.hs index f19cd2f..3d3eaac 100644 --- a/lib/Data/BitMap/Roaring/Utility.hs +++ b/lib/Data/BitMap/Roaring/Utility.hs @@ -1,12 +1,13 @@ module Data.BitMap.Roaring.Utility where -import Data.Bits -import Data.Convertible -import Data.Monoid -import qualified Data.Vector as V +import Data.Bits +import Data.Convertible +import Data.Monoid +import qualified Data.Vector as V import qualified Data.Vector.Algorithms.Heap as VAH -import qualified Data.Vector.Unboxed as U -import Data.Word +import qualified Data.Vector.Generic as G +import qualified Data.Vector.Unboxed as U +import Data.Word -- * Words @@ -24,17 +25,44 @@ combineWord h l = rotate (convert h) (-16) .|. convert l -- * Vectors -vMerge :: (U.Unbox e, Ord e) => U.Vector e -> U.Vector e -> U.Vector e -vMerge as bs - | U.null as = bs - | U.null bs = as +-- | Merge two sorted vectors. +-- +-- The right element of a pair which are equal according to 'compare' +-- will be discarded. +-- +-- Postcondition: sorted (vMerge xs ys) +-- Postcondition: length (vMerge xs ys) >= max (length xs) (length ys) +vMerge :: (G.Vector vector e, Ord e) => vector e -> vector e -> vector e +vMerge = vMergeWith merge + where + merge :: Maybe e -> Maybe e -> Maybe e + merge Nothing a = a + merge a Nothing = a + merge (Just a) (Just b) = Just a + +-- | Merge two sorted vectors. +vMergeWith + :: (G.Vector vector e, Ord e) + => (Maybe e -> Maybe e -> Maybe e) + -> vector e + -> vector e + -> vector e +vMergeWith f as bs + | G.null as = bs + | G.null bs = as | otherwise = - let a = U.head as - b = U.head bs + let a = G.head as + b = G.head bs in case a `compare` b of - LT -> a `U.cons` vMerge (U.tail as) bs - EQ -> a `U.cons` vMerge (U.tail as) (U.tail bs) - GT -> b `U.cons` vMerge as (U.tail bs) + LT -> case f (Just a) Nothing of + Nothing -> vMergeWith f (G.tail as) bs + Just r -> r `G.cons` vMergeWith f (G.tail as) bs + EQ -> case f (Just a) (Just b) of + Nothing -> vMergeWith f (G.tail as) (G.tail bs) + Just r -> r `G.cons` vMergeWith f (G.tail as) (G.tail bs) + GT -> case f Nothing (Just b) of + Nothing -> vMergeWith f as (G.tail bs) + Just r -> r `G.cons` vMergeWith f as (G.tail bs) -- | Alter the 'Chunk' with the given index in a vector of 'Chunk's. -- @@ -58,7 +86,9 @@ vAlter f p v = case vLookup p v of -- | Search for a 'Chunk' with a specific index. -- --- TODO(thsutton) better search algorithm. +-- /O(log n)/ +-- +-- TODO(thsutton) Make the complexity claim be true. vLookup :: Ord a => (a -> Bool) -> V.Vector a -> Maybe (Int, a) vLookup p v = case V.findIndex p v of Nothing -> Nothing diff --git a/rawr.cabal b/rawr.cabal index 674fd6c..8581e7f 100644 --- a/rawr.cabal +++ b/rawr.cabal @@ -7,7 +7,7 @@ license: BSD3 license-file: LICENSE author: Thomas Sutton maintainer: me@thomas-sutton.id.au --- copyright: +copyright: (c) 2015 Thomas Sutton category: Data build-type: Simple extra-source-files: README.md @@ -20,16 +20,17 @@ source-repository HEAD library default-language: Haskell2010 hs-source-dirs: lib - exposed-modules: - Data.BitMap.Roaring - Data.BitMap.Roaring.Chunk - Data.BitMap.Roaring.Utility - build-depends: - base >=4.7 && <4.8 - , convertible - , containers - , vector - , vector-algorithms + exposed-modules: Data.BitMap.Roaring + Data.BitMap.Roaring.Chunk + Data.BitMap.Roaring.Chunk.High + Data.BitMap.Roaring.Chunk.Low + Data.BitMap.Roaring.Utility + build-depends: base >=4.5 && <4.10 + , containers + , convertible + , transformers + , vector + , vector-algorithms test-suite properties type: exitcode-stdio-1.0 diff --git a/test/properties.hs b/test/properties.hs index 4bb73d4..63b31db 100644 --- a/test/properties.hs +++ b/test/properties.hs @@ -2,15 +2,15 @@ module Main where -import Control.Monad -import Data.List -import Data.Monoid -import qualified Data.Set as S -import Data.Word -import System.Exit -import Test.QuickCheck - -import qualified Data.BitMap.Roaring as R +import Control.Monad +import Data.List +import Data.Monoid +import qualified Data.Set as S +import Data.Word +import System.Exit +import Test.QuickCheck + +import qualified Data.BitMap.Roaring as R import qualified Data.BitMap.Roaring.Utility as R -- * Check utility functions From 482198ad2cd7343e5ce236444b0287b621041581 Mon Sep 17 00:00:00 2001 From: Thomas Sutton Date: Mon, 2 Jan 2017 19:50:29 +1100 Subject: [PATCH 03/10] Add some properties for intersection on chunk and bitmap --- test/properties.hs | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/test/properties.hs b/test/properties.hs index 63b31db..a18799a 100644 --- a/test/properties.hs +++ b/test/properties.hs @@ -11,6 +11,7 @@ import System.Exit import Test.QuickCheck import qualified Data.BitMap.Roaring as R +import qualified Data.BitMap.Roaring.Chunk as C import qualified Data.BitMap.Roaring.Utility as R -- * Check utility functions @@ -40,6 +41,10 @@ prop_size_empty = 0 == R.size R.empty prop_size_singleton :: Word32 -> Bool prop_size_singleton i = 1 == R.size (R.singleton i) +-- | Sets have size of list length. +prop_size_fromList :: NonEmptyList Word32 -> Bool +prop_size_fromList (NonEmpty is) = length (nub is) == R.size (R.fromList is) + -- | Singletons have size 1, then size 0 when deleted. prop_size_delete_singleton :: Word32 -> Bool prop_size_delete_singleton i = @@ -55,17 +60,17 @@ prop_null_delete_singleton i = -- | 'toAscList' produces sorted lists. prop_toAscList_sorted :: NonEmptyList Word32 -> Bool prop_toAscList_sorted (NonEmpty l) = - let l' = S.toAscList (S.fromList l) + let l' = R.toAscList (R.fromList l) in l' == sort l' -- | 'toDescList' produces sorted lists. prop_toDescList_sorted :: NonEmptyList Word32 -> Bool prop_toDescList_sorted (NonEmpty l) = - let l' = S.toDescList (S.fromList l) + let l' = R.toDescList (R.fromList l) in l' == sortBy (flip compare) l' --- | "Data.IntSet" and "Data.BitMap.Roaring" agree about a set when building --- from the same list of inputs. +-- | "Data.Set" and "Data.BitMap.Roaring" agree about a set when +-- building from the same list of inputs. prop_intset_roaring_agree :: NonEmptyList Word32 -> Bool prop_intset_roaring_agree (NonEmpty l) = let r = R.toAscList $ R.fromList l @@ -78,7 +83,7 @@ prop_map_elem_fromList (NonEmpty l) = let r = R.fromList l in all (`R.member` r) l --- | union s1 s2 == fromList (toList s1 <> toList s2) +-- | union (fromList s1) (fromList s2) == fromList (s1 <> s2) prop_union_fromList :: NonEmptyList Word32 -> NonEmptyList Word32 -> Bool prop_union_fromList (NonEmpty as) (NonEmpty bs) = let q = R.fromList as @@ -86,6 +91,24 @@ prop_union_fromList (NonEmpty as) (NonEmpty bs) = qr = R.fromList (as <> bs) in (R.toAscList qr == R.toAscList (q `R.union` r)) +prop_intersection_fromList :: NonEmptyList Word32 -> NonEmptyList Word32 -> Bool +prop_intersection_fromList (NonEmpty al) (NonEmpty bl) = + let am = R.fromList al + bm = R.fromList bl + im = R.intersection am bm + as = S.fromList al + bs = S.fromList bl + is = S.intersection as bs + in (R.toList im) == (S.toList is) + +prop_ld_chunk_intersection :: NonEmptyList Word16 -> NonEmptyList Word16 -> Bool +prop_ld_chunk_intersection (NonEmpty al) (NonEmpty bl) = + let is = S.intersection (S.fromList al) (S.fromList bl) + cc = C.chunkClear 0 (C.chunkNew 0 0) + fromList = foldl' (\c b -> C.chunkSet b c) cc + ms = C.intersection (fromList al) (fromList bl) + in (C.toList ms) == (map fromIntegral $ S.toList is) + -- -- Use Template Haskell to automatically run all of the properties above. -- From 3bf23b129d6e06aabf9755befc826bcafc49be7f Mon Sep 17 00:00:00 2001 From: Thomas Sutton Date: Mon, 2 Jan 2017 19:51:38 +1100 Subject: [PATCH 04/10] Fix vMergeWith bug: process all elements, even when short-circuiting null vectors --- lib/Data/BitMap/Roaring/Utility.hs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/Data/BitMap/Roaring/Utility.hs b/lib/Data/BitMap/Roaring/Utility.hs index 3d3eaac..ffb8805 100644 --- a/lib/Data/BitMap/Roaring/Utility.hs +++ b/lib/Data/BitMap/Roaring/Utility.hs @@ -42,14 +42,14 @@ vMerge = vMergeWith merge -- | Merge two sorted vectors. vMergeWith - :: (G.Vector vector e, Ord e) - => (Maybe e -> Maybe e -> Maybe e) - -> vector e + :: (G.Vector vector e, G.Vector vector r, Ord e) + => (Maybe e -> Maybe e -> Maybe r) -> vector e -> vector e + -> vector r vMergeWith f as bs - | G.null as = bs - | G.null bs = as + | G.null as = G.concatMap (\e -> maybe G.empty G.singleton $ f Nothing (Just e)) bs + | G.null bs = G.concatMap (\e -> maybe G.empty G.singleton $ f (Just e) Nothing) as | otherwise = let a = G.head as b = G.head bs From 0173e44ce09af3f333cbf2d53eda6d8602ace940 Mon Sep 17 00:00:00 2001 From: Thomas Sutton Date: Mon, 2 Jan 2017 20:28:25 +1100 Subject: [PATCH 05/10] HLint --- HLint.hs | 1 + lib/Data/BitMap/Roaring/Chunk.hs | 10 +++++----- lib/Data/BitMap/Roaring/Chunk/High.hs | 10 +++++----- lib/Data/BitMap/Roaring/Chunk/Low.hs | 2 +- lib/Data/BitMap/Roaring/Utility.hs | 2 +- rawr.cabal | 12 ++++++++++-- stack.yaml | 2 ++ test/check-hlint.hs | 17 +++++++++++++++++ test/properties.hs | 8 ++++---- 9 files changed, 46 insertions(+), 18 deletions(-) create mode 100644 test/check-hlint.hs diff --git a/HLint.hs b/HLint.hs index 3ab0a2e..83f8b90 100644 --- a/HLint.hs +++ b/HLint.hs @@ -9,3 +9,4 @@ import "hint" HLint.Generalise ignore "Use if" ignore "Use liftM" +ignore "Use &&&" diff --git a/lib/Data/BitMap/Roaring/Chunk.hs b/lib/Data/BitMap/Roaring/Chunk.hs index ac4a57c..15a5c7e 100644 --- a/lib/Data/BitMap/Roaring/Chunk.hs +++ b/lib/Data/BitMap/Roaring/Chunk.hs @@ -75,15 +75,15 @@ set b c@(LowDensity i bs) | otherwise = repackChunk $ LowDensity i (L.setBit bs b) toList :: Chunk -> [Word32] -toList (LowDensity i bs) = map (combineWord i) $ L.toList bs -toList (HighDensity i bs) = map (combineWord i) $ H.toList bs +toList (LowDensity i bs) = combineWord i <$> L.toList bs +toList (HighDensity i bs) = combineWord i <$> H.toList bs bits :: Word64 -> [Word16] bits w = foldr abit [] [0..63] where abit :: Int -> [Word16] -> [Word16] abit i l = if testBit w i - then (fromIntegral i) : l + then fromIntegral i : l else l chunkCheck :: Word16 -> Chunk -> Bool @@ -172,7 +172,7 @@ repackChunk c@(HighDensity ix v) -- | Pack a low-density vector into a high-density vector. toHDVector :: LDVector -> HDVector -toHDVector (LDVector bs) = U.foldl' (\v b -> H.setBit v b) H.empty bs +toHDVector (LDVector bs) = U.foldl' H.setBit H.empty bs -- | Unpack a high-density vector to a low-density vector. toLDVector :: HDVector -> LDVector @@ -185,5 +185,5 @@ toLDVector v@(HDVector ws) = select :: Int -> HDVector -> Int select i (HDVector v) = let runningCount = evalState (U.mapM (\c -> modify (+ popCount c) >> get) v) 0 - (p,r) = U.span (\a -> a < i) runningCount + (p,r) = U.span (< i) runningCount in -1 diff --git a/lib/Data/BitMap/Roaring/Chunk/High.hs b/lib/Data/BitMap/Roaring/Chunk/High.hs index 064ae2c..20ad52f 100644 --- a/lib/Data/BitMap/Roaring/Chunk/High.hs +++ b/lib/Data/BitMap/Roaring/Chunk/High.hs @@ -22,7 +22,7 @@ empty = -- | New high-density vector with a single bit set. singleton :: Word16 -> HDVector singleton i = - let (wi, bi) = (fromIntegral i) `divMod` 64 + let (wi, bi) = fromIntegral i `divMod` 64 mk i = if i == wi then B.bit bi else 0 @@ -33,13 +33,13 @@ singleton i = -- | Set a bit in a high-density vector. setBit :: HDVector -> Word16 -> HDVector setBit (HDVector bs) ix = - let (w, b) = (fromIntegral ix :: Int) `divMod` 64 + let (w, b) = fromIntegral ix `divMod` 64 in HDVector (U.modify (\v -> M.read v w >>= M.write v w . flip B.setBit b) bs) -- | Clear a bit in a high-density vector. clearBit :: HDVector -> Word16 -> HDVector clearBit (HDVector bs) ix = - let (w, b) = (fromIntegral ix :: Int) `divMod` 64 + let (w, b) = fromIntegral ix `divMod` 64 in HDVector (U.modify (\v -> M.read v w >>= M.write v w . flip B.clearBit b) bs) -- | Flip a bit in a high-density vector. @@ -76,7 +76,7 @@ xor (HDVector as) (HDVector bs) = -- | Test a bit in a high-density vector. testBit :: HDVector -> Word16 -> Bool testBit (HDVector bs) ix = - let (wi, bi) = (fromIntegral ix) `divMod` 64 + let (wi, bi) = fromIntegral ix `divMod` 64 in B.testBit (bs U.! wi) bi -- | Query number of set bits. @@ -90,5 +90,5 @@ toList :: HDVector -> [Word16] toList (HDVector v) = [] where unpackWord :: Word64 -> [Word16] - unpackWord w = map fst . filter snd $ map (\bi -> (fromIntegral bi, B.testBit w bi)) [0..63] + unpackWord w = fmap fst . filter snd $ fmap (\bi -> (fromIntegral bi, B.testBit w bi)) [0..63] diff --git a/lib/Data/BitMap/Roaring/Chunk/Low.hs b/lib/Data/BitMap/Roaring/Chunk/Low.hs index 5449ec1..d5ded89 100644 --- a/lib/Data/BitMap/Roaring/Chunk/Low.hs +++ b/lib/Data/BitMap/Roaring/Chunk/Low.hs @@ -29,7 +29,7 @@ singleton b = LDVector (U.singleton b) setBit :: LDVector -> Word16 -> LDVector setBit lv@(LDVector bs) ix | testBit lv ix = lv - | otherwise = LDVector (U.modify (S.sortBy (compare)) $ U.cons ix bs) + | otherwise = LDVector (U.modify (S.sortBy compare) $ U.cons ix bs) -- | Clear a word in a low-density vector. -- diff --git a/lib/Data/BitMap/Roaring/Utility.hs b/lib/Data/BitMap/Roaring/Utility.hs index ffb8805..53aa4bc 100644 --- a/lib/Data/BitMap/Roaring/Utility.hs +++ b/lib/Data/BitMap/Roaring/Utility.hs @@ -48,7 +48,7 @@ vMergeWith -> vector e -> vector r vMergeWith f as bs - | G.null as = G.concatMap (\e -> maybe G.empty G.singleton $ f Nothing (Just e)) bs + | G.null as = G.concatMap (maybe G.empty G.singleton . f Nothing . Just) bs | G.null bs = G.concatMap (\e -> maybe G.empty G.singleton $ f (Just e) Nothing) as | otherwise = let a = G.head as diff --git a/rawr.cabal b/rawr.cabal index 8581e7f..5928b3a 100644 --- a/rawr.cabal +++ b/rawr.cabal @@ -2,7 +2,7 @@ name: rawr version: 0.1.0.0 synopsis: Roaring Bitmaps compressed bitmap data-structure. description: Roaring Bitmaps compressed bitmap data-structure. -homepage: https://github.com/thsutton/rawr/ +homepage: https://github.com/thsutton/rawr#readme license: BSD3 license-file: LICENSE author: Thomas Sutton @@ -15,7 +15,7 @@ cabal-version: >=1.10 source-repository HEAD type: git - location: https://github.com/thsutton/rawr.git + location: https://github.com/thsutton/rawr/ library default-language: Haskell2010 @@ -42,3 +42,11 @@ test-suite properties , QuickCheck , containers , rawr + +test-suite zzz-check-hlint + default-language: Haskell2010 + type: exitcode-stdio-1.0 + hs-source-dirs: test + main-is: check-hlint.hs + build-depends: base + , hlint diff --git a/stack.yaml b/stack.yaml index 517b659..0d760e7 100644 --- a/stack.yaml +++ b/stack.yaml @@ -1,4 +1,6 @@ resolver: lts-6.7 +packages: +- '.' extra-deps: [] flags: {} extra-package-dbs: [] diff --git a/test/check-hlint.hs b/test/check-hlint.hs new file mode 100644 index 0000000..b1b488e --- /dev/null +++ b/test/check-hlint.hs @@ -0,0 +1,17 @@ +module Main (main) where + +import Language.Haskell.HLint (hlint) +import System.Exit (exitFailure, exitSuccess) + +arguments :: [String] +arguments = + [ "lib" + , "test" + ] + +main :: IO () +main = do + hints <- hlint arguments + if null hints + then exitSuccess + else exitFailure diff --git a/test/properties.hs b/test/properties.hs index a18799a..2f7ddbd 100644 --- a/test/properties.hs +++ b/test/properties.hs @@ -89,7 +89,7 @@ prop_union_fromList (NonEmpty as) (NonEmpty bs) = let q = R.fromList as r = R.fromList bs qr = R.fromList (as <> bs) - in (R.toAscList qr == R.toAscList (q `R.union` r)) + in R.toAscList qr == R.toAscList (q `R.union` r) prop_intersection_fromList :: NonEmptyList Word32 -> NonEmptyList Word32 -> Bool prop_intersection_fromList (NonEmpty al) (NonEmpty bl) = @@ -99,15 +99,15 @@ prop_intersection_fromList (NonEmpty al) (NonEmpty bl) = as = S.fromList al bs = S.fromList bl is = S.intersection as bs - in (R.toList im) == (S.toList is) + in R.toList im == S.toList is prop_ld_chunk_intersection :: NonEmptyList Word16 -> NonEmptyList Word16 -> Bool prop_ld_chunk_intersection (NonEmpty al) (NonEmpty bl) = let is = S.intersection (S.fromList al) (S.fromList bl) cc = C.chunkClear 0 (C.chunkNew 0 0) - fromList = foldl' (\c b -> C.chunkSet b c) cc + fromList = foldl' (flip C.chunkSet) cc ms = C.intersection (fromList al) (fromList bl) - in (C.toList ms) == (map fromIntegral $ S.toList is) + in C.toList ms == (fromIntegral <$> S.toList is) -- -- Use Template Haskell to automatically run all of the properties above. From 6c28410579770d74aa6a34b5e5c79a290e87c99a Mon Sep 17 00:00:00 2001 From: Thomas Sutton Date: Mon, 2 Jan 2017 20:31:19 +1100 Subject: [PATCH 06/10] Add doctests test suite --- rawr.cabal | 19 ++++++++++++++----- test/doctests.hs | 4 ++++ 2 files changed, 18 insertions(+), 5 deletions(-) create mode 100644 test/doctests.hs diff --git a/rawr.cabal b/rawr.cabal index 5928b3a..23cbe4c 100644 --- a/rawr.cabal +++ b/rawr.cabal @@ -37,11 +37,20 @@ test-suite properties default-language: Haskell2010 hs-source-dirs: test main-is: properties.hs - build-depends: - base - , QuickCheck - , containers - , rawr + build-depends: base + , QuickCheck + , containers + , rawr + +test-suite doctests + default-language: Haskell2010 + hs-source-dirs: test + type: exitcode-stdio-1.0 + ghc-options: -threaded + main-is: doctests.hs + build-depends: base + , QuickCheck + , doctest >= 0.9 test-suite zzz-check-hlint default-language: Haskell2010 diff --git a/test/doctests.hs b/test/doctests.hs new file mode 100644 index 0000000..bd8b9ee --- /dev/null +++ b/test/doctests.hs @@ -0,0 +1,4 @@ +import Test.DocTest + +main :: IO () +main = doctest ["-ilib", "lib"] From 9217dd22b67adfaeb065feb44461b48ff0ddf969 Mon Sep 17 00:00:00 2001 From: Thomas Sutton Date: Mon, 2 Jan 2017 20:43:09 +1100 Subject: [PATCH 07/10] Stylish --- lib/Data/BitMap/Roaring/Utility.hs | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/Data/BitMap/Roaring/Utility.hs b/lib/Data/BitMap/Roaring/Utility.hs index 53aa4bc..18abe94 100644 --- a/lib/Data/BitMap/Roaring/Utility.hs +++ b/lib/Data/BitMap/Roaring/Utility.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE MultiParamTypeClasses #-} module Data.BitMap.Roaring.Utility where import Data.Bits From 07322f9edece1168498c1105adfbc770ad3b9015 Mon Sep 17 00:00:00 2001 From: Thomas Sutton Date: Mon, 2 Jan 2017 20:43:16 +1100 Subject: [PATCH 08/10] Update README --- README.md | 45 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 5c11c84..a01c284 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,42 @@ Rawr ==== -[![Build Status][3]][2] +[![Build status][travis-badge]][travis-link] -This is an implementation of the [roaring bitmaps][1] data structure in -Haskell. Roaring bitmaps is a compressed bitmap data structure which offers -better compression and performance than other compressed bitmaps in many -situations. +[*Rawr*][1] is a Haskell implementation of the [Roaring Bitmaps][2] +data structure. Roaring Bitmaps are a compressed bitmap data structure +offering better space and time performance than other compressed +bitmaps in many situation. -[1]: http://www.roaringbitmap.org/ -[2]: https://travis-ci.org/thsutton/rawr -[3]: https://travis-ci.org/thsutton/rawr.svg?branch=master +For more information about *rawr* see the [documentation][3] or refer +to the [Roaring Bitmaps][2] web-site for other implementations and +publications about the data structure. + +**Please note:** This is a work in progress and is not yet ready for +use. When complete it will be released on Hackage. + +Structure +--------- + +The *Roaring Bitmap* structure divides the 32-bit keys into two 16-bit +values. One, the high order bits, identifies a *chunk* within the +map and the other, the low order bits, identifies a *bit* within +the chunk. + +There are two chunk representations: + +1. A sparse chunk contains a `Word16` for each *bit* present in the + chunk. + +2. A dense chunk contains 4096 `Word16`s which contains exactly one + bit for every possible *bit* which can be present in the chunk. + +The structure will convert the representation of each chunk as *bit*s +are set and cleared from the map. + +[1]: https://github.com/thsutton/rawr +[2]: http://www.roaringbitmaps.org/ +[3]: https://hackage.haskell.org/package/rawr/docs/Data-BitMap-Roaring.html + +[travis-link]: https://travis-ci.org/thsutton/rawr +[travis-badge]: https://travis-ci.org/thsutton/rawr.svg?branch=master From aa576d96fb81586bf3e54d200079277c55197c89 Mon Sep 17 00:00:00 2001 From: Thomas Sutton Date: Mon, 2 Jan 2017 21:14:50 +1100 Subject: [PATCH 09/10] Projects in sub-directories: rawr, rawr-format --- .stylish-haskell.yaml | 106 ++++++++++++++- .travis.yml | 21 +-- README.md | 31 +---- LICENSE => rawr-format/LICENSE | 2 +- rawr-format/README.md | 20 +++ Setup.hs => rawr-format/Setup.hs | 0 rawr-format/lib/Lib.hs | 6 + rawr-format/rawr-format.cabal | 53 ++++++++ rawr-format/test/Spec.hs | 2 + {test => rawr-format/test}/check-hlint.hs | 0 {test => rawr-format/test}/doctests.hs | 0 {test => rawr-format/test}/properties.hs | 0 HLint.hs => rawr/HLint.hs | 0 rawr/LICENSE | 30 +++++ rawr/README.md | 42 ++++++ rawr/Setup.hs | 2 + {lib => rawr/lib}/Data/BitMap/Roaring.hs | 0 .../lib}/Data/BitMap/Roaring/Chunk.hs | 0 .../lib}/Data/BitMap/Roaring/Chunk/High.hs | 0 .../lib}/Data/BitMap/Roaring/Chunk/Low.hs | 0 .../lib}/Data/BitMap/Roaring/Utility.hs | 0 rawr.cabal => rawr/rawr.cabal | 0 rawr/test/check-hlint.hs | 17 +++ rawr/test/doctests.hs | 4 + rawr/test/properties.hs | 123 ++++++++++++++++++ stack.yaml | 5 +- 26 files changed, 421 insertions(+), 43 deletions(-) rename LICENSE => rawr-format/LICENSE (97%) create mode 100644 rawr-format/README.md rename Setup.hs => rawr-format/Setup.hs (100%) create mode 100644 rawr-format/lib/Lib.hs create mode 100644 rawr-format/rawr-format.cabal create mode 100644 rawr-format/test/Spec.hs rename {test => rawr-format/test}/check-hlint.hs (100%) rename {test => rawr-format/test}/doctests.hs (100%) rename {test => rawr-format/test}/properties.hs (100%) rename HLint.hs => rawr/HLint.hs (100%) create mode 100644 rawr/LICENSE create mode 100644 rawr/README.md create mode 100644 rawr/Setup.hs rename {lib => rawr/lib}/Data/BitMap/Roaring.hs (100%) rename {lib => rawr/lib}/Data/BitMap/Roaring/Chunk.hs (100%) rename {lib => rawr/lib}/Data/BitMap/Roaring/Chunk/High.hs (100%) rename {lib => rawr/lib}/Data/BitMap/Roaring/Chunk/Low.hs (100%) rename {lib => rawr/lib}/Data/BitMap/Roaring/Utility.hs (100%) rename rawr.cabal => rawr/rawr.cabal (100%) create mode 100644 rawr/test/check-hlint.hs create mode 100644 rawr/test/doctests.hs create mode 100644 rawr/test/properties.hs diff --git a/.stylish-haskell.yaml b/.stylish-haskell.yaml index e48c5d5..51a508f 100644 --- a/.stylish-haskell.yaml +++ b/.stylish-haskell.yaml @@ -15,6 +15,14 @@ steps: # # true. # add_language_pragma: true + # Align the right hand side of some elements. This is quite conservative + # and only applies to statements where each element occupies a single + # line. + - simple_align: + cases: true + top_level_patterns: true + records: true + # Import cleanup - imports: # There are different ways we can align names and lists. @@ -22,6 +30,9 @@ steps: # - global: Align the import names and import list throughout the entire # file. # + # - file: Like global, but don't add padding when there are no qualified + # imports in the file. + # # - group: Only align the imports per group (a group is formed by adjacent # import lines). # @@ -30,6 +41,73 @@ steps: # Default: global. align: group + # Folowing options affect only import list alignment. + # + # List align has following options: + # + # - after_alias: Import list is aligned with end of import including + # 'as' and 'hiding' keywords. + # + # > import qualified Data.List as List (concat, foldl, foldr, head, + # > init, last, length) + # + # - with_alias: Import list is aligned with start of alias or hiding. + # + # > import qualified Data.List as List (concat, foldl, foldr, head, + # > init, last, length) + # + # - new_line: Import list starts always on new line. + # + # > import qualified Data.List as List + # > (concat, foldl, foldr, head, init, last, length) + # + # Default: after_alias + list_align: after_alias + + # Long list align style takes effect when import is too long. This is + # determined by 'columns' setting. + # + # - inline: This option will put as much specs on same line as possible. + # + # - new_line: Import list will start on new line. + # + # - new_line_multiline: Import list will start on new line when it's + # short enough to fit to single line. Otherwise it'll be multiline. + # + # - multiline: One line per import list entry. + # Type with contructor list acts like single import. + # + # > import qualified Data.Map as M + # > ( empty + # > , singleton + # > , ... + # > , delete + # > ) + # + # Default: inline + long_list_align: inline + + # List padding determines indentation of import list on lines after import. + # This option affects 'list_align' and 'long_list_align'. + list_padding: 4 + + # Separate lists option affects formating of import list for type + # or class. The only difference is single space between type and list + # of constructors, selectors and class functions. + # + # - true: There is single space between Foldable type and list of it's + # functions. + # + # > import Data.Foldable (Foldable (fold, foldl, foldMap)) + # + # - false: There is no space between Foldable type and list of it's + # functions. + # + # > import Data.Foldable (Foldable(fold, foldl, foldMap)) + # + # Default: true + separate_lists: true + # Language pragmas - language_pragmas: # We can generate different styles of language pragma lists. @@ -38,16 +116,26 @@ steps: # # - compact: A more compact style. # + # - compact_line: Similar to compact, but wrap each line with + # `{-#LANGUAGE #-}'. + # # Default: vertical. style: vertical + # Align affects alignment of closing pragma brackets. + # + # - true: Brackets are aligned in same collumn. + # + # - false: Brackets are not aligned together. There is only one space + # between actual import and closing bracket. + # + # Default: true + align: true + # stylish-haskell can detect redundancy of some language pragmas. If this # is set to true, it will remove those redundant pragmas. Default: true. remove_redundant: true - # Align the types in record declarations - - records: {} - # Replace tabs by spaces. This is disabled by default. - tabs: # Number of spaces to use for each tab. Default: 8, as specified by the @@ -61,6 +149,18 @@ steps: # to. Different steps take this into account. Default: 80. columns: 78 +# By default, line endings are converted according to the OS. You can override +# preferred format here. +# +# - native: Native newline format. CRLF on Windows, LF on other OSes. +# +# - lf: Convert to LF ("\n"). +# +# - crlf: Convert to CRLF ("\r\n"). +# +# Default: native. +newline: native + # Sometimes, language extensions are specified in a cabal file or from the # command line instead of using language pragmas in the file. stylish-haskell # needs to be aware of these, so it can parse the file correctly. diff --git a/.travis.yml b/.travis.yml index 5d3061e..a3d63b8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -29,6 +29,7 @@ env: - STACK_RESOLVER=lts-3 - STACK_RESOLVER=lts-5 - STACK_RESOLVER=lts-6 + - STACK_RESOLVER=lts-7 # Download the latest stack command. before_install: @@ -42,13 +43,13 @@ install: # Here starts the actual work to be performed for the package under test; any # command which exits with a non-zero exit code causes the build to fail. script: - - cabal check - - cabal sdist - - export SRC=$(cabal info . | awk '{print $2;exit}') - - tar -xzf "dist/$SRC.tar.gz" - - cd "$SRC" - - travis_retry ../stack --no-terminal --resolver $STACK_RESOLVER setup - - travis_retry ../stack --no-terminal --resolver $STACK_RESOLVER install --only-snapshot -j4 --verbosity info - - ../stack --no-terminal --resolver $STACK_RESOLVER build - - ../stack --no-terminal --resolver $STACK_RESOLVER haddock --no-haddock-deps - - ../stack --no-terminal --resolver $STACK_RESOLVER test +# - cabal check +# - cabal sdist +# - export SRC=$(cabal info . | awk '{print $2;exit}') +# - tar -xzf "dist/$SRC.tar.gz" +# - cd "$SRC" + - travis_retry ./stack --no-terminal --resolver $STACK_RESOLVER setup + - travis_retry ./stack --no-terminal --resolver $STACK_RESOLVER install --only-snapshot -j4 --verbosity info + - ./stack --no-terminal --resolver $STACK_RESOLVER build + - ./stack --no-terminal --resolver $STACK_RESOLVER haddock --no-haddock-deps + - ./stack --no-terminal --resolver $STACK_RESOLVER test diff --git a/README.md b/README.md index a01c284..93eb342 100644 --- a/README.md +++ b/README.md @@ -4,39 +4,16 @@ Rawr [![Build status][travis-badge]][travis-link] [*Rawr*][1] is a Haskell implementation of the [Roaring Bitmaps][2] -data structure. Roaring Bitmaps are a compressed bitmap data structure -offering better space and time performance than other compressed -bitmaps in many situation. - -For more information about *rawr* see the [documentation][3] or refer -to the [Roaring Bitmaps][2] web-site for other implementations and -publications about the data structure. +data structure and [serialisation format][3]. Roaring Bitmaps are a +compressed bitmap data structure offering better space and time +performance than other compressed bitmaps in many situations. **Please note:** This is a work in progress and is not yet ready for use. When complete it will be released on Hackage. -Structure ---------- - -The *Roaring Bitmap* structure divides the 32-bit keys into two 16-bit -values. One, the high order bits, identifies a *chunk* within the -map and the other, the low order bits, identifies a *bit* within -the chunk. - -There are two chunk representations: - -1. A sparse chunk contains a `Word16` for each *bit* present in the - chunk. - -2. A dense chunk contains 4096 `Word16`s which contains exactly one - bit for every possible *bit* which can be present in the chunk. - -The structure will convert the representation of each chunk as *bit*s -are set and cleared from the map. - [1]: https://github.com/thsutton/rawr [2]: http://www.roaringbitmaps.org/ -[3]: https://hackage.haskell.org/package/rawr/docs/Data-BitMap-Roaring.html +[3]: https://github.com/RoaringBitmap/RoaringFormatSpec [travis-link]: https://travis-ci.org/thsutton/rawr [travis-badge]: https://travis-ci.org/thsutton/rawr.svg?branch=master diff --git a/LICENSE b/rawr-format/LICENSE similarity index 97% rename from LICENSE rename to rawr-format/LICENSE index d80172e..f284b4b 100644 --- a/LICENSE +++ b/rawr-format/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2015, Thomas Sutton +Copyright Thomas Sutton (c) 2017 All rights reserved. diff --git a/rawr-format/README.md b/rawr-format/README.md new file mode 100644 index 0000000..a22cc20 --- /dev/null +++ b/rawr-format/README.md @@ -0,0 +1,20 @@ +Rawr Format +=========== + +[![Build status][travis-badge]][travis-link] + +[*Rawr Format*][1] is a Haskell implementation of the Roaring Bitmaps +[serialisation format][3]. Roaring Bitmaps are a compressed bitmap +data structure offering better space and time performance than other +compressed bitmaps in many situations. This library implements the +interoperable serialisation format supported by many implementations. + +**Please note:** This is a work in progress and is not yet ready for +use. When complete it will be released on Hackage. + +[1]: https://github.com/thsutton/rawr +[2]: http://www.roaringbitmaps.org/ +[3]: https://github.com/RoaringBitmap/RoaringFormatSpec + +[travis-link]: https://travis-ci.org/thsutton/rawr +[travis-badge]: https://travis-ci.org/thsutton/rawr.svg?branch=master diff --git a/Setup.hs b/rawr-format/Setup.hs similarity index 100% rename from Setup.hs rename to rawr-format/Setup.hs diff --git a/rawr-format/lib/Lib.hs b/rawr-format/lib/Lib.hs new file mode 100644 index 0000000..d36ff27 --- /dev/null +++ b/rawr-format/lib/Lib.hs @@ -0,0 +1,6 @@ +module Lib + ( someFunc + ) where + +someFunc :: IO () +someFunc = putStrLn "someFunc" diff --git a/rawr-format/rawr-format.cabal b/rawr-format/rawr-format.cabal new file mode 100644 index 0000000..0c54cc5 --- /dev/null +++ b/rawr-format/rawr-format.cabal @@ -0,0 +1,53 @@ +name: rawr-format +version: 0.1.0.0 +synopsis: Interoperable Roaring Bitmaps serialisation for rawr +description: Please see README.md +homepage: https://github.com/thsutton/rawr#readme +license: BSD3 +license-file: LICENSE +author: Thomas Sutton +maintainer: me@thomas-sutton.id.au +copyright: Copyright: (c) 2016 Thomas Sutton +category: Web +build-type: Simple +extra-source-files: README.md +cabal-version: >=1.10 + +source-repository head + type: git + location: https://github.com/thsutton/rawr + +library + default-language: Haskell2010 + hs-source-dirs: lib + exposed-modules: Lib + build-depends: base >= 4.5 && < 5 + , rawr + +test-suite doctests + default-language: Haskell2010 + hs-source-dirs: test + type: exitcode-stdio-1.0 + ghc-options: -threaded + main-is: doctests.hs + build-depends: base + , QuickCheck + , doctest >= 0.9 + +test-suite zzz-check-hlint + default-language: Haskell2010 + type: exitcode-stdio-1.0 + hs-source-dirs: test + main-is: check-hlint.hs + build-depends: base + , hlint + +test-suite rawr-format-test + type: exitcode-stdio-1.0 + hs-source-dirs: test + main-is: Spec.hs + build-depends: base + , rawr-format + ghc-options: -threaded -rtsopts -with-rtsopts=-N + default-language: Haskell2010 + diff --git a/rawr-format/test/Spec.hs b/rawr-format/test/Spec.hs new file mode 100644 index 0000000..cd4753f --- /dev/null +++ b/rawr-format/test/Spec.hs @@ -0,0 +1,2 @@ +main :: IO () +main = putStrLn "Test suite not yet implemented" diff --git a/test/check-hlint.hs b/rawr-format/test/check-hlint.hs similarity index 100% rename from test/check-hlint.hs rename to rawr-format/test/check-hlint.hs diff --git a/test/doctests.hs b/rawr-format/test/doctests.hs similarity index 100% rename from test/doctests.hs rename to rawr-format/test/doctests.hs diff --git a/test/properties.hs b/rawr-format/test/properties.hs similarity index 100% rename from test/properties.hs rename to rawr-format/test/properties.hs diff --git a/HLint.hs b/rawr/HLint.hs similarity index 100% rename from HLint.hs rename to rawr/HLint.hs diff --git a/rawr/LICENSE b/rawr/LICENSE new file mode 100644 index 0000000..f284b4b --- /dev/null +++ b/rawr/LICENSE @@ -0,0 +1,30 @@ +Copyright Thomas Sutton (c) 2017 + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of Thomas Sutton nor the names of other + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/rawr/README.md b/rawr/README.md new file mode 100644 index 0000000..65d95d2 --- /dev/null +++ b/rawr/README.md @@ -0,0 +1,42 @@ +Rawr +==== + +[![Build status][travis-badge]][travis-link] + +[*Rawr*][1] is a Haskell implementation of the [Roaring Bitmaps][2] +data structure. Roaring Bitmaps are a compressed bitmap data structure +offering better space and time performance than other compressed +bitmaps in many situations. + +For more information about *rawr* see the [documentation][3] or refer +to the [Roaring Bitmaps][2] web-site for other implementations and +publications about the data structure. + +**Please note:** This is a work in progress and is not yet ready for +use. When complete it will be released on Hackage. + +Structure +--------- + +The *Roaring Bitmap* structure divides the 32-bit keys into two 16-bit +values. One, the high order bits, identifies a *chunk* within the +map and the other, the low order bits, identifies a *bit* within +the chunk. + +There are two chunk representations: + +1. A sparse chunk contains a `Word16` for each *bit* present in the + chunk. + +2. A dense chunk contains 4096 `Word16`s which contains exactly one + bit for every possible *bit* which can be present in the chunk. + +The structure will convert the representation of each chunk as *bit*s +are set and cleared from the map. + +[1]: https://github.com/thsutton/rawr +[2]: http://www.roaringbitmaps.org/ +[3]: https://hackage.haskell.org/package/rawr/docs/Data-BitMap-Roaring.html + +[travis-link]: https://travis-ci.org/thsutton/rawr +[travis-badge]: https://travis-ci.org/thsutton/rawr.svg?branch=master diff --git a/rawr/Setup.hs b/rawr/Setup.hs new file mode 100644 index 0000000..9a994af --- /dev/null +++ b/rawr/Setup.hs @@ -0,0 +1,2 @@ +import Distribution.Simple +main = defaultMain diff --git a/lib/Data/BitMap/Roaring.hs b/rawr/lib/Data/BitMap/Roaring.hs similarity index 100% rename from lib/Data/BitMap/Roaring.hs rename to rawr/lib/Data/BitMap/Roaring.hs diff --git a/lib/Data/BitMap/Roaring/Chunk.hs b/rawr/lib/Data/BitMap/Roaring/Chunk.hs similarity index 100% rename from lib/Data/BitMap/Roaring/Chunk.hs rename to rawr/lib/Data/BitMap/Roaring/Chunk.hs diff --git a/lib/Data/BitMap/Roaring/Chunk/High.hs b/rawr/lib/Data/BitMap/Roaring/Chunk/High.hs similarity index 100% rename from lib/Data/BitMap/Roaring/Chunk/High.hs rename to rawr/lib/Data/BitMap/Roaring/Chunk/High.hs diff --git a/lib/Data/BitMap/Roaring/Chunk/Low.hs b/rawr/lib/Data/BitMap/Roaring/Chunk/Low.hs similarity index 100% rename from lib/Data/BitMap/Roaring/Chunk/Low.hs rename to rawr/lib/Data/BitMap/Roaring/Chunk/Low.hs diff --git a/lib/Data/BitMap/Roaring/Utility.hs b/rawr/lib/Data/BitMap/Roaring/Utility.hs similarity index 100% rename from lib/Data/BitMap/Roaring/Utility.hs rename to rawr/lib/Data/BitMap/Roaring/Utility.hs diff --git a/rawr.cabal b/rawr/rawr.cabal similarity index 100% rename from rawr.cabal rename to rawr/rawr.cabal diff --git a/rawr/test/check-hlint.hs b/rawr/test/check-hlint.hs new file mode 100644 index 0000000..b1b488e --- /dev/null +++ b/rawr/test/check-hlint.hs @@ -0,0 +1,17 @@ +module Main (main) where + +import Language.Haskell.HLint (hlint) +import System.Exit (exitFailure, exitSuccess) + +arguments :: [String] +arguments = + [ "lib" + , "test" + ] + +main :: IO () +main = do + hints <- hlint arguments + if null hints + then exitSuccess + else exitFailure diff --git a/rawr/test/doctests.hs b/rawr/test/doctests.hs new file mode 100644 index 0000000..bd8b9ee --- /dev/null +++ b/rawr/test/doctests.hs @@ -0,0 +1,4 @@ +import Test.DocTest + +main :: IO () +main = doctest ["-ilib", "lib"] diff --git a/rawr/test/properties.hs b/rawr/test/properties.hs new file mode 100644 index 0000000..2f7ddbd --- /dev/null +++ b/rawr/test/properties.hs @@ -0,0 +1,123 @@ +{-# LANGUAGE TemplateHaskell #-} + +module Main where + +import Control.Monad +import Data.List +import Data.Monoid +import qualified Data.Set as S +import Data.Word +import System.Exit +import Test.QuickCheck + +import qualified Data.BitMap.Roaring as R +import qualified Data.BitMap.Roaring.Chunk as C +import qualified Data.BitMap.Roaring.Utility as R + +-- * Check utility functions + +-- | id == uncurry combineWord . splitWord +prop_splitWord_combineWord_id :: Word32 -> Bool +prop_splitWord_combineWord_id w = + w == (uncurry R.combineWord . R.splitWord $ w) + +-- | The empty set is null. +prop_null_empty :: Bool +prop_null_empty = R.null R.empty + +-- | Singleton sets are not null. +prop_not_null_singleton :: Word32 -> Bool +prop_not_null_singleton i = not . R.null $ R.singleton i + +-- | Larger sets are not null. +prop_not_null_fromList :: NonEmptyList Word32 -> Bool +prop_not_null_fromList (NonEmpty is) = not . R.null $ R.fromList is + +-- | Empty sets have size zero. +prop_size_empty :: Bool +prop_size_empty = 0 == R.size R.empty + +-- | Singletons have size one. +prop_size_singleton :: Word32 -> Bool +prop_size_singleton i = 1 == R.size (R.singleton i) + +-- | Sets have size of list length. +prop_size_fromList :: NonEmptyList Word32 -> Bool +prop_size_fromList (NonEmpty is) = length (nub is) == R.size (R.fromList is) + +-- | Singletons have size 1, then size 0 when deleted. +prop_size_delete_singleton :: Word32 -> Bool +prop_size_delete_singleton i = + let s = R.singleton i + s' = R.delete i s + in R.size s == 1 && R.size s' == 0 + +-- | Singletons are empty when the sole item is deleted. +prop_null_delete_singleton :: Word32 -> Bool +prop_null_delete_singleton i = + R.null . R.delete i $ R.singleton i + +-- | 'toAscList' produces sorted lists. +prop_toAscList_sorted :: NonEmptyList Word32 -> Bool +prop_toAscList_sorted (NonEmpty l) = + let l' = R.toAscList (R.fromList l) + in l' == sort l' + +-- | 'toDescList' produces sorted lists. +prop_toDescList_sorted :: NonEmptyList Word32 -> Bool +prop_toDescList_sorted (NonEmpty l) = + let l' = R.toDescList (R.fromList l) + in l' == sortBy (flip compare) l' + +-- | "Data.Set" and "Data.BitMap.Roaring" agree about a set when +-- building from the same list of inputs. +prop_intset_roaring_agree :: NonEmptyList Word32 -> Bool +prop_intset_roaring_agree (NonEmpty l) = + let r = R.toAscList $ R.fromList l + s = S.toAscList $ S.fromList l + in r == s + +-- | Every item in the source list should be an element. +prop_map_elem_fromList :: NonEmptyList Word32 -> Bool +prop_map_elem_fromList (NonEmpty l) = + let r = R.fromList l + in all (`R.member` r) l + +-- | union (fromList s1) (fromList s2) == fromList (s1 <> s2) +prop_union_fromList :: NonEmptyList Word32 -> NonEmptyList Word32 -> Bool +prop_union_fromList (NonEmpty as) (NonEmpty bs) = + let q = R.fromList as + r = R.fromList bs + qr = R.fromList (as <> bs) + in R.toAscList qr == R.toAscList (q `R.union` r) + +prop_intersection_fromList :: NonEmptyList Word32 -> NonEmptyList Word32 -> Bool +prop_intersection_fromList (NonEmpty al) (NonEmpty bl) = + let am = R.fromList al + bm = R.fromList bl + im = R.intersection am bm + as = S.fromList al + bs = S.fromList bl + is = S.intersection as bs + in R.toList im == S.toList is + +prop_ld_chunk_intersection :: NonEmptyList Word16 -> NonEmptyList Word16 -> Bool +prop_ld_chunk_intersection (NonEmpty al) (NonEmpty bl) = + let is = S.intersection (S.fromList al) (S.fromList bl) + cc = C.chunkClear 0 (C.chunkNew 0 0) + fromList = foldl' (flip C.chunkSet) cc + ms = C.intersection (fromList al) (fromList bl) + in C.toList ms == (fromIntegral <$> S.toList is) + +-- +-- Use Template Haskell to automatically run all of the properties above. +-- + +return [] +runTests :: IO Bool +runTests = $quickCheckAll + +main :: IO () +main = do + result <- runTests + unless result exitFailure diff --git a/stack.yaml b/stack.yaml index 0d760e7..ad27add 100644 --- a/stack.yaml +++ b/stack.yaml @@ -1,6 +1,7 @@ -resolver: lts-6.7 +resolver: lts-7.14 packages: -- '.' +- 'rawr' +- 'rawr-format' extra-deps: [] flags: {} extra-package-dbs: [] From 782dc19b7233577d664fc429ed3b02dcf384831f Mon Sep 17 00:00:00 2001 From: Thomas Sutton Date: Mon, 2 Jan 2017 21:15:14 +1100 Subject: [PATCH 10/10] Stylish apply alignment --- rawr/lib/Data/BitMap/Roaring/Chunk.hs | 8 ++++---- rawr/lib/Data/BitMap/Roaring/Chunk/Low.hs | 12 ++++++------ rawr/lib/Data/BitMap/Roaring/Utility.hs | 8 ++++---- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/rawr/lib/Data/BitMap/Roaring/Chunk.hs b/rawr/lib/Data/BitMap/Roaring/Chunk.hs index 15a5c7e..7edb203 100644 --- a/rawr/lib/Data/BitMap/Roaring/Chunk.hs +++ b/rawr/lib/Data/BitMap/Roaring/Chunk.hs @@ -48,12 +48,12 @@ instance Bits Chunk where (.|.) = union (.&.) = intersection - testBit (LowDensity ix a) i = L.testBit a (fromIntegral i) + testBit (LowDensity ix a) i = L.testBit a (fromIntegral i) testBit (HighDensity ix a) i = H.testBit a (fromIntegral i) bit i = singleton (fromIntegral i) - popCount (LowDensity ix a) = L.popCount a + popCount (LowDensity ix a) = L.popCount a popCount (HighDensity ix a) = H.popCount a singleton :: Word32 -> Chunk @@ -75,7 +75,7 @@ set b c@(LowDensity i bs) | otherwise = repackChunk $ LowDensity i (L.setBit bs b) toList :: Chunk -> [Word32] -toList (LowDensity i bs) = combineWord i <$> L.toList bs +toList (LowDensity i bs) = combineWord i <$> L.toList bs toList (HighDensity i bs) = combineWord i <$> H.toList bs bits :: Word64 -> [Word16] @@ -87,7 +87,7 @@ bits w = foldr abit [] [0..63] else l chunkCheck :: Word16 -> Chunk -> Bool -chunkCheck w (LowDensity _ bs) = L.testBit bs w +chunkCheck w (LowDensity _ bs) = L.testBit bs w chunkCheck w (HighDensity _ bs) = H.testBit bs w chunkSet :: Word16 -> Chunk -> Chunk diff --git a/rawr/lib/Data/BitMap/Roaring/Chunk/Low.hs b/rawr/lib/Data/BitMap/Roaring/Chunk/Low.hs index d5ded89..d7a7fd5 100644 --- a/rawr/lib/Data/BitMap/Roaring/Chunk/Low.hs +++ b/rawr/lib/Data/BitMap/Roaring/Chunk/Low.hs @@ -69,9 +69,9 @@ intersection (LDVector as) (LDVector bs) = LDVector (vMergeWith merge as bs) where merge (Just a) (Just b) = Just a - merge (Just a) Nothing = Nothing - merge Nothing (Just b) = Nothing - merge Nothing Nothing = Nothing + merge (Just a) Nothing = Nothing + merge Nothing (Just b) = Nothing + merge Nothing Nothing = Nothing -- | Take the union of two low-density vectors. -- @@ -81,8 +81,8 @@ union (LDVector v1) (LDVector v2) = LDVector (vMergeWith merge v1 v2) where merge (Just a) (Just b) = Just a - merge Nothing a = a - merge a Nothing = a + merge Nothing a = a + merge a Nothing = a -- | Take the exclusive-or of two low-density vectors. xor :: LDVector -> LDVector -> LDVector @@ -91,7 +91,7 @@ xor (LDVector as) (LDVector bs) = where merge a Nothing = a merge Nothing b = b - merge _ _ = Nothing + merge _ _ = Nothing -- * Conversions diff --git a/rawr/lib/Data/BitMap/Roaring/Utility.hs b/rawr/lib/Data/BitMap/Roaring/Utility.hs index 18abe94..4f4da35 100644 --- a/rawr/lib/Data/BitMap/Roaring/Utility.hs +++ b/rawr/lib/Data/BitMap/Roaring/Utility.hs @@ -37,8 +37,8 @@ vMerge :: (G.Vector vector e, Ord e) => vector e -> vector e -> vector e vMerge = vMergeWith merge where merge :: Maybe e -> Maybe e -> Maybe e - merge Nothing a = a - merge a Nothing = a + merge Nothing a = a + merge a Nothing = a merge (Just a) (Just b) = Just a -- | Merge two sorted vectors. @@ -57,13 +57,13 @@ vMergeWith f as bs in case a `compare` b of LT -> case f (Just a) Nothing of Nothing -> vMergeWith f (G.tail as) bs - Just r -> r `G.cons` vMergeWith f (G.tail as) bs + Just r -> r `G.cons` vMergeWith f (G.tail as) bs EQ -> case f (Just a) (Just b) of Nothing -> vMergeWith f (G.tail as) (G.tail bs) Just r -> r `G.cons` vMergeWith f (G.tail as) (G.tail bs) GT -> case f Nothing (Just b) of Nothing -> vMergeWith f as (G.tail bs) - Just r -> r `G.cons` vMergeWith f as (G.tail bs) + Just r -> r `G.cons` vMergeWith f as (G.tail bs) -- | Alter the 'Chunk' with the given index in a vector of 'Chunk's. --