Skip to content

Commit

Permalink
functions in Dim2Search simplified and comment added
Browse files Browse the repository at this point in the history
  • Loading branch information
Uwe Schmidt committed Feb 27, 2014
1 parent a0528b7 commit 2dbe37a
Show file tree
Hide file tree
Showing 4 changed files with 178 additions and 67 deletions.
6 changes: 3 additions & 3 deletions Data/StringMap/Base.hs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
Maintainer : Uwe Schmidt ([email protected])
Stability : experimental
Portability: not portable
Portability: portable
An efficient implementation of maps from strings to arbitrary values.
Expand Down Expand Up @@ -592,9 +592,9 @@ lookupLE k0 = look k0 . norm
look _ _ = normError "lookupLE"

-- | Combination of 'lookupLE' and 'lookupGE'
--
--
-- > keys $ lookupRange "a" "b" $ fromList $ zip ["", "a", "ab", "b", "ba", "c"] [1..] = ["a","ab","b"]
--
--
-- For all keys in @k = keys $ lookupRange lb ub m@, this property holts true: @k >= ub && k <= lb@

lookupRange :: Key -> Key -> StringMap a -> StringMap a
Expand Down
224 changes: 167 additions & 57 deletions Data/StringMap/Dim2Search.hs
Original file line number Diff line number Diff line change
@@ -1,5 +1,51 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveDataTypeable #-}
-- ----------------------------------------------------------------------------

{- |
Module : Data.StringMap.Dim2Search
Copyright : Copyright (C) 2014 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt ([email protected])
Stability : experimental
Portability: portable
2-dimensional range search of numeric values, e.g. pairs of Ints or Doubles
using StringMap and prefix search
Assumption: The coordinates, e.g. Int values are converted into strings
of equal length such that the ordering is preserved by the lexikographic ordering.
Example: convert an Int (>= 0) into a String
@intToString = reverse . take 19 . (++ repeat '0') . reverse . show@
Do this for both coordinates of a tuple
@(x,y)::(Int,Int)@
and merge the two strings character by character.
The resulting string is used as key and stored together with an attribute
in a StringMap.
A range search for all keys within a rectangle @(p1, p2) = ((x1,y1),(x2,y2))@
in a map @m@ can be done by @lookupGE p1' . lookupLE p2' $ m@ with
@p1'@ and @p2'@ as the to string converted points of the rectangle.
@lookupGE p1'@ throws away all keys not located in the quadrant with @p1@
as lower left corner, @lookupLE p2'@ all key not located in the quadrant
with @p2@ as upper right corner. So the combination (@lookupRange@) computed
the intersection of these two quadrants.
Efficiency of these two function is about the same as a normal lookup
from StringMap.Base.
This module should be imported @qualified@, the names in Data.StringMap.Dim2Search are the
same as theirs siblings in Data.StringMap:
> import Data.StringMap (StringMap)
> import qualified Data.StringMap as M
> import qualified Data.StringMap.Dim2Search as Dim2
-}

-- ----------------------------------------------------------------------------

module Data.StringMap.Dim2Search
-- {-
Expand All @@ -17,133 +63,197 @@ import Data.StringMap.Base hiding (lookupGE, lookupLE, lookupRange)
-- | remove all entries from the map with key less than the argument key

lookupGE :: Key -> StringMap a -> StringMap a
lookupGE = lookupGEX
lookupGE = lookupGE'

-- start comparing first dimension

lookupGEX :: Key -> StringMap a -> StringMap a
lookupGEX k0 = look k0 . norm
lookupGE' :: Key -> StringMap a -> StringMap a
lookupGE' k0 = look k0 . norm
where
look [] (Val v' _t') = (Val v' empty)

-- take all values in tree t, they are larger than the key
look [] t = t

look k@(c : k1) (Branch c' s' n')
-- this dimension fits for s', the other dimension has to be checked
-- with lookupGE2, process has to be repeated for the rest
| c < c' = branch c' (lookupGE2 k1 s') rest
| c == c' = branch c' (lookupGEY k1 s') rest
| otherwise = rest
where
rest = lookupGEX k n'
look _ Empty = empty
look k (Val _v' t') = lookupGEX k t'
look _ _ = normError "lookupGEX"

-- continue comparing second dimension
-- symbols are equal, no info about ordering gathered, repeat the
-- the same lookup for the subtree s'
-- the rest in n' has to be processed the same way as this branch
| c == c' = branch c' (lookupGE' k1 s') rest

lookupGEY :: Key -> StringMap a -> StringMap a
lookupGEY k0 = look k0 . norm
where
look [] (Val v' _t') = (Val v' empty)
look [] t = t
look k@(c : k1) (Branch c' s' n')
| c < c' = branch c' (lookupGE2 k1 s') rest
| c == c' = branch c' (lookupGEX k1 s') rest
-- this dimension does not fit, throw away this branch and continue with n'
| otherwise = rest
where
rest = lookupGEY k n'
rest = lookupGE' k n'

-- empty remains empty
look _ Empty = empty
look k (Val _v' t') = lookupGEY k t'
look _ _ = normError "lookupGEY"

-- ordering of one dim is o.k., check only 2. dim (every second char)
-- throw away the value, its smaller than required
look k (Val _v' t') = lookupGE' k t'

-- the impossible has happened
look _ _ = normError "lookupGE'"

lookupGE2 :: Key -> StringMap a -> StringMap a
lookupGE2 k0 = look k0 . norm
where
look [] (Val v' _t') = (Val v' empty)
-- key is empty, all values in t are larger, so they are included
look [] t = t

look k@(c : k1) t@(Branch c' s' n')
-- tree s' and all others in n' contain values larger than required
-- take them
| c < c' = t

-- the 1. symbols are equal, so lookup has to continue,
-- but only along this dimension, so skip the next key symbol (lookupLE1) and
-- repeat this comparison procedure (call of lookupLE2 in lookupLE1)
-- the rest (n') is taken like in the 1. case
| c == c' = branch c' (lookupGE1 k1 s') n'

-- the 1. symbol in the key is larger, so cut off this subtree (s')
-- and repeat lookup for the rest (n')
| otherwise = lookupGE2 k n'

-- empty remains empty
look _ Empty = empty

-- throw away the value, its smaller than required
look k (Val _v' t') = lookupGE2 k t'
look _ _ = normError "lookupGE2"

-- skip char _c and continue with comparison of next char
-- the impossible has happened
look _ _ = normError "lookupGE2"

lookupGE1 :: Key -> StringMap a -> StringMap a
lookupGE1 k0 = look k0 . norm
where
look [] (Val v' _t') = (Val v' empty)
-- like above
look [] t = t

-- ignore the 1. symbol of the key, take the subtree s' and
-- continue comparison of every other symbol,
-- do the same for all remaining trees in n'
look k@(_c : k1) (Branch c' s' n')
= branch c' (lookupGE2 k1 s') $ lookupGE1 k n'

-- like above
look _ Empty = empty

-- like above
look k (Val _v' t') = lookupGE1 k t'

-- like above
look _ _ = normError "lookupGE1"

-- ----------------------------------------
--
-- the same stuff for less or equal

lookupLE :: Key -> StringMap a -> StringMap a
lookupLE = lookupLEX
lookupLE = lookupLE'

-- start comparing first dimension

lookupLEX :: Key -> StringMap a -> StringMap a
lookupLEX k0 = look k0 . norm
lookupLE' :: Key -> StringMap a -> StringMap a
lookupLE' k0 = look k0 . norm
where

-- if key is empty and node stores a value
-- take this value, it's the upper limit,
-- all other values in the subtree _t' are larger and thrown away
look [] (Val v' _t') = (Val v' empty)

-- key is empty, all remaining values in _t are larger and thrown away
look [] _t = empty

look k@(c : k1) (Branch c' s' n')
-- the char c' is larger than the 1. char in the search key
-- so this and all other others (n') are cut off
| c < c' = empty
| c == c' = branch c' (lookupLEY k1 s') empty
| otherwise = branch c' (lookupLE2 k1 s') (lookupLEX k n')
look _ Empty = empty
look k (Val v' t') = val v' (lookupLEX k t')
look _ _ = normError "lookupLEX"

-- continue comparing second dimension
-- the char c and c' are the same, so search for this subtree s' must
-- continue, but all further trees (n') are cut off
| c == c' = branch c' (lookupLE' k1 s') empty

lookupLEY :: Key -> StringMap a -> StringMap a
lookupLEY k0 = look k0 . norm
where
look [] (Val v' _t') = (Val v' empty)
look [] _t = empty
look k@(c : k1) (Branch c' s' n')
| c < c' = empty
| c == c' = branch c' (lookupLEX k1 s') empty
| otherwise = branch c' (lookupLE2 k1 s') (lookupLEY k n')
-- the char c' is smaller than the 1. char in the search key
-- so concerning this dimension, the elements must be included into the
-- result, but the other dimension must be checked (with lookupLE2)
-- all remaining values in n' have also to be taken, therfore the rec. call with n'
| otherwise = branch c' (lookupLE2 k1 s') (lookupLE' k n')

-- the empty tree remains empty
look _ Empty = empty
look k (Val v' t') = val v' (lookupLEY k t')
look _ _ = normError "lookupLEY"

-- ordering of one dim is o.k., check only 2. dim (every second char)
-- the values v' are included into the result, and the lookup process
-- continues with the subtree t'
-- this case will not occur, when the 2-dim keys are normalized and all
-- are of the same length, in that case the values occur only on leaf nodes not in inner nodes
look k (Val v' t') = val v' (lookupLE' k t')

-- the impossible has happend
look _ _ = normError "lookupLE'"

lookupLE2 :: Key -> StringMap a -> StringMap a
lookupLE2 k0 = look k0 . norm
where

-- if key is empty and node stores a value
-- take this value, it's the upper limit,
-- all other values in the subtree _t' are larger and thrown away
look [] (Val v' _t') = (Val v' empty)

-- key is empty, all remaining values in _t are larger and thrown away
look [] _t = empty

look k@(c : k1) (Branch c' s' n')
-- tree s' and all others in n' contain values larger than required
-- throw them away
| c < c' = empty

-- the 1. symbols are equal, so lookup has to continue,
-- but only along this dimension, so skip the next key symbol (lookupLE1) and
-- repeat this comparison procedure (call of lookupLE2 in lookupLE1)
-- the rest (n') can be thrown away like in the 1. case
| c == c' = branch c' (lookupLE1 k1 s') empty

-- the 1. symbol in the key is larger, so take this subtree (s')
-- and repeat lookup for the rest (n')
| otherwise = branch c' s' (lookupLE2 k n')

-- the empty tree remains empty
look _ Empty = empty

-- the values v' are included into the result, and the lookup process
-- continues with the subtree t'
-- this case will not occur, when the 2-dim keys are normalized and all
-- are of the same length, in that case the values occur only on leaf nodes not in inner nodes
look k (Val v' t') = val v' (lookupLE2 k t')
look _ _ = normError "lookupLE2"

-- skip char _c and continue with comparison of next char
-- the impossible has happend
look _ _ = normError "lookupLE2"

lookupLE1 :: Key -> StringMap a -> StringMap a
lookupLE1 k0 = look k0 . norm
where
-- like above
look [] (Val v' _t') = (Val v' empty)

-- like above
look [] t = t

-- ignore the 1. symbol of the key, take the subtree s' and
-- continue comparison of every other symbol,
-- do the same for all remaining trees in n'
look k@(_c : k1) (Branch c' s' n')
= branch c' (lookupLE2 k1 s') (lookupLE1 k n')

-- like above
look _ Empty = empty

-- like above
look k (Val v' t') = val v' (lookupLE1 k t')

-- like above
look _ _ = normError "lookupLE1"


Expand Down
2 changes: 1 addition & 1 deletion data-stringmap.cabal
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: data-stringmap
version: 1.0.1
version: 1.0.1.1
license: MIT
license-file: LICENSE
author: Uwe Schmidt, Sebastian Philipp
Expand Down
13 changes: 7 additions & 6 deletions tests/Dim2Test.hs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

module Main where

import Data.List (nub, sort)
import Data.List (sort)
import qualified Data.StringMap as M
import qualified Data.StringMap.Dim2Search as D2

Expand Down Expand Up @@ -59,13 +59,13 @@ instance Ord Point' where
| x1 < x2 && y1 < y2 = True
| otherwise = False

leX [] [] = True -- the result for the Y dimension is already known
leX [] [] = True -- the result for the Y dimension is already known
leX (x1 : y1 : ds1) (x2 : y2 : ds2)
| x1 == x2 = ds1 `leX` ds2
| x1 < x2 = True
| otherwise = False

leY [] [] = True -- the result for the X dimension is already known
leY [] [] = True -- the result for the X dimension is already known
leY (x1 : y1 : ds1) (x2 : y2 : ds2)
| y1 == y2 = ds1 `leY` ds2
| y1 < y2 = True
Expand All @@ -76,7 +76,7 @@ instance Ord Point' where
toPoint' :: Point -> Point'
toPoint' (P p) = P' $ intPairToKey base len p
where
base = 2 -- or 10
base = 2 -- or 10
len = 10 -- or 3 (or something else)

fromPoint' :: Point' -> Point
Expand Down Expand Up @@ -105,7 +105,7 @@ test1 = null $ propTest 20

newtype Point = P {unP :: (Int, Int) } deriving (Eq)
newtype PointSet = PS {unPS :: [Point] } deriving (Eq)
-- assuming only smart constructor mkPS is used
-- assuming only smart constructor mkPS is used

newtype Point' = P' {unP' :: String } deriving (Eq)
newtype PointSet' = PS' {unPS' :: M.StringMap ()} deriving (Eq)
Expand Down Expand Up @@ -148,7 +148,7 @@ instance Lookup Point' PointSet' where
-- the bijection between Point and Point'

pToP' :: Point -> Point'
pToP' = P' . intPairToKey 10 5 . unP -- base 10, 5 digits
pToP' = P' . intPairToKey 10 5 . unP -- base 10, 5 digits

p'ToP :: Point' -> Point
p'ToP (P' p') = P (intFromKey xs, intFromKey ys)
Expand Down Expand Up @@ -264,3 +264,4 @@ main :: IO ()
main = print theTest >> return ()

-- ----------------------------------------

0 comments on commit 2dbe37a

Please sign in to comment.