diff --git a/alpacc.cabal b/alpacc.cabal index c0a0089..bd2e71d 100644 --- a/alpacc.cabal +++ b/alpacc.cabal @@ -72,6 +72,7 @@ library Alpacc.Lexer.DFA Alpacc.Lexer.FSA Alpacc.Lexer.ParallelLexing + Alpacc.Lexer.DFAParallelLexer Alpacc.Lexer.Encode hs-source-dirs: src diff --git a/src/Alpacc/CFG.hs b/src/Alpacc/CFG.hs index bc75ecb..37a6f02 100644 --- a/src/Alpacc/CFG.hs +++ b/src/Alpacc/CFG.hs @@ -104,7 +104,7 @@ cfgToDFALexer cfg@(CFG {tRules}) = do let x = find (producesEpsilon . snd) t_rule_tuples case x of Just (t, _) -> Left [i|Error: #{t} may not produce empty strings.|] - Nothing -> parallelLexerDFA order_map (0 :: Int) terminal_map + Nothing -> Right $ parallelLexerDFA order_map (0 :: Int) terminal_map type Parser = Parsec Void Text diff --git a/src/Alpacc/Generator/Futhark/Lexer.hs b/src/Alpacc/Generator/Futhark/Lexer.hs index 31b7ca3..9770565 100644 --- a/src/Alpacc/Generator/Futhark/Lexer.hs +++ b/src/Alpacc/Generator/Futhark/Lexer.hs @@ -4,13 +4,14 @@ module Alpacc.Generator.Futhark.Lexer import Alpacc.Grammar import Alpacc.Lexer.DFA +import Alpacc.Lexer.ParallelLexing import Alpacc.Lexer.Encode +import Alpacc.Lexer.DFAParallelLexer import Data.Map (Map) import Data.Map qualified as Map import Data.String.Interpolate (i) import Data.FileEmbed import Data.Word (Word8) -import Alpacc.Lexer.ParallelLexing import Data.List qualified as List import Data.Either.Extra import Alpacc.Types @@ -64,7 +65,7 @@ generateLexer :: IInt -> Either String String generateLexer lexer terminal_index_map terminal_type = do - int_parallel_lexer <- intParallelLexer new_token_map lexer + int_parallel_lexer <- intDfaParallelLexer new_token_map lexer let ParallelLexerMasks { tokenMask = token_mask , tokenOffset = token_offset diff --git a/src/Alpacc/Lexer/DFA.hs b/src/Alpacc/Lexer/DFA.hs index 6ffb5e2..6fadc6a 100644 --- a/src/Alpacc/Lexer/DFA.hs +++ b/src/Alpacc/Lexer/DFA.hs @@ -6,7 +6,7 @@ module Alpacc.Lexer.DFA , DFALexer , fromRegExToDFA , transitions' - , ParallelDFALexer (parDFALexer, producesToken, deadState) + , ParallelDFALexer (parDFALexer, producesToken) , parallelLexerDFA , enumerateParLexer ) @@ -27,7 +27,6 @@ import Data.Set qualified as Set hiding (Set) import Data.Text (Text) import Data.Text qualified as Text import Data.Foldable -import Data.Either.Extra import Data.Maybe (mapMaybe) type DFA t s = FSA Identity Identity t s @@ -36,7 +35,6 @@ type DFALexer t s k = Lexer Identity Identity t s k data ParallelDFALexer t s k = ParallelDFALexer { parDFALexer :: DFALexer t s k , producesToken :: Set (s, t) - , deadState :: s } deriving (Eq, Ord, Show) transitions' :: (Ord s, Ord t) => DFA t s -> Map (s, t) s @@ -231,7 +229,6 @@ enumerateParLexer s lexer = ParallelDFALexer { parDFALexer = fsaLexerSecond toPrime $ parDFALexer lexer , producesToken = Set.map (first toPrime) $ producesToken lexer - , deadState = toPrime $ deadState lexer } where toPrime = (mapping Map.!) @@ -316,26 +313,19 @@ tokenProducingTransitions dfa = new_transitions toParallelDFALexer :: (Enum s, Ord s, Ord t) => DFALexer t s k -> - Either String (ParallelDFALexer t s k) -toParallelDFALexer lexer = do - dead_state <- - maybeToEither "Error: Can not add a dead state to an empty DFA." - maybe_dead_state - return $ - ParallelDFALexer - { parDFALexer = new_lexer - , producesToken = produces_token - , deadState = dead_state - } + ParallelDFALexer t s k +toParallelDFALexer lexer = + ParallelDFALexer + { parDFALexer = new_lexer + , producesToken = produces_token + } where dfa = fsa lexer token_producing_trans = tokenProducingTransitions dfa _transitions = transitions' dfa produces_token = Map.keysSet token_producing_trans new_trans = Map.union _transitions token_producing_trans - (new_dfa, maybe_dead_state) = - mkDFATotal - $ dfa { transitions = addIdentity new_trans } + new_dfa = dfa { transitions = addIdentity new_trans } new_lexer = lexer { fsa = new_dfa } parallelLexerDFA :: @@ -343,7 +333,7 @@ parallelLexerDFA :: Map k o -> s -> Map k (RegEx (NonEmpty t)) -> - Either String (ParallelDFALexer t s k) + ParallelDFALexer t s k parallelLexerDFA terminal_to_order start_state regex_map = toParallelDFALexer $ lexerDFA terminal_to_order start_state regex_map @@ -358,7 +348,7 @@ lexerDFA terminal_to_order start_state regex_map = reenumerateLexer start_state $ Lexer { fsa = dfa - , terminalMap = dfa_terminal_map + , tokenMap = dfa_token_map } where auxiliary = @@ -383,7 +373,7 @@ lexerDFA terminal_to_order start_state regex_map = minimumSet = minimumBy (on compare (terminal_to_order Map.!)) - dfa_terminal_map = + dfa_token_map = fmap minimumSet . Map.unionsWith Set.union . concat diff --git a/src/Alpacc/Lexer/DFAParallelLexer.hs b/src/Alpacc/Lexer/DFAParallelLexer.hs new file mode 100644 index 0000000..553986c --- /dev/null +++ b/src/Alpacc/Lexer/DFAParallelLexer.hs @@ -0,0 +1,100 @@ +module Alpacc.Lexer.DFAParallelLexer + (dfaParallelLexer + ,intDfaParallelLexer) +where + +import Alpacc.Lexer.FSA +import Alpacc.Lexer.DFA +import Alpacc.Lexer.Encode +import Alpacc.Lexer.ParallelLexing +import Data.Map.Strict (Map) +import Data.Map.Strict qualified as Map hiding (Map) +import Data.Set qualified as Set hiding (Set) +import Data.Maybe +import Data.Array.Base (IArray (..)) +import Data.Array.Unboxed (UArray) +import Data.Array.Unboxed qualified as UArray hiding (UArray) + +errorMessage :: String +errorMessage = "Error: Happend during Parallel Lexing genration, contact a maintainer." + +data Endomorphism = + Endomorphism + {-# UNPACK #-} !(UArray S S) + {-# UNPACK #-} !(UArray S Bool) deriving (Eq, Ord, Show) + +type S = Int + +deadState :: S +deadState = 0 + +initState :: S +initState = 1 + +endomorphismTable :: + (Enum t, Bounded t, Ord t, Ord k) => + ParallelDFALexer t S k -> + Map t Endomorphism +endomorphismTable lexer = + Map.fromList + $ map statesFromChar + $ Set.toList _alphabet + where + dfa = fsa $ parDFALexer lexer + produces_set = producesToken lexer + _transitions = transitions' dfa + _states = states dfa + _alphabet = alphabet dfa + first_index = deadState + last_index = maximum _states + tableLookUp key = + fromMaybe deadState + $ Map.lookup key _transitions + statesFromChar t = (t, Endomorphism ss bs) + where + ss = + UArray.array (first_index, last_index) + $ zip [first_index..last_index] + $ map (tableLookUp . (, t)) + $ Set.toAscList _states + bs = + UArray.array (first_index, last_index) + $ zip [first_index..last_index] + $ map ((`Set.member` produces_set) . (, t)) + $ Set.toAscList _states + +instance Semigroup Endomorphism where + (Endomorphism a a') <> (Endomorphism b b') = Endomorphism c c' + where + c = UArray.array (0, numElements a - 1) + $ map auxiliary [0..(numElements a - 1)] + c' = UArray.array (0, numElements a' - 1) + $ map auxiliary' [0..(numElements a' - 1)] + auxiliary i = (i, b UArray.! (a UArray.! i)) + auxiliary' i = (i, b' UArray.! (a UArray.! i)) + +instance Sim Endomorphism S where + toState s endo = + if a <= s && s <= b then + (producing UArray.! s, endo' UArray.! s) + else + error errorMessage + where + (Endomorphism endo' producing) = endo + (a, b) = bounds endo' + +dfaParallelLexer :: + (Ord t, Ord s, Enum t, Bounded t, Ord k) => + ParallelDFALexer t s k -> + ParallelLexer t (EndoData k) +dfaParallelLexer lexer' = parallelLexer lexer endo_table + where + lexer = enumerateParLexer initState lexer' + endo_table = endomorphismTable lexer + +intDfaParallelLexer :: + (Ord t, Ord s, Enum t, Bounded t, Ord k) => + Map (Maybe k) Int -> + ParallelDFALexer t s k -> + Either String (IntParallelLexer t) +intDfaParallelLexer m = intParallelLexer m . dfaParallelLexer diff --git a/src/Alpacc/Lexer/Encode.hs b/src/Alpacc/Lexer/Encode.hs index 77d0f16..b00e891 100644 --- a/src/Alpacc/Lexer/Encode.hs +++ b/src/Alpacc/Lexer/Encode.hs @@ -6,7 +6,6 @@ module Alpacc.Lexer.Encode ) where -import Alpacc.Lexer.DFA import Alpacc.Types import Alpacc.Lexer.ParallelLexing import Data.Map.Strict (Map) @@ -144,12 +143,11 @@ data IntParallelLexer t = intParallelLexer :: - (Enum t, Bounded t, Ord t, Ord s, Ord k) => + (Ord t, Ord k) => Map (Maybe k) Int -> - ParallelDFALexer t s k -> + ParallelLexer t (EndoData k) -> Either String (IntParallelLexer t) -intParallelLexer to_int lexer = do - let parallel_lexer = parallelLexer lexer +intParallelLexer to_int parallel_lexer = do ms <- lexerMasks parallel_lexer let encode = encodeEndoData ms to_int let new_compositions = fmap encode <$> compositions parallel_lexer diff --git a/src/Alpacc/Lexer/FSA.hs b/src/Alpacc/Lexer/FSA.hs index 3e5c3d8..5a4bdf6 100644 --- a/src/Alpacc/Lexer/FSA.hs +++ b/src/Alpacc/Lexer/FSA.hs @@ -32,7 +32,7 @@ data FSA f f' t s = FSA data Lexer f f' t s k = Lexer { fsa :: FSA f f' t s, - terminalMap :: Map s k + tokenMap :: Map s k } deriving (Ord, Eq, Show) @@ -81,10 +81,10 @@ instance LexerMap Lexer where fsaLexerMap g f fsa_lexer = fsa_lexer { fsa = fsaMap g f $ fsa fsa_lexer, - terminalMap = Map.mapKeys f terminal_map + tokenMap = Map.mapKeys f token_map } where - terminal_map = terminalMap fsa_lexer + token_map = tokenMap fsa_lexer reenumerateFSA :: (FSAMappable FSA f f' t s', FSAMappable FSA f f' t s, Enum s) => diff --git a/src/Alpacc/Lexer/NFA.hs b/src/Alpacc/Lexer/NFA.hs index 898bbbd..54ccadd 100644 --- a/src/Alpacc/Lexer/NFA.hs +++ b/src/Alpacc/Lexer/NFA.hs @@ -113,7 +113,6 @@ mkNFA regex = do let accept_list = toList s' mapM_ (\_s -> mkNFA' s _s regex) accept_list - fromRegExToNFA :: (Ord s, Ord t, Enum s) => s -> RegEx (NonEmpty t) -> NFA t s fromRegExToNFA start_state regex = execState (mkNFA regex) init_nfa where diff --git a/src/Alpacc/Lexer/ParallelLexing.hs b/src/Alpacc/Lexer/ParallelLexing.hs index 28868a6..87cb1e9 100644 --- a/src/Alpacc/Lexer/ParallelLexing.hs +++ b/src/Alpacc/Lexer/ParallelLexing.hs @@ -3,6 +3,7 @@ module Alpacc.Lexer.ParallelLexing , parallelLexer , EndoData (..) , listCompositions + , Sim (..) ) where @@ -18,17 +19,12 @@ import Data.IntSet qualified as IntSet hiding (IntSet) import Data.Set (Set) import Data.Set qualified as Set hiding (Set) import Data.Maybe -import Data.Array.Base (IArray (..)) -import Data.Array.Unboxed (UArray) import Data.Array.Unboxed qualified as UArray hiding (UArray) import Data.Tuple (swap) import Control.Monad.State.Strict import Data.List qualified as List +import Data.Array.Unboxed (UArray) -errorMessage :: String -errorMessage = "Error: Happend during Parallel Lexing genration, contact a maintainer." - -type S = Int type E = Int identityE :: E @@ -58,11 +54,6 @@ deadEndo = initE :: E initE = 2 -data Endomorphism = - Endomorphism - {-# UNPACK #-} !(UArray S S) - {-# UNPACK #-} !(UArray S Bool) deriving (Eq, Ord, Show) - data EndoData k = EndoData { endo :: !E @@ -71,13 +62,16 @@ data EndoData k = , isProducing :: !Bool } deriving (Show, Eq, Ord, Functor) +class (Ord t, Ord s, Semigroup t) => Sim t s where + toState :: s -> t -> (Bool, s) + toEndoData :: - (Ord k) => - S -> - Map S k -> - Set S -> + (Ord s, Sim t s) => + s -> + Map s k -> + Set s -> E -> - Endomorphism -> + t -> EndoData k toEndoData initial_state token_map accept_states e endo = EndoData @@ -87,14 +81,7 @@ toEndoData initial_state token_map accept_states e endo = , isProducing = is_producing } where - (is_producing, s) = - let (Endomorphism endo' producing) = endo - (a, b) = bounds endo' - in if a <= initial_state && initial_state <= b then - (producing UArray.! initial_state - ,endo' UArray.! initial_state) - else - error errorMessage + (is_producing, s) = toState initial_state endo data ParallelLexer t e = ParallelLexer @@ -107,18 +94,18 @@ data ParallelLexer t e = , acceptArray :: !(UArray E Bool) } deriving (Show, Eq, Ord) -data EndoCtx k = +data EndoCtx t s k = EndoCtx { ecCompositions :: !(IntMap (IntMap (EndoData k))) - , ecEndoMap :: !(Map Endomorphism E) - , ecEndoData :: !(IntMap (EndoData k)) - , ecInverseEndoMap :: !(IntMap Endomorphism) + , ecEndoMap :: !(Map t E) + , ecInverseEndoMap :: !(IntMap t) , ecConnectedMap :: !(IntMap IntSet) , ecInverseConnectedMap :: !(IntMap IntSet) , ecMaxE :: !E - , ecInitialState :: !S - , ecTokenMap :: !(Map S k) - , ecAcceptStates :: !(Set S) + , ecInitialState :: !s + , ecTokenMap :: !(Map s k) + , ecAcceptStates :: !(Set s) + , ecEndoData :: !(IntMap (EndoData k)) } deriving (Show, Eq, Ord) lookupComposition :: @@ -139,10 +126,10 @@ listCompositions parallel_lexer = upper = endomorphismsSize parallel_lexer - 1 endoInsert :: - Ord k => + (Sim t s, Ord k) => E -> - Endomorphism -> - State (EndoCtx k) (EndoData k) + t -> + State (EndoCtx t s k) (EndoData k) endoInsert e endo = do inv_map <- gets ecInverseEndoMap initial_state <- gets ecInitialState @@ -163,12 +150,12 @@ endoInsert e endo = do , ecEndoData = new_endo_data } pure d -eLookup :: E -> State (EndoCtx k) Endomorphism +eLookup :: E -> State (EndoCtx t s k) t eLookup e = do inv_map <- gets ecInverseEndoMap pure $ inv_map IntMap.! e -connectedUpdate :: IntMap IntSet -> IntMap IntSet -> State (EndoCtx k) () +connectedUpdate :: IntMap IntSet -> IntMap IntSet -> State (EndoCtx t s k) () connectedUpdate add_map add_inv_map = do old_map <- gets ecConnectedMap old_inv_map <- gets ecInverseConnectedMap @@ -180,7 +167,7 @@ connectedUpdate add_map add_inv_map = do s { ecConnectedMap = new_map , ecInverseConnectedMap = new_inv_map } -insertComposition :: E -> E -> EndoData k -> State (EndoCtx k) () +insertComposition :: E -> E -> EndoData k -> State (EndoCtx t s k) () insertComposition e e' e'' = do _map <- gets ecCompositions let new_map = @@ -190,7 +177,7 @@ insertComposition e e' e'' = do IntMap.insert e (IntMap.singleton e' e'') _map modify $ \s -> s { ecCompositions = new_map } -preSets :: E -> E -> State (EndoCtx k) (IntMap IntSet, IntMap IntSet) +preSets :: E -> E -> State (EndoCtx t s k) (IntMap IntSet, IntMap IntSet) preSets e'' e = do m <- gets ecInverseConnectedMap let e_set = m IntMap.! e @@ -200,7 +187,7 @@ preSets e'' e = do let e_m'' = IntMap.fromList $ (, IntSet.singleton e'') <$> IntSet.elems new_set pure (e_m'', e_inv_m'') -postSets :: E -> E -> State (EndoCtx k) (IntMap IntSet, IntMap IntSet) +postSets :: E -> E -> State (EndoCtx t s k) (IntMap IntSet, IntMap IntSet) postSets e'' e' = do m <- gets ecConnectedMap let e_set' = m IntMap.! e' @@ -211,23 +198,14 @@ postSets e'' e' = do pure (e_m'', e_inv_m'') endomorphismLookup :: - Endomorphism -> - State (EndoCtx k) (Maybe E) + Ord t => + t -> + State (EndoCtx t s k) (Maybe E) endomorphismLookup endomorphism = do _map <- gets ecEndoMap pure $ Map.lookup endomorphism _map -compose :: Endomorphism -> Endomorphism -> Endomorphism -compose (Endomorphism a a') (Endomorphism b b') = Endomorphism c c' - where - c = UArray.array (0, numElements a - 1) - $ map auxiliary [0..(numElements a - 1)] - c' = UArray.array (0, numElements a' - 1) - $ map auxiliary' [0..(numElements a' - 1)] - auxiliary i = (i, b UArray.! (a UArray.! i)) - auxiliary' i = (i, b' UArray.! (a UArray.! i)) - -endoNext :: Ord k => Endomorphism -> State (EndoCtx k) (EndoData k) +endoNext :: (Sim t s, Ord k) => t -> State (EndoCtx t s k) (EndoData k) endoNext endo = do maybe_e <- endomorphismLookup endo case maybe_e of @@ -241,15 +219,15 @@ endoNext endo = do pure d endoCompose :: - Ord k => + (Sim t s, Ord k) => E -> E -> - State (EndoCtx k) (IntMap IntSet) + State (EndoCtx t s k) (IntMap IntSet) endoCompose e e' = do _comps <- gets ecCompositions case lookupComposition _comps e e' of Nothing -> do - endo'' <- {-# SCC compose #-} compose <$> eLookup e <*> eLookup e' + endo'' <- (<>) <$> eLookup e <*> eLookup e' e'' <- endoNext endo'' insertComposition e e' e'' (pre_map, pre_inv_map) <- preSets (endo e'') e @@ -273,9 +251,9 @@ popElement _map = Nothing -> Nothing endoCompositionsTable :: - Ord k => + (Sim t s, Ord k) => IntMap IntSet -> - State (EndoCtx k) () + State (EndoCtx t s k) () endoCompositionsTable _map = case popElement _map of Just ((e, e'), map') -> do @@ -284,40 +262,7 @@ endoCompositionsTable _map = endoCompositionsTable map''' Nothing -> pure () -endomorphismTable :: - (Enum t, Bounded t, Ord t, Ord k) => - ParallelDFALexer t S k -> - Map t Endomorphism -endomorphismTable lexer = - Map.fromList - $ map statesFromChar - $ Set.toList _alphabet - where - dfa = fsa $ parDFALexer lexer - produces_set = producesToken lexer - dead_state = deadState lexer - _transitions = transitions' dfa - _states = states dfa - _alphabet = alphabet dfa - first_index = minimum _states - last_index = maximum _states - tableLookUp key = - fromMaybe dead_state - $ Map.lookup key _transitions - statesFromChar t = (t, Endomorphism ss bs) - where - ss = - UArray.array (first_index, last_index) - $ zip [first_index..last_index] - $ map (tableLookUp . (, t)) - $ Set.toAscList _states - bs = - UArray.array (first_index, last_index) - $ zip [first_index..last_index] - $ map ((`Set.member` produces_set) . (, t)) - $ Set.toAscList _states - -connectedTable :: Ord t => ParallelDFALexer t S k -> Map t (Set t) +connectedTable :: (Ord s, Ord t) => ParallelDFALexer t s k -> Map t (Set t) connectedTable lexer = Map.fromList $ auxiliary <$> _alphabet @@ -385,13 +330,12 @@ mapMapSet :: mapMapSet f = Map.mapKeys f . fmap (Set.map f) -initEndoData :: Ord k => S -> Map S k -> Set S -> IntMap Endomorphism -> IntMap (EndoData k) +initEndoData :: (Sim t s, Ord k) => s -> Map s k -> Set s -> IntMap t -> IntMap (EndoData k) initEndoData initial_state token_map accept_states = IntMap.insert identityE identityEndo . IntMap.insert deadE deadEndo . IntMap.mapWithKey (toEndoData initial_state token_map accept_states) - initCompositions :: [EndoData k] -> IntMap (IntMap (EndoData k)) initCompositions ls = IntMap.unionsWith IntMap.union @@ -401,10 +345,10 @@ initCompositions ls = initEndoCtx :: - (Enum t, Bounded t, Ord t, Ord k) => - ParallelDFALexer t S k -> - Map t Endomorphism -> - EndoCtx k + (Ord t', Ord t, Sim t' s, Ord k) => + ParallelDFALexer t s k -> + Map t t' -> + EndoCtx t' s k initEndoCtx lexer endo_table = EndoCtx { ecCompositions = initCompositions $ IntMap.elems endo_data @@ -421,7 +365,7 @@ initEndoCtx lexer endo_table = where endo_data = initEndoData initial_state token_map accept_states e_to_endo initial_state = initial $ fsa $ parDFALexer lexer - token_map = terminalMap $ parDFALexer lexer + token_map = tokenMap $ parDFALexer lexer accept_states = accepting $ fsa $ parDFALexer lexer e_to_endo = enumerate initE $ List.nub $ Map.elems endo_table endo_to_e = invertBijection $ intMapToMap e_to_endo @@ -440,21 +384,21 @@ addDead = (`Map.union` unknown_transitions) $ map (,deadEndo) [minBound..maxBound] parallelLexer :: - (Ord t, Enum t, Bounded t, Ord s, Ord k) => + (Ord t, Enum t, Bounded t, Sim t' s, Ord k) => ParallelDFALexer t s k -> + Map t t' -> ParallelLexer t (EndoData k) -parallelLexer lexer' = +parallelLexer lexer endo_table = ParallelLexer { compositions = _compositions , endomorphisms = transition_to_endo , identity = identityEndo , endomorphismsSize = IntMap.size endo_data , dead = deadEndo - , tokenSize = Map.size $ terminalMap $ parDFALexer lexer + , tokenSize = Map.size $ tokenMap $ parDFALexer lexer , acceptArray = accept_array } where - lexer = enumerateParLexer 0 lexer' accept_array = toAcceptArray endo_data ctx = initEndoCtx lexer endo_table connected_map = ecConnectedMap ctx @@ -463,7 +407,5 @@ parallelLexer lexer' = , ecEndoMap = endo_map , ecEndoData = endo_data }) = execState (endoCompositionsTable connected_map) ctx - endo_table = endomorphismTable lexer transition_to_endo = addDead $ (endo_data IntMap.!) . (endo_map Map.!) <$> endo_table -