diff --git a/src/swarm-lang/Swarm/Language/Load.hs b/src/swarm-lang/Swarm/Language/Load.hs index 007fcbcfb..9a45ae2c9 100644 --- a/src/swarm-lang/Swarm/Language/Load.hs +++ b/src/swarm-lang/Swarm/Language/Load.hs @@ -25,6 +25,7 @@ import Data.Map (Map) import Data.Map qualified as M import Data.Set (Set) import Data.Set qualified as S +import Data.Text (Text) import Data.Text.Encoding qualified as T import GHC.Generics (Generic) import Network.HTTP.Simple (getResponseBody, httpBS, parseRequest) @@ -40,6 +41,10 @@ import Swarm.Pretty (prettyText) import Swarm.Util (readFileMayT, showT) import Swarm.Util.Graph (findCycle) import Witch (into) +import Swarm.Language.InternCache +import Control.Monad.IO.Class (MonadIO, liftIO) +import System.IO.Unsafe (unsafePerformIO) +import Swarm.Language.InternCache qualified as IC type ResLoc = ImportLoc Import.Resolved @@ -153,6 +158,11 @@ resolveImports :: m (Set (ImportLoc Import.Resolved), Syntax Resolved) resolveImports parent = runAccum S.empty . traverseSyntax pure (resolveImport parent) +-- | Cache imported modules. TODO: fix for module tree +moduleCache :: (MonadIO m) => InternCache m (ImportLoc Import.Resolved) (Module Resolved) +moduleCache = unsafePerformIO $ hoist liftIO <$> IC.newInternCache @IO @(ImportLoc Import.Resolved) @(Module Resolved) +{-# NOINLINE moduleCache #-} + -- | Given a parent directory relative to which any local imports -- should be interpreted, load an import and all its imports, -- transitively. Also return a canonicalized version of the import @@ -170,31 +180,33 @@ resolveImport parent loc = do -- Compute the canonicalized location for the import, and record it canonicalLoc <- resolveImportLoc (unresolveImportDir parent loc) add $ S.singleton canonicalLoc + + sendIO (IC.lookupCached moduleCache canonicalLoc) >>= \case + Just cachedModule -> do + sendIO $ putStr "C\a" + modify @(SourceMap Resolved) (M.insert canonicalLoc cachedModule) + Nothing -> M.lookup canonicalLoc <$> get @(SourceMap Resolved) >>= \case + Just _ -> pure () -- Already loaded - do nothing + Nothing -> do + -- Record this import loc in the source map using a temporary, empty module, + -- to prevent it from attempting to load itself recursively + modify @(SourceMap Resolved) (M.insert canonicalLoc $ Module Nothing () mempty) - srcMap <- get @(SourceMap Resolved) - resMod <- case M.lookup canonicalLoc srcMap of - Just m -> pure m -- Already loaded - do nothing - Nothing -> do - -- Record this import loc in the source map using a temporary, empty module, - -- to prevent it from attempting to load itself recursively - modify @(SourceMap Resolved) (M.insert canonicalLoc $ Module Nothing () mempty) - - -- Read it from network/disk - mt <- readLoc canonicalLoc + -- Read it from network/disk + mt <- readLoc canonicalLoc - -- Recursively resolve any imports it contains - mres <- traverse (resolveImports (importDir canonicalLoc)) mt - -- sequence :: Maybe (Set a, b) -> (Set a, Maybe b) - let (imps, mt') = sequence mres + -- Recursively resolve any imports it contains + mres <- traverse (resolveImports (importDir canonicalLoc)) mt + -- sequence :: Maybe (Set a, b) -> (Set a, Maybe b) + let (imps, mt') = sequence mres - -- Finally, record the loaded module in the SourceMap. - let m = Module mt' () imps - modify @(SourceMap Resolved) (M.insert canonicalLoc m) + -- Finally, record the loaded module in the SourceMap. + let m = Module mt' () imps + modify @(SourceMap Resolved) (M.insert canonicalLoc m) + sendIO $ IC.insertCached moduleCache canonicalLoc m - pure m - - -- Make sure imports are pure, i.e. contain ONLY defs + imports. - validateImport canonicalLoc resMod + -- Make sure imports are pure, i.e. contain ONLY defs + imports. + validateImport canonicalLoc m pure canonicalLoc @@ -217,6 +229,11 @@ validateImport loc = maybe (pure ()) validate . moduleTerm TConst Noop -> pure () t -> throwError $ ImpureImport loc (prettyText t) +-- | Cache file contents for import location. +fileCache :: (MonadIO m) => InternCache m (ImportLoc Import.Resolved) Text +fileCache = unsafePerformIO $ hoist liftIO <$> IC.newInternCache @IO @(ImportLoc Import.Resolved) @Text +{-# NOINLINE fileCache #-} + -- | Try to read and parse a term from a specific import location, -- either over the network or on disk. readLoc :: @@ -224,26 +241,31 @@ readLoc :: ImportLoc Import.Resolved -> m (Maybe (Syntax Raw)) readLoc loc = do - let path = locToFilePath loc - badImport :: Has (Throw SystemFailure) sig m => LoadingFailure -> m a - badImport = throwError . AssetNotLoaded (Data Script) path - withBadImport :: Has (Throw SystemFailure) sig m => (e -> LoadingFailure) -> Either e a -> m a - withBadImport f = either (badImport . f) pure - -- Try to read the file from network/disk, depending on the anchor - src <- case importAnchor loc of - -- Read from network - Web_ {} -> do - -- Try to parse the URL - req <- parseRequest (into @String path) & withBadImport (BadURL . showT) - -- Send HTTP request - resp <- sendIO $ httpBS req - -- Try to decode the response - T.decodeUtf8' (getResponseBody resp) & withBadImport CanNotDecodeUTF8 - - -- Read from disk - _ -> sendIO (readFileMayT path) >>= maybe (badImport (DoesNotExist File)) pure - + src <- sendIO (IC.lookupCached fileCache loc) >>= \case + Just cachedSrc -> pure cachedSrc + Nothing -> do + s <- case importAnchor loc of + Web_ {} -> readFromNet + _ -> readFromDisk + _ <- sendIO $ IC.insertCached fileCache loc s + pure s + -- Finally, try to parse the contents readTerm' (defaultParserConfig & importLoc ?~ loc) src & withBadImport (SystemFailure . CanNotParseMegaparsec) + + where + path = locToFilePath loc + badImport :: Has (Throw SystemFailure) sig m => LoadingFailure -> m a + badImport = throwError . AssetNotLoaded (Data Script) path + withBadImport :: Has (Throw SystemFailure) sig m => (e -> LoadingFailure) -> Either e a -> m a + withBadImport f = either (badImport . f) pure + readFromDisk = sendIO (readFileMayT path) >>= maybe (badImport (DoesNotExist File)) pure + readFromNet = do + -- Try to parse the URL + req <- parseRequest (into @String path) & withBadImport (BadURL . showT) + -- Send HTTP request + resp <- sendIO $ httpBS req + -- Try to decode the response + T.decodeUtf8' (getResponseBody resp) & withBadImport CanNotDecodeUTF8 diff --git a/src/swarm-util/Swarm/Language/InternCache.hs b/src/swarm-util/Swarm/Language/InternCache.hs new file mode 100644 index 000000000..527299a88 --- /dev/null +++ b/src/swarm-util/Swarm/Language/InternCache.hs @@ -0,0 +1,80 @@ +{-# LANGUAGE BlockArguments #-} +module Swarm.Language.InternCache + ( InternCache, + newInternCache, + lookupCached, + insertCached, + intern, + hoist, + ) +where + +import Control.Monad.IO.Class (MonadIO (..)) +import Data.HashMap.Strict (HashMap) +import Data.HashMap.Strict qualified as HashMap +import Data.Hashable (Hashable) +import System.Mem.Weak +import UnliftIO.STM + +-- | Parameterized by the monad in which it operates, the key type, +-- and the value type. +data InternCache m k v = InternCache + { lookupCached :: k -> m (Maybe v), + insertCached :: k -> v -> m () + } + +-- | Creates an 'InternCache' which uses weak references to only +-- keep values in the cache for as long as they're reachable by +-- something else in the app. +-- +-- This means you don't need to worry about a value not being +-- GC'd because it's in the cache. +newInternCache :: + forall m k v. (MonadIO m, Hashable k) + => m (InternCache m k v) +newInternCache = do + var <- newTVarIO mempty + pure $ + InternCache + { lookupCached = lookupCachedImpl var, + insertCached = insertCachedImpl var + } + where + lookupCachedImpl :: TVar (HashMap k (Weak v)) -> k -> m (Maybe v) + lookupCachedImpl var ch = liftIO $ do + cache <- readTVarIO var + case HashMap.lookup ch cache of + Nothing -> pure Nothing + Just weakRef -> do + deRefWeak weakRef + + insertCachedImpl :: TVar (HashMap k (Weak v)) -> k -> v -> m () + insertCachedImpl var k v = liftIO $ do + wk <- mkWeakPtr v (Just $ removeDeadVal var k) + atomically $ modifyTVar' var (HashMap.insert k wk) + + -- Use this as a finalizer to remove the key from the map + -- when its value gets GC'd + removeDeadVal :: TVar (HashMap k (Weak v)) -> k -> IO () + removeDeadVal var k = liftIO do + atomically $ modifyTVar' var (HashMap.delete k) + +-- | Changing the monad in which the cache operates with a natural transformation. +hoist :: (forall x. m x -> n x) -> InternCache m k v -> InternCache n k v +hoist f (InternCache lookup' insert') = + InternCache + { lookupCached = f . lookup', + insertCached = \k v -> f $ insert' k v + } + +-- | When a value is its own key, this ensures that the given value +-- is in the cache and always returns the single canonical in-memory +-- instance of that value, garbage collecting any others. +intern :: (Hashable k, Monad m) => InternCache m k k -> k -> m k +intern cache k = do + mVal <- lookupCached cache k + case mVal of + Just v -> pure v + Nothing -> do + insertCached cache k k + pure k \ No newline at end of file diff --git a/swarm.cabal b/swarm.cabal index 9972ec85f..b0647e2f3 100644 --- a/swarm.cabal +++ b/swarm.cabal @@ -384,6 +384,9 @@ common transformers common unicode-show build-depends: unicode-show >=0.1 && <0.2 +common unliftio + build-depends: unliftio >=0.2 && <0.3 + common unordered-containers build-depends: unordered-containers >=0.2.14 && <0.3 @@ -942,6 +945,8 @@ library swarm-util template-haskell, text, transformers, + unliftio, + unordered-containers, vector, witch, witherable, @@ -955,6 +960,7 @@ library swarm-util Swarm.Effect.Metric Swarm.Effect.Time Swarm.Failure + Swarm.Language.InternCache Swarm.Language.Phase Swarm.Language.Syntax.Direction Swarm.Language.Syntax.Import