Avoid parsing the same import multiple times

In some scenarios the hierarchy of schemas may be very interconnected
and redundant parsing of the same imported files is a huge performance
overhead for code generation (this change showed 20x improvement when
compiling a sample set of ~900 interconnected schemas).
This commit is contained in:
Adam Sapek 2015-02-11 17:19:55 -08:00
Родитель 0c4d4ad4fd
Коммит c764d993e0
1 изменённых файлов: 16 добавлений и 10 удалений

Просмотреть файл

@ -22,8 +22,11 @@ import Bond.Lexer
import Bond.Schema import Bond.Schema
-- parser state, mutable and global -- parser state, mutable and global
-- list of structs, enums and aliases declared in the current and all imported files data Symbols =
type Symbols = [Declaration] Symbols
{ symbols :: [Declaration] -- list of structs, enums and aliases declared in the current and all imported files
, imports :: [FilePath] -- list of imported files
}
-- parser environment, immutable but contextual -- parser environment, immutable but contextual
data Environment = data Environment =
@ -39,7 +42,7 @@ newEnvironment = Environment [] []
type Parser a = ParsecT String Symbols (ReaderT Environment IO) a type Parser a = ParsecT String Symbols (ReaderT Environment IO) a
parseBond = runParserT bond [] parseBond = runParserT bond $ Symbols [] []
data Bond = Bond [Import] [Namespace] [Declaration] data Bond = Bond [Import] [Namespace] [Declaration]
@ -63,13 +66,16 @@ import_ = do
setPosition pos setPosition pos
return i return i
processImport :: Import -> Parser Bond processImport :: Import -> Parser()
processImport (Import file) = do processImport (Import file) = do
Environment { currentFile = currentFile, resolveImport = resolveImport } <- ask Environment { currentFile = currentFile, resolveImport = resolveImport } <- ask
(path, content) <- liftIO $ resolveImport currentFile file (path, content) <- liftIO $ resolveImport currentFile file
setInput content Symbols { imports = imports } <- getState
setPosition $ initialPos path if path `elem` imports then return () else do
local (\e -> e { currentFile = path }) bond modifyState (\u -> u { imports = path:imports } )
setInput content
setPosition $ initialPos path
void $ local (\e -> e { currentFile = path }) bond
-- parser for struct, enum or type alias declaration/definition -- parser for struct, enum or type alias declaration/definition
declaration :: Parser Declaration declaration :: Parser Declaration
@ -83,7 +89,7 @@ declaration = do
return decl return decl
updateSymbols decl = do updateSymbols decl = do
(previous, symbols) <- partition (duplicateDeclaration decl) <$> getState (previous, symbols) <- partition (duplicateDeclaration decl) <$> symbols <$> getState
case reconcile previous decl of case reconcile previous decl of
(False, _) -> fail $ "The " ++ show decl ++ " has been previously defined as " ++ show (head previous) (False, _) -> fail $ "The " ++ show decl ++ " has been previously defined as " ++ show (head previous)
(True, f) -> modifyState (f symbols) (True, f) -> modifyState (f symbols)
@ -98,14 +104,14 @@ updateSymbols decl = do
-- paths which are unreliable. -- paths which are unreliable.
reconcile [x] y = (x == y, const id) reconcile [x] y = (x == y, const id)
paramsMatch = (==) `on` (map paramConstraint . declParams) paramsMatch = (==) `on` (map paramConstraint . declParams)
add x xs _ = x:xs add x xs u = u { symbols = x:xs }
findSymbol :: QualifiedName -> Parser Declaration findSymbol :: QualifiedName -> Parser Declaration
findSymbol name = doFind <?> "qualified name" findSymbol name = doFind <?> "qualified name"
where where
doFind = do doFind = do
namespaces <- asks currentNamespaces namespaces <- asks currentNamespaces
symbols <- getState Symbols { symbols = symbols } <- getState
case find (delcMatching namespaces name) symbols of case find (delcMatching namespaces name) symbols of
Just decl -> return decl Just decl -> return decl
Nothing -> fail $ "Unknown symbol: " ++ showQualifiedName name Nothing -> fail $ "Unknown symbol: " ++ showQualifiedName name