From c734bfd63583657ae14210cbfab5a9985980dc75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20W=C3=BClker?= Date: Sat, 20 Jan 2024 17:05:46 +0100 Subject: [PATCH] js: Track some static semantics during parsing --- crates/js/src/parser/identifiers.rs | 32 +++++++++++++++++++++++++-- crates/js/src/parser/script.rs | 1 + crates/js/src/parser/tokenizer.rs | 34 ++++++++++++++++++++++++----- 3 files changed, 60 insertions(+), 7 deletions(-) diff --git a/crates/js/src/parser/identifiers.rs b/crates/js/src/parser/identifiers.rs index 5c33bbdb..f8b6d325 100644 --- a/crates/js/src/parser/identifiers.rs +++ b/crates/js/src/parser/identifiers.rs @@ -1,6 +1,6 @@ //! -use super::{SyntaxError, Tokenizer}; +use super::{tokenizer::GoalSymbol, SyntaxError, Tokenizer}; const RESERVED_WORDS: [&str; 37] = [ "await", @@ -47,11 +47,17 @@ pub(crate) fn parse_binding_identifier( tokenizer: &mut Tokenizer<'_>, ) -> Result { let binding_identifier = if let Ok(identifier) = tokenizer.attempt(Identifier::parse) { + if tokenizer.is_strict() && matches!(identifier.0.as_str(), "arguments" | "eval") { + return Err(tokenizer.syntax_error()); + } + identifier.0 } else { let identifier_name = tokenizer.consume_identifier()?; - if matches!(identifier_name.as_str(), "yield" | "await") { + if !YIELD && identifier_name.as_str() == "yield" { + identifier_name + } else if !AWAIT && identifier_name.as_str() == "await" { identifier_name } else { return Err(tokenizer.syntax_error()); @@ -65,6 +71,18 @@ pub(crate) fn parse_binding_identifier( #[derive(Clone, Debug)] pub struct Identifier(String); +const DISALLOWED_IDENTIFIERS_IN_STRICT_MODE: [&str; 9] = [ + "implements", + "interface", + "let", + "package", + "private", + "protected", + "public", + "static", + "yield", +]; + impl Identifier { /// pub(crate) fn parse(tokenizer: &mut Tokenizer<'_>) -> Result { @@ -73,6 +91,16 @@ impl Identifier { return Err(tokenizer.syntax_error()); } + if tokenizer.is_strict() + && DISALLOWED_IDENTIFIERS_IN_STRICT_MODE.contains(&identifier_name.as_str()) + { + return Err(tokenizer.syntax_error()); + } + + if tokenizer.goal_symbol() == GoalSymbol::Module && identifier_name.as_str() == "await" { + return Err(tokenizer.syntax_error()); + } + Ok(Self(identifier_name)) } } diff --git a/crates/js/src/parser/script.rs b/crates/js/src/parser/script.rs index 6b32d86c..20efc69d 100644 --- a/crates/js/src/parser/script.rs +++ b/crates/js/src/parser/script.rs @@ -89,6 +89,7 @@ impl CompileToBytecode for StatementListItem { impl CompileToBytecode for Statement { fn compile(&self, builder: &mut bytecode::Builder) -> Self::Result { + _ = builder; todo!() } } diff --git a/crates/js/src/parser/tokenizer.rs b/crates/js/src/parser/tokenizer.rs index 8d313f4c..851da235 100644 --- a/crates/js/src/parser/tokenizer.rs +++ b/crates/js/src/parser/tokenizer.rs @@ -96,19 +96,43 @@ pub enum Punctuator { CurlyBraceClose, } +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum GoalSymbol { + Script, + Module, +} + #[derive(Clone, Copy)] pub struct Tokenizer<'a> { source: ReversibleCharIterator<&'a str>, + strict: bool, + goal_symbol: GoalSymbol, } impl<'a> Tokenizer<'a> { #[must_use] - pub fn new(source_text: &'a str) -> Self { + pub fn new(source_text: &'a str, goal_symbol: GoalSymbol) -> Self { Self { source: ReversibleCharIterator::new(source_text), + strict: false, + goal_symbol, } } + #[must_use] + pub const fn is_strict(&self) -> bool { + self.strict + } + + #[must_use] + pub const fn goal_symbol(&self) -> GoalSymbol { + self.goal_symbol + } + + pub fn set_strict(&mut self, strict: bool) { + self.strict = strict; + } + #[must_use] pub fn syntax_error(&self) -> SyntaxError { SyntaxError::from_position(self.source.position()) @@ -493,12 +517,12 @@ mod tests { #[test] fn tokenize_punctuator() { assert_eq!( - Tokenizer::new("?.").consume_punctuator(), + Tokenizer::new("?.", GoalSymbol::Script).consume_punctuator(), Ok(Punctuator::OptionalChaining) ); assert_ne!( - Tokenizer::new("?.5").consume_punctuator(), + Tokenizer::new("?.5", GoalSymbol::Script).consume_punctuator(), Ok(Punctuator::OptionalChaining) ); } @@ -506,14 +530,14 @@ mod tests { #[test] fn tokenize_string_literal() { assert_eq!( - Tokenizer::new("\"foobar\"") + Tokenizer::new("\"foobar\"", GoalSymbol::Script) .consume_string_literal() .as_deref(), Ok("foobar") ); assert_eq!( - Tokenizer::new("'foobar'") + Tokenizer::new("'foobar'", GoalSymbol::Script) .consume_string_literal() .as_deref(), Ok("foobar")