Skip to content

Commit

Permalink
js: Track some static semantics during parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
simonwuelker committed Jan 20, 2024
1 parent c95c806 commit c734bfd
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 7 deletions.
32 changes: 30 additions & 2 deletions crates/js/src/parser/identifiers.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//! <https://262.ecma-international.org/14.0/#sec-identifiers>
use super::{SyntaxError, Tokenizer};
use super::{tokenizer::GoalSymbol, SyntaxError, Tokenizer};

const RESERVED_WORDS: [&str; 37] = [
"await",
Expand Down Expand Up @@ -47,11 +47,17 @@ pub(crate) fn parse_binding_identifier<const YIELD: bool, const AWAIT: bool>(
tokenizer: &mut Tokenizer<'_>,
) -> Result<String, SyntaxError> {
let binding_identifier = if let Ok(identifier) = tokenizer.attempt(Identifier::parse) {
if tokenizer.is_strict() && matches!(identifier.0.as_str(), "arguments" | "eval") {
return Err(tokenizer.syntax_error());
}

identifier.0
} else {
let identifier_name = tokenizer.consume_identifier()?;

if matches!(identifier_name.as_str(), "yield" | "await") {
if !YIELD && identifier_name.as_str() == "yield" {
identifier_name
} else if !AWAIT && identifier_name.as_str() == "await" {
identifier_name
} else {
return Err(tokenizer.syntax_error());
Expand All @@ -65,6 +71,18 @@ pub(crate) fn parse_binding_identifier<const YIELD: bool, const AWAIT: bool>(
#[derive(Clone, Debug)]
pub struct Identifier(String);

const DISALLOWED_IDENTIFIERS_IN_STRICT_MODE: [&str; 9] = [
"implements",
"interface",
"let",
"package",
"private",
"protected",
"public",
"static",
"yield",
];

impl Identifier {
/// <https://262.ecma-international.org/14.0/#prod-Identifier>
pub(crate) fn parse(tokenizer: &mut Tokenizer<'_>) -> Result<Self, SyntaxError> {
Expand All @@ -73,6 +91,16 @@ impl Identifier {
return Err(tokenizer.syntax_error());
}

if tokenizer.is_strict()
&& DISALLOWED_IDENTIFIERS_IN_STRICT_MODE.contains(&identifier_name.as_str())
{
return Err(tokenizer.syntax_error());
}

if tokenizer.goal_symbol() == GoalSymbol::Module && identifier_name.as_str() == "await" {
return Err(tokenizer.syntax_error());
}

Ok(Self(identifier_name))
}
}
Expand Down
1 change: 1 addition & 0 deletions crates/js/src/parser/script.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ impl CompileToBytecode for StatementListItem {

impl CompileToBytecode for Statement {
fn compile(&self, builder: &mut bytecode::Builder) -> Self::Result {
_ = builder;
todo!()
}
}
34 changes: 29 additions & 5 deletions crates/js/src/parser/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,19 +96,43 @@ pub enum Punctuator {
CurlyBraceClose,
}

#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum GoalSymbol {
Script,
Module,
}

#[derive(Clone, Copy)]
pub struct Tokenizer<'a> {
source: ReversibleCharIterator<&'a str>,
strict: bool,
goal_symbol: GoalSymbol,
}

impl<'a> Tokenizer<'a> {
#[must_use]
pub fn new(source_text: &'a str) -> Self {
pub fn new(source_text: &'a str, goal_symbol: GoalSymbol) -> Self {
Self {
source: ReversibleCharIterator::new(source_text),
strict: false,
goal_symbol,
}
}

#[must_use]
pub const fn is_strict(&self) -> bool {
self.strict
}

#[must_use]
pub const fn goal_symbol(&self) -> GoalSymbol {
self.goal_symbol
}

pub fn set_strict(&mut self, strict: bool) {
self.strict = strict;
}

#[must_use]
pub fn syntax_error(&self) -> SyntaxError {
SyntaxError::from_position(self.source.position())
Expand Down Expand Up @@ -493,27 +517,27 @@ mod tests {
#[test]
fn tokenize_punctuator() {
assert_eq!(
Tokenizer::new("?.").consume_punctuator(),
Tokenizer::new("?.", GoalSymbol::Script).consume_punctuator(),
Ok(Punctuator::OptionalChaining)
);

assert_ne!(
Tokenizer::new("?.5").consume_punctuator(),
Tokenizer::new("?.5", GoalSymbol::Script).consume_punctuator(),
Ok(Punctuator::OptionalChaining)
);
}

#[test]
fn tokenize_string_literal() {
assert_eq!(
Tokenizer::new("\"foobar\"")
Tokenizer::new("\"foobar\"", GoalSymbol::Script)
.consume_string_literal()
.as_deref(),
Ok("foobar")
);

assert_eq!(
Tokenizer::new("'foobar'")
Tokenizer::new("'foobar'", GoalSymbol::Script)
.consume_string_literal()
.as_deref(),
Ok("foobar")
Expand Down

0 comments on commit c734bfd

Please sign in to comment.