From c0ce0b0c483ca04f2b6fee78f4c77ba8fe626cd3 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Thu, 22 Sep 2022 13:59:06 -0400 Subject: [PATCH] Enable automatic `noqa` insertion (#256) --- src/linter.rs | 37 ++++++++++--- src/main.rs | 42 +++++++++++++++ src/noqa.rs | 145 +++++++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 210 insertions(+), 14 deletions(-) diff --git a/src/linter.rs b/src/linter.rs index 4b264e0502613..a10b87f3e782a 100644 --- a/src/linter.rs +++ b/src/linter.rs @@ -11,23 +11,22 @@ use crate::check_ast::check_ast; use crate::check_lines::check_lines; use crate::checks::{Check, CheckCode, CheckKind, LintSource}; use crate::message::Message; +use crate::noqa::add_noqa; use crate::settings::Settings; use crate::{cache, fs, noqa}; fn check_path( path: &Path, contents: &str, + tokens: Vec, settings: &Settings, autofix: &fixer::Mode, ) -> Vec { // Aggregate all checks. let mut checks: Vec = vec![]; - // Tokenize once. - let lxr: Vec = lexer::make_tokenizer(contents).collect(); - // Determine the noqa line for every line in the source. - let noqa_line_for = noqa::extract_noqa_line_for(&lxr); + let noqa_line_for = noqa::extract_noqa_line_for(&tokens); // Run the AST-based checks. if settings @@ -35,7 +34,7 @@ fn check_path( .iter() .any(|check_code| matches!(check_code.lint_source(), LintSource::AST)) { - match parser::parse_program_tokens(lxr, "") { + match parser::parse_program_tokens(tokens, "") { Ok(python_ast) => { checks.extend(check_ast(&python_ast, contents, settings, autofix, path)) } @@ -73,8 +72,11 @@ pub fn lint_path( // Read the file from disk. let contents = fs::read_file(path)?; + // Tokenize once. + let tokens: Vec = lexer::make_tokenizer(&contents).collect(); + // Generate checks. - let mut checks = check_path(path, &contents, settings, autofix); + let mut checks = check_path(path, &contents, tokens, settings, autofix); // Apply autofix. if matches!(autofix, fixer::Mode::Apply) { @@ -96,11 +98,29 @@ pub fn lint_path( Ok(messages) } +pub fn add_noqa_to_path(path: &Path, settings: &Settings) -> Result { + // Read the file from disk. + let contents = fs::read_file(path)?; + + // Tokenize once. + let tokens: Vec = lexer::make_tokenizer(&contents).collect(); + + // Determine the noqa line for every line in the source. + let noqa_line_for = noqa::extract_noqa_line_for(&tokens); + + // Generate checks. + let checks = check_path(path, &contents, tokens, settings, &fixer::Mode::None); + + add_noqa(&checks, &contents, &noqa_line_for, path) +} + #[cfg(test)] mod tests { use std::path::Path; use anyhow::Result; + use rustpython_parser::lexer; + use rustpython_parser::lexer::LexResult; use crate::autofix::fixer; use crate::checks::{Check, CheckCode}; @@ -114,7 +134,10 @@ mod tests { autofix: &fixer::Mode, ) -> Result> { let contents = fs::read_file(path)?; - Ok(linter::check_path(path, &contents, settings, autofix)) + let tokens: Vec = lexer::make_tokenizer(&contents).collect(); + Ok(linter::check_path( + path, &contents, tokens, settings, autofix, + )) } #[test] diff --git a/src/main.rs b/src/main.rs index 8474722b5d7c7..af9a4cf37a074 100644 --- a/src/main.rs +++ b/src/main.rs @@ -25,6 +25,7 @@ use ::ruff::printer::{Printer, SerializationFormat}; use ::ruff::pyproject; use ::ruff::settings::{FilePattern, Settings}; use ::ruff::tell_user; +use ruff::linter::add_noqa_to_path; const CARGO_PKG_NAME: &str = env!("CARGO_PKG_NAME"); const CARGO_PKG_VERSION: &str = env!("CARGO_PKG_VERSION"); @@ -81,6 +82,9 @@ struct Cli { /// See ruff's settings. #[clap(long, action)] show_settings: bool, + /// Enable automatic additions of noqa directives to failing lines. + #[clap(long, action)] + add_noqa: bool, } #[cfg(feature = "update-informer")] @@ -183,6 +187,35 @@ fn run_once( Ok(messages) } +fn add_noqa(files: &[PathBuf], settings: &Settings) -> Result { + // Collect all the files to check. + let start = Instant::now(); + let paths: Vec> = files + .iter() + .flat_map(|path| iter_python_files(path, &settings.exclude, &settings.extend_exclude)) + .collect(); + let duration = start.elapsed(); + debug!("Identified files to lint in: {:?}", duration); + + let start = Instant::now(); + let modifications: usize = paths + .par_iter() + .map(|entry| match entry { + Ok(entry) => { + let path = entry.path(); + add_noqa_to_path(path, settings) + } + Err(_) => Ok(0), + }) + .flatten() + .sum(); + + let duration = start.elapsed(); + debug!("Added noqa to files in: {:?}", duration); + + Ok(modifications) +} + fn inner_main() -> Result { let cli = Cli::parse(); @@ -254,6 +287,10 @@ fn inner_main() -> Result { println!("Warning: --fix is not enabled in watch mode."); } + if cli.add_noqa { + println!("Warning: --no-qa is not enabled in watch mode."); + } + if cli.format != SerializationFormat::Text { println!("Warning: --format 'text' is used in watch mode."); } @@ -292,6 +329,11 @@ fn inner_main() -> Result { Err(e) => return Err(e.into()), } } + } else if cli.add_noqa { + let modifications = add_noqa(&cli.files, &settings)?; + if modifications > 0 { + println!("Added {modifications} noqa directives."); + } } else { let messages = run_once(&cli.files, &settings, !cli.no_cache, cli.fix)?; if !cli.quiet { diff --git a/src/noqa.rs b/src/noqa.rs index 4ce5629798477..6d1b446048f1a 100644 --- a/src/noqa.rs +++ b/src/noqa.rs @@ -1,8 +1,13 @@ use std::cmp::{max, min}; +use crate::checks::{Check, CheckCode}; +use anyhow::Result; use once_cell::sync::Lazy; use regex::Regex; use rustpython_parser::lexer::{LexResult, Tok}; +use std::collections::{BTreeMap, BTreeSet}; +use std::fs; +use std::path::Path; static NO_QA_REGEX: Lazy = Lazy::new(|| { Regex::new(r"(?i)(?P\s*# noqa(?::\s?(?P([A-Z]+[0-9]+(?:[,\s]+)?)+))?)") @@ -38,7 +43,7 @@ pub fn extract_noqa_directive(line: &str) -> Directive { } pub fn extract_noqa_line_for(lxr: &[LexResult]) -> Vec { - let mut line_map: Vec = vec![]; + let mut noqa_line_for: Vec = vec![]; let mut last_is_string = false; let mut last_seen = usize::MIN; @@ -56,10 +61,10 @@ pub fn extract_noqa_line_for(lxr: &[LexResult]) -> Vec { // For now, we only care about preserving noqa directives across multi-line strings. if last_is_string { - line_map.extend(vec![max_line; (max_line + 1) - min_line]); + noqa_line_for.extend(vec![max_line; (max_line + 1) - min_line]); } else { for i in (min_line - 1)..(max_line) { - line_map.push(i + 1); + noqa_line_for.push(i + 1); } } @@ -69,7 +74,7 @@ pub fn extract_noqa_line_for(lxr: &[LexResult]) -> Vec { // Handle empty lines. if start.row() > last_seen { for i in last_seen..(start.row() - 1) { - line_map.push(i + 1); + noqa_line_for.push(i + 1); } } @@ -80,19 +85,91 @@ pub fn extract_noqa_line_for(lxr: &[LexResult]) -> Vec { last_is_string = matches!(tok, Tok::String { .. }); } - line_map + noqa_line_for +} + +fn add_noqa_inner( + checks: &Vec, + contents: &str, + noqa_line_for: &[usize], +) -> Result<(usize, String)> { + let lines: Vec<&str> = contents.lines().collect(); + let mut matches_by_line: BTreeMap> = BTreeMap::new(); + for lineno in 0..lines.len() { + let mut codes: BTreeSet<&CheckCode> = BTreeSet::new(); + for check in checks { + if check.location.row() == lineno + 1 { + codes.insert(check.kind.code()); + } + } + + // Grab the noqa (logical) line number for the current (physical) line. + // If there are newlines at the end of the file, they won't be represented in + // `noqa_line_for`, so fallback to the current line. + let noqa_lineno = noqa_line_for + .get(lineno) + .map(|lineno| lineno - 1) + .unwrap_or(lineno); + + if !codes.is_empty() { + let matches = matches_by_line + .entry(noqa_lineno) + .or_insert_with(BTreeSet::new); + matches.append(&mut codes); + } + } + + let mut count: usize = 0; + let mut output = "".to_string(); + for (lineno, line) in lines.iter().enumerate() { + match matches_by_line.get(&lineno) { + None => { + output.push_str(line); + output.push('\n'); + } + Some(codes) => { + match extract_noqa_directive(line) { + Directive::None => { + output.push_str(line); + } + Directive::All(start) => output.push_str(&line[..start]), + Directive::Codes(start, _) => output.push_str(&line[..start]), + }; + let codes: Vec<&str> = codes.iter().map(|code| code.as_str()).collect(); + output.push_str(" # noqa: "); + output.push_str(&codes.join(", ")); + output.push('\n'); + count += 1; + } + } + } + + Ok((count, output)) +} + +pub fn add_noqa( + checks: &Vec, + contents: &str, + noqa_line_for: &[usize], + path: &Path, +) -> Result { + let (count, output) = add_noqa_inner(checks, contents, noqa_line_for)?; + fs::write(path, output)?; + Ok(count) } #[cfg(test)] mod tests { + use crate::checks::{Check, CheckKind}; use anyhow::Result; + use rustpython_parser::ast::Location; use rustpython_parser::lexer; use rustpython_parser::lexer::LexResult; - use crate::noqa::extract_noqa_line_for; + use crate::noqa::{add_noqa_inner, extract_noqa_line_for}; #[test] - fn line_map() -> Result<()> { + fn extraction() -> Result<()> { let lxr: Vec = lexer::make_tokenizer( "x = 1 y = 2 @@ -146,4 +223,58 @@ z = x + 1", Ok(()) } + + #[test] + fn modification() -> Result<()> { + let checks = vec![]; + let contents = "x = 1"; + let noqa_line_for = vec![1]; + let (count, output) = add_noqa_inner(&checks, contents, &noqa_line_for)?; + assert_eq!(count, 0); + assert_eq!(output.trim(), contents.trim()); + + let checks = vec![Check::new( + CheckKind::UnusedVariable("x".to_string()), + Location::new(1, 1), + )]; + let contents = "x = 1"; + let noqa_line_for = vec![1]; + let (count, output) = add_noqa_inner(&checks, contents, &noqa_line_for)?; + assert_eq!(count, 1); + assert_eq!(output.trim(), "x = 1 # noqa: F841".trim()); + + let checks = vec![ + Check::new( + CheckKind::AmbiguousVariableName("x".to_string()), + Location::new(1, 1), + ), + Check::new( + CheckKind::UnusedVariable("x".to_string()), + Location::new(1, 1), + ), + ]; + let contents = "x = 1 # noqa: E741"; + let noqa_line_for = vec![1]; + let (count, output) = add_noqa_inner(&checks, contents, &noqa_line_for)?; + assert_eq!(count, 1); + assert_eq!(output.trim(), "x = 1 # noqa: E741, F841".trim()); + + let checks = vec![ + Check::new( + CheckKind::AmbiguousVariableName("x".to_string()), + Location::new(1, 1), + ), + Check::new( + CheckKind::UnusedVariable("x".to_string()), + Location::new(1, 1), + ), + ]; + let contents = "x = 1 # noqa"; + let noqa_line_for = vec![1]; + let (count, output) = add_noqa_inner(&checks, contents, &noqa_line_for)?; + assert_eq!(count, 1); + assert_eq!(output.trim(), "x = 1 # noqa: E741, F841".trim()); + + Ok(()) + } }