-
Notifications
You must be signed in to change notification settings - Fork 13.6k
Extend is_case_difference
to handle digit-letter confusables
#144691
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -262,19 +262,11 @@ pub trait Emitter { | |
format!("help: {msg}") | ||
} else { | ||
// Show the default suggestion text with the substitution | ||
format!( | ||
"help: {}{}: `{}`", | ||
msg, | ||
if self | ||
.source_map() | ||
.is_some_and(|sm| is_case_difference(sm, snippet, part.span,)) | ||
{ | ||
" (notice the capitalization)" | ||
} else { | ||
"" | ||
}, | ||
snippet, | ||
) | ||
let confusion_type = self | ||
.source_map() | ||
.map(|sm| detect_confusion_type(sm, snippet, part.span)) | ||
.unwrap_or(ConfusionType::None); | ||
format!("help: {}{}: `{}`", msg, confusion_type.label_text(), snippet,) | ||
}; | ||
primary_span.push_span_label(part.span, msg); | ||
|
||
|
@@ -2028,12 +2020,12 @@ impl HumanEmitter { | |
buffer.append(0, ": ", Style::HeaderMsg); | ||
|
||
let mut msg = vec![(suggestion.msg.to_owned(), Style::NoStyle)]; | ||
if suggestions | ||
.iter() | ||
.take(MAX_SUGGESTIONS) | ||
.any(|(_, _, _, only_capitalization)| *only_capitalization) | ||
if let Some(confusion_type) = | ||
suggestions.iter().take(MAX_SUGGESTIONS).find_map(|(_, _, _, confusion_type)| { | ||
if confusion_type.has_confusion() { Some(*confusion_type) } else { None } | ||
}) | ||
{ | ||
msg.push((" (notice the capitalization difference)".into(), Style::NoStyle)); | ||
msg.push((confusion_type.label_text().into(), Style::NoStyle)); | ||
} | ||
self.msgs_to_buffer( | ||
&mut buffer, | ||
|
@@ -3528,24 +3520,107 @@ pub fn is_different(sm: &SourceMap, suggested: &str, sp: Span) -> bool { | |
} | ||
|
||
/// Whether the original and suggested code are visually similar enough to warrant extra wording. | ||
pub fn is_case_difference(sm: &SourceMap, suggested: &str, sp: Span) -> bool { | ||
// FIXME: this should probably be extended to also account for `FO0` → `FOO` and unicode. | ||
pub fn detect_confusion_type(sm: &SourceMap, suggested: &str, sp: Span) -> ConfusionType { | ||
let found = match sm.span_to_snippet(sp) { | ||
Ok(snippet) => snippet, | ||
Err(e) => { | ||
warn!(error = ?e, "Invalid span {:?}", sp); | ||
return false; | ||
return ConfusionType::None; | ||
} | ||
}; | ||
let ascii_confusables = &['c', 'f', 'i', 'k', 'o', 's', 'u', 'v', 'w', 'x', 'y', 'z']; | ||
// All the chars that differ in capitalization are confusable (above): | ||
let confusable = iter::zip(found.chars(), suggested.chars()) | ||
.filter(|(f, s)| f != s) | ||
.all(|(f, s)| ascii_confusables.contains(&f) || ascii_confusables.contains(&s)); | ||
confusable && found.to_lowercase() == suggested.to_lowercase() | ||
// FIXME: We sometimes suggest the same thing we already have, which is a | ||
// bug, but be defensive against that here. | ||
&& found != suggested | ||
|
||
let mut has_case_confusion = false; | ||
let mut has_digit_letter_confusion = false; | ||
|
||
if found.len() == suggested.len() { | ||
let mut has_case_diff = false; | ||
let mut has_digit_letter_confusable = false; | ||
let mut has_other_diff = false; | ||
|
||
let ascii_confusables = &['c', 'f', 'i', 'k', 'o', 's', 'u', 'v', 'w', 'x', 'y', 'z']; | ||
|
||
let digit_letter_confusables = [('0', 'O'), ('1', 'l'), ('5', 'S'), ('8', 'B'), ('9', 'g')]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about 1 and capital i? And lowercase L and capital i? |
||
|
||
for (f, s) in iter::zip(found.chars(), suggested.chars()) { | ||
if f != s { | ||
if f.to_lowercase().to_string() == s.to_lowercase().to_string() { | ||
// Check for case differences (any character that differs only in case) | ||
if ascii_confusables.contains(&f) || ascii_confusables.contains(&s) { | ||
has_case_diff = true; | ||
} else { | ||
has_other_diff = true; | ||
} | ||
} else if digit_letter_confusables.contains(&(f, s)) | ||
|| digit_letter_confusables.contains(&(s, f)) | ||
{ | ||
// Check for digit-letter confusables (like 0 vs O, 1 vs l, etc.) | ||
has_digit_letter_confusable = true; | ||
} else { | ||
has_other_diff = true; | ||
} | ||
} | ||
} | ||
|
||
// If we have case differences and no other differences | ||
if has_case_diff && !has_other_diff && found != suggested { | ||
has_case_confusion = true; | ||
} | ||
if has_digit_letter_confusable && !has_other_diff && found != suggested { | ||
has_digit_letter_confusion = true; | ||
} | ||
} | ||
|
||
match (has_case_confusion, has_digit_letter_confusion) { | ||
(true, true) => ConfusionType::Both, | ||
(true, false) => ConfusionType::Case, | ||
(false, true) => ConfusionType::DigitLetter, | ||
(false, false) => ConfusionType::None, | ||
} | ||
} | ||
|
||
/// Represents the type of confusion detected between original and suggested code. | ||
#[derive(Debug, Clone, Copy, PartialEq, Eq)] | ||
pub enum ConfusionType { | ||
/// No confusion detected | ||
None, | ||
/// Only case differences (e.g., "hello" vs "Hello") | ||
Case, | ||
/// Only digit-letter confusion (e.g., "0" vs "O", "1" vs "l") | ||
DigitLetter, | ||
/// Both case and digit-letter confusion | ||
Both, | ||
} | ||
Comment on lines
+3582
to
+3592
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I define a enum to handle different cases. |
||
|
||
impl ConfusionType { | ||
/// Returns the appropriate label text for this confusion type. | ||
pub fn label_text(&self) -> &'static str { | ||
match self { | ||
ConfusionType::None => "", | ||
ConfusionType::Case => " (notice the capitalization)", | ||
ConfusionType::DigitLetter => " (notice the digit/letter confusion)", | ||
ConfusionType::Both => " (notice the capitalization and digit/letter confusion)", | ||
} | ||
} | ||
|
||
/// Combines two confusion types. If either is `Both`, the result is `Both`. | ||
/// If one is `Case` and the other is `DigitLetter`, the result is `Both`. | ||
/// Otherwise, returns the non-`None` type, or `None` if both are `None`. | ||
pub fn combine(self, other: ConfusionType) -> ConfusionType { | ||
match (self, other) { | ||
(ConfusionType::None, other) => other, | ||
(this, ConfusionType::None) => this, | ||
(ConfusionType::Both, _) | (_, ConfusionType::Both) => ConfusionType::Both, | ||
(ConfusionType::Case, ConfusionType::DigitLetter) | ||
| (ConfusionType::DigitLetter, ConfusionType::Case) => ConfusionType::Both, | ||
(ConfusionType::Case, ConfusionType::Case) => ConfusionType::Case, | ||
(ConfusionType::DigitLetter, ConfusionType::DigitLetter) => ConfusionType::DigitLetter, | ||
} | ||
} | ||
|
||
/// Returns true if this confusion type represents any kind of confusion. | ||
pub fn has_confusion(&self) -> bool { | ||
*self != ConfusionType::None | ||
} | ||
} | ||
|
||
pub(crate) fn should_show_source_code( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,7 +4,7 @@ error: keyword `use` is written in the wrong case | |
LL | Use std::ptr::read; | ||
| ^^^ | ||
| | ||
help: write it in the correct case (notice the capitalization difference) | ||
help: write it in the correct case (notice the capitalization) | ||
| | ||
LL - Use std::ptr::read; | ||
LL + use std::ptr::read; | ||
|
@@ -28,7 +28,7 @@ error: keyword `fn` is written in the wrong case | |
LL | async Fn _a() {} | ||
| ^^ | ||
| | ||
help: write it in the correct case (notice the capitalization difference) | ||
help: write it in the correct case (notice the capitalization) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I remove |
||
| | ||
LL - async Fn _a() {} | ||
LL + async fn _a() {} | ||
|
@@ -40,7 +40,7 @@ error: keyword `fn` is written in the wrong case | |
LL | Fn _b() {} | ||
| ^^ | ||
| | ||
help: write it in the correct case (notice the capitalization difference) | ||
help: write it in the correct case (notice the capitalization) | ||
| | ||
LL - Fn _b() {} | ||
LL + fn _b() {} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I keep simple and use reverse