Skip to content

Commit 7b667e7

Browse files
committed
Extend is_case_difference to handle digit-letter confusables
Signed-off-by: xizheyin <[email protected]>
1 parent 32e7a4b commit 7b667e7

18 files changed

+290
-58
lines changed

compiler/rustc_errors/src/emitter.rs

Lines changed: 105 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -262,19 +262,11 @@ pub trait Emitter {
262262
format!("help: {msg}")
263263
} else {
264264
// Show the default suggestion text with the substitution
265-
format!(
266-
"help: {}{}: `{}`",
267-
msg,
268-
if self
269-
.source_map()
270-
.is_some_and(|sm| is_case_difference(sm, snippet, part.span,))
271-
{
272-
" (notice the capitalization)"
273-
} else {
274-
""
275-
},
276-
snippet,
277-
)
265+
let confusion_type = self
266+
.source_map()
267+
.map(|sm| detect_confusion_type(sm, snippet, part.span))
268+
.unwrap_or(ConfusionType::None);
269+
format!("help: {}{}: `{}`", msg, confusion_type.label_text(), snippet,)
278270
};
279271
primary_span.push_span_label(part.span, msg);
280272

@@ -2028,12 +2020,12 @@ impl HumanEmitter {
20282020
buffer.append(0, ": ", Style::HeaderMsg);
20292021

20302022
let mut msg = vec![(suggestion.msg.to_owned(), Style::NoStyle)];
2031-
if suggestions
2032-
.iter()
2033-
.take(MAX_SUGGESTIONS)
2034-
.any(|(_, _, _, only_capitalization)| *only_capitalization)
2023+
if let Some(confusion_type) =
2024+
suggestions.iter().take(MAX_SUGGESTIONS).find_map(|(_, _, _, confusion_type)| {
2025+
if confusion_type.has_confusion() { Some(*confusion_type) } else { None }
2026+
})
20352027
{
2036-
msg.push((" (notice the capitalization difference)".into(), Style::NoStyle));
2028+
msg.push((confusion_type.label_text().into(), Style::NoStyle));
20372029
}
20382030
self.msgs_to_buffer(
20392031
&mut buffer,
@@ -3528,24 +3520,107 @@ pub fn is_different(sm: &SourceMap, suggested: &str, sp: Span) -> bool {
35283520
}
35293521

35303522
/// Whether the original and suggested code are visually similar enough to warrant extra wording.
3531-
pub fn is_case_difference(sm: &SourceMap, suggested: &str, sp: Span) -> bool {
3532-
// FIXME: this should probably be extended to also account for `FO0` → `FOO` and unicode.
3523+
pub fn detect_confusion_type(sm: &SourceMap, suggested: &str, sp: Span) -> ConfusionType {
35333524
let found = match sm.span_to_snippet(sp) {
35343525
Ok(snippet) => snippet,
35353526
Err(e) => {
35363527
warn!(error = ?e, "Invalid span {:?}", sp);
3537-
return false;
3528+
return ConfusionType::None;
35383529
}
35393530
};
3540-
let ascii_confusables = &['c', 'f', 'i', 'k', 'o', 's', 'u', 'v', 'w', 'x', 'y', 'z'];
3541-
// All the chars that differ in capitalization are confusable (above):
3542-
let confusable = iter::zip(found.chars(), suggested.chars())
3543-
.filter(|(f, s)| f != s)
3544-
.all(|(f, s)| ascii_confusables.contains(&f) || ascii_confusables.contains(&s));
3545-
confusable && found.to_lowercase() == suggested.to_lowercase()
3546-
// FIXME: We sometimes suggest the same thing we already have, which is a
3547-
// bug, but be defensive against that here.
3548-
&& found != suggested
3531+
3532+
let mut has_case_confusion = false;
3533+
let mut has_digit_letter_confusion = false;
3534+
3535+
if found.len() == suggested.len() {
3536+
let mut has_case_diff = false;
3537+
let mut has_digit_letter_confusable = false;
3538+
let mut has_other_diff = false;
3539+
3540+
let ascii_confusables = &['c', 'f', 'i', 'k', 'o', 's', 'u', 'v', 'w', 'x', 'y', 'z'];
3541+
3542+
let digit_letter_confusables = [('0', 'O'), ('1', 'l'), ('5', 'S'), ('8', 'B'), ('9', 'g')];
3543+
3544+
for (f, s) in iter::zip(found.chars(), suggested.chars()) {
3545+
if f != s {
3546+
if f.to_lowercase().to_string() == s.to_lowercase().to_string() {
3547+
// Check for case differences (any character that differs only in case)
3548+
if ascii_confusables.contains(&f) || ascii_confusables.contains(&s) {
3549+
has_case_diff = true;
3550+
} else {
3551+
has_other_diff = true;
3552+
}
3553+
} else if digit_letter_confusables.contains(&(f, s))
3554+
|| digit_letter_confusables.contains(&(s, f))
3555+
{
3556+
// Check for digit-letter confusables (like 0 vs O, 1 vs l, etc.)
3557+
has_digit_letter_confusable = true;
3558+
} else {
3559+
has_other_diff = true;
3560+
}
3561+
}
3562+
}
3563+
3564+
// If we have case differences and no other differences
3565+
if has_case_diff && !has_other_diff && found != suggested {
3566+
has_case_confusion = true;
3567+
}
3568+
if has_digit_letter_confusable && !has_other_diff && found != suggested {
3569+
has_digit_letter_confusion = true;
3570+
}
3571+
}
3572+
3573+
match (has_case_confusion, has_digit_letter_confusion) {
3574+
(true, true) => ConfusionType::Both,
3575+
(true, false) => ConfusionType::Case,
3576+
(false, true) => ConfusionType::DigitLetter,
3577+
(false, false) => ConfusionType::None,
3578+
}
3579+
}
3580+
3581+
/// Represents the type of confusion detected between original and suggested code.
3582+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
3583+
pub enum ConfusionType {
3584+
/// No confusion detected
3585+
None,
3586+
/// Only case differences (e.g., "hello" vs "Hello")
3587+
Case,
3588+
/// Only digit-letter confusion (e.g., "0" vs "O", "1" vs "l")
3589+
DigitLetter,
3590+
/// Both case and digit-letter confusion
3591+
Both,
3592+
}
3593+
3594+
impl ConfusionType {
3595+
/// Returns the appropriate label text for this confusion type.
3596+
pub fn label_text(&self) -> &'static str {
3597+
match self {
3598+
ConfusionType::None => "",
3599+
ConfusionType::Case => " (notice the capitalization)",
3600+
ConfusionType::DigitLetter => " (notice the digit/letter confusion)",
3601+
ConfusionType::Both => " (notice the capitalization and digit/letter confusion)",
3602+
}
3603+
}
3604+
3605+
/// Combines two confusion types. If either is `Both`, the result is `Both`.
3606+
/// If one is `Case` and the other is `DigitLetter`, the result is `Both`.
3607+
/// Otherwise, returns the non-`None` type, or `None` if both are `None`.
3608+
pub fn combine(self, other: ConfusionType) -> ConfusionType {
3609+
match (self, other) {
3610+
(ConfusionType::None, other) => other,
3611+
(this, ConfusionType::None) => this,
3612+
(ConfusionType::Both, _) | (_, ConfusionType::Both) => ConfusionType::Both,
3613+
(ConfusionType::Case, ConfusionType::DigitLetter)
3614+
| (ConfusionType::DigitLetter, ConfusionType::Case) => ConfusionType::Both,
3615+
(ConfusionType::Case, ConfusionType::Case) => ConfusionType::Case,
3616+
(ConfusionType::DigitLetter, ConfusionType::DigitLetter) => ConfusionType::DigitLetter,
3617+
}
3618+
}
3619+
3620+
/// Returns true if this confusion type represents any kind of confusion.
3621+
pub fn has_confusion(&self) -> bool {
3622+
*self != ConfusionType::None
3623+
}
35493624
}
35503625

35513626
pub(crate) fn should_show_source_code(

compiler/rustc_errors/src/lib.rs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ pub use diagnostic_impls::{
5050
IndicateAnonymousLifetime, SingleLabelManySpans,
5151
};
5252
pub use emitter::ColorConfig;
53-
use emitter::{DynEmitter, Emitter, is_case_difference, is_different};
53+
use emitter::{ConfusionType, DynEmitter, Emitter, detect_confusion_type, is_different};
5454
use rustc_data_structures::AtomicRef;
5555
use rustc_data_structures::fx::{FxHashSet, FxIndexMap, FxIndexSet};
5656
use rustc_data_structures::stable_hasher::StableHasher;
@@ -308,7 +308,7 @@ impl CodeSuggestion {
308308
pub(crate) fn splice_lines(
309309
&self,
310310
sm: &SourceMap,
311-
) -> Vec<(String, Vec<SubstitutionPart>, Vec<Vec<SubstitutionHighlight>>, bool)> {
311+
) -> Vec<(String, Vec<SubstitutionPart>, Vec<Vec<SubstitutionHighlight>>, ConfusionType)> {
312312
// For the `Vec<Vec<SubstitutionHighlight>>` value, the first level of the vector
313313
// corresponds to the output snippet's lines, while the second level corresponds to the
314314
// substrings within that line that should be highlighted.
@@ -414,14 +414,15 @@ impl CodeSuggestion {
414414
// We need to keep track of the difference between the existing code and the added
415415
// or deleted code in order to point at the correct column *after* substitution.
416416
let mut acc = 0;
417-
let mut only_capitalization = false;
417+
let mut confusion_type = ConfusionType::None;
418418
for part in &mut substitution.parts {
419419
// If this is a replacement of, e.g. `"a"` into `"ab"`, adjust the
420420
// suggestion and snippet to look as if we just suggested to add
421421
// `"b"`, which is typically much easier for the user to understand.
422422
part.trim_trivial_replacements(sm);
423423

424-
only_capitalization |= is_case_difference(sm, &part.snippet, part.span);
424+
let part_confusion = detect_confusion_type(sm, &part.snippet, part.span);
425+
confusion_type = confusion_type.combine(part_confusion);
425426
let cur_lo = sm.lookup_char_pos(part.span.lo());
426427
if prev_hi.line == cur_lo.line {
427428
let mut count =
@@ -511,7 +512,7 @@ impl CodeSuggestion {
511512
if highlights.iter().all(|parts| parts.is_empty()) {
512513
None
513514
} else {
514-
Some((buf, substitution.parts, highlights, only_capitalization))
515+
Some((buf, substitution.parts, highlights, confusion_type))
515516
}
516517
})
517518
.collect()

src/tools/clippy/tests/ui/match_str_case_mismatch.stderr

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ error: this `match` arm has a differing case than its expression
1818
LL | "~!@#$%^&*()-_=+Foo" => {},
1919
| ^^^^^^^^^^^^^^^^^^^^
2020
|
21-
help: consider changing the case of this arm to respect `to_ascii_lowercase` (notice the capitalization difference)
21+
help: consider changing the case of this arm to respect `to_ascii_lowercase` (notice the capitalization)
2222
|
2323
LL - "~!@#$%^&*()-_=+Foo" => {},
2424
LL + "~!@#$%^&*()-_=+foo" => {},

tests/ui/error-codes/E0423.stderr

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ help: use struct literal syntax instead
5454
LL - let f = Foo();
5555
LL + let f = Foo { a: val };
5656
|
57-
help: a function with a similar name exists (notice the capitalization difference)
57+
help: a function with a similar name exists (notice the capitalization)
5858
|
5959
LL - let f = Foo();
6060
LL + let f = foo();

tests/ui/lint/lint-non-uppercase-usages.stderr

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ warning: const parameter `foo` should have an upper case name
2929
LL | fn foo<const foo: u32>() {
3030
| ^^^
3131
|
32-
help: convert the identifier to upper case (notice the capitalization difference)
32+
help: convert the identifier to upper case (notice the capitalization)
3333
|
3434
LL - fn foo<const foo: u32>() {
3535
LL + fn foo<const FOO: u32>() {

tests/ui/parser/item-kw-case-mismatch.stderr

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ error: keyword `use` is written in the wrong case
44
LL | Use std::ptr::read;
55
| ^^^
66
|
7-
help: write it in the correct case (notice the capitalization difference)
7+
help: write it in the correct case (notice the capitalization)
88
|
99
LL - Use std::ptr::read;
1010
LL + use std::ptr::read;
@@ -28,7 +28,7 @@ error: keyword `fn` is written in the wrong case
2828
LL | async Fn _a() {}
2929
| ^^
3030
|
31-
help: write it in the correct case (notice the capitalization difference)
31+
help: write it in the correct case (notice the capitalization)
3232
|
3333
LL - async Fn _a() {}
3434
LL + async fn _a() {}
@@ -40,7 +40,7 @@ error: keyword `fn` is written in the wrong case
4040
LL | Fn _b() {}
4141
| ^^
4242
|
43-
help: write it in the correct case (notice the capitalization difference)
43+
help: write it in the correct case (notice the capitalization)
4444
|
4545
LL - Fn _b() {}
4646
LL + fn _b() {}

tests/ui/parser/kw-in-trait-bounds.stderr

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ error: expected identifier, found keyword `fn`
44
LL | fn _f<F: fn(), G>(_: impl fn(), _: &dyn fn())
55
| ^^
66
|
7-
help: use `Fn` to refer to the trait (notice the capitalization difference)
7+
help: use `Fn` to refer to the trait (notice the capitalization)
88
|
99
LL - fn _f<F: fn(), G>(_: impl fn(), _: &dyn fn())
1010
LL + fn _f<F: Fn(), G>(_: impl fn(), _: &dyn fn())
@@ -16,7 +16,7 @@ error: expected identifier, found keyword `fn`
1616
LL | fn _f<F: fn(), G>(_: impl fn(), _: &dyn fn())
1717
| ^^
1818
|
19-
help: use `Fn` to refer to the trait (notice the capitalization difference)
19+
help: use `Fn` to refer to the trait (notice the capitalization)
2020
|
2121
LL - fn _f<F: fn(), G>(_: impl fn(), _: &dyn fn())
2222
LL + fn _f<F: fn(), G>(_: impl Fn(), _: &dyn fn())
@@ -28,7 +28,7 @@ error: expected identifier, found keyword `fn`
2828
LL | fn _f<F: fn(), G>(_: impl fn(), _: &dyn fn())
2929
| ^^
3030
|
31-
help: use `Fn` to refer to the trait (notice the capitalization difference)
31+
help: use `Fn` to refer to the trait (notice the capitalization)
3232
|
3333
LL - fn _f<F: fn(), G>(_: impl fn(), _: &dyn fn())
3434
LL + fn _f<F: fn(), G>(_: impl fn(), _: &dyn Fn())
@@ -40,7 +40,7 @@ error: expected identifier, found keyword `fn`
4040
LL | G: fn(),
4141
| ^^
4242
|
43-
help: use `Fn` to refer to the trait (notice the capitalization difference)
43+
help: use `Fn` to refer to the trait (notice the capitalization)
4444
|
4545
LL - G: fn(),
4646
LL + G: Fn(),

tests/ui/parser/misspelled-keywords/hrdt.stderr

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ error: expected one of `!`, `(`, `+`, `::`, `<`, `where`, or `{`, found keyword
44
LL | Where for<'a> F: Fn(&'a (u8, u16)) -> &'a u8,
55
| ^^^ expected one of 7 possible tokens
66
|
7-
help: write keyword `where` in lowercase (notice the capitalization difference)
7+
help: write keyword `where` in lowercase (notice the capitalization)
88
|
99
LL - Where for<'a> F: Fn(&'a (u8, u16)) -> &'a u8,
1010
LL + where for<'a> F: Fn(&'a (u8, u16)) -> &'a u8,

tests/ui/parser/misspelled-keywords/impl-return.stderr

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ error: expected one of `!`, `(`, `+`, `::`, `<`, `where`, or `{`, found `Display
44
LL | fn code() -> Impl Display {}
55
| ^^^^^^^ expected one of 7 possible tokens
66
|
7-
help: write keyword `impl` in lowercase (notice the capitalization difference)
7+
help: write keyword `impl` in lowercase (notice the capitalization)
88
|
99
LL - fn code() -> Impl Display {}
1010
LL + fn code() -> impl Display {}

tests/ui/parser/misspelled-keywords/static.stderr

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ error: expected one of `!` or `::`, found `a`
44
LL | Static a = 0;
55
| ^ expected one of `!` or `::`
66
|
7-
help: write keyword `static` in lowercase (notice the capitalization difference)
7+
help: write keyword `static` in lowercase (notice the capitalization)
88
|
99
LL - Static a = 0;
1010
LL + static a = 0;

0 commit comments

Comments
 (0)