|
| 1 | +use std::ops::ControlFlow; |
| 2 | + |
| 3 | +use clippy_utils::diagnostics::span_lint_hir_and_then; |
| 4 | +use clippy_utils::ty::is_type_lang_item; |
| 5 | +use clippy_utils::visitors::for_each_expr; |
| 6 | +use clippy_utils::{eq_expr_value, higher, path_to_local_id}; |
| 7 | +use rustc_errors::{Applicability, MultiSpan}; |
| 8 | +use rustc_hir::{Expr, ExprKind, LangItem, Node, Pat, PatKind}; |
| 9 | +use rustc_lint::LateContext; |
| 10 | +use rustc_middle::ty::Ty; |
| 11 | +use rustc_span::{Span, sym}; |
| 12 | + |
| 13 | +use super::CHARS_ENUMERATE_FOR_BYTE_INDICES; |
| 14 | + |
| 15 | +// The list of `str` methods we want to lint that have a `usize` argument representing a byte index. |
| 16 | +// Note: `String` also has methods that work with byte indices, |
| 17 | +// but they all take `&mut self` and aren't worth considering since the user couldn't have called |
| 18 | +// them while the chars iterator is live anyway. |
| 19 | +const BYTE_INDEX_METHODS: &[&str] = &[ |
| 20 | + "is_char_boundary", |
| 21 | + "floor_char_boundary", |
| 22 | + "ceil_char_boundary", |
| 23 | + "get", |
| 24 | + "index", |
| 25 | + "index_mut", |
| 26 | + "get_mut", |
| 27 | + "get_unchecked", |
| 28 | + "get_unchecked_mut", |
| 29 | + "slice_unchecked", |
| 30 | + "slice_mut_unchecked", |
| 31 | + "split_at", |
| 32 | + "split_at_mut", |
| 33 | + "split_at_checked", |
| 34 | + "split_at_mut_checked", |
| 35 | +]; |
| 36 | + |
| 37 | +const CONTINUE: ControlFlow<!, ()> = ControlFlow::Continue(()); |
| 38 | + |
| 39 | +pub(super) fn check<'tcx>(cx: &LateContext<'tcx>, pat: &Pat<'_>, iterable: &Expr<'_>, body: &'tcx Expr<'tcx>) { |
| 40 | + if let ExprKind::MethodCall(_, enumerate_recv, _, enumerate_span) = iterable.kind |
| 41 | + && let Some(method_id) = cx.typeck_results().type_dependent_def_id(iterable.hir_id) |
| 42 | + && cx.tcx.is_diagnostic_item(sym::enumerate_method, method_id) |
| 43 | + && let ExprKind::MethodCall(_, chars_recv, _, chars_span) = enumerate_recv.kind |
| 44 | + && let Some(method_id) = cx.typeck_results().type_dependent_def_id(enumerate_recv.hir_id) |
| 45 | + && cx.tcx.is_diagnostic_item(sym::str_chars, method_id) |
| 46 | + { |
| 47 | + if let PatKind::Tuple([pat, _], _) = pat.kind |
| 48 | + && let PatKind::Binding(_, binding_id, ..) = pat.kind |
| 49 | + { |
| 50 | + // Destructured iterator element `(idx, _)`, look for uses of the binding |
| 51 | + for_each_expr(cx, body, |expr| { |
| 52 | + if path_to_local_id(expr, binding_id) { |
| 53 | + check_index_usage(cx, expr, pat, enumerate_span, chars_span, chars_recv); |
| 54 | + } |
| 55 | + CONTINUE |
| 56 | + }); |
| 57 | + } else if let PatKind::Binding(_, binding_id, ..) = pat.kind { |
| 58 | + // Bound as a tuple, look for `tup.0` |
| 59 | + for_each_expr(cx, body, |expr| { |
| 60 | + if let ExprKind::Field(e, field) = expr.kind |
| 61 | + && path_to_local_id(e, binding_id) |
| 62 | + && field.name == sym::integer(0) |
| 63 | + { |
| 64 | + check_index_usage(cx, expr, pat, enumerate_span, chars_span, chars_recv); |
| 65 | + } |
| 66 | + CONTINUE |
| 67 | + }); |
| 68 | + } |
| 69 | + } |
| 70 | +} |
| 71 | + |
| 72 | +fn check_index_usage<'tcx>( |
| 73 | + cx: &LateContext<'tcx>, |
| 74 | + expr: &'tcx Expr<'tcx>, |
| 75 | + pat: &Pat<'_>, |
| 76 | + enumerate_span: Span, |
| 77 | + chars_span: Span, |
| 78 | + chars_recv: &Expr<'_>, |
| 79 | +) { |
| 80 | + let Some(parent_expr) = index_consumed_at(cx, expr) else { |
| 81 | + return; |
| 82 | + }; |
| 83 | + |
| 84 | + let is_string_like = |ty: Ty<'_>| ty.is_str() || is_type_lang_item(cx, ty, LangItem::String); |
| 85 | + let message = match parent_expr.kind { |
| 86 | + ExprKind::MethodCall(segment, recv, ..) |
| 87 | + if cx.typeck_results().expr_ty_adjusted(recv).peel_refs().is_str() |
| 88 | + && BYTE_INDEX_METHODS.contains(&segment.ident.name.as_str()) |
| 89 | + && eq_expr_value(cx, chars_recv, recv) => |
| 90 | + { |
| 91 | + "passing a character position to a method that expects a byte index" |
| 92 | + }, |
| 93 | + ExprKind::Index(target, ..) |
| 94 | + if is_string_like(cx.typeck_results().expr_ty_adjusted(target).peel_refs()) |
| 95 | + && eq_expr_value(cx, chars_recv, target) => |
| 96 | + { |
| 97 | + "indexing into a string with a character position where a byte index is expected" |
| 98 | + }, |
| 99 | + _ => return, |
| 100 | + }; |
| 101 | + |
| 102 | + span_lint_hir_and_then( |
| 103 | + cx, |
| 104 | + CHARS_ENUMERATE_FOR_BYTE_INDICES, |
| 105 | + expr.hir_id, |
| 106 | + expr.span, |
| 107 | + message, |
| 108 | + |diag| { |
| 109 | + diag.note("a character can take up more than one byte, so they are not interchangeable") |
| 110 | + .span_note( |
| 111 | + MultiSpan::from_spans(vec![pat.span, enumerate_span]), |
| 112 | + "position comes from the enumerate iterator", |
| 113 | + ) |
| 114 | + .span_suggestion_verbose( |
| 115 | + chars_span.to(enumerate_span), |
| 116 | + "consider using `.char_indices()` instead", |
| 117 | + "char_indices()", |
| 118 | + Applicability::MaybeIncorrect, |
| 119 | + ); |
| 120 | + }, |
| 121 | + ); |
| 122 | +} |
| 123 | + |
| 124 | +/// Returns the expression which ultimately consumes the index. |
| 125 | +/// This is usually the parent expression, i.e. `.split_at(idx)` for `idx`, |
| 126 | +/// but for `.get(..idx)` we want to consider the method call the consuming expression, |
| 127 | +/// which requires skipping past the range expression. |
| 128 | +fn index_consumed_at<'tcx>(cx: &LateContext<'tcx>, expr: &'tcx Expr<'tcx>) -> Option<&'tcx Expr<'tcx>> { |
| 129 | + for (_, node) in cx.tcx.hir().parent_iter(expr.hir_id) { |
| 130 | + match node { |
| 131 | + Node::Expr(expr) if higher::Range::hir(expr).is_some() => {}, |
| 132 | + Node::ExprField(_) => {}, |
| 133 | + Node::Expr(expr) => return Some(expr), |
| 134 | + _ => break, |
| 135 | + } |
| 136 | + } |
| 137 | + None |
| 138 | +} |
0 commit comments