Skip to content

Commit c6d8ab0

Browse files
committed
Auto merge of #38357 - arielb1:deterministic-hash, r=michaelwoerister
make deterministic_hash host-architecture-independent `DefPath::deterministic_hash` used to call `std::hash::Hash`, which depends on the current architecture in several ways, which would prevent metadata written on one host architecture from being successfully read on another one. Use a hasher we control instead. Fixes #38177. r? @michaelwoerister
2 parents 8ae9040 + e1d4b8f commit c6d8ab0

File tree

14 files changed

+269
-229
lines changed

14 files changed

+269
-229
lines changed

src/librustc/hir/map/definitions.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@
1010

1111
use hir::def_id::{CrateNum, DefId, DefIndex, LOCAL_CRATE};
1212
use rustc_data_structures::fx::FxHashMap;
13+
use rustc_data_structures::stable_hasher::StableHasher;
1314
use std::fmt::Write;
1415
use std::hash::{Hash, Hasher};
15-
use std::collections::hash_map::DefaultHasher;
1616
use syntax::ast;
1717
use syntax::symbol::{Symbol, InternedString};
1818
use ty::TyCtxt;
@@ -131,7 +131,8 @@ impl DefPath {
131131
}
132132

133133
pub fn deterministic_hash(&self, tcx: TyCtxt) -> u64 {
134-
let mut state = DefaultHasher::new();
134+
debug!("deterministic_hash({:?})", self);
135+
let mut state = StableHasher::new();
135136
self.deterministic_hash_to(tcx, &mut state);
136137
state.finish()
137138
}
@@ -377,4 +378,3 @@ impl DefPathData {
377378
self.as_interned_str().to_string()
378379
}
379380
}
380-

src/librustc/ty/util.rs

+16-88
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,11 @@ use util::nodemap::FxHashMap;
2424
use middle::lang_items;
2525

2626
use rustc_const_math::{ConstInt, ConstIsize, ConstUsize};
27+
use rustc_data_structures::stable_hasher::{StableHasher, StableHasherResult};
2728

2829
use std::cell::RefCell;
2930
use std::cmp;
30-
use std::hash::{Hash, Hasher};
31-
use std::collections::hash_map::DefaultHasher;
31+
use std::hash::Hash;
3232
use std::intrinsics;
3333
use syntax::ast::{self, Name};
3434
use syntax::attr::{self, SignedInt, UnsignedInt};
@@ -349,7 +349,7 @@ impl<'a, 'gcx, 'tcx> TyCtxt<'a, 'gcx, 'tcx> {
349349
/// Creates a hash of the type `Ty` which will be the same no matter what crate
350350
/// context it's calculated within. This is used by the `type_id` intrinsic.
351351
pub fn type_id_hash(self, ty: Ty<'tcx>) -> u64 {
352-
let mut hasher = TypeIdHasher::new(self, DefaultHasher::default());
352+
let mut hasher = TypeIdHasher::new(self);
353353
hasher.visit_ty(ty);
354354
hasher.finish()
355355
}
@@ -395,96 +395,26 @@ impl<'a, 'gcx, 'tcx> TyCtxt<'a, 'gcx, 'tcx> {
395395
}
396396
}
397397

398-
/// When hashing a type this ends up affecting properties like symbol names. We
399-
/// want these symbol names to be calculated independent of other factors like
400-
/// what architecture you're compiling *from*.
401-
///
402-
/// The hashing just uses the standard `Hash` trait, but the implementations of
403-
/// `Hash` for the `usize` and `isize` types are *not* architecture independent
404-
/// (e.g. they has 4 or 8 bytes). As a result we want to avoid `usize` and
405-
/// `isize` completely when hashing. To ensure that these don't leak in we use a
406-
/// custom hasher implementation here which inflates the size of these to a `u64`
407-
/// and `i64`.
408-
///
409-
/// The same goes for endianess: We always convert multi-byte integers to little
410-
/// endian before hashing.
411-
#[derive(Debug)]
412-
pub struct ArchIndependentHasher<H> {
413-
inner: H,
414-
}
415-
416-
impl<H> ArchIndependentHasher<H> {
417-
pub fn new(inner: H) -> ArchIndependentHasher<H> {
418-
ArchIndependentHasher { inner: inner }
419-
}
420-
421-
pub fn into_inner(self) -> H {
422-
self.inner
423-
}
398+
pub struct TypeIdHasher<'a, 'gcx: 'a+'tcx, 'tcx: 'a, W> {
399+
tcx: TyCtxt<'a, 'gcx, 'tcx>,
400+
state: StableHasher<W>,
424401
}
425402

426-
impl<H: Hasher> Hasher for ArchIndependentHasher<H> {
427-
fn write(&mut self, bytes: &[u8]) {
428-
self.inner.write(bytes)
429-
}
430-
431-
fn finish(&self) -> u64 {
432-
self.inner.finish()
433-
}
434-
435-
fn write_u8(&mut self, i: u8) {
436-
self.inner.write_u8(i)
437-
}
438-
fn write_u16(&mut self, i: u16) {
439-
self.inner.write_u16(i.to_le())
440-
}
441-
fn write_u32(&mut self, i: u32) {
442-
self.inner.write_u32(i.to_le())
443-
}
444-
fn write_u64(&mut self, i: u64) {
445-
self.inner.write_u64(i.to_le())
446-
}
447-
fn write_usize(&mut self, i: usize) {
448-
self.inner.write_u64((i as u64).to_le())
403+
impl<'a, 'gcx, 'tcx, W> TypeIdHasher<'a, 'gcx, 'tcx, W>
404+
where W: StableHasherResult
405+
{
406+
pub fn new(tcx: TyCtxt<'a, 'gcx, 'tcx>) -> Self {
407+
TypeIdHasher { tcx: tcx, state: StableHasher::new() }
449408
}
450-
fn write_i8(&mut self, i: i8) {
451-
self.inner.write_i8(i)
452-
}
453-
fn write_i16(&mut self, i: i16) {
454-
self.inner.write_i16(i.to_le())
455-
}
456-
fn write_i32(&mut self, i: i32) {
457-
self.inner.write_i32(i.to_le())
458-
}
459-
fn write_i64(&mut self, i: i64) {
460-
self.inner.write_i64(i.to_le())
461-
}
462-
fn write_isize(&mut self, i: isize) {
463-
self.inner.write_i64((i as i64).to_le())
464-
}
465-
}
466-
467-
pub struct TypeIdHasher<'a, 'gcx: 'a+'tcx, 'tcx: 'a, H> {
468-
tcx: TyCtxt<'a, 'gcx, 'tcx>,
469-
state: ArchIndependentHasher<H>,
470-
}
471409

472-
impl<'a, 'gcx, 'tcx, H: Hasher> TypeIdHasher<'a, 'gcx, 'tcx, H> {
473-
pub fn new(tcx: TyCtxt<'a, 'gcx, 'tcx>, state: H) -> Self {
474-
TypeIdHasher {
475-
tcx: tcx,
476-
state: ArchIndependentHasher::new(state),
477-
}
410+
pub fn finish(self) -> W {
411+
self.state.finish()
478412
}
479413

480414
pub fn hash<T: Hash>(&mut self, x: T) {
481415
x.hash(&mut self.state);
482416
}
483417

484-
pub fn finish(self) -> u64 {
485-
self.state.finish()
486-
}
487-
488418
fn hash_discriminant_u8<T>(&mut self, x: &T) {
489419
let v = unsafe {
490420
intrinsics::discriminant_value(x)
@@ -504,13 +434,11 @@ impl<'a, 'gcx, 'tcx, H: Hasher> TypeIdHasher<'a, 'gcx, 'tcx, H> {
504434
pub fn def_path(&mut self, def_path: &ast_map::DefPath) {
505435
def_path.deterministic_hash_to(self.tcx, &mut self.state);
506436
}
507-
508-
pub fn into_inner(self) -> H {
509-
self.state.inner
510-
}
511437
}
512438

513-
impl<'a, 'gcx, 'tcx, H: Hasher> TypeVisitor<'tcx> for TypeIdHasher<'a, 'gcx, 'tcx, H> {
439+
impl<'a, 'gcx, 'tcx, W> TypeVisitor<'tcx> for TypeIdHasher<'a, 'gcx, 'tcx, W>
440+
where W: StableHasherResult
441+
{
514442
fn visit_ty(&mut self, ty: Ty<'tcx>) -> bool {
515443
// Distinguish between the Ty variants uniformly.
516444
self.hash_discriminant_u8(&ty.sty);

src/librustc_data_structures/lib.rs

+3
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ extern crate serialize as rustc_serialize; // used by deriving
4444
#[cfg(unix)]
4545
extern crate libc;
4646

47+
pub use rustc_serialize::hex::ToHex;
48+
4749
pub mod array_vec;
4850
pub mod accumulate_vec;
4951
pub mod small_vec;
@@ -59,6 +61,7 @@ pub mod indexed_vec;
5961
pub mod obligation_forest;
6062
pub mod snapshot_map;
6163
pub mod snapshot_vec;
64+
pub mod stable_hasher;
6265
pub mod transitive_relation;
6366
pub mod unify;
6467
pub mod fnv;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
use std::hash::Hasher;
12+
use std::marker::PhantomData;
13+
use std::mem;
14+
use blake2b::Blake2bHasher;
15+
use rustc_serialize::leb128;
16+
17+
fn write_unsigned_leb128_to_buf(buf: &mut [u8; 16], value: u64) -> usize {
18+
leb128::write_unsigned_leb128_to(value, |i, v| buf[i] = v)
19+
}
20+
21+
fn write_signed_leb128_to_buf(buf: &mut [u8; 16], value: i64) -> usize {
22+
leb128::write_signed_leb128_to(value, |i, v| buf[i] = v)
23+
}
24+
25+
/// When hashing something that ends up affecting properties like symbol names. We
26+
/// want these symbol names to be calculated independent of other factors like
27+
/// what architecture you're compiling *from*.
28+
///
29+
/// The hashing just uses the standard `Hash` trait, but the implementations of
30+
/// `Hash` for the `usize` and `isize` types are *not* architecture independent
31+
/// (e.g. they has 4 or 8 bytes). As a result we want to avoid `usize` and
32+
/// `isize` completely when hashing.
33+
///
34+
/// To do that, we encode all integers to be hashed with some
35+
/// arch-independent encoding.
36+
///
37+
/// At the moment, we pass i8/u8 straight through and encode
38+
/// all other integers using leb128.
39+
///
40+
/// This hasher currently always uses the stable Blake2b algorithm
41+
/// and allows for variable output lengths through its type
42+
/// parameter.
43+
#[derive(Debug)]
44+
pub struct StableHasher<W> {
45+
state: Blake2bHasher,
46+
bytes_hashed: u64,
47+
width: PhantomData<W>,
48+
}
49+
50+
pub trait StableHasherResult: Sized {
51+
fn finish(hasher: StableHasher<Self>) -> Self;
52+
}
53+
54+
impl<W: StableHasherResult> StableHasher<W> {
55+
pub fn new() -> Self {
56+
StableHasher {
57+
state: Blake2bHasher::new(mem::size_of::<W>(), &[]),
58+
bytes_hashed: 0,
59+
width: PhantomData,
60+
}
61+
}
62+
63+
pub fn finish(self) -> W {
64+
W::finish(self)
65+
}
66+
}
67+
68+
impl StableHasherResult for [u8; 20] {
69+
fn finish(mut hasher: StableHasher<Self>) -> Self {
70+
let mut result: [u8; 20] = [0; 20];
71+
result.copy_from_slice(hasher.state.finalize());
72+
result
73+
}
74+
}
75+
76+
impl StableHasherResult for u64 {
77+
fn finish(mut hasher: StableHasher<Self>) -> Self {
78+
hasher.state.finalize();
79+
hasher.state.finish()
80+
}
81+
}
82+
83+
impl<W> StableHasher<W> {
84+
#[inline]
85+
pub fn finalize(&mut self) -> &[u8] {
86+
self.state.finalize()
87+
}
88+
89+
#[inline]
90+
pub fn bytes_hashed(&self) -> u64 {
91+
self.bytes_hashed
92+
}
93+
94+
#[inline]
95+
fn write_uleb128(&mut self, value: u64) {
96+
let mut buf = [0; 16];
97+
let len = write_unsigned_leb128_to_buf(&mut buf, value);
98+
self.state.write(&buf[..len]);
99+
self.bytes_hashed += len as u64;
100+
}
101+
102+
#[inline]
103+
fn write_ileb128(&mut self, value: i64) {
104+
let mut buf = [0; 16];
105+
let len = write_signed_leb128_to_buf(&mut buf, value);
106+
self.state.write(&buf[..len]);
107+
self.bytes_hashed += len as u64;
108+
}
109+
}
110+
111+
// For the non-u8 integer cases we leb128 encode them first. Because small
112+
// integers dominate, this significantly and cheaply reduces the number of
113+
// bytes hashed, which is good because blake2b is expensive.
114+
impl<W> Hasher for StableHasher<W> {
115+
fn finish(&self) -> u64 {
116+
panic!("use StableHasher::finish instead");
117+
}
118+
119+
#[inline]
120+
fn write(&mut self, bytes: &[u8]) {
121+
self.state.write(bytes);
122+
self.bytes_hashed += bytes.len() as u64;
123+
}
124+
125+
#[inline]
126+
fn write_u8(&mut self, i: u8) {
127+
self.state.write_u8(i);
128+
self.bytes_hashed += 1;
129+
}
130+
131+
#[inline]
132+
fn write_u16(&mut self, i: u16) {
133+
self.write_uleb128(i as u64);
134+
}
135+
136+
#[inline]
137+
fn write_u32(&mut self, i: u32) {
138+
self.write_uleb128(i as u64);
139+
}
140+
141+
#[inline]
142+
fn write_u64(&mut self, i: u64) {
143+
self.write_uleb128(i);
144+
}
145+
146+
#[inline]
147+
fn write_usize(&mut self, i: usize) {
148+
self.write_uleb128(i as u64);
149+
}
150+
151+
#[inline]
152+
fn write_i8(&mut self, i: i8) {
153+
self.state.write_i8(i);
154+
self.bytes_hashed += 1;
155+
}
156+
157+
#[inline]
158+
fn write_i16(&mut self, i: i16) {
159+
self.write_ileb128(i as i64);
160+
}
161+
162+
#[inline]
163+
fn write_i32(&mut self, i: i32) {
164+
self.write_ileb128(i as i64);
165+
}
166+
167+
#[inline]
168+
fn write_i64(&mut self, i: i64) {
169+
self.write_ileb128(i);
170+
}
171+
172+
#[inline]
173+
fn write_isize(&mut self, i: isize) {
174+
self.write_ileb128(i as i64);
175+
}
176+
}

0 commit comments

Comments
 (0)