@@ -4,7 +4,6 @@ use crate::{
4
4
Error ,
5
5
} ;
6
6
use core:: {
7
- cell:: UnsafeCell ,
8
7
ffi:: c_void,
9
8
mem:: MaybeUninit ,
10
9
sync:: atomic:: { AtomicI32 , Ordering } ,
@@ -18,159 +17,186 @@ use core::{
18
17
/// - On Haiku and QNX Neutrino they are identical.
19
18
const FILE_PATH : & [ u8 ] = b"/dev/urandom\0 " ;
20
19
21
- // Do not inline this when it is the fallback implementation, but don't mark it
22
- // `#[cold]` because it is hot when it is actually used.
23
- #[ cfg_attr( any( target_os = "android" , target_os = "linux" ) , inline( never) ) ]
20
+ // File descriptor is a "nonnegative integer", so we can safely use negative sentinel values.
21
+ const FD_UNINIT : libc:: c_int = -1 ;
22
+ const FD_ONGOING_INIT : libc:: c_int = -2 ;
23
+
24
+ // In theory `libc::c_int` could be something other than `i32`, but for the
25
+ // targets we currently support that use `use_file`, it is always `i32`.
26
+ // If/when we add support for a target where that isn't the case, we may
27
+ // need to use a different atomic type or make other accomodations. The
28
+ // compiler will let us know if/when that is the case, because the
29
+ // `FD.store(fd)` would fail to compile.
30
+ //
31
+ // The opening of the file, by libc/libstd/etc. may write some unknown
32
+ // state into in-process memory. (Such state may include some sanitizer
33
+ // bookkeeping, or we might be operating in a unikernal-like environment
34
+ // where all the "kernel" file descriptor bookkeeping is done in our
35
+ // process.) `get_fd_locked` stores into FD using `Ordering::Release` to
36
+ // ensure any such state is synchronized. `get_fd` loads from `FD` with
37
+ // `Ordering::Acquire` to synchronize with it.
38
+ static FD : AtomicI32 = AtomicI32 :: new ( FD_UNINIT ) ;
39
+
24
40
pub fn getrandom_inner ( dest : & mut [ MaybeUninit < u8 > ] ) -> Result < ( ) , Error > {
25
- let fd = get_rng_fd ( ) ?;
41
+ let mut fd = FD . load ( Ordering :: Acquire ) ;
42
+ if fd == FD_UNINIT || fd == FD_ONGOING_INIT {
43
+ fd = open_or_wait ( ) ?;
44
+ }
26
45
sys_fill_exact ( dest, |buf| unsafe {
27
46
libc:: read ( fd, buf. as_mut_ptr ( ) . cast :: < c_void > ( ) , buf. len ( ) )
28
47
} )
29
48
}
30
49
31
- // Returns the file descriptor for the device file used to retrieve random
32
- // bytes. The file will be opened exactly once. All subsequent calls will
33
- // return the same file descriptor. This file descriptor is never closed.
34
- fn get_rng_fd ( ) -> Result < libc:: c_int , Error > {
35
- // std::os::fd::{BorrowedFd, OwnedFd} guarantee that -1 is not a valid file descriptor.
36
- const FD_UNINIT : libc:: c_int = -1 ;
37
-
38
- // In theory `libc::c_int` could be something other than `i32`, but for the
39
- // targets we currently support that use `use_file`, it is always `i32`.
40
- // If/when we add support for a target where that isn't the case, we may
41
- // need to use a different atomic type or make other accomodations. The
42
- // compiler will let us know if/when that is the case, because the
43
- // `FD.store(fd)` would fail to compile.
44
- //
45
- // The opening of the file, by libc/libstd/etc. may write some unknown
46
- // state into in-process memory. (Such state may include some sanitizer
47
- // bookkeeping, or we might be operating in a unikernal-like environment
48
- // where all the "kernel" file descriptor bookkeeping is done in our
49
- // process.) `get_fd_locked` stores into FD using `Ordering::Release` to
50
- // ensure any such state is synchronized. `get_fd` loads from `FD` with
51
- // `Ordering::Acquire` to synchronize with it.
52
- static FD : AtomicI32 = AtomicI32 :: new ( FD_UNINIT ) ;
53
-
54
- fn get_fd ( ) -> Option < libc:: c_int > {
50
+ #[ cold]
51
+ fn open_or_wait ( ) -> Result < libc:: c_int , Error > {
52
+ loop {
55
53
match FD . load ( Ordering :: Acquire ) {
56
- FD_UNINIT => None ,
57
- val => Some ( val) ,
54
+ FD_UNINIT => {
55
+ let res = FD . compare_exchange_weak (
56
+ FD_UNINIT ,
57
+ FD_ONGOING_INIT ,
58
+ Ordering :: AcqRel ,
59
+ Ordering :: Relaxed ,
60
+ ) ;
61
+ if res. is_ok ( ) {
62
+ break ;
63
+ }
64
+ }
65
+ FD_ONGOING_INIT => sync:: wait ( ) ,
66
+ fd => return Ok ( fd) ,
58
67
}
59
68
}
60
69
61
- #[ cold]
62
- fn get_fd_locked ( ) -> Result < libc:: c_int , Error > {
63
- // This mutex is used to prevent multiple threads from opening file
64
- // descriptors concurrently, which could run into the limit on the
65
- // number of open file descriptors. Our goal is to have no more than one
66
- // file descriptor open, ever.
67
- //
68
- // SAFETY: We use the mutex only in this method, and we always unlock it
69
- // before returning, making sure we don't violate the pthread_mutex_t API.
70
- static MUTEX : Mutex = Mutex :: new ( ) ;
71
- unsafe { MUTEX . lock ( ) } ;
72
- let _guard = DropGuard ( || unsafe { MUTEX . unlock ( ) } ) ;
73
-
74
- if let Some ( fd) = get_fd ( ) {
75
- return Ok ( fd) ;
76
- }
77
-
78
- // On Linux, /dev/urandom might return insecure values.
79
- #[ cfg( any( target_os = "android" , target_os = "linux" ) ) ]
80
- wait_until_rng_ready ( ) ?;
70
+ let res = open_fd ( ) ;
71
+ let val = match res {
72
+ Ok ( fd) => fd,
73
+ Err ( _) => FD_UNINIT ,
74
+ } ;
75
+ FD . store ( val, Ordering :: Release ) ;
81
76
82
- let fd = open_readonly ( FILE_PATH ) ?;
83
- debug_assert ! ( fd != FD_UNINIT ) ;
84
- FD . store ( fd, Ordering :: Release ) ;
77
+ // On non-Linux targets `wait` is just 1 ms sleep,
78
+ // so we don't need any explicit wake up in addition
79
+ // to updating value of `FD`.
80
+ #[ cfg( any( target_os = "android" , target_os = "linux" ) ) ]
81
+ sync:: wake ( ) ;
85
82
86
- Ok ( fd)
87
- }
88
-
89
- // Use double-checked locking to avoid acquiring the lock if possible.
90
- if let Some ( fd) = get_fd ( ) {
91
- Ok ( fd)
92
- } else {
93
- get_fd_locked ( )
94
- }
83
+ res
95
84
}
96
85
97
- // Polls /dev/random to make sure it is ok to read from /dev/urandom.
98
- //
99
- // Polling avoids draining the estimated entropy from /dev/random;
100
- // short-lived processes reading even a single byte from /dev/random could
101
- // be problematic if they are being executed faster than entropy is being
102
- // collected.
103
- //
104
- // OTOH, reading a byte instead of polling is more compatible with
105
- // sandboxes that disallow `poll()` but which allow reading /dev/random,
106
- // e.g. sandboxes that assume that `poll()` is for network I/O. This way,
107
- // fewer applications will have to insert pre-sandbox-initialization logic.
108
- // Often (blocking) file I/O is not allowed in such early phases of an
109
- // application for performance and/or security reasons.
110
- //
111
- // It is hard to write a sandbox policy to support `libc::poll()` because
112
- // it may invoke the `poll`, `ppoll`, `ppoll_time64` (since Linux 5.1, with
113
- // newer versions of glibc), and/or (rarely, and probably only on ancient
114
- // systems) `select`. depending on the libc implementation (e.g. glibc vs
115
- // musl), libc version, potentially the kernel version at runtime, and/or
116
- // the target architecture.
117
- //
118
- // BoringSSL and libstd don't try to protect against insecure output from
119
- // `/dev/urandom'; they don't open `/dev/random` at all.
120
- //
121
- // OpenSSL uses `libc::select()` unless the `dev/random` file descriptor
122
- // is too large; if it is too large then it does what we do here.
123
- //
124
- // libsodium uses `libc::poll` similarly to this.
125
- #[ cfg( any( target_os = "android" , target_os = "linux" ) ) ]
126
- fn wait_until_rng_ready ( ) -> Result < ( ) , Error > {
127
- let fd = open_readonly ( b"/dev/random\0 " ) ?;
128
- let mut pfd = libc:: pollfd {
129
- fd,
130
- events : libc:: POLLIN ,
131
- revents : 0 ,
132
- } ;
133
- let _guard = DropGuard ( || unsafe {
134
- libc:: close ( fd) ;
135
- } ) ;
86
+ fn open_fd ( ) -> Result < libc:: c_int , Error > {
87
+ #[ cfg( any( target_os = "android" , target_os = "linux" ) ) ]
88
+ sync:: wait_until_rng_ready ( ) ?;
89
+ let fd = open_readonly ( FILE_PATH ) ?;
90
+ debug_assert ! ( fd >= 0 ) ;
91
+ Ok ( fd)
92
+ }
136
93
137
- loop {
138
- // A negative timeout means an infinite timeout.
139
- let res = unsafe { libc:: poll ( & mut pfd, 1 , -1 ) } ;
140
- if res >= 0 {
141
- debug_assert_eq ! ( res, 1 ) ; // We only used one fd, and cannot timeout.
142
- return Ok ( ( ) ) ;
143
- }
144
- let err = crate :: util_libc:: last_os_error ( ) ;
145
- match err. raw_os_error ( ) {
146
- Some ( libc:: EINTR ) | Some ( libc:: EAGAIN ) => continue ,
147
- _ => return Err ( err) ,
94
+ #[ cfg( not( any( target_os = "android" , target_os = "linux" ) ) ) ]
95
+ mod sync {
96
+ /// Sleep 1 ms before checking `FD` again.
97
+ ///
98
+ /// On non-Linux targets the critical section only opens file,
99
+ /// which should not block, so in the unlikely contended case,
100
+ /// we can sleep-wait for the opening operation to finish.
101
+ pub ( super ) fn wait ( ) {
102
+ let rqtp = libc:: timespec {
103
+ tv_sec : 0 ,
104
+ tv_nsec : 1_000_000 ,
105
+ } ;
106
+ let mut rmtp = libc:: timespec {
107
+ tv_sec : 0 ,
108
+ tv_nsec : 0 ,
109
+ } ;
110
+ // We do not care if sleep gets interrupted, so the return value is ignored
111
+ unsafe {
112
+ libc:: nanosleep ( & rqtp, & mut rmtp) ;
148
113
}
149
114
}
150
115
}
151
116
152
- struct Mutex ( UnsafeCell < libc:: pthread_mutex_t > ) ;
153
-
154
- impl Mutex {
155
- const fn new ( ) -> Self {
156
- Self ( UnsafeCell :: new ( libc:: PTHREAD_MUTEX_INITIALIZER ) )
157
- }
158
- unsafe fn lock ( & self ) {
159
- let r = libc:: pthread_mutex_lock ( self . 0 . get ( ) ) ;
160
- debug_assert_eq ! ( r, 0 ) ;
161
- }
162
- unsafe fn unlock ( & self ) {
163
- let r = libc:: pthread_mutex_unlock ( self . 0 . get ( ) ) ;
164
- debug_assert_eq ! ( r, 0 ) ;
117
+ #[ cfg( any( target_os = "android" , target_os = "linux" ) ) ]
118
+ mod sync {
119
+ use super :: { Error , FD , FD_ONGOING_INIT } ;
120
+ use crate :: util_libc:: { last_os_error, open_readonly} ;
121
+
122
+ /// Wait for atomic `FD` to change value from `FD_ONGOING_INIT` to something else.
123
+ ///
124
+ /// Futex syscall with `FUTEX_WAIT` op puts the current thread to sleep
125
+ /// until futex syscall with `FUTEX_WAKE` op gets executed for `FD`.
126
+ ///
127
+ /// For more information read: https://www.man7.org/linux/man-pages/man2/futex.2.html
128
+ pub ( super ) fn wait ( ) {
129
+ let op = libc:: FUTEX_WAIT | libc:: FUTEX_PRIVATE_FLAG ;
130
+ let timeout_ptr = core:: ptr:: null :: < libc:: timespec > ( ) ;
131
+ let ret = unsafe { libc:: syscall ( libc:: SYS_futex , & FD , op, FD_ONGOING_INIT , timeout_ptr) } ;
132
+ // FUTEX_WAIT should return either 0 or EAGAIN error
133
+ debug_assert ! ( {
134
+ match ret {
135
+ 0 => true ,
136
+ -1 => last_os_error( ) . raw_os_error( ) == Some ( libc:: EAGAIN ) ,
137
+ _ => false ,
138
+ }
139
+ } ) ;
165
140
}
166
- }
167
-
168
- unsafe impl Sync for Mutex { }
169
141
170
- struct DropGuard < F : FnMut ( ) > ( F ) ;
142
+ /// Wake up all threads which wait for value of atomic `FD` to change.
143
+ pub ( super ) fn wake ( ) {
144
+ let op = libc:: FUTEX_WAKE | libc:: FUTEX_PRIVATE_FLAG ;
145
+ let ret = unsafe { libc:: syscall ( libc:: SYS_futex , & FD , op, libc:: INT_MAX ) } ;
146
+ debug_assert ! ( ret >= 0 ) ;
147
+ }
171
148
172
- impl < F : FnMut ( ) > Drop for DropGuard < F > {
173
- fn drop ( & mut self ) {
174
- self . 0 ( )
149
+ // Polls /dev/random to make sure it is ok to read from /dev/urandom.
150
+ //
151
+ // Polling avoids draining the estimated entropy from /dev/random;
152
+ // short-lived processes reading even a single byte from /dev/random could
153
+ // be problematic if they are being executed faster than entropy is being
154
+ // collected.
155
+ //
156
+ // OTOH, reading a byte instead of polling is more compatible with
157
+ // sandboxes that disallow `poll()` but which allow reading /dev/random,
158
+ // e.g. sandboxes that assume that `poll()` is for network I/O. This way,
159
+ // fewer applications will have to insert pre-sandbox-initialization logic.
160
+ // Often (blocking) file I/O is not allowed in such early phases of an
161
+ // application for performance and/or security reasons.
162
+ //
163
+ // It is hard to write a sandbox policy to support `libc::poll()` because
164
+ // it may invoke the `poll`, `ppoll`, `ppoll_time64` (since Linux 5.1, with
165
+ // newer versions of glibc), and/or (rarely, and probably only on ancient
166
+ // systems) `select`. depending on the libc implementation (e.g. glibc vs
167
+ // musl), libc version, potentially the kernel version at runtime, and/or
168
+ // the target architecture.
169
+ //
170
+ // BoringSSL and libstd don't try to protect against insecure output from
171
+ // `/dev/urandom'; they don't open `/dev/random` at all.
172
+ //
173
+ // OpenSSL uses `libc::select()` unless the `dev/random` file descriptor
174
+ // is too large; if it is too large then it does what we do here.
175
+ //
176
+ // libsodium uses `libc::poll` similarly to this.
177
+ pub ( super ) fn wait_until_rng_ready ( ) -> Result < ( ) , Error > {
178
+ let fd = open_readonly ( b"/dev/random\0 " ) ?;
179
+ let mut pfd = libc:: pollfd {
180
+ fd,
181
+ events : libc:: POLLIN ,
182
+ revents : 0 ,
183
+ } ;
184
+
185
+ let res = loop {
186
+ // A negative timeout means an infinite timeout.
187
+ let res = unsafe { libc:: poll ( & mut pfd, 1 , -1 ) } ;
188
+ if res >= 0 {
189
+ // We only used one fd, and cannot timeout.
190
+ debug_assert_eq ! ( res, 1 ) ;
191
+ break Ok ( ( ) ) ;
192
+ }
193
+ let err = last_os_error ( ) ;
194
+ match err. raw_os_error ( ) {
195
+ Some ( libc:: EINTR ) | Some ( libc:: EAGAIN ) => continue ,
196
+ _ => break Err ( err) ,
197
+ }
198
+ } ;
199
+ unsafe { libc:: close ( fd) } ;
200
+ res
175
201
}
176
202
}
0 commit comments