From 67567982307ebfaa5483eced8e9c4b8f29a284a3 Mon Sep 17 00:00:00 2001
From: disarchive <disarchive@protonmail.com>
Date: Mon, 17 Feb 2025 11:42:09 +0100
Subject: [PATCH] Allow recording the raw HTTP/1 headers sent and received

This allows feeding them into a WARC file
(https://en.wikipedia.org/wiki/WARC_(file_format)). The rest of the
request and response is already available as it's either the explicitly
set body or the received response body.

This includes the final `\r\n\r\n` between the headers and the response
(so that it can be distinguished from bare `\n\n`).

Needed for a similar request in reqwest:
https://github.com/seanmonstar/reqwest/issues/1229.
---
 src/client/conn/http1.rs | 26 +++++++++++++++++
 src/ext/mod.rs           | 32 +++++++++++++++++++++
 src/proto/h1/conn.rs     | 24 ++++++++++++++++
 src/proto/h1/io.rs       |  4 +++
 src/proto/h1/mod.rs      |  2 ++
 src/proto/h1/role.rs     | 62 +++++++++++++++++++++++++++++++++++++---
 6 files changed, 146 insertions(+), 4 deletions(-)

diff --git a/src/client/conn/http1.rs b/src/client/conn/http1.rs
index ecfe6eb8fb..a13b8a9d0e 100644
--- a/src/client/conn/http1.rs
+++ b/src/client/conn/http1.rs
@@ -112,6 +112,8 @@ pub struct Builder {
     h1_parser_config: ParserConfig,
     h1_writev: Option<bool>,
     h1_title_case_headers: bool,
+    h1_record_raw_request_headers: bool,
+    h1_record_raw_response_headers: bool,
     h1_preserve_header_case: bool,
     h1_max_headers: Option<usize>,
     #[cfg(feature = "ffi")]
@@ -312,6 +314,8 @@ impl Builder {
             h1_read_buf_exact_size: None,
             h1_parser_config: Default::default(),
             h1_title_case_headers: false,
+            h1_record_raw_request_headers: false,
+            h1_record_raw_response_headers: false,
             h1_preserve_header_case: false,
             h1_max_headers: None,
             #[cfg(feature = "ffi")]
@@ -428,6 +432,22 @@ impl Builder {
         self
     }
 
+    /// Set whether to record the raw headers sent.
+    ///
+    /// Default is false.
+    pub fn record_raw_request_headers(&mut self, enabled: bool) -> &mut Builder {
+        self.h1_record_raw_request_headers = enabled;
+        self
+    }
+
+    /// Set whether to record the raw headers received.
+    ///
+    /// Default is false.
+    pub fn record_raw_response_headers(&mut self, enabled: bool) -> &mut Builder {
+        self.h1_record_raw_response_headers = enabled;
+        self
+    }
+
     /// Set whether to support preserving original header cases.
     ///
     /// Currently, this will record the original cases received, and store them
@@ -539,6 +559,12 @@ impl Builder {
             if opts.h1_title_case_headers {
                 conn.set_title_case_headers();
             }
+            if opts.h1_record_raw_request_headers {
+                conn.set_record_raw_request_headers();
+            }
+            if opts.h1_record_raw_response_headers {
+                conn.set_record_raw_response_headers();
+            }
             if opts.h1_preserve_header_case {
                 conn.set_preserve_header_case();
             }
diff --git a/src/ext/mod.rs b/src/ext/mod.rs
index da28da64a5..eef5b7722c 100644
--- a/src/ext/mod.rs
+++ b/src/ext/mod.rs
@@ -86,6 +86,38 @@ impl fmt::Debug for Protocol {
     }
 }
 
+/// Raw request headers as sent over the TLS or TCP connection.
+#[cfg(all(any(feature = "client", feature = "server"), feature = "http1"))]
+#[derive(Clone, Debug)]
+pub struct RawRequestHeaders(Bytes);
+
+#[cfg(all(any(feature = "client", feature = "server"), feature = "http1"))]
+impl RawRequestHeaders {
+    /// Returns the raw bytes sent for the header of the request.
+    pub fn as_bytes(&self) -> &[u8] {
+        &self.0
+    }
+    pub(crate) fn from(bytes: Bytes) -> Self {
+        Self(bytes)
+    }
+}
+
+/// Raw response headers as sent over the TLS or TCP connection.
+#[cfg(all(any(feature = "client", feature = "server"), feature = "http1"))]
+#[derive(Clone, Debug)]
+pub struct RawResponseHeaders(Bytes);
+
+#[cfg(all(any(feature = "client", feature = "server"), feature = "http1"))]
+impl RawResponseHeaders {
+    /// Returns the raw bytes received for the header of the response.
+    pub fn as_bytes(&self) -> &[u8] {
+        &self.0
+    }
+    pub(crate) fn from(bytes: Bytes) -> Self {
+        Self(bytes)
+    }
+}
+
 /// A map from header names to their original casing as received in an HTTP message.
 ///
 /// If an HTTP/1 response `res` is parsed on a connection whose option
diff --git a/src/proto/h1/conn.rs b/src/proto/h1/conn.rs
index bea8faa221..a7267073b9 100644
--- a/src/proto/h1/conn.rs
+++ b/src/proto/h1/conn.rs
@@ -68,6 +68,9 @@ where
                 date_header: true,
                 #[cfg(feature = "server")]
                 timer: Time::Empty,
+                raw_request_headers: None,
+                record_raw_request_headers: false,
+                record_raw_response_headers: false,
                 preserve_header_case: false,
                 #[cfg(feature = "ffi")]
                 preserve_header_order: false,
@@ -123,6 +126,14 @@ where
         self.state.title_case_headers = true;
     }
 
+    pub(crate) fn set_record_raw_request_headers(&mut self) {
+        self.state.record_raw_request_headers = true;
+    }
+
+    pub(crate) fn set_record_raw_response_headers(&mut self) {
+        self.state.record_raw_response_headers = true;
+    }
+
     pub(crate) fn set_preserve_header_case(&mut self) {
         self.state.preserve_header_case = true;
     }
@@ -241,6 +252,8 @@ where
                 req_method: &mut self.state.method,
                 h1_parser_config: self.state.h1_parser_config.clone(),
                 h1_max_headers: self.state.h1_max_headers,
+                raw_request_headers: self.state.raw_request_headers.as_ref(),
+                record_raw_headers: self.state.record_raw_response_headers,
                 preserve_header_case: self.state.preserve_header_case,
                 #[cfg(feature = "ffi")]
                 preserve_header_order: self.state.preserve_header_order,
@@ -617,6 +630,7 @@ where
         self.enforce_version(&mut head);
 
         let buf = self.io.headers_buf();
+        let headers_start = buf.len();
         match super::role::encode_headers::<T>(
             Encode {
                 head: &mut head,
@@ -633,6 +647,13 @@ where
             Ok(encoder) => {
                 debug_assert!(self.state.cached_headers.is_none());
                 debug_assert!(head.headers.is_empty());
+                if self.state.record_raw_request_headers {
+                    self.state.raw_request_headers = Some(crate::ext::RawRequestHeaders::from(
+                        Bytes::copy_from_slice(&buf[headers_start..]),
+                    ));
+                } else {
+                    self.state.raw_request_headers = None;
+                }
                 self.state.cached_headers = Some(head.headers);
 
                 #[cfg(feature = "client")]
@@ -934,6 +955,9 @@ struct State {
     date_header: bool,
     #[cfg(feature = "server")]
     timer: Time,
+    raw_request_headers: Option<crate::ext::RawRequestHeaders>,
+    record_raw_request_headers: bool,
+    record_raw_response_headers: bool,
     preserve_header_case: bool,
     #[cfg(feature = "ffi")]
     preserve_header_order: bool,
diff --git a/src/proto/h1/io.rs b/src/proto/h1/io.rs
index d5afba683a..4357626689 100644
--- a/src/proto/h1/io.rs
+++ b/src/proto/h1/io.rs
@@ -184,6 +184,8 @@ where
                     req_method: parse_ctx.req_method,
                     h1_parser_config: parse_ctx.h1_parser_config.clone(),
                     h1_max_headers: parse_ctx.h1_max_headers,
+                    raw_request_headers: parse_ctx.raw_request_headers,
+                    record_raw_headers: parse_ctx.record_raw_headers,
                     preserve_header_case: parse_ctx.preserve_header_case,
                     #[cfg(feature = "ffi")]
                     preserve_header_order: parse_ctx.preserve_header_order,
@@ -706,6 +708,8 @@ mod tests {
                 req_method: &mut None,
                 h1_parser_config: Default::default(),
                 h1_max_headers: None,
+                raw_request_headers: None,
+                record_raw_headers: false,
                 preserve_header_case: false,
                 #[cfg(feature = "ffi")]
                 preserve_header_order: false,
diff --git a/src/proto/h1/mod.rs b/src/proto/h1/mod.rs
index a8f36f5fd9..6010562684 100644
--- a/src/proto/h1/mod.rs
+++ b/src/proto/h1/mod.rs
@@ -73,6 +73,8 @@ pub(crate) struct ParseContext<'a> {
     req_method: &'a mut Option<Method>,
     h1_parser_config: ParserConfig,
     h1_max_headers: Option<usize>,
+    record_raw_headers: bool,
+    raw_request_headers: Option<&'a crate::ext::RawRequestHeaders>,
     preserve_header_case: bool,
     #[cfg(feature = "ffi")]
     preserve_header_order: bool,
diff --git a/src/proto/h1/role.rs b/src/proto/h1/role.rs
index 1674e26bc6..f4f458a28f 100644
--- a/src/proto/h1/role.rs
+++ b/src/proto/h1/role.rs
@@ -20,6 +20,7 @@ use crate::error::Parse;
 use crate::ext::HeaderCaseMap;
 #[cfg(feature = "ffi")]
 use crate::ext::OriginalHeaderOrder;
+use crate::ext::RawResponseHeaders;
 use crate::headers;
 use crate::proto::h1::{
     Encode, Encoder, Http1Transaction, ParseContext, ParseResult, ParsedMessage,
@@ -1056,19 +1057,32 @@ impl Http1Transaction for Client {
             };
 
             let mut slice = buf.split_to(len);
+            let raw_headers;
 
-            if ctx
+            let slice = if ctx
                 .h1_parser_config
                 .obsolete_multiline_headers_in_responses_are_allowed()
             {
+                raw_headers = if ctx.record_raw_headers {
+                    Some(RawResponseHeaders::from(slice.clone().freeze()))
+                } else {
+                    None
+                };
                 for header in &mut headers_indices[..headers_len] {
                     // SAFETY: array is valid up to `headers_len`
                     let header = unsafe { header.assume_init_mut() };
                     Client::obs_fold_line(&mut slice, header);
                 }
-            }
-
-            let slice = slice.freeze();
+                slice.freeze()
+            } else {
+                let slice = slice.freeze();
+                raw_headers = if ctx.record_raw_headers {
+                    Some(RawResponseHeaders::from(slice.clone()))
+                } else {
+                    None
+                };
+                slice
+            };
 
             let mut headers = ctx.cached_headers.take().unwrap_or_default();
 
@@ -1119,6 +1133,14 @@ impl Http1Transaction for Client {
 
             let mut extensions = http::Extensions::default();
 
+            if let Some(raw_request_headers) = ctx.raw_request_headers {
+                extensions.insert(raw_request_headers.clone());
+            }
+
+            if let Some(raw_headers) = raw_headers {
+                extensions.insert(raw_headers);
+            }
+
             if let Some(header_case_map) = header_case_map {
                 extensions.insert(header_case_map);
             }
@@ -1656,6 +1678,8 @@ mod tests {
                 req_method: &mut method,
                 h1_parser_config: Default::default(),
                 h1_max_headers: None,
+                raw_request_headers: None,
+                record_raw_headers: false,
                 preserve_header_case: false,
                 #[cfg(feature = "ffi")]
                 preserve_header_order: false,
@@ -1684,6 +1708,8 @@ mod tests {
             req_method: &mut Some(crate::Method::GET),
             h1_parser_config: Default::default(),
             h1_max_headers: None,
+            raw_request_headers: None,
+            record_raw_headers: false,
             preserve_header_case: false,
             #[cfg(feature = "ffi")]
             preserve_header_order: false,
@@ -1708,6 +1734,8 @@ mod tests {
             req_method: &mut None,
             h1_parser_config: Default::default(),
             h1_max_headers: None,
+            raw_request_headers: None,
+            record_raw_headers: false,
             preserve_header_case: false,
             #[cfg(feature = "ffi")]
             preserve_header_order: false,
@@ -1729,6 +1757,8 @@ mod tests {
             req_method: &mut Some(crate::Method::GET),
             h1_parser_config: Default::default(),
             h1_max_headers: None,
+            raw_request_headers: None,
+            record_raw_headers: false,
             preserve_header_case: false,
             #[cfg(feature = "ffi")]
             preserve_header_order: false,
@@ -1752,6 +1782,8 @@ mod tests {
             req_method: &mut Some(crate::Method::GET),
             h1_parser_config: Default::default(),
             h1_max_headers: None,
+            raw_request_headers: None,
+            record_raw_headers: false,
             preserve_header_case: false,
             #[cfg(feature = "ffi")]
             preserve_header_order: false,
@@ -1779,6 +1811,8 @@ mod tests {
             req_method: &mut Some(crate::Method::GET),
             h1_parser_config,
             h1_max_headers: None,
+            raw_request_headers: None,
+            record_raw_headers: false,
             preserve_header_case: false,
             #[cfg(feature = "ffi")]
             preserve_header_order: false,
@@ -1803,6 +1837,8 @@ mod tests {
             req_method: &mut Some(crate::Method::GET),
             h1_parser_config: Default::default(),
             h1_max_headers: None,
+            raw_request_headers: None,
+            record_raw_headers: false,
             preserve_header_case: false,
             #[cfg(feature = "ffi")]
             preserve_header_order: false,
@@ -1823,6 +1859,8 @@ mod tests {
             req_method: &mut None,
             h1_parser_config: Default::default(),
             h1_max_headers: None,
+            raw_request_headers: None,
+            record_raw_headers: false,
             preserve_header_case: true,
             #[cfg(feature = "ffi")]
             preserve_header_order: false,
@@ -1862,6 +1900,8 @@ mod tests {
                     req_method: &mut None,
                     h1_parser_config: Default::default(),
                     h1_max_headers: None,
+                    raw_request_headers: None,
+                    record_raw_headers: false,
                     preserve_header_case: false,
                     #[cfg(feature = "ffi")]
                     preserve_header_order: false,
@@ -1883,6 +1923,8 @@ mod tests {
                     req_method: &mut None,
                     h1_parser_config: Default::default(),
                     h1_max_headers: None,
+                    raw_request_headers: None,
+                    record_raw_headers: false,
                     preserve_header_case: false,
                     #[cfg(feature = "ffi")]
                     preserve_header_order: false,
@@ -2113,6 +2155,8 @@ mod tests {
                     req_method: &mut Some(Method::GET),
                     h1_parser_config: Default::default(),
                     h1_max_headers: None,
+                    raw_request_headers: None,
+                    record_raw_headers: false,
                     preserve_header_case: false,
                     #[cfg(feature = "ffi")]
                     preserve_header_order: false,
@@ -2134,6 +2178,8 @@ mod tests {
                     req_method: &mut Some(m),
                     h1_parser_config: Default::default(),
                     h1_max_headers: None,
+                    raw_request_headers: None,
+                    record_raw_headers: false,
                     preserve_header_case: false,
                     #[cfg(feature = "ffi")]
                     preserve_header_order: false,
@@ -2155,6 +2201,8 @@ mod tests {
                     req_method: &mut Some(Method::GET),
                     h1_parser_config: Default::default(),
                     h1_max_headers: None,
+                    raw_request_headers: None,
+                    record_raw_headers: false,
                     preserve_header_case: false,
                     #[cfg(feature = "ffi")]
                     preserve_header_order: false,
@@ -2725,6 +2773,8 @@ mod tests {
                 req_method: &mut Some(Method::GET),
                 h1_parser_config: Default::default(),
                 h1_max_headers: None,
+                raw_request_headers: None,
+                record_raw_headers: false,
                 preserve_header_case: false,
                 #[cfg(feature = "ffi")]
                 preserve_header_order: false,
@@ -2769,6 +2819,8 @@ mod tests {
                         req_method: &mut None,
                         h1_parser_config: Default::default(),
                         h1_max_headers: max_headers,
+                        raw_request_headers: None,
+                        record_raw_headers: false,
                         preserve_header_case: false,
                         #[cfg(feature = "ffi")]
                         preserve_header_order: false,
@@ -2793,6 +2845,8 @@ mod tests {
                         req_method: &mut None,
                         h1_parser_config: Default::default(),
                         h1_max_headers: max_headers,
+                        raw_request_headers: None,
+                        record_raw_headers: false,
                         preserve_header_case: false,
                         #[cfg(feature = "ffi")]
                         preserve_header_order: false,