Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions newsfragments/5405.added.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
added conversion for &Cstr and Cstring
204 changes: 201 additions & 3 deletions src/conversions/std/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ use std::{borrow::Cow, convert::Infallible};
#[cfg(feature = "experimental-inspect")]
use crate::inspect::types::TypeInfo;
use crate::{
conversion::IntoPyObject, instance::Bound, types::PyString, Borrowed, FromPyObject, PyAny,
PyErr, Python,
conversion::IntoPyObject, exceptions::PyValueError, ffi, instance::Bound, types::PyString,
Borrowed, FromPyObject, PyAny, PyErr, Python,
};

impl<'py> IntoPyObject<'py> for &str {
Expand Down Expand Up @@ -233,7 +233,152 @@ impl FromPyObject<'_, '_> for char {
}
}

#[cfg(test)]
// FFI conversions for CStr and CString

impl<'py> IntoPyObject<'py> for &std::ffi::CStr {
type Target = PyString;
type Output = Bound<'py, Self::Target>;
type Error = Infallible;

#[cfg(feature = "experimental-inspect")]
const OUTPUT_TYPE: &'static str = "str";

#[inline]
fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
// Convert cstr to &str, it's is safe because cstr is guaranteed to be valid UTF-8
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not true, a CStr can safely be constructed from any &[u8] without interior null bytes an thus may not contain valid utf8.

Instead of unwrapping this should use Utf8Error as its Error type, to forward the error to the caller.

Ok(PyString::new(py, self.to_str().unwrap()))
}

#[cfg(feature = "experimental-inspect")]
fn type_output() -> TypeInfo {
<String>::type_output()
}
}

impl<'py> IntoPyObject<'py> for std::ffi::CString {
type Target = PyString;
type Output = Bound<'py, Self::Target>;
type Error = Infallible;

#[cfg(feature = "experimental-inspect")]
const OUTPUT_TYPE: &'static str = "str";

#[inline]
fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
// Convert cstring to &str, it's safe because cstring is guaranteed to be valid UTF-8
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This (and below) should internally make use of the CStr conversion above

Ok(PyString::new(py, self.to_str().unwrap()))
}

#[cfg(feature = "experimental-inspect")]
fn type_output() -> TypeInfo {
<String>::type_output()
}
}

impl<'py> IntoPyObject<'py> for &std::ffi::CString {
type Target = PyString;
type Output = Bound<'py, Self::Target>;
type Error = Infallible;

#[cfg(feature = "experimental-inspect")]
const OUTPUT_TYPE: &'static str = "str";

#[inline]
fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
Ok(PyString::new(py, self.to_str().unwrap()))
}

#[cfg(feature = "experimental-inspect")]
fn type_output() -> TypeInfo {
<String>::type_output()
}
}

/// Allows extracting cstr from Python objects.
/// Accepts Python `str` objects and converts them to cstr.
/// Fails if the string contains interior nul bytes.
impl<'a> FromPyObject<'a, '_> for &'a std::ffi::CStr {
type Error = PyErr;

#[cfg(feature = "experimental-inspect")]
const INPUT_TYPE: &'static str = "str";

fn extract(obj: Borrowed<'a, '_, PyAny>) -> Result<Self, Self::Error> {
let py_string = obj.cast::<PyString>()?;

// Use PyUnicode_AsUTF8AndSize to get the raw bytes with guaranteed NUL termination
#[cfg(any(Py_3_10, not(Py_LIMITED_API)))]
{
let mut size: ffi::Py_ssize_t = 0;
let data: *const u8 =
unsafe { ffi::PyUnicode_AsUTF8AndSize(py_string.as_ptr(), &mut size).cast() };
if data.is_null() {
return Err(crate::PyErr::fetch(obj.py()));
}

// Create a slice that includes the NUL terminator
let bytes_with_nul = unsafe { std::slice::from_raw_parts(data, (size + 1) as usize) };

// Check for interior NUL bytes (excluding the final NUL)
for &byte in &bytes_with_nul[..size as usize] {
if byte == 0 {
return Err(PyValueError::new_err("string contains interior NUL bytes"));
}
}
Comment on lines +322 to +327
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need to do this manually, we should make use of CStr::from_bytes_with_nul and just map the error.


// Now create the CStr from the bytes with NUL
Ok(unsafe { std::ffi::CStr::from_ptr(data as *const i8) })
}

#[cfg(not(any(Py_3_10, not(Py_LIMITED_API))))]
{
// Fallback for older Python versions
use std::ffi::FromBytesWithNulError;
let cow = py_string.to_cow()?;
let bytes = cow.as_bytes();
Comment on lines +335 to +338
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this works, since the underlying buffer will not be null terminated. We may have to gate the whole impl under #[cfg(any(Py_3_10, not(Py_LIMITED_API)))]


// Try to create a CStr from the bytes
// This will fail if there are interior NUL bytes
std::ffi::CStr::from_bytes_with_nul(bytes).map_err(|e| match e {
FromBytesWithNulError::InteriorNul { .. } => {
PyValueError::new_err("string contains interior NUL bytes")
}
FromBytesWithNulError::NotNulTerminated => {
PyValueError::new_err("string is not NUL-terminated")
}
})
}
}

#[cfg(feature = "experimental-inspect")]
fn type_input() -> TypeInfo {
<String as FromPyObject>::type_input()
}
}

/// Allows extracting CString from Python objects.
/// Accepts Python `str` objects and converts them to CString.
/// Fails if the string contains NUL bytes.
impl FromPyObject<'_, '_> for std::ffi::CString {
type Error = PyErr;

#[cfg(feature = "experimental-inspect")]
const INPUT_TYPE: &'static str = "str";

fn extract(obj: Borrowed<'_, '_, PyAny>) -> Result<Self, Self::Error> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should make use of the CStr impl above and just call to_owned on the result.

let py_string = obj.cast::<PyString>()?;
let cow = py_string.to_cow()?;
let bytes = cow.as_bytes();
std::ffi::CString::new(bytes)
.map_err(|_| PyValueError::new_err("string contains NUL bytes"))
}

#[cfg(feature = "experimental-inspect")]
fn type_input() -> TypeInfo {
<String as FromPyObject>::type_input()
}
}

mod tests {
use crate::types::any::PyAnyMethods;
use crate::{IntoPyObject, Python};
Expand Down Expand Up @@ -267,6 +412,28 @@ mod tests {

let s2: Cow<'_, str> = py_string.extract().unwrap();
assert_eq!(s, s2);

let cstr: &std::ffi::CStr = py_string.extract().unwrap();
assert_eq!(s, cstr.to_str().unwrap());

let cstring: std::ffi::CString = py_string.extract().unwrap();
assert_eq!(s, cstring.to_str().unwrap());
})
}

#[test]
fn test_extract_str_unicode() {
Python::attach(|py| {
let s = "Hello 🐍 Python";
let py_string = s.into_pyobject(py).unwrap();
let s2: Cow<'_, str> = py_string.extract().unwrap();
assert_eq!(s, s2);

let cstr: &std::ffi::CStr = py_string.extract().unwrap();
assert_eq!(s, cstr.to_str().unwrap());

let cstring: std::ffi::CString = py_string.extract().unwrap();
assert_eq!(s, cstring.to_str().unwrap());
})
}

Expand Down Expand Up @@ -320,6 +487,37 @@ mod tests {
.extract::<Cow<'_, str>>()
.unwrap()
);

let cstr = std::ffi::CStr::from_bytes_with_nul(b"Hello Python\0").unwrap();
let py_string = cstr.into_pyobject(py).unwrap();
assert_eq!(s, py_string.extract::<String>().unwrap());

let cstring = std::ffi::CString::new("Hello Python").unwrap();
let py_string = cstring.clone().into_pyobject(py).unwrap();
assert_eq!(s, py_string.extract::<String>().unwrap());

let py_string = (&cstring).into_pyobject(py).unwrap();
assert_eq!(s, py_string.extract::<String>().unwrap());
})
}

#[test]
fn test_extract_with_nul_error() {
Python::attach(|py| {
let s = "Hello\0Python";
let py_string = s.into_pyobject(py).unwrap();

let err: crate::PyResult<&std::ffi::CStr> = py_string.extract();
assert!(err
.unwrap_err()
.to_string()
.contains("string contains interior NUL bytes"));

let err: crate::PyResult<std::ffi::CString> = py_string.extract();
assert!(err
.unwrap_err()
.to_string()
.contains("string contains NUL bytes"));
})
}
}
Loading