pyo3/conversions/std/
osstr.rs

1use crate::conversion::IntoPyObject;
2use crate::ffi_ptr_ext::FfiPtrExt;
3use crate::instance::Bound;
4use crate::types::any::PyAnyMethods;
5use crate::types::PyString;
6use crate::{ffi, FromPyObject, PyAny, PyObject, PyResult, Python};
7#[allow(deprecated)]
8use crate::{IntoPy, ToPyObject};
9use std::borrow::Cow;
10use std::convert::Infallible;
11use std::ffi::{OsStr, OsString};
12
13#[allow(deprecated)]
14impl ToPyObject for OsStr {
15    #[inline]
16    fn to_object(&self, py: Python<'_>) -> PyObject {
17        self.into_pyobject(py).unwrap().into_any().unbind()
18    }
19}
20
21impl<'py> IntoPyObject<'py> for &OsStr {
22    type Target = PyString;
23    type Output = Bound<'py, Self::Target>;
24    type Error = Infallible;
25
26    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
27        // If the string is UTF-8, take the quick and easy shortcut
28        if let Some(valid_utf8_path) = self.to_str() {
29            return valid_utf8_path.into_pyobject(py);
30        }
31
32        // All targets besides windows support the std::os::unix::ffi::OsStrExt API:
33        // https://doc.rust-lang.org/src/std/sys_common/mod.rs.html#59
34        #[cfg(not(windows))]
35        {
36            #[cfg(target_os = "wasi")]
37            let bytes = std::os::wasi::ffi::OsStrExt::as_bytes(self);
38            #[cfg(not(target_os = "wasi"))]
39            let bytes = std::os::unix::ffi::OsStrExt::as_bytes(self);
40
41            let ptr = bytes.as_ptr().cast();
42            let len = bytes.len() as ffi::Py_ssize_t;
43            unsafe {
44                // DecodeFSDefault automatically chooses an appropriate decoding mechanism to
45                // parse os strings losslessly (i.e. surrogateescape most of the time)
46                Ok(ffi::PyUnicode_DecodeFSDefaultAndSize(ptr, len)
47                    .assume_owned(py)
48                    .downcast_into_unchecked::<PyString>())
49            }
50        }
51
52        #[cfg(windows)]
53        {
54            let wstr: Vec<u16> = std::os::windows::ffi::OsStrExt::encode_wide(self).collect();
55
56            unsafe {
57                // This will not panic because the data from encode_wide is well-formed Windows
58                // string data
59
60                Ok(
61                    ffi::PyUnicode_FromWideChar(wstr.as_ptr(), wstr.len() as ffi::Py_ssize_t)
62                        .assume_owned(py)
63                        .downcast_into_unchecked::<PyString>(),
64                )
65            }
66        }
67    }
68}
69
70impl<'py> IntoPyObject<'py> for &&OsStr {
71    type Target = PyString;
72    type Output = Bound<'py, Self::Target>;
73    type Error = Infallible;
74
75    #[inline]
76    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
77        (*self).into_pyobject(py)
78    }
79}
80
81// There's no FromPyObject implementation for &OsStr because albeit possible on Unix, this would
82// be impossible to implement on Windows. Hence it's omitted entirely
83
84impl FromPyObject<'_> for OsString {
85    fn extract_bound(ob: &Bound<'_, PyAny>) -> PyResult<Self> {
86        let pystring = ob.downcast::<PyString>()?;
87
88        #[cfg(not(windows))]
89        {
90            // Decode from Python's lossless bytes string representation back into raw bytes
91            let fs_encoded_bytes = unsafe {
92                crate::Py::<crate::types::PyBytes>::from_owned_ptr(
93                    ob.py(),
94                    ffi::PyUnicode_EncodeFSDefault(pystring.as_ptr()),
95                )
96            };
97
98            // Create an OsStr view into the raw bytes from Python
99            #[cfg(target_os = "wasi")]
100            let os_str: &OsStr =
101                std::os::wasi::ffi::OsStrExt::from_bytes(fs_encoded_bytes.as_bytes(ob.py()));
102            #[cfg(not(target_os = "wasi"))]
103            let os_str: &OsStr =
104                std::os::unix::ffi::OsStrExt::from_bytes(fs_encoded_bytes.as_bytes(ob.py()));
105
106            Ok(os_str.to_os_string())
107        }
108
109        #[cfg(windows)]
110        {
111            use crate::types::string::PyStringMethods;
112
113            // Take the quick and easy shortcut if UTF-8
114            if let Ok(utf8_string) = pystring.to_cow() {
115                return Ok(utf8_string.into_owned().into());
116            }
117
118            // Get an owned allocated wide char buffer from PyString, which we have to deallocate
119            // ourselves
120            let size =
121                unsafe { ffi::PyUnicode_AsWideChar(pystring.as_ptr(), std::ptr::null_mut(), 0) };
122            crate::err::error_on_minusone(ob.py(), size)?;
123
124            let mut buffer = vec![0; size as usize];
125            let bytes_read =
126                unsafe { ffi::PyUnicode_AsWideChar(pystring.as_ptr(), buffer.as_mut_ptr(), size) };
127            assert_eq!(bytes_read, size);
128
129            // Copy wide char buffer into OsString
130            let os_string = std::os::windows::ffi::OsStringExt::from_wide(&buffer);
131
132            Ok(os_string)
133        }
134    }
135}
136
137#[allow(deprecated)]
138impl IntoPy<PyObject> for &'_ OsStr {
139    #[inline]
140    fn into_py(self, py: Python<'_>) -> PyObject {
141        self.into_pyobject(py).unwrap().into_any().unbind()
142    }
143}
144
145#[allow(deprecated)]
146impl ToPyObject for Cow<'_, OsStr> {
147    #[inline]
148    fn to_object(&self, py: Python<'_>) -> PyObject {
149        self.into_pyobject(py).unwrap().into_any().unbind()
150    }
151}
152
153#[allow(deprecated)]
154impl IntoPy<PyObject> for Cow<'_, OsStr> {
155    #[inline]
156    fn into_py(self, py: Python<'_>) -> PyObject {
157        self.into_pyobject(py).unwrap().into_any().unbind()
158    }
159}
160
161impl<'py> IntoPyObject<'py> for Cow<'_, OsStr> {
162    type Target = PyString;
163    type Output = Bound<'py, Self::Target>;
164    type Error = Infallible;
165
166    #[inline]
167    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
168        (*self).into_pyobject(py)
169    }
170}
171
172impl<'py> IntoPyObject<'py> for &Cow<'_, OsStr> {
173    type Target = PyString;
174    type Output = Bound<'py, Self::Target>;
175    type Error = Infallible;
176
177    #[inline]
178    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
179        (&**self).into_pyobject(py)
180    }
181}
182
183#[allow(deprecated)]
184impl ToPyObject for OsString {
185    #[inline]
186    fn to_object(&self, py: Python<'_>) -> PyObject {
187        self.into_pyobject(py).unwrap().into_any().unbind()
188    }
189}
190
191#[allow(deprecated)]
192impl IntoPy<PyObject> for OsString {
193    #[inline]
194    fn into_py(self, py: Python<'_>) -> PyObject {
195        self.into_pyobject(py).unwrap().into_any().unbind()
196    }
197}
198
199impl<'py> IntoPyObject<'py> for OsString {
200    type Target = PyString;
201    type Output = Bound<'py, Self::Target>;
202    type Error = Infallible;
203
204    #[inline]
205    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
206        self.as_os_str().into_pyobject(py)
207    }
208}
209
210#[allow(deprecated)]
211impl IntoPy<PyObject> for &OsString {
212    #[inline]
213    fn into_py(self, py: Python<'_>) -> PyObject {
214        self.into_pyobject(py).unwrap().into_any().unbind()
215    }
216}
217
218impl<'py> IntoPyObject<'py> for &OsString {
219    type Target = PyString;
220    type Output = Bound<'py, Self::Target>;
221    type Error = Infallible;
222
223    #[inline]
224    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
225        self.as_os_str().into_pyobject(py)
226    }
227}
228
229#[cfg(test)]
230mod tests {
231    use crate::types::{PyAnyMethods, PyString, PyStringMethods};
232    use crate::{BoundObject, IntoPyObject, Python};
233    use std::fmt::Debug;
234    use std::{
235        borrow::Cow,
236        ffi::{OsStr, OsString},
237    };
238
239    #[test]
240    #[cfg(not(windows))]
241    fn test_non_utf8_conversion() {
242        Python::with_gil(|py| {
243            #[cfg(not(target_os = "wasi"))]
244            use std::os::unix::ffi::OsStrExt;
245            #[cfg(target_os = "wasi")]
246            use std::os::wasi::ffi::OsStrExt;
247
248            // this is not valid UTF-8
249            let payload = &[250, 251, 252, 253, 254, 255, 0, 255];
250            let os_str = OsStr::from_bytes(payload);
251
252            // do a roundtrip into Pythonland and back and compare
253            let py_str = os_str.into_pyobject(py).unwrap();
254            let os_str_2: OsString = py_str.extract().unwrap();
255            assert_eq!(os_str, os_str_2);
256        });
257    }
258
259    #[test]
260    fn test_intopyobject_roundtrip() {
261        Python::with_gil(|py| {
262            fn test_roundtrip<'py, T>(py: Python<'py>, obj: T)
263            where
264                T: IntoPyObject<'py> + AsRef<OsStr> + Debug + Clone,
265                T::Error: Debug,
266            {
267                let pyobject = obj.clone().into_pyobject(py).unwrap().into_any();
268                let pystring = pyobject.as_borrowed().downcast::<PyString>().unwrap();
269                assert_eq!(pystring.to_string_lossy(), obj.as_ref().to_string_lossy());
270                let roundtripped_obj: OsString = pystring.extract().unwrap();
271                assert_eq!(obj.as_ref(), roundtripped_obj.as_os_str());
272            }
273            let os_str = OsStr::new("Hello\0\nšŸ");
274            test_roundtrip::<&OsStr>(py, os_str);
275            test_roundtrip::<Cow<'_, OsStr>>(py, Cow::Borrowed(os_str));
276            test_roundtrip::<Cow<'_, OsStr>>(py, Cow::Owned(os_str.to_os_string()));
277            test_roundtrip::<OsString>(py, os_str.to_os_string());
278        });
279    }
280}