pyo3_ffi/cpython/
unicodeobject.rs

1#[cfg(any(Py_3_11, not(PyPy)))]
2use crate::Py_hash_t;
3use crate::{PyObject, Py_UCS1, Py_UCS2, Py_UCS4, Py_ssize_t};
4use libc::wchar_t;
5#[cfg(Py_3_14)]
6use std::os::raw::c_ushort;
7use std::os::raw::{c_char, c_int, c_uint, c_void};
8
9// skipped Py_UNICODE_ISSPACE()
10// skipped Py_UNICODE_ISLOWER()
11// skipped Py_UNICODE_ISUPPER()
12// skipped Py_UNICODE_ISTITLE()
13// skipped Py_UNICODE_ISLINEBREAK
14// skipped Py_UNICODE_TOLOWER
15// skipped Py_UNICODE_TOUPPER
16// skipped Py_UNICODE_TOTITLE
17// skipped Py_UNICODE_ISDECIMAL
18// skipped Py_UNICODE_ISDIGIT
19// skipped Py_UNICODE_ISNUMERIC
20// skipped Py_UNICODE_ISPRINTABLE
21// skipped Py_UNICODE_TODECIMAL
22// skipped Py_UNICODE_TODIGIT
23// skipped Py_UNICODE_TONUMERIC
24// skipped Py_UNICODE_ISALPHA
25// skipped Py_UNICODE_ISALNUM
26// skipped Py_UNICODE_COPY
27// skipped Py_UNICODE_FILL
28// skipped Py_UNICODE_IS_SURROGATE
29// skipped Py_UNICODE_IS_HIGH_SURROGATE
30// skipped Py_UNICODE_IS_LOW_SURROGATE
31// skipped Py_UNICODE_JOIN_SURROGATES
32// skipped Py_UNICODE_HIGH_SURROGATE
33// skipped Py_UNICODE_LOW_SURROGATE
34
35// generated by bindgen v0.63.0 (with small adaptations)
36#[repr(C)]
37struct BitfieldUnit<Storage> {
38    storage: Storage,
39}
40
41impl<Storage> BitfieldUnit<Storage> {
42    #[inline]
43    pub const fn new(storage: Storage) -> Self {
44        Self { storage }
45    }
46}
47
48#[cfg(not(GraalPy))]
49impl<Storage> BitfieldUnit<Storage>
50where
51    Storage: AsRef<[u8]> + AsMut<[u8]>,
52{
53    #[inline]
54    fn get_bit(&self, index: usize) -> bool {
55        debug_assert!(index / 8 < self.storage.as_ref().len());
56        let byte_index = index / 8;
57        let byte = self.storage.as_ref()[byte_index];
58        let bit_index = if cfg!(target_endian = "big") {
59            7 - (index % 8)
60        } else {
61            index % 8
62        };
63        let mask = 1 << bit_index;
64        byte & mask == mask
65    }
66
67    #[inline]
68    fn set_bit(&mut self, index: usize, val: bool) {
69        debug_assert!(index / 8 < self.storage.as_ref().len());
70        let byte_index = index / 8;
71        let byte = &mut self.storage.as_mut()[byte_index];
72        let bit_index = if cfg!(target_endian = "big") {
73            7 - (index % 8)
74        } else {
75            index % 8
76        };
77        let mask = 1 << bit_index;
78        if val {
79            *byte |= mask;
80        } else {
81            *byte &= !mask;
82        }
83    }
84
85    #[inline]
86    fn get(&self, bit_offset: usize, bit_width: u8) -> u64 {
87        debug_assert!(bit_width <= 64);
88        debug_assert!(bit_offset / 8 < self.storage.as_ref().len());
89        debug_assert!((bit_offset + (bit_width as usize)) / 8 <= self.storage.as_ref().len());
90        let mut val = 0;
91        for i in 0..(bit_width as usize) {
92            if self.get_bit(i + bit_offset) {
93                let index = if cfg!(target_endian = "big") {
94                    bit_width as usize - 1 - i
95                } else {
96                    i
97                };
98                val |= 1 << index;
99            }
100        }
101        val
102    }
103
104    #[inline]
105    fn set(&mut self, bit_offset: usize, bit_width: u8, val: u64) {
106        debug_assert!(bit_width <= 64);
107        debug_assert!(bit_offset / 8 < self.storage.as_ref().len());
108        debug_assert!((bit_offset + (bit_width as usize)) / 8 <= self.storage.as_ref().len());
109        for i in 0..(bit_width as usize) {
110            let mask = 1 << i;
111            let val_bit_is_set = val & mask == mask;
112            let index = if cfg!(target_endian = "big") {
113                bit_width as usize - 1 - i
114            } else {
115                i
116            };
117            self.set_bit(index + bit_offset, val_bit_is_set);
118        }
119    }
120}
121
122#[cfg(not(GraalPy))]
123const STATE_INTERNED_INDEX: usize = 0;
124#[cfg(all(not(GraalPy), not(Py_3_14)))]
125const STATE_INTERNED_WIDTH: u8 = 2;
126#[cfg(all(not(GraalPy), Py_3_14))]
127const STATE_INTERNED_WIDTH: u8 = 16;
128
129#[cfg(not(GraalPy))]
130const STATE_KIND_INDEX: usize = STATE_INTERNED_WIDTH as usize;
131#[cfg(not(GraalPy))]
132const STATE_KIND_WIDTH: u8 = 3;
133
134#[cfg(not(GraalPy))]
135const STATE_COMPACT_INDEX: usize = (STATE_INTERNED_WIDTH + STATE_KIND_WIDTH) as usize;
136#[cfg(not(GraalPy))]
137const STATE_COMPACT_WIDTH: u8 = 1;
138
139#[cfg(not(GraalPy))]
140const STATE_ASCII_INDEX: usize =
141    (STATE_INTERNED_WIDTH + STATE_KIND_WIDTH + STATE_COMPACT_WIDTH) as usize;
142#[cfg(not(GraalPy))]
143const STATE_ASCII_WIDTH: u8 = 1;
144
145#[cfg(all(not(GraalPy), Py_3_12))]
146const STATE_STATICALLY_ALLOCATED_INDEX: usize =
147    (STATE_INTERNED_WIDTH + STATE_KIND_WIDTH + STATE_COMPACT_WIDTH + STATE_ASCII_WIDTH) as usize;
148#[cfg(all(not(GraalPy), Py_3_12))]
149const STATE_STATICALLY_ALLOCATED_WIDTH: u8 = 1;
150
151#[cfg(not(any(Py_3_12, GraalPy)))]
152const STATE_READY_INDEX: usize =
153    (STATE_INTERNED_WIDTH + STATE_KIND_WIDTH + STATE_COMPACT_WIDTH + STATE_ASCII_WIDTH) as usize;
154#[cfg(not(any(Py_3_12, GraalPy)))]
155const STATE_READY_WIDTH: u8 = 1;
156
157// generated by bindgen v0.63.0 (with small adaptations)
158// The same code is generated for Python 3.7, 3.8, 3.9, 3.10, and 3.11, but the "ready" field
159// has been removed from Python 3.12.
160
161/// Wrapper around the `PyASCIIObject.state` bitfield with getters and setters that work
162/// on most little- and big-endian architectures.
163///
164/// Memory layout of C bitfields is implementation defined, so these functions are still
165/// unsafe. Users must verify that they work as expected on the architectures they target.
166#[repr(C)]
167#[repr(align(4))]
168struct PyASCIIObjectState {
169    bitfield_align: [u8; 0],
170    bitfield: BitfieldUnit<[u8; 4usize]>,
171}
172
173// c_uint and u32 are not necessarily the same type on all targets / architectures
174#[cfg(not(GraalPy))]
175#[allow(clippy::useless_transmute)]
176impl PyASCIIObjectState {
177    #[inline]
178    #[cfg(not(Py_3_14))]
179    unsafe fn interned(&self) -> c_uint {
180        std::mem::transmute(
181            self.bitfield
182                .get(STATE_INTERNED_INDEX, STATE_INTERNED_WIDTH) as u32,
183        )
184    }
185
186    #[inline]
187    #[cfg(not(Py_3_14))]
188    unsafe fn set_interned(&mut self, val: c_uint) {
189        let val: u32 = std::mem::transmute(val);
190        self.bitfield
191            .set(STATE_INTERNED_INDEX, STATE_INTERNED_WIDTH, val as u64)
192    }
193
194    #[inline]
195    #[cfg(Py_3_14)]
196    unsafe fn interned(&self) -> u16 {
197        std::mem::transmute(
198            self.bitfield
199                .get(STATE_INTERNED_INDEX, STATE_INTERNED_WIDTH) as u16,
200        )
201    }
202
203    #[inline]
204    #[cfg(Py_3_14)]
205    unsafe fn set_interned(&mut self, val: u16) {
206        let val: u16 = std::mem::transmute(val);
207        self.bitfield
208            .set(STATE_INTERNED_INDEX, STATE_INTERNED_WIDTH, val as u64)
209    }
210
211    #[inline]
212    #[cfg(not(Py_3_14))]
213    unsafe fn kind(&self) -> c_uint {
214        std::mem::transmute(self.bitfield.get(STATE_KIND_INDEX, STATE_KIND_WIDTH) as u32)
215    }
216
217    #[inline]
218    #[cfg(Py_3_14)]
219    unsafe fn kind(&self) -> c_ushort {
220        std::mem::transmute(self.bitfield.get(STATE_KIND_INDEX, STATE_KIND_WIDTH) as c_ushort)
221    }
222
223    #[inline]
224    #[cfg(not(Py_3_14))]
225    unsafe fn set_kind(&mut self, val: c_uint) {
226        let val: u32 = std::mem::transmute(val);
227        self.bitfield
228            .set(STATE_KIND_INDEX, STATE_KIND_WIDTH, val as u64)
229    }
230
231    #[inline]
232    #[cfg(Py_3_14)]
233    unsafe fn set_kind(&mut self, val: c_ushort) {
234        let val: c_ushort = std::mem::transmute(val);
235        self.bitfield
236            .set(STATE_KIND_INDEX, STATE_KIND_WIDTH, val as u64)
237    }
238
239    #[inline]
240    #[cfg(not(Py_3_14))]
241    unsafe fn compact(&self) -> c_uint {
242        std::mem::transmute(self.bitfield.get(STATE_COMPACT_INDEX, STATE_COMPACT_WIDTH) as u32)
243    }
244
245    #[inline]
246    #[cfg(Py_3_14)]
247    unsafe fn compact(&self) -> c_ushort {
248        std::mem::transmute(self.bitfield.get(STATE_COMPACT_INDEX, STATE_COMPACT_WIDTH) as c_ushort)
249    }
250
251    #[inline]
252    #[cfg(not(Py_3_14))]
253    unsafe fn set_compact(&mut self, val: c_uint) {
254        let val: u32 = std::mem::transmute(val);
255        self.bitfield
256            .set(STATE_COMPACT_INDEX, STATE_COMPACT_WIDTH, val as u64)
257    }
258
259    #[inline]
260    #[cfg(Py_3_14)]
261    unsafe fn set_compact(&mut self, val: c_ushort) {
262        let val: c_ushort = std::mem::transmute(val);
263        self.bitfield
264            .set(STATE_COMPACT_INDEX, STATE_COMPACT_WIDTH, val as u64)
265    }
266
267    #[inline]
268    #[cfg(not(Py_3_14))]
269    unsafe fn ascii(&self) -> c_uint {
270        std::mem::transmute(self.bitfield.get(STATE_ASCII_INDEX, STATE_ASCII_WIDTH) as u32)
271    }
272
273    #[inline]
274    #[cfg(not(Py_3_14))]
275    unsafe fn set_ascii(&mut self, val: c_uint) {
276        let val: u32 = std::mem::transmute(val);
277        self.bitfield
278            .set(STATE_ASCII_INDEX, STATE_ASCII_WIDTH, val as u64)
279    }
280
281    #[inline]
282    #[cfg(Py_3_14)]
283    unsafe fn ascii(&self) -> c_ushort {
284        std::mem::transmute(self.bitfield.get(STATE_ASCII_INDEX, STATE_ASCII_WIDTH) as c_ushort)
285    }
286
287    #[inline]
288    #[cfg(Py_3_14)]
289    unsafe fn set_ascii(&mut self, val: c_ushort) {
290        let val: c_ushort = std::mem::transmute(val);
291        self.bitfield
292            .set(STATE_ASCII_INDEX, STATE_ASCII_WIDTH, val as u64)
293    }
294
295    #[cfg(all(Py_3_12, not(Py_3_14)))]
296    #[inline]
297    unsafe fn statically_allocated(&self) -> c_uint {
298        std::mem::transmute(self.bitfield.get(
299            STATE_STATICALLY_ALLOCATED_INDEX,
300            STATE_STATICALLY_ALLOCATED_WIDTH,
301        ) as u32)
302    }
303
304    #[cfg(all(Py_3_12, not(Py_3_14)))]
305    #[inline]
306    unsafe fn set_statically_allocated(&mut self, val: c_uint) {
307        let val: u32 = std::mem::transmute(val);
308        self.bitfield.set(
309            STATE_STATICALLY_ALLOCATED_INDEX,
310            STATE_STATICALLY_ALLOCATED_WIDTH,
311            val as u64,
312        )
313    }
314
315    #[inline]
316    #[cfg(Py_3_14)]
317    unsafe fn statically_allocated(&self) -> c_ushort {
318        std::mem::transmute(self.bitfield.get(
319            STATE_STATICALLY_ALLOCATED_INDEX,
320            STATE_STATICALLY_ALLOCATED_WIDTH,
321        ) as c_ushort)
322    }
323
324    #[inline]
325    #[cfg(Py_3_14)]
326    unsafe fn set_statically_allocated(&mut self, val: c_ushort) {
327        let val: c_ushort = std::mem::transmute(val);
328        self.bitfield.set(
329            STATE_STATICALLY_ALLOCATED_INDEX,
330            STATE_STATICALLY_ALLOCATED_WIDTH,
331            val as u64,
332        )
333    }
334
335    #[cfg(not(Py_3_12))]
336    #[inline]
337    unsafe fn ready(&self) -> c_uint {
338        std::mem::transmute(self.bitfield.get(STATE_READY_INDEX, STATE_READY_WIDTH) as u32)
339    }
340
341    #[cfg(not(Py_3_12))]
342    #[inline]
343    unsafe fn set_ready(&mut self, val: c_uint) {
344        let val: u32 = std::mem::transmute(val);
345        self.bitfield
346            .set(STATE_READY_INDEX, STATE_READY_WIDTH, val as u64)
347    }
348}
349
350impl From<u32> for PyASCIIObjectState {
351    #[inline]
352    fn from(value: u32) -> Self {
353        PyASCIIObjectState {
354            bitfield_align: [],
355            bitfield: BitfieldUnit::new(value.to_ne_bytes()),
356        }
357    }
358}
359
360impl From<PyASCIIObjectState> for u32 {
361    #[inline]
362    fn from(value: PyASCIIObjectState) -> Self {
363        u32::from_ne_bytes(value.bitfield.storage)
364    }
365}
366
367#[repr(C)]
368pub struct PyASCIIObject {
369    pub ob_base: PyObject,
370    pub length: Py_ssize_t,
371    #[cfg(any(Py_3_11, not(PyPy)))]
372    pub hash: Py_hash_t,
373    /// A bit field with various properties.
374    ///
375    /// Rust doesn't expose bitfields. So we have accessor functions for
376    /// retrieving values.
377    ///
378    /// Before 3.12:
379    /// unsigned int interned:2; // SSTATE_* constants.
380    /// unsigned int kind:3;     // PyUnicode_*_KIND constants.
381    /// unsigned int compact:1;
382    /// unsigned int ascii:1;
383    /// unsigned int ready:1;
384    /// unsigned int :24;
385    ///
386    /// 3.12 and 3.13:
387    /// unsigned int interned:2; // SSTATE_* constants.
388    /// unsigned int kind:3;     // PyUnicode_*_KIND constants.
389    /// unsigned int compact:1;
390    /// unsigned int ascii:1;
391    /// unsigned int statically_allocated:1;
392    /// unsigned int :24;
393    ///
394    /// 3.14 and later:
395    /// uint16_t interned;   // SSTATE_* constants.
396    /// unsigned short kind:3; // PyUnicode_*_KIND constants.
397    /// unsigned short compact:1;
398    /// unsigned short ascii:1;
399    /// unsigned int statically_allocated:1;
400    /// unsigned int :10;
401    pub state: u32,
402    #[cfg(not(Py_3_12))]
403    pub wstr: *mut wchar_t,
404}
405
406/// Interacting with the bitfield is not actually well-defined, so we mark these APIs unsafe.
407#[cfg(not(GraalPy))]
408impl PyASCIIObject {
409    #[cfg_attr(not(Py_3_12), allow(rustdoc::broken_intra_doc_links))] // SSTATE_INTERNED_IMMORTAL_STATIC requires 3.12
410    /// Get the `interned` field of the [`PyASCIIObject`] state bitfield.
411    ///
412    /// Returns one of: [`SSTATE_NOT_INTERNED`], [`SSTATE_INTERNED_MORTAL`],
413    /// [`SSTATE_INTERNED_IMMORTAL`], or [`SSTATE_INTERNED_IMMORTAL_STATIC`].
414    #[inline]
415    #[cfg(not(Py_3_14))]
416    pub unsafe fn interned(&self) -> c_uint {
417        PyASCIIObjectState::from(self.state).interned()
418    }
419
420    #[cfg_attr(not(Py_3_12), allow(rustdoc::broken_intra_doc_links))] // SSTATE_INTERNED_IMMORTAL_STATIC requires 3.12
421    /// Set the `interned` field of the [`PyASCIIObject`] state bitfield.
422    ///
423    /// Calling this function with an argument that is not [`SSTATE_NOT_INTERNED`],
424    /// [`SSTATE_INTERNED_MORTAL`], [`SSTATE_INTERNED_IMMORTAL`], or
425    /// [`SSTATE_INTERNED_IMMORTAL_STATIC`] is invalid.
426    #[inline]
427    #[cfg(not(Py_3_14))]
428    pub unsafe fn set_interned(&mut self, val: c_uint) {
429        let mut state = PyASCIIObjectState::from(self.state);
430        state.set_interned(val);
431        self.state = u32::from(state);
432    }
433
434    #[cfg_attr(not(Py_3_12), allow(rustdoc::broken_intra_doc_links))] // SSTATE_INTERNED_IMMORTAL_STATIC requires 3.12
435    /// Get the `interned` field of the [`PyASCIIObject`] state bitfield.
436    ///
437    /// Returns one of: [`SSTATE_NOT_INTERNED`], [`SSTATE_INTERNED_MORTAL`],
438    /// [`SSTATE_INTERNED_IMMORTAL`], or [`SSTATE_INTERNED_IMMORTAL_STATIC`].
439    #[inline]
440    #[cfg(Py_3_14)]
441    pub unsafe fn interned(&self) -> u16 {
442        PyASCIIObjectState::from(self.state).interned()
443    }
444
445    #[cfg_attr(not(Py_3_12), allow(rustdoc::broken_intra_doc_links))] // SSTATE_INTERNED_IMMORTAL_STATIC requires 3.12
446    /// Set the `interned` field of the [`PyASCIIObject`] state bitfield.
447    ///
448    /// Calling this function with an argument that is not [`SSTATE_NOT_INTERNED`],
449    /// [`SSTATE_INTERNED_MORTAL`], [`SSTATE_INTERNED_IMMORTAL`], or
450    /// [`SSTATE_INTERNED_IMMORTAL_STATIC`] is invalid.
451    #[inline]
452    #[cfg(Py_3_14)]
453    pub unsafe fn set_interned(&mut self, val: u16) {
454        let mut state = PyASCIIObjectState::from(self.state);
455        state.set_interned(val);
456        self.state = u32::from(state);
457    }
458
459    /// Get the `kind` field of the [`PyASCIIObject`] state bitfield.
460    ///
461    /// Returns one of:
462    #[cfg_attr(not(Py_3_12), doc = "[`PyUnicode_WCHAR_KIND`], ")]
463    /// [`PyUnicode_1BYTE_KIND`], [`PyUnicode_2BYTE_KIND`], or [`PyUnicode_4BYTE_KIND`].
464    #[inline]
465    #[cfg(not(Py_3_14))]
466    pub unsafe fn kind(&self) -> c_uint {
467        PyASCIIObjectState::from(self.state).kind()
468    }
469
470    /// Get the `kind` field of the [`PyASCIIObject`] state bitfield.
471    ///
472    /// Returns one of:
473    #[cfg_attr(not(Py_3_12), doc = "[`PyUnicode_WCHAR_KIND`], ")]
474    /// [`PyUnicode_1BYTE_KIND`], [`PyUnicode_2BYTE_KIND`], or [`PyUnicode_4BYTE_KIND`].
475    #[inline]
476    #[cfg(Py_3_14)]
477    pub unsafe fn kind(&self) -> c_ushort {
478        PyASCIIObjectState::from(self.state).kind()
479    }
480
481    /// Set the `kind` field of the [`PyASCIIObject`] state bitfield.
482    ///
483    /// Calling this function with an argument that is not
484    #[cfg_attr(not(Py_3_12), doc = "[`PyUnicode_WCHAR_KIND`], ")]
485    /// [`PyUnicode_1BYTE_KIND`], [`PyUnicode_2BYTE_KIND`], or [`PyUnicode_4BYTE_KIND`] is invalid.
486    #[inline]
487    #[cfg(not(Py_3_14))]
488    pub unsafe fn set_kind(&mut self, val: c_uint) {
489        let mut state = PyASCIIObjectState::from(self.state);
490        state.set_kind(val);
491        self.state = u32::from(state);
492    }
493
494    /// Set the `kind` field of the [`PyASCIIObject`] state bitfield.
495    ///
496    /// Calling this function with an argument that is not
497    #[cfg_attr(not(Py_3_12), doc = "[`PyUnicode_WCHAR_KIND`], ")]
498    /// [`PyUnicode_1BYTE_KIND`], [`PyUnicode_2BYTE_KIND`], or [`PyUnicode_4BYTE_KIND`] is invalid.
499    #[inline]
500    #[cfg(Py_3_14)]
501    pub unsafe fn set_kind(&mut self, val: c_ushort) {
502        let mut state = PyASCIIObjectState::from(self.state);
503        state.set_kind(val);
504        self.state = u32::from(state);
505    }
506
507    /// Get the `compact` field of the [`PyASCIIObject`] state bitfield.
508    ///
509    /// Returns either `0` or `1`.
510    #[inline]
511    #[cfg(not(Py_3_14))]
512    pub unsafe fn compact(&self) -> c_uint {
513        PyASCIIObjectState::from(self.state).compact()
514    }
515
516    /// Get the `compact` field of the [`PyASCIIObject`] state bitfield.
517    ///
518    /// Returns either `0` or `1`.
519    #[inline]
520    #[cfg(Py_3_14)]
521    pub unsafe fn compact(&self) -> c_ushort {
522        PyASCIIObjectState::from(self.state).compact()
523    }
524
525    /// Set the `compact` flag of the [`PyASCIIObject`] state bitfield.
526    ///
527    /// Calling this function with an argument that is neither `0` nor `1` is invalid.
528    #[inline]
529    #[cfg(not(Py_3_14))]
530    pub unsafe fn set_compact(&mut self, val: c_uint) {
531        let mut state = PyASCIIObjectState::from(self.state);
532        state.set_compact(val);
533        self.state = u32::from(state);
534    }
535
536    /// Set the `compact` flag of the [`PyASCIIObject`] state bitfield.
537    ///
538    /// Calling this function with an argument that is neither `0` nor `1` is invalid.
539    #[inline]
540    #[cfg(Py_3_14)]
541    pub unsafe fn set_compact(&mut self, val: c_ushort) {
542        let mut state = PyASCIIObjectState::from(self.state);
543        state.set_compact(val);
544        self.state = u32::from(state);
545    }
546
547    /// Get the `ascii` field of the [`PyASCIIObject`] state bitfield.
548    ///
549    /// Returns either `0` or `1`.
550    #[inline]
551    #[cfg(not(Py_3_14))]
552    pub unsafe fn ascii(&self) -> c_uint {
553        PyASCIIObjectState::from(self.state).ascii()
554    }
555
556    /// Set the `ascii` flag of the [`PyASCIIObject`] state bitfield.
557    ///
558    /// Calling this function with an argument that is neither `0` nor `1` is invalid.
559    #[inline]
560    #[cfg(not(Py_3_14))]
561    pub unsafe fn set_ascii(&mut self, val: c_uint) {
562        let mut state = PyASCIIObjectState::from(self.state);
563        state.set_ascii(val);
564        self.state = u32::from(state);
565    }
566
567    /// Get the `ascii` field of the [`PyASCIIObject`] state bitfield.
568    ///
569    /// Returns either `0` or `1`.
570    #[inline]
571    #[cfg(Py_3_14)]
572    pub unsafe fn ascii(&self) -> c_ushort {
573        PyASCIIObjectState::from(self.state).ascii()
574    }
575
576    /// Set the `ascii` flag of the [`PyASCIIObject`] state bitfield.
577    ///
578    /// Calling this function with an argument that is neither `0` nor `1` is invalid.
579    #[inline]
580    #[cfg(Py_3_14)]
581    pub unsafe fn set_ascii(&mut self, val: c_ushort) {
582        let mut state = PyASCIIObjectState::from(self.state);
583        state.set_ascii(val);
584        self.state = u32::from(state);
585    }
586
587    /// Get the `ready` field of the [`PyASCIIObject`] state bitfield.
588    ///
589    /// Returns either `0` or `1`.
590    #[cfg(not(Py_3_12))]
591    #[inline]
592    pub unsafe fn ready(&self) -> c_uint {
593        PyASCIIObjectState::from(self.state).ready()
594    }
595
596    /// Set the `ready` flag of the [`PyASCIIObject`] state bitfield.
597    ///
598    /// Calling this function with an argument that is neither `0` nor `1` is invalid.
599    #[cfg(not(Py_3_12))]
600    #[inline]
601    pub unsafe fn set_ready(&mut self, val: c_uint) {
602        let mut state = PyASCIIObjectState::from(self.state);
603        state.set_ready(val);
604        self.state = u32::from(state);
605    }
606
607    /// Get the `statically_allocated` field of the [`PyASCIIObject`] state bitfield.
608    ///
609    /// Returns either `0` or `1`.
610    #[inline]
611    #[cfg(all(Py_3_12, not(Py_3_14)))]
612    pub unsafe fn statically_allocated(&self) -> c_uint {
613        PyASCIIObjectState::from(self.state).statically_allocated()
614    }
615
616    /// Set the `statically_allocated` flag of the [`PyASCIIObject`] state bitfield.
617    ///
618    /// Calling this function with an argument that is neither `0` nor `1` is invalid.
619    #[inline]
620    #[cfg(all(Py_3_12, not(Py_3_14)))]
621    pub unsafe fn set_statically_allocated(&mut self, val: c_uint) {
622        let mut state = PyASCIIObjectState::from(self.state);
623        state.set_statically_allocated(val);
624        self.state = u32::from(state);
625    }
626
627    /// Get the `statically_allocated` field of the [`PyASCIIObject`] state bitfield.
628    ///
629    /// Returns either `0` or `1`.
630    #[inline]
631    #[cfg(Py_3_14)]
632    pub unsafe fn statically_allocated(&self) -> c_ushort {
633        PyASCIIObjectState::from(self.state).statically_allocated()
634    }
635
636    /// Set the `statically_allocated` flag of the [`PyASCIIObject`] state bitfield.
637    ///
638    /// Calling this function with an argument that is neither `0` nor `1` is invalid.
639    #[inline]
640    #[cfg(Py_3_14)]
641    pub unsafe fn set_statically_allocated(&mut self, val: c_ushort) {
642        let mut state = PyASCIIObjectState::from(self.state);
643        state.set_statically_allocated(val);
644        self.state = u32::from(state);
645    }
646}
647
648#[repr(C)]
649pub struct PyCompactUnicodeObject {
650    pub _base: PyASCIIObject,
651    pub utf8_length: Py_ssize_t,
652    pub utf8: *mut c_char,
653    #[cfg(not(Py_3_12))]
654    pub wstr_length: Py_ssize_t,
655}
656
657#[repr(C)]
658pub union PyUnicodeObjectData {
659    pub any: *mut c_void,
660    pub latin1: *mut Py_UCS1,
661    pub ucs2: *mut Py_UCS2,
662    pub ucs4: *mut Py_UCS4,
663}
664
665#[repr(C)]
666pub struct PyUnicodeObject {
667    pub _base: PyCompactUnicodeObject,
668    pub data: PyUnicodeObjectData,
669}
670
671extern "C" {
672    #[cfg(not(any(PyPy, GraalPy)))]
673    pub fn _PyUnicode_CheckConsistency(op: *mut PyObject, check_content: c_int) -> c_int;
674}
675
676// skipped PyUnicode_GET_SIZE
677// skipped PyUnicode_GET_DATA_SIZE
678// skipped PyUnicode_AS_UNICODE
679// skipped PyUnicode_AS_DATA
680
681pub const SSTATE_NOT_INTERNED: c_uint = 0;
682pub const SSTATE_INTERNED_MORTAL: c_uint = 1;
683pub const SSTATE_INTERNED_IMMORTAL: c_uint = 2;
684#[cfg(Py_3_12)]
685pub const SSTATE_INTERNED_IMMORTAL_STATIC: c_uint = 3;
686
687#[cfg(all(not(GraalPy), not(Py_3_14)))]
688#[inline]
689pub unsafe fn PyUnicode_IS_ASCII(op: *mut PyObject) -> c_uint {
690    debug_assert!(crate::PyUnicode_Check(op) != 0);
691    #[cfg(not(Py_3_12))]
692    debug_assert!(PyUnicode_IS_READY(op) != 0);
693
694    (*(op as *mut PyASCIIObject)).ascii()
695}
696
697#[cfg(all(not(GraalPy), not(Py_3_14)))]
698#[inline]
699pub unsafe fn PyUnicode_IS_COMPACT(op: *mut PyObject) -> c_uint {
700    (*(op as *mut PyASCIIObject)).compact()
701}
702
703#[cfg(all(not(GraalPy), Py_3_14))]
704#[inline]
705pub unsafe fn PyUnicode_IS_ASCII(op: *mut PyObject) -> c_ushort {
706    debug_assert!(crate::PyUnicode_Check(op) != 0);
707    #[cfg(not(Py_3_12))]
708    debug_assert!(PyUnicode_IS_READY(op) != 0);
709
710    (*(op as *mut PyASCIIObject)).ascii()
711}
712
713#[cfg(all(not(GraalPy), Py_3_14))]
714#[inline]
715pub unsafe fn PyUnicode_IS_COMPACT(op: *mut PyObject) -> c_ushort {
716    (*(op as *mut PyASCIIObject)).compact()
717}
718
719#[cfg(not(GraalPy))]
720#[inline]
721pub unsafe fn PyUnicode_IS_COMPACT_ASCII(op: *mut PyObject) -> c_uint {
722    ((*(op as *mut PyASCIIObject)).ascii() != 0 && PyUnicode_IS_COMPACT(op) != 0).into()
723}
724
725#[cfg(not(Py_3_12))]
726#[deprecated(note = "Removed in Python 3.12")]
727pub const PyUnicode_WCHAR_KIND: c_uint = 0;
728
729#[cfg(not(Py_3_14))]
730pub const PyUnicode_1BYTE_KIND: c_uint = 1;
731#[cfg(not(Py_3_14))]
732pub const PyUnicode_2BYTE_KIND: c_uint = 2;
733#[cfg(not(Py_3_14))]
734pub const PyUnicode_4BYTE_KIND: c_uint = 4;
735
736#[cfg(Py_3_14)]
737pub const PyUnicode_1BYTE_KIND: c_ushort = 1;
738#[cfg(Py_3_14)]
739pub const PyUnicode_2BYTE_KIND: c_ushort = 2;
740#[cfg(Py_3_14)]
741pub const PyUnicode_4BYTE_KIND: c_ushort = 4;
742
743#[cfg(not(any(GraalPy, PyPy)))]
744#[inline]
745pub unsafe fn PyUnicode_1BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS1 {
746    PyUnicode_DATA(op) as *mut Py_UCS1
747}
748
749#[cfg(not(any(GraalPy, PyPy)))]
750#[inline]
751pub unsafe fn PyUnicode_2BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS2 {
752    PyUnicode_DATA(op) as *mut Py_UCS2
753}
754
755#[cfg(not(any(GraalPy, PyPy)))]
756#[inline]
757pub unsafe fn PyUnicode_4BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS4 {
758    PyUnicode_DATA(op) as *mut Py_UCS4
759}
760
761#[cfg(all(not(GraalPy), not(Py_3_14)))]
762#[inline]
763pub unsafe fn PyUnicode_KIND(op: *mut PyObject) -> c_uint {
764    debug_assert!(crate::PyUnicode_Check(op) != 0);
765    #[cfg(not(Py_3_12))]
766    debug_assert!(PyUnicode_IS_READY(op) != 0);
767
768    (*(op as *mut PyASCIIObject)).kind()
769}
770
771#[cfg(all(not(GraalPy), Py_3_14))]
772#[inline]
773pub unsafe fn PyUnicode_KIND(op: *mut PyObject) -> c_ushort {
774    debug_assert!(crate::PyUnicode_Check(op) != 0);
775    #[cfg(not(Py_3_12))]
776    debug_assert!(PyUnicode_IS_READY(op) != 0);
777
778    (*(op as *mut PyASCIIObject)).kind()
779}
780
781#[cfg(not(GraalPy))]
782#[inline]
783pub unsafe fn _PyUnicode_COMPACT_DATA(op: *mut PyObject) -> *mut c_void {
784    if PyUnicode_IS_ASCII(op) != 0 {
785        (op as *mut PyASCIIObject).offset(1) as *mut c_void
786    } else {
787        (op as *mut PyCompactUnicodeObject).offset(1) as *mut c_void
788    }
789}
790
791#[cfg(not(any(GraalPy, PyPy)))]
792#[inline]
793pub unsafe fn _PyUnicode_NONCOMPACT_DATA(op: *mut PyObject) -> *mut c_void {
794    debug_assert!(!(*(op as *mut PyUnicodeObject)).data.any.is_null());
795
796    (*(op as *mut PyUnicodeObject)).data.any
797}
798
799#[cfg(not(any(GraalPy, PyPy)))]
800#[inline]
801pub unsafe fn PyUnicode_DATA(op: *mut PyObject) -> *mut c_void {
802    debug_assert!(crate::PyUnicode_Check(op) != 0);
803
804    if PyUnicode_IS_COMPACT(op) != 0 {
805        _PyUnicode_COMPACT_DATA(op)
806    } else {
807        _PyUnicode_NONCOMPACT_DATA(op)
808    }
809}
810
811// skipped PyUnicode_WRITE
812// skipped PyUnicode_READ
813// skipped PyUnicode_READ_CHAR
814
815#[cfg(not(GraalPy))]
816#[inline]
817pub unsafe fn PyUnicode_GET_LENGTH(op: *mut PyObject) -> Py_ssize_t {
818    debug_assert!(crate::PyUnicode_Check(op) != 0);
819    #[cfg(not(Py_3_12))]
820    debug_assert!(PyUnicode_IS_READY(op) != 0);
821
822    (*(op as *mut PyASCIIObject)).length
823}
824
825#[cfg(any(Py_3_12, GraalPy))]
826#[inline]
827pub unsafe fn PyUnicode_IS_READY(_op: *mut PyObject) -> c_uint {
828    // kept in CPython for backwards compatibility
829    1
830}
831
832#[cfg(not(any(GraalPy, Py_3_12)))]
833#[inline]
834pub unsafe fn PyUnicode_IS_READY(op: *mut PyObject) -> c_uint {
835    (*(op as *mut PyASCIIObject)).ready()
836}
837
838#[cfg(any(Py_3_12, GraalPy))]
839#[inline]
840pub unsafe fn PyUnicode_READY(_op: *mut PyObject) -> c_int {
841    0
842}
843
844#[cfg(not(any(Py_3_12, GraalPy)))]
845#[inline]
846pub unsafe fn PyUnicode_READY(op: *mut PyObject) -> c_int {
847    debug_assert!(crate::PyUnicode_Check(op) != 0);
848
849    if PyUnicode_IS_READY(op) != 0 {
850        0
851    } else {
852        _PyUnicode_Ready(op)
853    }
854}
855
856// skipped PyUnicode_MAX_CHAR_VALUE
857// skipped _PyUnicode_get_wstr_length
858// skipped PyUnicode_WSTR_LENGTH
859
860extern "C" {
861    #[cfg_attr(PyPy, link_name = "PyPyUnicode_New")]
862    pub fn PyUnicode_New(size: Py_ssize_t, maxchar: Py_UCS4) -> *mut PyObject;
863    #[cfg_attr(PyPy, link_name = "_PyPyUnicode_Ready")]
864    pub fn _PyUnicode_Ready(unicode: *mut PyObject) -> c_int;
865
866    // skipped _PyUnicode_Copy
867
868    #[cfg(not(PyPy))]
869    pub fn PyUnicode_CopyCharacters(
870        to: *mut PyObject,
871        to_start: Py_ssize_t,
872        from: *mut PyObject,
873        from_start: Py_ssize_t,
874        how_many: Py_ssize_t,
875    ) -> Py_ssize_t;
876
877    // skipped _PyUnicode_FastCopyCharacters
878
879    #[cfg(not(PyPy))]
880    pub fn PyUnicode_Fill(
881        unicode: *mut PyObject,
882        start: Py_ssize_t,
883        length: Py_ssize_t,
884        fill_char: Py_UCS4,
885    ) -> Py_ssize_t;
886
887    // skipped _PyUnicode_FastFill
888
889    #[cfg(not(Py_3_12))]
890    #[deprecated]
891    #[cfg_attr(PyPy, link_name = "PyPyUnicode_FromUnicode")]
892    pub fn PyUnicode_FromUnicode(u: *const wchar_t, size: Py_ssize_t) -> *mut PyObject;
893
894    #[cfg_attr(PyPy, link_name = "PyPyUnicode_FromKindAndData")]
895    pub fn PyUnicode_FromKindAndData(
896        kind: c_int,
897        buffer: *const c_void,
898        size: Py_ssize_t,
899    ) -> *mut PyObject;
900
901    // skipped _PyUnicode_FromASCII
902    // skipped _PyUnicode_FindMaxChar
903
904    #[cfg(not(Py_3_12))]
905    #[deprecated]
906    #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUnicode")]
907    pub fn PyUnicode_AsUnicode(unicode: *mut PyObject) -> *mut wchar_t;
908
909    // skipped _PyUnicode_AsUnicode
910
911    #[cfg(not(Py_3_12))]
912    #[deprecated]
913    #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUnicodeAndSize")]
914    pub fn PyUnicode_AsUnicodeAndSize(
915        unicode: *mut PyObject,
916        size: *mut Py_ssize_t,
917    ) -> *mut wchar_t;
918
919    // skipped PyUnicode_GetMax
920}
921
922// skipped _PyUnicodeWriter
923// skipped _PyUnicodeWriter_Init
924// skipped _PyUnicodeWriter_Prepare
925// skipped _PyUnicodeWriter_PrepareInternal
926// skipped _PyUnicodeWriter_PrepareKind
927// skipped _PyUnicodeWriter_PrepareKindInternal
928// skipped _PyUnicodeWriter_WriteChar
929// skipped _PyUnicodeWriter_WriteStr
930// skipped _PyUnicodeWriter_WriteSubstring
931// skipped _PyUnicodeWriter_WriteASCIIString
932// skipped _PyUnicodeWriter_WriteLatin1String
933// skipped _PyUnicodeWriter_Finish
934// skipped _PyUnicodeWriter_Dealloc
935// skipped _PyUnicode_FormatAdvancedWriter
936
937extern "C" {
938    // skipped _PyUnicode_AsStringAndSize
939
940    #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF8")]
941    pub fn PyUnicode_AsUTF8(unicode: *mut PyObject) -> *const c_char;
942
943    // skipped _PyUnicode_AsString
944
945    pub fn PyUnicode_Encode(
946        s: *const wchar_t,
947        size: Py_ssize_t,
948        encoding: *const c_char,
949        errors: *const c_char,
950    ) -> *mut PyObject;
951
952    pub fn PyUnicode_EncodeUTF7(
953        data: *const wchar_t,
954        length: Py_ssize_t,
955        base64SetO: c_int,
956        base64WhiteSpace: c_int,
957        errors: *const c_char,
958    ) -> *mut PyObject;
959
960    // skipped _PyUnicode_EncodeUTF7
961    // skipped _PyUnicode_AsUTF8String
962
963    #[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeUTF8")]
964    pub fn PyUnicode_EncodeUTF8(
965        data: *const wchar_t,
966        length: Py_ssize_t,
967        errors: *const c_char,
968    ) -> *mut PyObject;
969
970    pub fn PyUnicode_EncodeUTF32(
971        data: *const wchar_t,
972        length: Py_ssize_t,
973        errors: *const c_char,
974        byteorder: c_int,
975    ) -> *mut PyObject;
976
977    // skipped _PyUnicode_EncodeUTF32
978
979    pub fn PyUnicode_EncodeUTF16(
980        data: *const wchar_t,
981        length: Py_ssize_t,
982        errors: *const c_char,
983        byteorder: c_int,
984    ) -> *mut PyObject;
985
986    // skipped _PyUnicode_EncodeUTF16
987    // skipped _PyUnicode_DecodeUnicodeEscape
988
989    pub fn PyUnicode_EncodeUnicodeEscape(data: *const wchar_t, length: Py_ssize_t)
990        -> *mut PyObject;
991
992    pub fn PyUnicode_EncodeRawUnicodeEscape(
993        data: *const wchar_t,
994        length: Py_ssize_t,
995    ) -> *mut PyObject;
996
997    // skipped _PyUnicode_AsLatin1String
998
999    #[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeLatin1")]
1000    pub fn PyUnicode_EncodeLatin1(
1001        data: *const wchar_t,
1002        length: Py_ssize_t,
1003        errors: *const c_char,
1004    ) -> *mut PyObject;
1005
1006    // skipped _PyUnicode_AsASCIIString
1007
1008    #[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeASCII")]
1009    pub fn PyUnicode_EncodeASCII(
1010        data: *const wchar_t,
1011        length: Py_ssize_t,
1012        errors: *const c_char,
1013    ) -> *mut PyObject;
1014
1015    pub fn PyUnicode_EncodeCharmap(
1016        data: *const wchar_t,
1017        length: Py_ssize_t,
1018        mapping: *mut PyObject,
1019        errors: *const c_char,
1020    ) -> *mut PyObject;
1021
1022    // skipped _PyUnicode_EncodeCharmap
1023
1024    pub fn PyUnicode_TranslateCharmap(
1025        data: *const wchar_t,
1026        length: Py_ssize_t,
1027        table: *mut PyObject,
1028        errors: *const c_char,
1029    ) -> *mut PyObject;
1030
1031    // skipped PyUnicode_EncodeMBCS
1032
1033    #[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeDecimal")]
1034    pub fn PyUnicode_EncodeDecimal(
1035        s: *mut wchar_t,
1036        length: Py_ssize_t,
1037        output: *mut c_char,
1038        errors: *const c_char,
1039    ) -> c_int;
1040
1041    #[cfg_attr(PyPy, link_name = "PyPyUnicode_TransformDecimalToASCII")]
1042    pub fn PyUnicode_TransformDecimalToASCII(s: *mut wchar_t, length: Py_ssize_t) -> *mut PyObject;
1043
1044    // skipped _PyUnicode_TransformDecimalAndSpaceToASCII
1045}
1046
1047// skipped _PyUnicode_JoinArray
1048// skipped _PyUnicode_EqualToASCIIId
1049// skipped _PyUnicode_EqualToASCIIString
1050// skipped _PyUnicode_XStrip
1051// skipped _PyUnicode_InsertThousandsGrouping
1052
1053// skipped _Py_ascii_whitespace
1054
1055// skipped _PyUnicode_IsLowercase
1056// skipped _PyUnicode_IsUppercase
1057// skipped _PyUnicode_IsTitlecase
1058// skipped _PyUnicode_IsXidStart
1059// skipped _PyUnicode_IsXidContinue
1060// skipped _PyUnicode_IsWhitespace
1061// skipped _PyUnicode_IsLinebreak
1062// skipped _PyUnicode_ToLowercase
1063// skipped _PyUnicode_ToUppercase
1064// skipped _PyUnicode_ToTitlecase
1065// skipped _PyUnicode_ToLowerFull
1066// skipped _PyUnicode_ToTitleFull
1067// skipped _PyUnicode_ToUpperFull
1068// skipped _PyUnicode_ToFoldedFull
1069// skipped _PyUnicode_IsCaseIgnorable
1070// skipped _PyUnicode_IsCased
1071// skipped _PyUnicode_ToDecimalDigit
1072// skipped _PyUnicode_ToDigit
1073// skipped _PyUnicode_ToNumeric
1074// skipped _PyUnicode_IsDecimalDigit
1075// skipped _PyUnicode_IsDigit
1076// skipped _PyUnicode_IsNumeric
1077// skipped _PyUnicode_IsPrintable
1078// skipped _PyUnicode_IsAlpha
1079// skipped Py_UNICODE_strlen
1080// skipped Py_UNICODE_strcpy
1081// skipped Py_UNICODE_strcat
1082// skipped Py_UNICODE_strncpy
1083// skipped Py_UNICODE_strcmp
1084// skipped Py_UNICODE_strncmp
1085// skipped Py_UNICODE_strchr
1086// skipped Py_UNICODE_strrchr
1087// skipped _PyUnicode_FormatLong
1088// skipped PyUnicode_AsUnicodeCopy
1089// skipped _PyUnicode_FromId
1090// skipped _PyUnicode_EQ
1091// skipped _PyUnicode_ScanIdentifier
⚠️ Internal Docs ⚠️ Not Public API 👉 Official Docs Here