readstat/
err.rs

1//! Error types for the readstat crate.
2//!
3//! [`ReadStatCError`] maps the 41 error codes from the `ReadStat` C library to Rust
4//! enum variants. [`ReadStatError`] is the main error type, wrapping C library errors
5//! alongside Arrow, Parquet, I/O, and other failure modes.
6
7use num_derive::FromPrimitive;
8
9/// Error codes returned by the `ReadStat` C library.
10///
11/// Each variant maps directly to a `readstat_error_t` value. A value of
12/// [`READSTAT_OK`](ReadStatCError::READSTAT_OK) indicates success; all other
13/// variants represent specific failure conditions.
14#[derive(Debug, FromPrimitive)]
15#[allow(non_camel_case_types)]
16pub enum ReadStatCError {
17    /// Operation completed successfully.
18    READSTAT_OK = 0,
19    /// Failed to open the file.
20    READSTAT_ERROR_OPEN = 1,
21    /// Failed to read from the file.
22    READSTAT_ERROR_READ = 2,
23    /// Memory allocation failure.
24    READSTAT_ERROR_MALLOC = 3,
25    /// User-initiated abort via callback return value.
26    READSTAT_ERROR_USER_ABORT = 4,
27    /// General parse error in the file structure.
28    READSTAT_ERROR_PARSE = 5,
29    /// File uses an unsupported compression method.
30    READSTAT_ERROR_UNSUPPORTED_COMPRESSION = 6,
31    /// File uses an unsupported character set.
32    READSTAT_ERROR_UNSUPPORTED_CHARSET = 7,
33    /// Column count in header does not match actual columns.
34    READSTAT_ERROR_COLUMN_COUNT_MISMATCH = 8,
35    /// Row count in header does not match actual rows.
36    READSTAT_ERROR_ROW_COUNT_MISMATCH = 9,
37    /// Row width in header does not match actual width.
38    READSTAT_ERROR_ROW_WIDTH_MISMATCH = 10,
39    /// Invalid or unrecognized format string.
40    READSTAT_ERROR_BAD_FORMAT_STRING = 11,
41    /// Value type does not match expected type.
42    READSTAT_ERROR_VALUE_TYPE_MISMATCH = 12,
43    /// Failed to write output.
44    READSTAT_ERROR_WRITE = 13,
45    /// Writer was not properly initialized before use.
46    READSTAT_ERROR_WRITER_NOT_INITIALIZED = 14,
47    /// Failed to seek within the file.
48    READSTAT_ERROR_SEEK = 15,
49    /// Character encoding conversion failed.
50    READSTAT_ERROR_CONVERT = 16,
51    /// Conversion failed due to invalid string data.
52    READSTAT_ERROR_CONVERT_BAD_STRING = 17,
53    /// String is too short for conversion.
54    READSTAT_ERROR_CONVERT_SHORT_STRING = 18,
55    /// String is too long for conversion.
56    READSTAT_ERROR_CONVERT_LONG_STRING = 19,
57    /// Numeric value is outside the representable range.
58    READSTAT_ERROR_NUMERIC_VALUE_IS_OUT_OF_RANGE = 20,
59    /// Tagged missing value is outside the valid range.
60    READSTAT_ERROR_TAGGED_VALUE_IS_OUT_OF_RANGE = 21,
61    /// String value exceeds the maximum allowed length.
62    READSTAT_ERROR_STRING_VALUE_IS_TOO_LONG = 22,
63    /// Tagged missing values are not supported by this format.
64    READSTAT_ERROR_TAGGED_VALUES_NOT_SUPPORTED = 23,
65    /// File format version is not supported.
66    READSTAT_ERROR_UNSUPPORTED_FILE_FORMAT_VERSION = 24,
67    /// Variable name begins with an illegal character.
68    READSTAT_ERROR_NAME_BEGINS_WITH_ILLEGAL_CHARACTER = 25,
69    /// Variable name contains an illegal character.
70    READSTAT_ERROR_NAME_CONTAINS_ILLEGAL_CHARACTER = 26,
71    /// Variable name is a reserved word.
72    READSTAT_ERROR_NAME_IS_RESERVED_WORD = 27,
73    /// Variable name exceeds the maximum allowed length.
74    READSTAT_ERROR_NAME_IS_TOO_LONG = 28,
75    /// Timestamp string could not be parsed.
76    READSTAT_ERROR_BAD_TIMESTAMP_STRING = 29,
77    /// Invalid frequency weight specification.
78    READSTAT_ERROR_BAD_FREQUENCY_WEIGHT = 30,
79    /// Too many missing value definitions for a variable.
80    READSTAT_ERROR_TOO_MANY_MISSING_VALUE_DEFINITIONS = 31,
81    /// Note text exceeds the maximum allowed length.
82    READSTAT_ERROR_NOTE_IS_TOO_LONG = 32,
83    /// String references are not supported by this format.
84    READSTAT_ERROR_STRING_REFS_NOT_SUPPORTED = 33,
85    /// A string reference is required but was not provided.
86    READSTAT_ERROR_STRING_REF_IS_REQUIRED = 34,
87    /// Row is too wide for a single page.
88    READSTAT_ERROR_ROW_IS_TOO_WIDE_FOR_PAGE = 35,
89    /// File has too few columns.
90    READSTAT_ERROR_TOO_FEW_COLUMNS = 36,
91    /// File has too many columns.
92    READSTAT_ERROR_TOO_MANY_COLUMNS = 37,
93    /// Variable name is empty (zero length).
94    READSTAT_ERROR_NAME_IS_ZERO_LENGTH = 38,
95    /// Timestamp value is invalid.
96    READSTAT_ERROR_BAD_TIMESTAMP_VALUE = 39,
97    /// Invalid multiple response (MR) set string.
98    READSTAT_ERROR_BAD_MR_STRING = 40,
99}
100
101/// The main error type for the readstat crate.
102///
103/// Wraps errors from the `ReadStat` C library, Arrow/Parquet processing,
104/// I/O operations, and other subsystems into a single error enum.
105#[derive(Debug, thiserror::Error)]
106pub enum ReadStatError {
107    /// Error from the `ReadStat` C library.
108    #[error("ReadStat C library error: {0:?}")]
109    CLibrary(ReadStatCError),
110
111    /// Unrecognized C error code not mapped to [`ReadStatCError`].
112    #[error("Unknown C error code: {0}")]
113    UnknownCError(i32),
114
115    /// Arithmetic overflow during SAS-to-Unix epoch date/time conversion.
116    #[error("Date arithmetic overflow")]
117    DateOverflow,
118
119    /// Integer conversion error (e.g. `u32` to `i32` overflow).
120    #[error("Integer conversion failed: {0}")]
121    IntConversion(#[from] std::num::TryFromIntError),
122
123    /// Error from the Arrow library.
124    #[error("{0}")]
125    Arrow(#[from] arrow::error::ArrowError),
126
127    /// Error from the Parquet library.
128    #[cfg(feature = "parquet")]
129    #[error("{0}")]
130    Parquet(#[from] parquet::errors::ParquetError),
131
132    /// I/O error.
133    #[error("{0}")]
134    Io(#[from] std::io::Error),
135
136    /// Path resolution error.
137    #[cfg(not(target_arch = "wasm32"))]
138    #[error("{0}")]
139    PathAbs(#[from] path_abs::Error),
140
141    /// JSON serialization/deserialization error.
142    #[error("{0}")]
143    SerdeJson(#[from] serde_json::Error),
144
145    /// Rayon thread pool build error.
146    #[cfg(not(target_arch = "wasm32"))]
147    #[error("{0}")]
148    Rayon(#[from] rayon::ThreadPoolBuildError),
149
150    /// Null byte found in a string intended for C FFI.
151    #[error("{0}")]
152    NulError(#[from] std::ffi::NulError),
153
154    /// One or more specified column names were not found in the dataset.
155    #[error("Column(s) not found: {requested:?}\nAvailable columns: {available:?}")]
156    ColumnsNotFound {
157        /// The column names that were requested but not found.
158        requested: Vec<String>,
159        /// All available column names in the dataset.
160        available: Vec<String>,
161    },
162
163    /// Error from the DataFusion SQL engine.
164    #[cfg(feature = "sql")]
165    #[error("{0}")]
166    DataFusion(#[from] datafusion::error::DataFusionError),
167
168    /// Catch-all error with a custom message.
169    #[error("{0}")]
170    Other(String),
171}
172
173/// Check a readstat C error code, returning Ok(()) for `READSTAT_OK`
174/// or an appropriate error variant otherwise.
175pub(crate) fn check_c_error(code: i32) -> Result<(), ReadStatError> {
176    use num_traits::FromPrimitive;
177    match FromPrimitive::from_i32(code) {
178        Some(ReadStatCError::READSTAT_OK) => Ok(()),
179        Some(e) => Err(ReadStatError::CLibrary(e)),
180        None => Err(ReadStatError::UnknownCError(code)),
181    }
182}
183
184#[cfg(test)]
185mod tests {
186    use super::*;
187
188    #[test]
189    fn check_c_error_ok() {
190        assert!(check_c_error(0).is_ok());
191    }
192
193    #[test]
194    fn check_c_error_known_errors() {
195        for code in 1..=40 {
196            let err = check_c_error(code).unwrap_err();
197            match err {
198                ReadStatError::CLibrary(_) => {}
199                other => panic!("Expected CLibrary error for code {code}, got {other:?}"),
200            }
201        }
202    }
203
204    #[test]
205    fn check_c_error_open() {
206        let err = check_c_error(1).unwrap_err();
207        assert!(matches!(
208            err,
209            ReadStatError::CLibrary(ReadStatCError::READSTAT_ERROR_OPEN)
210        ));
211    }
212
213    #[test]
214    fn check_c_error_parse() {
215        let err = check_c_error(5).unwrap_err();
216        assert!(matches!(
217            err,
218            ReadStatError::CLibrary(ReadStatCError::READSTAT_ERROR_PARSE)
219        ));
220    }
221
222    #[test]
223    fn check_c_error_unknown_positive() {
224        let err = check_c_error(999).unwrap_err();
225        assert!(matches!(err, ReadStatError::UnknownCError(999)));
226    }
227
228    #[test]
229    fn check_c_error_unknown_negative() {
230        let err = check_c_error(-1).unwrap_err();
231        assert!(matches!(err, ReadStatError::UnknownCError(-1)));
232    }
233
234    #[test]
235    fn error_display_messages() {
236        let err = ReadStatError::Other("test error".to_string());
237        assert_eq!(format!("{err}"), "test error");
238
239        let err = ReadStatError::DateOverflow;
240        assert_eq!(format!("{err}"), "Date arithmetic overflow");
241
242        let err = ReadStatError::UnknownCError(99);
243        assert_eq!(format!("{err}"), "Unknown C error code: 99");
244    }
245
246    #[test]
247    fn error_columns_not_found_display() {
248        let err = ReadStatError::ColumnsNotFound {
249            requested: vec!["foo".into(), "bar".into()],
250            available: vec!["a".into(), "b".into(), "c".into()],
251        };
252        let msg = format!("{err}");
253        assert!(msg.contains("foo"));
254        assert!(msg.contains("bar"));
255        assert!(msg.contains("Available columns"));
256    }
257}