Skip to main content

readstat/
err.rs

1//! Error types for the readstat crate.
2//!
3//! [`ReadStatCError`] maps the 41 error codes from the `ReadStat` C library to Rust
4//! enum variants. [`ReadStatError`] is the main error type, wrapping C library errors
5//! alongside Arrow, Parquet, I/O, and other failure modes.
6
7use num_derive::FromPrimitive;
8
9/// Error codes returned by the `ReadStat` C library.
10///
11/// Each variant maps directly to a `readstat_error_t` value. A value of
12/// [`READSTAT_OK`](ReadStatCError::READSTAT_OK) indicates success; all other
13/// variants represent specific failure conditions.
14#[derive(Debug, FromPrimitive)]
15#[allow(non_camel_case_types)]
16pub enum ReadStatCError {
17    /// Operation completed successfully.
18    READSTAT_OK = 0,
19    /// Failed to open the file.
20    READSTAT_ERROR_OPEN = 1,
21    /// Failed to read from the file.
22    READSTAT_ERROR_READ = 2,
23    /// Memory allocation failure.
24    READSTAT_ERROR_MALLOC = 3,
25    /// User-initiated abort via callback return value.
26    READSTAT_ERROR_USER_ABORT = 4,
27    /// General parse error in the file structure.
28    READSTAT_ERROR_PARSE = 5,
29    /// File uses an unsupported compression method.
30    READSTAT_ERROR_UNSUPPORTED_COMPRESSION = 6,
31    /// File uses an unsupported character set.
32    READSTAT_ERROR_UNSUPPORTED_CHARSET = 7,
33    /// Column count in header does not match actual columns.
34    READSTAT_ERROR_COLUMN_COUNT_MISMATCH = 8,
35    /// Row count in header does not match actual rows.
36    READSTAT_ERROR_ROW_COUNT_MISMATCH = 9,
37    /// Row width in header does not match actual width.
38    READSTAT_ERROR_ROW_WIDTH_MISMATCH = 10,
39    /// Invalid or unrecognized format string.
40    READSTAT_ERROR_BAD_FORMAT_STRING = 11,
41    /// Value type does not match expected type.
42    READSTAT_ERROR_VALUE_TYPE_MISMATCH = 12,
43    /// Failed to write output.
44    READSTAT_ERROR_WRITE = 13,
45    /// Writer was not properly initialized before use.
46    READSTAT_ERROR_WRITER_NOT_INITIALIZED = 14,
47    /// Failed to seek within the file.
48    READSTAT_ERROR_SEEK = 15,
49    /// Character encoding conversion failed.
50    READSTAT_ERROR_CONVERT = 16,
51    /// Conversion failed due to invalid string data.
52    READSTAT_ERROR_CONVERT_BAD_STRING = 17,
53    /// String is too short for conversion.
54    READSTAT_ERROR_CONVERT_SHORT_STRING = 18,
55    /// String is too long for conversion.
56    READSTAT_ERROR_CONVERT_LONG_STRING = 19,
57    /// Numeric value is outside the representable range.
58    READSTAT_ERROR_NUMERIC_VALUE_IS_OUT_OF_RANGE = 20,
59    /// Tagged missing value is outside the valid range.
60    READSTAT_ERROR_TAGGED_VALUE_IS_OUT_OF_RANGE = 21,
61    /// String value exceeds the maximum allowed length.
62    READSTAT_ERROR_STRING_VALUE_IS_TOO_LONG = 22,
63    /// Tagged missing values are not supported by this format.
64    READSTAT_ERROR_TAGGED_VALUES_NOT_SUPPORTED = 23,
65    /// File format version is not supported.
66    READSTAT_ERROR_UNSUPPORTED_FILE_FORMAT_VERSION = 24,
67    /// Variable name begins with an illegal character.
68    READSTAT_ERROR_NAME_BEGINS_WITH_ILLEGAL_CHARACTER = 25,
69    /// Variable name contains an illegal character.
70    READSTAT_ERROR_NAME_CONTAINS_ILLEGAL_CHARACTER = 26,
71    /// Variable name is a reserved word.
72    READSTAT_ERROR_NAME_IS_RESERVED_WORD = 27,
73    /// Variable name exceeds the maximum allowed length.
74    READSTAT_ERROR_NAME_IS_TOO_LONG = 28,
75    /// Timestamp string could not be parsed.
76    READSTAT_ERROR_BAD_TIMESTAMP_STRING = 29,
77    /// Invalid frequency weight specification.
78    READSTAT_ERROR_BAD_FREQUENCY_WEIGHT = 30,
79    /// Too many missing value definitions for a variable.
80    READSTAT_ERROR_TOO_MANY_MISSING_VALUE_DEFINITIONS = 31,
81    /// Note text exceeds the maximum allowed length.
82    READSTAT_ERROR_NOTE_IS_TOO_LONG = 32,
83    /// String references are not supported by this format.
84    READSTAT_ERROR_STRING_REFS_NOT_SUPPORTED = 33,
85    /// A string reference is required but was not provided.
86    READSTAT_ERROR_STRING_REF_IS_REQUIRED = 34,
87    /// Row is too wide for a single page.
88    READSTAT_ERROR_ROW_IS_TOO_WIDE_FOR_PAGE = 35,
89    /// File has too few columns.
90    READSTAT_ERROR_TOO_FEW_COLUMNS = 36,
91    /// File has too many columns.
92    READSTAT_ERROR_TOO_MANY_COLUMNS = 37,
93    /// Variable name is empty (zero length).
94    READSTAT_ERROR_NAME_IS_ZERO_LENGTH = 38,
95    /// Timestamp value is invalid.
96    READSTAT_ERROR_BAD_TIMESTAMP_VALUE = 39,
97    /// Invalid multiple response (MR) set string.
98    READSTAT_ERROR_BAD_MR_STRING = 40,
99}
100
101/// The main error type for the readstat crate.
102///
103/// Wraps errors from the `ReadStat` C library, Arrow/Parquet processing,
104/// I/O operations, and other subsystems into a single error enum.
105///
106/// This enum is `#[non_exhaustive]`: new variants may be added in minor
107/// releases without a semver-breaking change. Match with a wildcard arm
108/// (`_ => ...`) to remain forward-compatible.
109#[non_exhaustive]
110#[derive(Debug, thiserror::Error)]
111pub enum ReadStatError {
112    /// Error from the `ReadStat` C library.
113    #[error("ReadStat C library error: {0:?}")]
114    CLibrary(ReadStatCError),
115
116    /// Unrecognized C error code not mapped to [`ReadStatCError`].
117    #[error("Unknown C error code: {0}")]
118    UnknownCError(i32),
119
120    /// Arithmetic overflow during SAS-to-Unix epoch date/time conversion.
121    #[error("Date arithmetic overflow")]
122    DateOverflow,
123
124    /// Integer conversion error (e.g. `u32` to `i32` overflow).
125    #[error("Integer conversion failed: {0}")]
126    IntConversion(#[from] std::num::TryFromIntError),
127
128    /// Error from the Arrow library.
129    #[error("{0}")]
130    Arrow(#[from] arrow::error::ArrowError),
131
132    /// Error from the Parquet library.
133    #[cfg(feature = "parquet")]
134    #[error("{0}")]
135    Parquet(#[from] parquet::errors::ParquetError),
136
137    /// I/O error.
138    #[error("{0}")]
139    Io(#[from] std::io::Error),
140
141    /// Path resolution error.
142    #[cfg(not(target_arch = "wasm32"))]
143    #[error("{0}")]
144    PathAbs(#[from] path_abs::Error),
145
146    /// JSON serialization/deserialization error.
147    #[error("{0}")]
148    SerdeJson(#[from] serde_json::Error),
149
150    /// Rayon thread pool build error.
151    #[cfg(not(target_arch = "wasm32"))]
152    #[error("{0}")]
153    Rayon(#[from] rayon::ThreadPoolBuildError),
154
155    /// Null byte found in a string intended for C FFI.
156    #[error("{0}")]
157    NulError(#[from] std::ffi::NulError),
158
159    /// One or more specified column names were not found in the dataset.
160    #[error("Column(s) not found: {requested:?}\nAvailable columns: {available:?}")]
161    ColumnsNotFound {
162        /// The column names that were requested but not found.
163        requested: Vec<String>,
164        /// All available column names in the dataset.
165        available: Vec<String>,
166    },
167
168    /// Error from the DataFusion SQL engine.
169    #[cfg(feature = "sql")]
170    #[error("{0}")]
171    DataFusion(#[from] datafusion::error::DataFusionError),
172
173    /// Catch-all error with a custom message.
174    #[error("{0}")]
175    Other(String),
176}
177
178/// Check a readstat C error code, returning Ok(()) for `READSTAT_OK`
179/// or an appropriate error variant otherwise.
180pub(crate) fn check_c_error(code: i32) -> Result<(), ReadStatError> {
181    use num_traits::FromPrimitive;
182    match FromPrimitive::from_i32(code) {
183        Some(ReadStatCError::READSTAT_OK) => Ok(()),
184        Some(e) => Err(ReadStatError::CLibrary(e)),
185        None => Err(ReadStatError::UnknownCError(code)),
186    }
187}
188
189#[cfg(test)]
190mod tests {
191    use super::*;
192
193    #[test]
194    fn check_c_error_ok() {
195        assert!(check_c_error(0).is_ok());
196    }
197
198    #[test]
199    fn check_c_error_known_errors() {
200        for code in 1..=40 {
201            let err = check_c_error(code).unwrap_err();
202            match err {
203                ReadStatError::CLibrary(_) => {}
204                other => panic!("Expected CLibrary error for code {code}, got {other:?}"),
205            }
206        }
207    }
208
209    #[test]
210    fn check_c_error_open() {
211        let err = check_c_error(1).unwrap_err();
212        assert!(matches!(
213            err,
214            ReadStatError::CLibrary(ReadStatCError::READSTAT_ERROR_OPEN)
215        ));
216    }
217
218    #[test]
219    fn check_c_error_parse() {
220        let err = check_c_error(5).unwrap_err();
221        assert!(matches!(
222            err,
223            ReadStatError::CLibrary(ReadStatCError::READSTAT_ERROR_PARSE)
224        ));
225    }
226
227    #[test]
228    fn check_c_error_unknown_positive() {
229        let err = check_c_error(999).unwrap_err();
230        assert!(matches!(err, ReadStatError::UnknownCError(999)));
231    }
232
233    #[test]
234    fn check_c_error_unknown_negative() {
235        let err = check_c_error(-1).unwrap_err();
236        assert!(matches!(err, ReadStatError::UnknownCError(-1)));
237    }
238
239    #[test]
240    fn error_display_messages() {
241        let err = ReadStatError::Other("test error".to_string());
242        assert_eq!(format!("{err}"), "test error");
243
244        let err = ReadStatError::DateOverflow;
245        assert_eq!(format!("{err}"), "Date arithmetic overflow");
246
247        let err = ReadStatError::UnknownCError(99);
248        assert_eq!(format!("{err}"), "Unknown C error code: 99");
249    }
250
251    #[test]
252    fn error_columns_not_found_display() {
253        let err = ReadStatError::ColumnsNotFound {
254            requested: vec!["foo".into(), "bar".into()],
255            available: vec!["a".into(), "b".into(), "c".into()],
256        };
257        let msg = format!("{err}");
258        assert!(msg.contains("foo"));
259        assert!(msg.contains("bar"));
260        assert!(msg.contains("Available columns"));
261    }
262}