Skip to main content

readstat/
err.rs

1//! Error types for the readstat crate.
2//!
3//! [`ReadStatCError`] maps the 41 error codes from the `ReadStat` C library to Rust
4//! enum variants. [`ReadStatError`] is the main error type, wrapping C library errors
5//! alongside Arrow, Parquet, I/O, and other failure modes.
6
7use std::path::PathBuf;
8
9use num_derive::FromPrimitive;
10
11/// Error codes returned by the `ReadStat` C library.
12///
13/// Each variant maps directly to a `readstat_error_t` value. A value of
14/// [`READSTAT_OK`](ReadStatCError::READSTAT_OK) indicates success; all other
15/// variants represent specific failure conditions.
16///
17/// This enum is `#[non_exhaustive]`: the C library adds new error codes over
18/// time, so new variants may appear in minor releases. Match with a wildcard
19/// arm (`_ => ...`) to remain forward-compatible.
20#[non_exhaustive]
21#[derive(Debug, Clone, Copy, PartialEq, Eq, FromPrimitive)]
22#[allow(non_camel_case_types)]
23pub enum ReadStatCError {
24    /// Operation completed successfully.
25    READSTAT_OK = 0,
26    /// Failed to open the file.
27    READSTAT_ERROR_OPEN = 1,
28    /// Failed to read from the file.
29    READSTAT_ERROR_READ = 2,
30    /// Memory allocation failure.
31    READSTAT_ERROR_MALLOC = 3,
32    /// User-initiated abort via callback return value.
33    READSTAT_ERROR_USER_ABORT = 4,
34    /// General parse error in the file structure.
35    READSTAT_ERROR_PARSE = 5,
36    /// File uses an unsupported compression method.
37    READSTAT_ERROR_UNSUPPORTED_COMPRESSION = 6,
38    /// File uses an unsupported character set.
39    READSTAT_ERROR_UNSUPPORTED_CHARSET = 7,
40    /// Column count in header does not match actual columns.
41    READSTAT_ERROR_COLUMN_COUNT_MISMATCH = 8,
42    /// Row count in header does not match actual rows.
43    READSTAT_ERROR_ROW_COUNT_MISMATCH = 9,
44    /// Row width in header does not match actual width.
45    READSTAT_ERROR_ROW_WIDTH_MISMATCH = 10,
46    /// Invalid or unrecognized format string.
47    READSTAT_ERROR_BAD_FORMAT_STRING = 11,
48    /// Value type does not match expected type.
49    READSTAT_ERROR_VALUE_TYPE_MISMATCH = 12,
50    /// Failed to write output.
51    READSTAT_ERROR_WRITE = 13,
52    /// Writer was not properly initialized before use.
53    READSTAT_ERROR_WRITER_NOT_INITIALIZED = 14,
54    /// Failed to seek within the file.
55    READSTAT_ERROR_SEEK = 15,
56    /// Character encoding conversion failed.
57    READSTAT_ERROR_CONVERT = 16,
58    /// Conversion failed due to invalid string data.
59    READSTAT_ERROR_CONVERT_BAD_STRING = 17,
60    /// String is too short for conversion.
61    READSTAT_ERROR_CONVERT_SHORT_STRING = 18,
62    /// String is too long for conversion.
63    READSTAT_ERROR_CONVERT_LONG_STRING = 19,
64    /// Numeric value is outside the representable range.
65    READSTAT_ERROR_NUMERIC_VALUE_IS_OUT_OF_RANGE = 20,
66    /// Tagged missing value is outside the valid range.
67    READSTAT_ERROR_TAGGED_VALUE_IS_OUT_OF_RANGE = 21,
68    /// String value exceeds the maximum allowed length.
69    READSTAT_ERROR_STRING_VALUE_IS_TOO_LONG = 22,
70    /// Tagged missing values are not supported by this format.
71    READSTAT_ERROR_TAGGED_VALUES_NOT_SUPPORTED = 23,
72    /// File format version is not supported.
73    READSTAT_ERROR_UNSUPPORTED_FILE_FORMAT_VERSION = 24,
74    /// Variable name begins with an illegal character.
75    READSTAT_ERROR_NAME_BEGINS_WITH_ILLEGAL_CHARACTER = 25,
76    /// Variable name contains an illegal character.
77    READSTAT_ERROR_NAME_CONTAINS_ILLEGAL_CHARACTER = 26,
78    /// Variable name is a reserved word.
79    READSTAT_ERROR_NAME_IS_RESERVED_WORD = 27,
80    /// Variable name exceeds the maximum allowed length.
81    READSTAT_ERROR_NAME_IS_TOO_LONG = 28,
82    /// Timestamp string could not be parsed.
83    READSTAT_ERROR_BAD_TIMESTAMP_STRING = 29,
84    /// Invalid frequency weight specification.
85    READSTAT_ERROR_BAD_FREQUENCY_WEIGHT = 30,
86    /// Too many missing value definitions for a variable.
87    READSTAT_ERROR_TOO_MANY_MISSING_VALUE_DEFINITIONS = 31,
88    /// Note text exceeds the maximum allowed length.
89    READSTAT_ERROR_NOTE_IS_TOO_LONG = 32,
90    /// String references are not supported by this format.
91    READSTAT_ERROR_STRING_REFS_NOT_SUPPORTED = 33,
92    /// A string reference is required but was not provided.
93    READSTAT_ERROR_STRING_REF_IS_REQUIRED = 34,
94    /// Row is too wide for a single page.
95    READSTAT_ERROR_ROW_IS_TOO_WIDE_FOR_PAGE = 35,
96    /// File has too few columns.
97    READSTAT_ERROR_TOO_FEW_COLUMNS = 36,
98    /// File has too many columns.
99    READSTAT_ERROR_TOO_MANY_COLUMNS = 37,
100    /// Variable name is empty (zero length).
101    READSTAT_ERROR_NAME_IS_ZERO_LENGTH = 38,
102    /// Timestamp value is invalid.
103    READSTAT_ERROR_BAD_TIMESTAMP_VALUE = 39,
104    /// Invalid multiple response (MR) set string.
105    READSTAT_ERROR_BAD_MR_STRING = 40,
106}
107
108impl std::fmt::Display for ReadStatCError {
109    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
110        let msg = match self {
111            Self::READSTAT_OK => "operation completed successfully",
112            Self::READSTAT_ERROR_OPEN => "failed to open the file",
113            Self::READSTAT_ERROR_READ => "failed to read from the file",
114            Self::READSTAT_ERROR_MALLOC => "memory allocation failure",
115            Self::READSTAT_ERROR_USER_ABORT => "user-initiated abort via callback return value",
116            Self::READSTAT_ERROR_PARSE => "general parse error in the file structure",
117            Self::READSTAT_ERROR_UNSUPPORTED_COMPRESSION => {
118                "file uses an unsupported compression method"
119            }
120            Self::READSTAT_ERROR_UNSUPPORTED_CHARSET => "file uses an unsupported character set",
121            Self::READSTAT_ERROR_COLUMN_COUNT_MISMATCH => {
122                "column count in header does not match actual columns"
123            }
124            Self::READSTAT_ERROR_ROW_COUNT_MISMATCH => {
125                "row count in header does not match actual rows"
126            }
127            Self::READSTAT_ERROR_ROW_WIDTH_MISMATCH => {
128                "row width in header does not match actual width"
129            }
130            Self::READSTAT_ERROR_BAD_FORMAT_STRING => "invalid or unrecognized format string",
131            Self::READSTAT_ERROR_VALUE_TYPE_MISMATCH => "value type does not match expected type",
132            Self::READSTAT_ERROR_WRITE => "failed to write output",
133            Self::READSTAT_ERROR_WRITER_NOT_INITIALIZED => {
134                "writer was not properly initialized before use"
135            }
136            Self::READSTAT_ERROR_SEEK => "failed to seek within the file",
137            Self::READSTAT_ERROR_CONVERT => "character encoding conversion failed",
138            Self::READSTAT_ERROR_CONVERT_BAD_STRING => {
139                "conversion failed due to invalid string data"
140            }
141            Self::READSTAT_ERROR_CONVERT_SHORT_STRING => "string is too short for conversion",
142            Self::READSTAT_ERROR_CONVERT_LONG_STRING => "string is too long for conversion",
143            Self::READSTAT_ERROR_NUMERIC_VALUE_IS_OUT_OF_RANGE => {
144                "numeric value is outside the representable range"
145            }
146            Self::READSTAT_ERROR_TAGGED_VALUE_IS_OUT_OF_RANGE => {
147                "tagged missing value is outside the valid range"
148            }
149            Self::READSTAT_ERROR_STRING_VALUE_IS_TOO_LONG => {
150                "string value exceeds the maximum allowed length"
151            }
152            Self::READSTAT_ERROR_TAGGED_VALUES_NOT_SUPPORTED => {
153                "tagged missing values are not supported by this format"
154            }
155            Self::READSTAT_ERROR_UNSUPPORTED_FILE_FORMAT_VERSION => {
156                "file format version is not supported"
157            }
158            Self::READSTAT_ERROR_NAME_BEGINS_WITH_ILLEGAL_CHARACTER => {
159                "variable name begins with an illegal character"
160            }
161            Self::READSTAT_ERROR_NAME_CONTAINS_ILLEGAL_CHARACTER => {
162                "variable name contains an illegal character"
163            }
164            Self::READSTAT_ERROR_NAME_IS_RESERVED_WORD => "variable name is a reserved word",
165            Self::READSTAT_ERROR_NAME_IS_TOO_LONG => {
166                "variable name exceeds the maximum allowed length"
167            }
168            Self::READSTAT_ERROR_BAD_TIMESTAMP_STRING => "timestamp string could not be parsed",
169            Self::READSTAT_ERROR_BAD_FREQUENCY_WEIGHT => "invalid frequency weight specification",
170            Self::READSTAT_ERROR_TOO_MANY_MISSING_VALUE_DEFINITIONS => {
171                "too many missing value definitions for a variable"
172            }
173            Self::READSTAT_ERROR_NOTE_IS_TOO_LONG => "note text exceeds the maximum allowed length",
174            Self::READSTAT_ERROR_STRING_REFS_NOT_SUPPORTED => {
175                "string references are not supported by this format"
176            }
177            Self::READSTAT_ERROR_STRING_REF_IS_REQUIRED => {
178                "a string reference is required but was not provided"
179            }
180            Self::READSTAT_ERROR_ROW_IS_TOO_WIDE_FOR_PAGE => "row is too wide for a single page",
181            Self::READSTAT_ERROR_TOO_FEW_COLUMNS => "file has too few columns",
182            Self::READSTAT_ERROR_TOO_MANY_COLUMNS => "file has too many columns",
183            Self::READSTAT_ERROR_NAME_IS_ZERO_LENGTH => "variable name is empty (zero length)",
184            Self::READSTAT_ERROR_BAD_TIMESTAMP_VALUE => "timestamp value is invalid",
185            Self::READSTAT_ERROR_BAD_MR_STRING => "invalid multiple response (MR) set string",
186        };
187        f.write_str(msg)
188    }
189}
190
191impl std::error::Error for ReadStatCError {}
192
193/// The main error type for the readstat crate.
194///
195/// Wraps errors from the `ReadStat` C library, Arrow/Parquet processing,
196/// I/O operations, and other subsystems into a single error enum.
197///
198/// This enum is `#[non_exhaustive]`: new variants may be added in minor
199/// releases without a semver-breaking change. Match with a wildcard arm
200/// (`_ => ...`) to remain forward-compatible.
201#[non_exhaustive]
202#[derive(Debug, thiserror::Error)]
203pub enum ReadStatError {
204    /// Error from the `ReadStat` C library.
205    #[error("ReadStat C library error: {0}")]
206    CLibrary(ReadStatCError),
207
208    /// Unrecognized C error code not mapped to [`ReadStatCError`].
209    #[error("Unknown C error code: {0}")]
210    UnknownCError(i32),
211
212    /// Arithmetic overflow during SAS-to-Unix epoch date/time conversion.
213    #[error("Date arithmetic overflow")]
214    DateOverflow,
215
216    /// Integer conversion error (e.g. `u32` to `i32` overflow).
217    #[error("Integer conversion failed: {0}")]
218    IntConversion(#[from] std::num::TryFromIntError),
219
220    /// Error from the Arrow library.
221    #[error("{0}")]
222    Arrow(#[from] arrow::error::ArrowError),
223
224    /// Error from the Parquet library.
225    #[cfg(feature = "parquet")]
226    #[error("{0}")]
227    Parquet(#[from] parquet::errors::ParquetError),
228
229    /// I/O error.
230    #[error("{0}")]
231    Io(#[from] std::io::Error),
232
233    /// Path resolution error.
234    #[cfg(not(target_arch = "wasm32"))]
235    #[error("{0}")]
236    PathAbs(#[from] path_abs::Error),
237
238    /// JSON serialization/deserialization error.
239    #[error("{0}")]
240    SerdeJson(#[from] serde_json::Error),
241
242    /// Rayon thread pool build error.
243    #[cfg(not(target_arch = "wasm32"))]
244    #[error("{0}")]
245    Rayon(#[from] rayon::ThreadPoolBuildError),
246
247    /// Null byte found in a string intended for C FFI.
248    #[error("{0}")]
249    NulError(#[from] std::ffi::NulError),
250
251    /// One or more specified column names were not found in the dataset.
252    #[error("Column(s) not found: {requested:?}\nAvailable columns: {available:?}")]
253    ColumnsNotFound {
254        /// The column names that were requested but not found.
255        requested: Vec<String>,
256        /// All available column names in the dataset.
257        available: Vec<String>,
258    },
259
260    /// The input file does not exist.
261    #[error("File {} does not exist!", .0.display())]
262    FileNotFound(PathBuf),
263
264    /// The input file has a missing or unsupported extension.
265    ///
266    /// Only `.sas7bdat` files are supported as input.
267    #[error("File {} does not have the expected .sas7bdat extension!", .0.display())]
268    UnsupportedInputExtension(PathBuf),
269
270    /// The output file path has a missing or mismatched extension for the
271    /// chosen [`OutFormat`](crate::OutFormat).
272    #[error("File {} does not have the expected .{expected} extension!", .path.display())]
273    OutputExtensionMismatch {
274        /// The output path whose extension did not match.
275        path: PathBuf,
276        /// The extension expected for the configured output format.
277        expected: String,
278    },
279
280    /// The output file already exists and overwrite was not requested.
281    #[error("Output file {} already exists! Set overwrite = true to replace it.", .0.display())]
282    OutputFileExists(PathBuf),
283
284    /// The parent directory of the output path does not exist.
285    #[error("The parent directory of the output path {} does not exist", .0.display())]
286    OutputParentMissing(PathBuf),
287
288    /// The format string is not a recognized output format.
289    #[error("Unknown format: {0:?}. Expected one of: csv, feather, ndjson, parquet")]
290    UnknownFormat(String),
291
292    /// The SQL file was empty.
293    #[error("SQL file {} is empty", .0.display())]
294    EmptySqlFile(PathBuf),
295
296    /// The columns file contained no column names (only blanks/comments).
297    #[error("Columns file {} contains no column names", .0.display())]
298    EmptyColumnsFile(PathBuf),
299
300    /// Error from the DataFusion SQL engine.
301    #[cfg(feature = "sql")]
302    #[error("{0}")]
303    DataFusion(#[from] datafusion::error::DataFusionError),
304
305    /// Catch-all error with a custom message.
306    #[error("{0}")]
307    Other(String),
308}
309
310/// Check a readstat C error code, returning Ok(()) for `READSTAT_OK`
311/// or an appropriate error variant otherwise.
312pub(crate) fn check_c_error(code: i32) -> Result<(), ReadStatError> {
313    use num_traits::FromPrimitive;
314    match FromPrimitive::from_i32(code) {
315        Some(ReadStatCError::READSTAT_OK) => Ok(()),
316        Some(e) => Err(ReadStatError::CLibrary(e)),
317        None => Err(ReadStatError::UnknownCError(code)),
318    }
319}
320
321#[cfg(test)]
322mod tests {
323    use super::*;
324
325    #[test]
326    fn check_c_error_ok() {
327        assert!(check_c_error(0).is_ok());
328    }
329
330    #[test]
331    fn check_c_error_known_errors() {
332        for code in 1..=40 {
333            let err = check_c_error(code).unwrap_err();
334            match err {
335                ReadStatError::CLibrary(_) => {}
336                other => panic!("Expected CLibrary error for code {code}, got {other:?}"),
337            }
338        }
339    }
340
341    #[test]
342    fn check_c_error_open() {
343        let err = check_c_error(1).unwrap_err();
344        assert!(matches!(
345            err,
346            ReadStatError::CLibrary(ReadStatCError::READSTAT_ERROR_OPEN)
347        ));
348    }
349
350    #[test]
351    fn check_c_error_parse() {
352        let err = check_c_error(5).unwrap_err();
353        assert!(matches!(
354            err,
355            ReadStatError::CLibrary(ReadStatCError::READSTAT_ERROR_PARSE)
356        ));
357    }
358
359    #[test]
360    fn check_c_error_unknown_positive() {
361        let err = check_c_error(999).unwrap_err();
362        assert!(matches!(err, ReadStatError::UnknownCError(999)));
363    }
364
365    #[test]
366    fn check_c_error_unknown_negative() {
367        let err = check_c_error(-1).unwrap_err();
368        assert!(matches!(err, ReadStatError::UnknownCError(-1)));
369    }
370
371    #[test]
372    fn error_display_messages() {
373        let err = ReadStatError::Other("test error".to_string());
374        assert_eq!(format!("{err}"), "test error");
375
376        let err = ReadStatError::DateOverflow;
377        assert_eq!(format!("{err}"), "Date arithmetic overflow");
378
379        let err = ReadStatError::UnknownCError(99);
380        assert_eq!(format!("{err}"), "Unknown C error code: 99");
381    }
382
383    #[test]
384    fn clibrary_display_uses_human_message() {
385        // CLibrary errors should surface the human-readable description, not the
386        // raw Debug variant name.
387        let err = check_c_error(1).unwrap_err(); // READSTAT_ERROR_OPEN
388        let msg = format!("{err}");
389        assert_eq!(msg, "ReadStat C library error: failed to open the file");
390        assert!(!msg.contains("READSTAT_ERROR_OPEN"));
391    }
392
393    #[test]
394    fn cerror_display_all_variants_nonempty() {
395        use num_traits::FromPrimitive;
396        for code in 0..=40 {
397            let e: ReadStatCError = FromPrimitive::from_i32(code).unwrap();
398            assert!(!format!("{e}").is_empty(), "empty Display for code {code}");
399        }
400    }
401
402    #[test]
403    fn error_columns_not_found_display() {
404        let err = ReadStatError::ColumnsNotFound {
405            requested: vec!["foo".into(), "bar".into()],
406            available: vec!["a".into(), "b".into(), "c".into()],
407        };
408        let msg = format!("{err}");
409        assert!(msg.contains("foo"));
410        assert!(msg.contains("bar"));
411        assert!(msg.contains("Available columns"));
412    }
413}