Skip to main content

readstat/
rs_metadata.rs

1//! File-level and variable-level metadata extracted from `.sas7bdat` files.
2//!
3//! [`ReadStatMetadata`] holds file-level properties (row/variable counts, encoding,
4//! compression, timestamps) and per-variable metadata ([`ReadStatVarMetadata`]) including
5//! names, types, labels, and SAS format strings. After parsing, it builds an Arrow
6//! [`Schema`](arrow::datatypes::Schema) that maps SAS types to Arrow data types.
7
8use arrow::datatypes::{DataType, Field, Schema, TimeUnit};
9use log::debug;
10use num_derive::FromPrimitive;
11use serde::Serialize;
12#[cfg(any(not(target_arch = "wasm32"), test))]
13use std::fs::File;
14use std::{
15    collections::{BTreeMap, BTreeSet, HashMap},
16    ffi::{CString, c_void},
17    path::Path,
18};
19
20use crate::cb::{handle_metadata, handle_variable};
21use crate::err::{ReadStatError, check_c_error};
22use crate::rs_buffer_io::ReadStatBufferCtx;
23use crate::rs_parser::ReadStatParser;
24use crate::rs_path::ReadStatPath;
25use crate::rs_var::{ReadStatVarFormatClass, ReadStatVarType, ReadStatVarTypeClass};
26
27/// File-level metadata extracted from a `.sas7bdat` file.
28///
29/// Populated by the `handle_metadata` and `handle_variable` FFI callbacks during parsing.
30/// After parsing, call [`read_metadata`](ReadStatMetadata::read_metadata) to populate
31/// all fields and build the Arrow [`Schema`].
32#[derive(Clone, Debug, Serialize)]
33pub struct ReadStatMetadata {
34    /// Number of rows (observations) in the dataset.
35    pub row_count: i32,
36    /// Number of variables (columns) in the dataset.
37    pub var_count: i32,
38    /// Internal table name from the SAS file header.
39    pub table_name: String,
40    /// User-assigned file label.
41    pub file_label: String,
42    /// Character encoding of the file (e.g. `"UTF-8"`, `"WINDOWS-1252"`).
43    pub file_encoding: String,
44    /// SAS file format version number.
45    pub version: i32,
46    /// Whether the file uses the 64-bit format (`true`) or 32-bit (`false`).
47    pub is_64bit: bool,
48    /// File creation timestamp (formatted as `YYYY-MM-DD HH:MM:SS`).
49    pub creation_time: String,
50    /// File modification timestamp (formatted as `YYYY-MM-DD HH:MM:SS`).
51    pub modified_time: String,
52    /// Compression method used in the file.
53    pub compression: ReadStatCompress,
54    /// Byte order (endianness) of the file.
55    pub endianness: ReadStatEndian,
56    /// Per-variable metadata, keyed by variable index.
57    pub vars: BTreeMap<i32, ReadStatVarMetadata>,
58    /// Arrow schema derived from variable types. Not serialized.
59    #[serde(skip_serializing)]
60    pub schema: Schema,
61}
62
63impl Default for ReadStatMetadata {
64    fn default() -> Self {
65        Self::new()
66    }
67}
68
69impl ReadStatMetadata {
70    /// Creates a new `ReadStatMetadata` with default (empty) values.
71    pub fn new() -> Self {
72        Self {
73            row_count: 0,
74            var_count: 0,
75            table_name: String::new(),
76            file_label: String::new(),
77            file_encoding: String::new(),
78            version: 0,
79            is_64bit: false,
80            creation_time: String::new(),
81            modified_time: String::new(),
82            compression: ReadStatCompress::None,
83            endianness: ReadStatEndian::None,
84            vars: BTreeMap::new(),
85            schema: Schema::empty(),
86        }
87    }
88
89    fn initialize_schema(&self) -> Schema {
90        // build up Schema
91        let fields: Vec<Field> = self
92            .vars
93            .values()
94            .map(|vm| {
95                let var_dt = match &vm.var_type {
96                    ReadStatVarType::String
97                    | ReadStatVarType::StringRef
98                    | ReadStatVarType::Unknown => DataType::Utf8,
99                    ReadStatVarType::Int8 | ReadStatVarType::Int16 => DataType::Int16,
100                    ReadStatVarType::Int32 => DataType::Int32,
101                    ReadStatVarType::Float => DataType::Float32,
102                    ReadStatVarType::Double => match &vm.var_format_class {
103                        Some(ReadStatVarFormatClass::Date) => DataType::Date32,
104                        Some(ReadStatVarFormatClass::DateTime) => {
105                            DataType::Timestamp(TimeUnit::Second, None)
106                        }
107                        Some(ReadStatVarFormatClass::DateTimeWithMilliseconds) => {
108                            DataType::Timestamp(TimeUnit::Millisecond, None)
109                        }
110                        Some(ReadStatVarFormatClass::DateTimeWithMicroseconds) => {
111                            DataType::Timestamp(TimeUnit::Microsecond, None)
112                        }
113                        Some(ReadStatVarFormatClass::DateTimeWithNanoseconds) => {
114                            DataType::Timestamp(TimeUnit::Nanosecond, None)
115                        }
116                        Some(ReadStatVarFormatClass::Time) => DataType::Time32(TimeUnit::Second),
117                        Some(ReadStatVarFormatClass::TimeWithMilliseconds) => {
118                            DataType::Time32(TimeUnit::Millisecond)
119                        }
120                        Some(ReadStatVarFormatClass::TimeWithMicroseconds) => {
121                            DataType::Time64(TimeUnit::Microsecond)
122                        }
123                        Some(ReadStatVarFormatClass::TimeWithNanoseconds) => {
124                            DataType::Time64(TimeUnit::Nanosecond)
125                        }
126                        None => DataType::Float64,
127                    },
128                };
129
130                // Build field metadata
131                let mut field = Field::new(&vm.var_name, var_dt, true);
132                let mut metadata = HashMap::new();
133                if !vm.var_label.is_empty() {
134                    metadata.insert("label".to_string(), vm.var_label.clone());
135                }
136                if !vm.var_format.is_empty() {
137                    metadata.insert("sas_format".to_string(), vm.var_format.clone());
138                }
139                metadata.insert("storage_width".to_string(), vm.storage_width.to_string());
140                if vm.display_width != 0 {
141                    metadata.insert("display_width".to_string(), vm.display_width.to_string());
142                }
143                if !metadata.is_empty() {
144                    field = field.with_metadata(metadata);
145                }
146                field
147            })
148            .collect();
149
150        // Add table label as schema metadata if not empty
151        if self.file_label.is_empty() {
152            Schema::new(fields)
153        } else {
154            let mut schema_metadata = HashMap::new();
155            schema_metadata.insert("table_label".to_string(), self.file_label.clone());
156            Schema::new_with_metadata(fields, schema_metadata)
157        }
158    }
159
160    /// Parses metadata from the `.sas7bdat` file referenced by `rsp`.
161    ///
162    /// Sets up the `ReadStat` C parser with metadata and variable handlers, then
163    /// invokes parsing. On success, builds the Arrow [`Schema`] from the
164    /// discovered variable types. If `skip_row_count` is `true`, sets a row
165    /// limit of 1 to skip counting all rows (faster for metadata-only queries).
166    ///
167    /// # Errors
168    ///
169    /// Returns [`ReadStatError`] if FFI parsing fails.
170    #[allow(clippy::cast_possible_wrap, clippy::ptr_as_ptr)]
171    pub fn read_metadata(
172        &mut self,
173        rsp: &ReadStatPath,
174        skip_row_count: bool,
175    ) -> Result<(), ReadStatError> {
176        debug!("Path as C string is {:?}", rsp.cstring_path);
177        let ppath = rsp.cstring_path.as_ptr();
178
179        let ctx = std::ptr::from_mut::<Self>(self) as *mut c_void;
180
181        let error: readstat_sys::readstat_error_t = readstat_sys::readstat_error_e_READSTAT_OK;
182        debug!("Initially, error ==> {error}");
183
184        let row_limit = if skip_row_count { Some(1) } else { None };
185
186        let error = ReadStatParser::new()?
187            .set_metadata_handler(Some(handle_metadata))?
188            .set_variable_handler(Some(handle_variable))?
189            .set_row_limit(row_limit)?
190            .parse_sas7bdat(ppath, ctx);
191
192        check_c_error(error as i32)?;
193
194        // if successful, initialize schema
195        self.schema = self.initialize_schema();
196        Ok(())
197    }
198
199    /// Parses metadata from an in-memory byte slice containing `.sas7bdat` data.
200    ///
201    /// Equivalent to [`read_metadata`](ReadStatMetadata::read_metadata) but reads from
202    /// a `&[u8]` buffer instead of a file path. Useful for WASM targets, cloud storage,
203    /// HTTP uploads, and testing without filesystem access.
204    ///
205    /// # Errors
206    ///
207    /// Returns [`ReadStatError`] if FFI parsing fails.
208    ///
209    /// # Panics
210    ///
211    /// Panics if the dummy path `CString` allocation fails (should never happen).
212    #[allow(clippy::cast_possible_wrap, clippy::ptr_as_ptr)]
213    pub fn read_metadata_from_bytes(
214        &mut self,
215        bytes: &[u8],
216        skip_row_count: bool,
217    ) -> Result<(), ReadStatError> {
218        let mut buffer_ctx = ReadStatBufferCtx::new(bytes);
219
220        let ctx = std::ptr::from_mut::<Self>(self) as *mut c_void;
221
222        let error: readstat_sys::readstat_error_t = readstat_sys::readstat_error_e_READSTAT_OK;
223        debug!("Initially, error ==> {error}");
224
225        let row_limit = if skip_row_count { Some(1) } else { None };
226
227        // Dummy path — custom I/O handlers ignore it
228        let dummy_path = CString::new("").expect("empty string is valid C string");
229
230        let error = buffer_ctx
231            .configure_parser(
232                ReadStatParser::new()?
233                    .set_metadata_handler(Some(handle_metadata))?
234                    .set_variable_handler(Some(handle_variable))?
235                    .set_row_limit(row_limit)?,
236            )?
237            .parse_sas7bdat(dummy_path.as_ptr(), ctx);
238
239        check_c_error(error as i32)?;
240
241        // if successful, initialize schema
242        self.schema = self.initialize_schema();
243        Ok(())
244    }
245
246    /// Parses metadata from a memory-mapped `.sas7bdat` file.
247    ///
248    /// Opens the file at `path` and memory-maps it, avoiding explicit read syscalls.
249    /// The OS loads pages on demand and manages caching automatically. This is
250    /// especially beneficial for large files where it avoids copying file data
251    /// through kernel buffers.
252    ///
253    /// # Safety
254    ///
255    /// Memory mapping is safe as long as the file is not modified or truncated by
256    /// another process while the map is active. This is the standard expectation
257    /// for `.sas7bdat` files, which are read-only artifacts.
258    ///
259    /// # Errors
260    ///
261    /// Returns [`ReadStatError`] if the file cannot be opened, mapped, or parsed.
262    #[cfg(not(target_arch = "wasm32"))]
263    pub fn read_metadata_from_mmap(
264        &mut self,
265        path: &Path,
266        skip_row_count: bool,
267    ) -> Result<(), ReadStatError> {
268        let file = File::open(path)?;
269        let mmap = unsafe { memmap2::Mmap::map(&file)? };
270        self.read_metadata_from_bytes(&mmap, skip_row_count)
271    }
272
273    /// Parses a columns file, returning column names.
274    ///
275    /// Lines starting with `#` are treated as comments and blank lines are skipped.
276    /// Each remaining line is trimmed and used as a column name.
277    ///
278    /// # Errors
279    ///
280    /// Returns [`ReadStatError`] if the file cannot be read.
281    pub fn parse_columns_file(path: &Path) -> Result<Vec<String>, ReadStatError> {
282        let contents = std::fs::read_to_string(path)?;
283        let names: Vec<String> = contents
284            .lines()
285            .map(str::trim)
286            .filter(|line| !line.is_empty() && !line.starts_with('#'))
287            .map(std::string::ToString::to_string)
288            .collect();
289        Ok(names)
290    }
291
292    /// Validates column names against the dataset's variables and returns a mapping
293    /// of original variable index to new contiguous index.
294    ///
295    /// Returns `Ok(None)` if `columns` is `None` (no filtering requested).
296    /// Returns `Err(ColumnsNotFound)` if any requested names are not in the dataset.
297    ///
298    /// # Errors
299    ///
300    /// Returns [`ReadStatError::ColumnsNotFound`] if any requested column names
301    /// do not exist in the dataset.
302    pub fn resolve_selected_columns(
303        &self,
304        columns: Option<Vec<String>>,
305    ) -> Result<Option<BTreeMap<i32, i32>>, ReadStatError> {
306        let Some(columns) = columns else {
307            return Ok(None);
308        };
309
310        // Deduplicate while preserving order isn't needed - we use dataset order
311        let requested: BTreeSet<String> = columns.into_iter().collect();
312
313        // Build a name -> index lookup
314        let name_to_index: HashMap<&str, i32> = self
315            .vars
316            .iter()
317            .map(|(&idx, vm)| (vm.var_name.as_str(), idx))
318            .collect();
319
320        // Check for invalid names
321        let not_found: Vec<String> = requested
322            .iter()
323            .filter(|name| !name_to_index.contains_key(name.as_str()))
324            .cloned()
325            .collect();
326
327        if !not_found.is_empty() {
328            let available: Vec<String> = self.vars.values().map(|vm| vm.var_name.clone()).collect();
329            return Err(ReadStatError::ColumnsNotFound {
330                requested: not_found,
331                available,
332            });
333        }
334
335        // Build mapping: original_var_index -> new_contiguous_index
336        // Iterate in original dataset order (BTreeMap is sorted by key)
337        let mut mapping = BTreeMap::new();
338        let mut new_index = 0i32;
339        for (&orig_index, vm) in &self.vars {
340            if requested.contains(&vm.var_name) {
341                mapping.insert(orig_index, new_index);
342                new_index += 1;
343            }
344        }
345
346        Ok(Some(mapping))
347    }
348
349    /// Returns a new `ReadStatMetadata` with only the selected variables,
350    /// re-keyed with contiguous indices starting from 0.
351    ///
352    /// Constructs the result directly instead of cloning the full struct,
353    /// avoiding a deep clone of unselected variables and the original schema.
354    #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)]
355    pub fn filter_to_selected_columns(&self, mapping: &BTreeMap<i32, i32>) -> Self {
356        let new_vars: BTreeMap<i32, ReadStatVarMetadata> = mapping
357            .iter()
358            .filter_map(|(&orig_idx, &new_idx)| {
359                self.vars.get(&orig_idx).map(|vm| (new_idx, vm.clone()))
360            })
361            .collect();
362
363        let mut filtered = Self {
364            row_count: self.row_count,
365            var_count: mapping.len() as i32,
366            table_name: self.table_name.clone(),
367            file_label: self.file_label.clone(),
368            file_encoding: self.file_encoding.clone(),
369            version: self.version,
370            is_64bit: self.is_64bit,
371            creation_time: self.creation_time.clone(),
372            modified_time: self.modified_time.clone(),
373            compression: self.compression.clone(),
374            endianness: self.endianness.clone(),
375            vars: new_vars,
376            schema: Schema::empty(),
377        };
378        filtered.schema = filtered.initialize_schema();
379        filtered
380    }
381}
382
383/// Compression method used in a `.sas7bdat` file.
384///
385/// This enum is `#[non_exhaustive]`: it mirrors a C library enum that may gain
386/// variants. Match with a wildcard arm to remain forward-compatible.
387#[non_exhaustive]
388#[derive(Clone, Debug, Default, FromPrimitive, Serialize)]
389#[allow(clippy::cast_possible_wrap)]
390pub enum ReadStatCompress {
391    /// No compression.
392    #[default]
393    None = readstat_sys::readstat_compress_e_READSTAT_COMPRESS_NONE as isize,
394    /// Row-level (RLE) compression.
395    Rows = readstat_sys::readstat_compress_e_READSTAT_COMPRESS_ROWS as isize,
396    /// Binary (RDC) compression.
397    Binary = readstat_sys::readstat_compress_e_READSTAT_COMPRESS_BINARY as isize,
398}
399
400/// Byte order (endianness) of a `.sas7bdat` file.
401///
402/// This enum is `#[non_exhaustive]`: it mirrors a C library enum that may gain
403/// variants. Match with a wildcard arm to remain forward-compatible.
404#[non_exhaustive]
405#[derive(Clone, Debug, Default, FromPrimitive, Serialize)]
406#[allow(clippy::cast_possible_wrap)]
407pub enum ReadStatEndian {
408    /// Endianness not specified.
409    #[default]
410    None = readstat_sys::readstat_endian_e_READSTAT_ENDIAN_NONE as isize,
411    /// Little-endian byte order.
412    Little = readstat_sys::readstat_endian_e_READSTAT_ENDIAN_LITTLE as isize,
413    /// Big-endian byte order.
414    Big = readstat_sys::readstat_endian_e_READSTAT_ENDIAN_BIG as isize,
415}
416
417/// Metadata for a single variable (column) in a SAS dataset.
418#[derive(Clone, Debug, Serialize)]
419pub struct ReadStatVarMetadata {
420    /// Variable name as defined in the SAS file.
421    pub var_name: String,
422    /// Storage type of the variable.
423    pub var_type: ReadStatVarType,
424    /// High-level type class (string or numeric).
425    pub var_type_class: ReadStatVarTypeClass,
426    /// User-assigned variable label (may be empty).
427    pub var_label: String,
428    /// SAS format string (e.g. `"DATE9"`, `"BEST12"`).
429    pub var_format: String,
430    /// Semantic format class derived from the format string, if date/time-related.
431    pub var_format_class: Option<ReadStatVarFormatClass>,
432    /// Number of bytes used to store the variable value.
433    /// Always 8 for SAS numeric variables; variable for strings.
434    pub storage_width: usize,
435    /// Display width hint from the file. 0 for sas7bdat; populated for XPORT/SPSS.
436    pub display_width: i32,
437}
438
439impl ReadStatVarMetadata {
440    /// Creates a new `ReadStatVarMetadata` with the given field values.
441    #[allow(clippy::too_many_arguments)]
442    pub fn new(
443        var_name: String,
444        var_type: ReadStatVarType,
445        var_type_class: ReadStatVarTypeClass,
446        var_label: String,
447        var_format: String,
448        var_format_class: Option<ReadStatVarFormatClass>,
449        storage_width: usize,
450        display_width: i32,
451    ) -> Self {
452        Self {
453            var_name,
454            var_type,
455            var_type_class,
456            var_label,
457            var_format,
458            var_format_class,
459            storage_width,
460            display_width,
461        }
462    }
463}
464
465#[cfg(test)]
466mod tests {
467    use super::*;
468    use std::io::Write;
469
470    /// Create a test metadata instance with the given variable names.
471    #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)]
472    fn test_metadata(var_names: &[&str]) -> ReadStatMetadata {
473        let mut md = ReadStatMetadata::new();
474        for (i, name) in var_names.iter().enumerate() {
475            md.vars.insert(
476                i as i32,
477                ReadStatVarMetadata::new(
478                    name.to_string(),
479                    ReadStatVarType::Double,
480                    ReadStatVarTypeClass::Numeric,
481                    String::new(),
482                    "BEST12".to_string(),
483                    None,
484                    8,
485                    0,
486                ),
487            );
488        }
489        md.var_count = var_names.len() as i32;
490        md.schema = md.initialize_schema();
491        md
492    }
493
494    // --- resolve_selected_columns ---
495
496    #[test]
497    fn resolve_columns_none_returns_none() {
498        let md = test_metadata(&["a", "b", "c"]);
499        assert!(md.resolve_selected_columns(None).unwrap().is_none());
500    }
501
502    #[test]
503    fn resolve_columns_valid_subset() {
504        let md = test_metadata(&["a", "b", "c"]);
505        let mapping = md
506            .resolve_selected_columns(Some(vec!["a".into(), "c".into()]))
507            .unwrap()
508            .unwrap();
509        assert_eq!(mapping.len(), 2);
510        // "a" is at original index 0, mapped to new index 0
511        assert_eq!(mapping[&0], 0);
512        // "c" is at original index 2, mapped to new index 1
513        assert_eq!(mapping[&2], 1);
514    }
515
516    #[test]
517    fn resolve_columns_invalid_name_errors() {
518        let md = test_metadata(&["a", "b", "c"]);
519        let err = md
520            .resolve_selected_columns(Some(vec!["a".into(), "nonexistent".into()]))
521            .unwrap_err();
522        match err {
523            ReadStatError::ColumnsNotFound {
524                requested,
525                available,
526            } => {
527                assert_eq!(requested, vec!["nonexistent"]);
528                assert_eq!(available, vec!["a", "b", "c"]);
529            }
530            other => panic!("Expected ColumnsNotFound, got {other:?}"),
531        }
532    }
533
534    #[test]
535    fn resolve_columns_all_columns() {
536        let md = test_metadata(&["x", "y", "z"]);
537        let mapping = md
538            .resolve_selected_columns(Some(vec!["x".into(), "y".into(), "z".into()]))
539            .unwrap()
540            .unwrap();
541        assert_eq!(mapping.len(), 3);
542        assert_eq!(mapping[&0], 0);
543        assert_eq!(mapping[&1], 1);
544        assert_eq!(mapping[&2], 2);
545    }
546
547    // --- filter_to_selected_columns ---
548
549    #[test]
550    fn filter_produces_contiguous_indices() {
551        let md = test_metadata(&["a", "b", "c", "d"]);
552        let mapping = md
553            .resolve_selected_columns(Some(vec!["b".into(), "d".into()]))
554            .unwrap()
555            .unwrap();
556        let filtered = md.filter_to_selected_columns(&mapping);
557
558        assert_eq!(filtered.var_count, 2);
559        assert_eq!(filtered.vars[&0].var_name, "b");
560        assert_eq!(filtered.vars[&1].var_name, "d");
561    }
562
563    #[test]
564    fn filter_preserves_schema() {
565        let md = test_metadata(&["a", "b", "c"]);
566        let mapping = md
567            .resolve_selected_columns(Some(vec!["b".into()]))
568            .unwrap()
569            .unwrap();
570        let filtered = md.filter_to_selected_columns(&mapping);
571
572        assert_eq!(filtered.schema.fields().len(), 1);
573        assert_eq!(filtered.schema.fields()[0].name(), "b");
574    }
575
576    // --- initialize_schema ---
577
578    #[test]
579    fn schema_string_type() {
580        let mut md = ReadStatMetadata::new();
581        md.vars.insert(
582            0,
583            ReadStatVarMetadata::new(
584                "name".into(),
585                ReadStatVarType::String,
586                ReadStatVarTypeClass::String,
587                String::new(),
588                "$30".into(),
589                None,
590                30,
591                0,
592            ),
593        );
594        md.var_count = 1;
595        let schema = md.initialize_schema();
596        assert_eq!(*schema.fields()[0].data_type(), DataType::Utf8);
597    }
598
599    #[test]
600    fn schema_float64_type() {
601        let mut md = ReadStatMetadata::new();
602        md.vars.insert(
603            0,
604            ReadStatVarMetadata::new(
605                "value".into(),
606                ReadStatVarType::Double,
607                ReadStatVarTypeClass::Numeric,
608                String::new(),
609                "BEST12".into(),
610                None,
611                8,
612                0,
613            ),
614        );
615        md.var_count = 1;
616        let schema = md.initialize_schema();
617        assert_eq!(*schema.fields()[0].data_type(), DataType::Float64);
618    }
619
620    #[test]
621    fn schema_date_type() {
622        let mut md = ReadStatMetadata::new();
623        md.vars.insert(
624            0,
625            ReadStatVarMetadata::new(
626                "dt".into(),
627                ReadStatVarType::Double,
628                ReadStatVarTypeClass::Numeric,
629                String::new(),
630                "DATE9".into(),
631                Some(ReadStatVarFormatClass::Date),
632                8,
633                0,
634            ),
635        );
636        md.var_count = 1;
637        let schema = md.initialize_schema();
638        assert_eq!(*schema.fields()[0].data_type(), DataType::Date32);
639    }
640
641    #[test]
642    fn schema_datetime_type() {
643        let mut md = ReadStatMetadata::new();
644        md.vars.insert(
645            0,
646            ReadStatVarMetadata::new(
647                "ts".into(),
648                ReadStatVarType::Double,
649                ReadStatVarTypeClass::Numeric,
650                String::new(),
651                "DATETIME22".into(),
652                Some(ReadStatVarFormatClass::DateTime),
653                8,
654                0,
655            ),
656        );
657        md.var_count = 1;
658        let schema = md.initialize_schema();
659        assert_eq!(
660            *schema.fields()[0].data_type(),
661            DataType::Timestamp(TimeUnit::Second, None)
662        );
663    }
664
665    #[test]
666    fn schema_time_type() {
667        let mut md = ReadStatMetadata::new();
668        md.vars.insert(
669            0,
670            ReadStatVarMetadata::new(
671                "tm".into(),
672                ReadStatVarType::Double,
673                ReadStatVarTypeClass::Numeric,
674                String::new(),
675                "TIME8".into(),
676                Some(ReadStatVarFormatClass::Time),
677                8,
678                0,
679            ),
680        );
681        md.var_count = 1;
682        let schema = md.initialize_schema();
683        assert_eq!(
684            *schema.fields()[0].data_type(),
685            DataType::Time32(TimeUnit::Second)
686        );
687    }
688
689    #[test]
690    fn schema_int32_type() {
691        let mut md = ReadStatMetadata::new();
692        md.vars.insert(
693            0,
694            ReadStatVarMetadata::new(
695                "count".into(),
696                ReadStatVarType::Int32,
697                ReadStatVarTypeClass::Numeric,
698                String::new(),
699                String::new(),
700                None,
701                4,
702                0,
703            ),
704        );
705        md.var_count = 1;
706        let schema = md.initialize_schema();
707        assert_eq!(*schema.fields()[0].data_type(), DataType::Int32);
708    }
709
710    #[test]
711    fn schema_with_labels_metadata() {
712        let mut md = ReadStatMetadata::new();
713        md.vars.insert(
714            0,
715            ReadStatVarMetadata::new(
716                "col".into(),
717                ReadStatVarType::Double,
718                ReadStatVarTypeClass::Numeric,
719                "My Label".into(),
720                "BEST12".into(),
721                None,
722                8,
723                0,
724            ),
725        );
726        md.var_count = 1;
727        md.file_label = "My Table".into();
728        let schema = md.initialize_schema();
729
730        // Field metadata
731        let field_meta = schema.fields()[0].metadata();
732        assert_eq!(field_meta.get("label").unwrap(), "My Label");
733
734        // Schema metadata
735        let schema_meta = schema.metadata();
736        assert_eq!(schema_meta.get("table_label").unwrap(), "My Table");
737    }
738
739    #[test]
740    fn schema_no_labels_has_format_and_width_metadata() {
741        let mut md = ReadStatMetadata::new();
742        md.vars.insert(
743            0,
744            ReadStatVarMetadata::new(
745                "col".into(),
746                ReadStatVarType::Double,
747                ReadStatVarTypeClass::Numeric,
748                String::new(),
749                "BEST12".into(),
750                None,
751                8,
752                0,
753            ),
754        );
755        md.var_count = 1;
756        let schema = md.initialize_schema();
757
758        let field_meta = schema.fields()[0].metadata();
759        assert!(!field_meta.contains_key("label"));
760        assert_eq!(field_meta.get("sas_format").unwrap(), "BEST12");
761        assert_eq!(field_meta.get("storage_width").unwrap(), "8");
762        assert!(!field_meta.contains_key("display_width"));
763        assert!(schema.metadata().is_empty());
764    }
765
766    // --- parse_columns_file ---
767
768    #[test]
769    fn parse_columns_file_normal() {
770        let dir = tempfile::tempdir().unwrap();
771        let path = dir.path().join("cols.txt");
772        let mut f = File::create(&path).unwrap();
773        writeln!(f, "col_a").unwrap();
774        writeln!(f, "col_b").unwrap();
775        writeln!(f, "col_c").unwrap();
776
777        let names = ReadStatMetadata::parse_columns_file(&path).unwrap();
778        assert_eq!(names, vec!["col_a", "col_b", "col_c"]);
779    }
780
781    #[test]
782    fn parse_columns_file_with_comments_and_blanks() {
783        let dir = tempfile::tempdir().unwrap();
784        let path = dir.path().join("cols.txt");
785        let mut f = File::create(&path).unwrap();
786        writeln!(f, "# This is a comment").unwrap();
787        writeln!(f, "col_a").unwrap();
788        writeln!(f).unwrap();
789        writeln!(f, "  col_b  ").unwrap();
790        writeln!(f, "# Another comment").unwrap();
791
792        let names = ReadStatMetadata::parse_columns_file(&path).unwrap();
793        assert_eq!(names, vec!["col_a", "col_b"]);
794    }
795
796    #[test]
797    fn parse_columns_file_empty() {
798        let dir = tempfile::tempdir().unwrap();
799        let path = dir.path().join("cols.txt");
800        File::create(&path).unwrap();
801
802        let names = ReadStatMetadata::parse_columns_file(&path).unwrap();
803        assert!(names.is_empty());
804    }
805
806    #[test]
807    fn parse_columns_file_nonexistent() {
808        let path = Path::new("/nonexistent/path/cols.txt");
809        assert!(ReadStatMetadata::parse_columns_file(path).is_err());
810    }
811
812    // --- ReadStatMetadata defaults ---
813
814    #[test]
815    fn default_metadata() {
816        let md = ReadStatMetadata::new();
817        assert_eq!(md.row_count, 0);
818        assert_eq!(md.var_count, 0);
819        assert!(md.table_name.is_empty());
820        assert!(md.vars.is_empty());
821        assert!(md.schema.fields().is_empty());
822    }
823}