Skip to main content

readstat/
cli.rs

1//! CLI argument types for the readstat binary.
2
3use clap::{Parser, Subcommand, ValueEnum, ValueHint};
4use readstat::{OutFormat, ParquetCompression};
5use std::fmt;
6use std::path::PathBuf;
7
8/// 💾 Command-line tool for working with SAS binary files
9///
10/// 🦀 Rust wrapper of `ReadStat` C library
11#[derive(Parser, Debug)]
12#[command(version)]
13#[command(propagate_version = true)]
14pub struct ReadStatCli {
15    #[command(subcommand)]
16    pub command: ReadStatCliCommands,
17}
18
19/// CLI subcommands for readstat.
20#[derive(Debug, Subcommand)]
21pub enum ReadStatCliCommands {
22    /// Display sas7bdat metadata
23    Metadata {
24        /// Path to sas7bdat file
25        #[arg(value_hint = ValueHint::FilePath, value_parser)]
26        input: PathBuf,
27        /// Display sas7bdat metadata as json
28        #[arg(action, long)]
29        as_json: bool,
30        /// Skip calculating row count{n}If only interested in variable metadata speeds up parsing
31        #[arg(action, long)]
32        skip_row_count: bool,
33    },
34    /// Preview sas7bdat data
35    Preview {
36        /// Path to sas7bdat file
37        #[arg(value_hint = ValueHint::FilePath, value_parser)]
38        input: PathBuf,
39        /// Number of rows to write
40        #[arg(default_value = "10", long, value_parser)]
41        rows: u32,
42        /// Type of reader{n}    mem = read all data into memory{n}    stream = read at most stream-rows into memory{n}Defaults to stream
43        #[arg(value_enum, ignore_case = true, long, value_parser)]
44        reader: Option<Reader>,
45        /// Number of rows to stream (read into memory) at a time{n}↑ rows = ↑ memory usage{n}Ignored if reader is set to mem{n}Defaults to 10,000 rows
46        #[arg(long, value_parser = clap::value_parser!(u32).range(1..))]
47        stream_rows: Option<u32>,
48        /// Do not display progress bar
49        #[arg(action, long)]
50        no_progress: bool,
51        /// Comma-separated list of column names to include in output
52        #[arg(long, value_delimiter = ',', num_args = 1..)]
53        columns: Option<Vec<String>>,
54        /// Path to a file containing column names (one per line, # comments)
55        #[arg(long, value_hint = ValueHint::FilePath, conflicts_with = "columns")]
56        columns_file: Option<PathBuf>,
57        /// SQL query to run against the data (requires sql feature){n}The table name is the input file stem (e.g. "cars" for cars.sas7bdat){n}Mutually exclusive with --columns/--columns-file
58        #[cfg(feature = "sql")]
59        #[arg(long, conflicts_with_all = ["columns", "columns_file"])]
60        sql: Option<String>,
61        /// Path to a file containing a SQL query (requires sql feature){n}Mutually exclusive with --sql and --columns/--columns-file
62        #[cfg(feature = "sql")]
63        #[arg(long, value_hint = ValueHint::FilePath, conflicts_with_all = ["sql", "columns", "columns_file"])]
64        sql_file: Option<PathBuf>,
65    },
66    /// Convert sas7bdat data to csv, feather (or the Arrow IPC format), ndjson, or parquet format
67    Data {
68        /// Path to sas7bdat file
69        #[arg(value_hint = ValueHint::FilePath, value_parser)]
70        input: PathBuf,
71        /// Output file path
72        #[arg(long, short = 'o', value_hint = ValueHint::FilePath, value_parser)]
73        output: Option<PathBuf>,
74        /// Output file format{n}Defaults to csv
75        #[arg(ignore_case = true, long, short = 'f', value_enum, value_parser)]
76        format: Option<CliOutFormat>,
77        /// Overwrite output file if it already exists
78        #[arg(action, long)]
79        overwrite: bool,
80        /// Number of rows to write
81        #[arg(long, value_parser)]
82        rows: Option<u32>,
83        /// Type of reader{n}    mem = read all data into memory{n}    stream = read at most stream-rows into memory{n}Defaults to stream
84        #[arg(ignore_case = true, long, value_enum, value_parser)]
85        reader: Option<Reader>,
86        /// Number of rows to stream (read into memory) at a time{n}↑ rows = ↑ memory usage{n}Ignored if reader is set to mem{n}Defaults to 10,000 rows
87        #[arg(long, value_parser = clap::value_parser!(u32).range(1..))]
88        stream_rows: Option<u32>,
89        /// Do not display progress bar
90        #[arg(action, long)]
91        no_progress: bool,
92        /// Convert sas7bdat data in parallel
93        #[arg(action, long)]
94        parallel: bool,
95        /// Write Parquet output in parallel{n}Parquet only — ignored for other formats{n}Only effective when --parallel is also enabled{n}Output row order is preserved
96        #[arg(action, long)]
97        parallel_write: bool,
98        /// Memory buffer size in MB before spilling to disk during parallel writes{n}Defaults to 100 MB{n}Only effective when parallel-write is enabled
99        #[arg(long, value_parser = clap::value_parser!(u64).range(1..=10240), default_value = "100")]
100        parallel_write_buffer_mb: u64,
101        /// Parquet compression algorithm
102        #[arg(long, value_enum, value_parser)]
103        compression: Option<CliParquetCompression>,
104        /// Parquet compression level (if applicable)
105        #[arg(long, value_parser = clap::value_parser!(u32).range(0..=22), requires = "compression")]
106        compression_level: Option<u32>,
107        /// Comma-separated list of column names to include in output
108        #[arg(long, value_delimiter = ',', num_args = 1..)]
109        columns: Option<Vec<String>>,
110        /// Path to a file containing column names (one per line, # comments)
111        #[arg(long, value_hint = ValueHint::FilePath, conflicts_with = "columns")]
112        columns_file: Option<PathBuf>,
113        /// SQL query to run against the data (requires sql feature){n}The table name is the input file stem (e.g. "cars" for cars.sas7bdat){n}Mutually exclusive with --columns/--columns-file
114        #[cfg(feature = "sql")]
115        #[arg(long, conflicts_with_all = ["columns", "columns_file"])]
116        sql: Option<String>,
117        /// Path to a file containing a SQL query (requires sql feature){n}Mutually exclusive with --sql and --columns/--columns-file
118        #[cfg(feature = "sql")]
119        #[arg(long, value_hint = ValueHint::FilePath, conflicts_with_all = ["sql", "columns", "columns_file"])]
120        sql_file: Option<PathBuf>,
121    },
122}
123
124/// CLI output file format (with clap `ValueEnum` derive).
125///
126/// Clap's `ValueEnum` derive converts `PascalCase` variants to lowercase
127/// for CLI input (e.g., `Csv` → `csv`).
128#[derive(Debug, Clone, Copy, ValueEnum)]
129pub enum CliOutFormat {
130    /// Comma-separated values.
131    Csv,
132    /// Feather (Arrow IPC) format.
133    Feather,
134    /// Newline-delimited JSON.
135    Ndjson,
136    /// Apache Parquet columnar format.
137    Parquet,
138}
139
140impl From<CliOutFormat> for OutFormat {
141    fn from(f: CliOutFormat) -> Self {
142        match f {
143            CliOutFormat::Csv => Self::Csv,
144            CliOutFormat::Feather => Self::Feather,
145            CliOutFormat::Ndjson => Self::Ndjson,
146            CliOutFormat::Parquet => Self::Parquet,
147        }
148    }
149}
150
151impl fmt::Display for CliOutFormat {
152    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
153        match self {
154            Self::Csv => f.write_str("csv"),
155            Self::Feather => f.write_str("feather"),
156            Self::Ndjson => f.write_str("ndjson"),
157            Self::Parquet => f.write_str("parquet"),
158        }
159    }
160}
161
162/// Strategy for reading SAS data into memory.
163///
164/// Clap's `ValueEnum` derive converts `PascalCase` variants to lowercase
165/// for CLI input (e.g., `Mem` → `mem`).
166#[derive(Debug, Clone, Copy, ValueEnum)]
167pub enum Reader {
168    /// Read all data into memory at once.
169    Mem,
170    /// Stream data in chunks (default, lower memory usage).
171    Stream,
172}
173
174impl fmt::Display for Reader {
175    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
176        match self {
177            Self::Mem => f.write_str("mem"),
178            Self::Stream => f.write_str("stream"),
179        }
180    }
181}
182
183/// CLI Parquet compression algorithm (with clap `ValueEnum` derive).
184#[derive(Debug, Clone, Copy, ValueEnum)]
185pub enum CliParquetCompression {
186    /// No compression.
187    Uncompressed,
188    /// Snappy compression (fast, moderate ratio).
189    Snappy,
190    /// Gzip compression (levels 0-9).
191    Gzip,
192    /// LZ4 raw compression.
193    Lz4Raw,
194    /// Brotli compression (levels 0-11).
195    Brotli,
196    /// Zstandard compression (levels 0-22).
197    Zstd,
198}
199
200impl From<CliParquetCompression> for ParquetCompression {
201    fn from(c: CliParquetCompression) -> Self {
202        match c {
203            CliParquetCompression::Uncompressed => Self::Uncompressed,
204            CliParquetCompression::Snappy => Self::Snappy,
205            CliParquetCompression::Gzip => Self::Gzip,
206            CliParquetCompression::Lz4Raw => Self::Lz4Raw,
207            CliParquetCompression::Brotli => Self::Brotli,
208            CliParquetCompression::Zstd => Self::Zstd,
209        }
210    }
211}
212
213impl fmt::Display for CliParquetCompression {
214    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
215        match self {
216            Self::Uncompressed => f.write_str("uncompressed"),
217            Self::Snappy => f.write_str("snappy"),
218            Self::Gzip => f.write_str("gzip"),
219            Self::Lz4Raw => f.write_str("lz4-raw"),
220            Self::Brotli => f.write_str("brotli"),
221            Self::Zstd => f.write_str("zstd"),
222        }
223    }
224}