readstat/
cli.rs

1//! CLI argument types for the readstat binary.
2
3use clap::{Parser, Subcommand, ValueEnum, ValueHint};
4use readstat::{OutFormat, ParquetCompression};
5use std::fmt;
6use std::path::PathBuf;
7
8/// 💾 Command-line tool for working with SAS binary files
9///
10/// 🦀 Rust wrapper of `ReadStat` C library
11#[derive(Parser, Debug)]
12#[command(version)]
13#[command(propagate_version = true)]
14pub struct ReadStatCli {
15    #[command(subcommand)]
16    pub command: ReadStatCliCommands,
17}
18
19/// CLI subcommands for readstat.
20#[derive(Debug, Subcommand)]
21pub enum ReadStatCliCommands {
22    /// Display sas7bdat metadata
23    Metadata {
24        /// Path to sas7bdat file
25        #[arg(value_hint = ValueHint::FilePath, value_parser)]
26        input: PathBuf,
27        /// Display sas7bdat metadata as json
28        #[arg(action, long)]
29        as_json: bool,
30        /// Do not display progress bar
31        #[arg(action, long)]
32        no_progress: bool,
33        /// Skip calculating row count{n}If only interested in variable metadata speeds up parsing
34        #[arg(action, long)]
35        skip_row_count: bool,
36    },
37    /// Preview sas7bdat data
38    Preview {
39        /// Path to sas7bdat file
40        #[arg(value_hint = ValueHint::FilePath, value_parser)]
41        input: PathBuf,
42        /// Number of rows to write
43        #[arg(default_value = "10", long, value_parser)]
44        rows: u32,
45        /// Type of reader{n}    mem = read all data into memory{n}    stream = read at most stream-rows into memory{n}Defaults to stream
46        #[arg(value_enum, ignore_case = true, long, value_parser)]
47        reader: Option<Reader>,
48        /// Number of rows to stream (read into memory) at a time{n}↑ rows = ↑ memory usage{n}Ignored if reader is set to mem{n}Defaults to 10,000 rows
49        #[arg(long, value_parser)]
50        stream_rows: Option<u32>,
51        /// Do not display progress bar
52        #[arg(action, long)]
53        no_progress: bool,
54        /// Comma-separated list of column names to include in output
55        #[arg(long, value_delimiter = ',', num_args = 1..)]
56        columns: Option<Vec<String>>,
57        /// Path to a file containing column names (one per line, # comments)
58        #[arg(long, value_hint = ValueHint::FilePath, conflicts_with = "columns")]
59        columns_file: Option<PathBuf>,
60        /// SQL query to run against the data (requires sql feature){n}The table name is the input file stem (e.g. "cars" for cars.sas7bdat){n}Mutually exclusive with --columns/--columns-file
61        #[cfg(feature = "sql")]
62        #[arg(long, conflicts_with_all = ["columns", "columns_file"])]
63        sql: Option<String>,
64        /// Path to a file containing a SQL query (requires sql feature){n}Mutually exclusive with --sql and --columns/--columns-file
65        #[cfg(feature = "sql")]
66        #[arg(long, value_hint = ValueHint::FilePath, conflicts_with_all = ["sql", "columns", "columns_file"])]
67        sql_file: Option<PathBuf>,
68    },
69    /// Convert sas7bdat data to csv, feather (or the Arrow IPC format), ndjson, or parquet format
70    Data {
71        /// Path to sas7bdat file
72        #[arg(value_hint = ValueHint::FilePath, value_parser)]
73        input: PathBuf,
74        /// Output file path
75        #[arg(long, short = 'o', value_hint = ValueHint::FilePath, value_parser)]
76        output: Option<PathBuf>,
77        /// Output file format{n}Defaults to csv
78        #[arg(ignore_case = true, long, short = 'f', value_enum, value_parser)]
79        format: Option<CliOutFormat>,
80        /// Overwrite output file if it already exists
81        #[arg(action, long)]
82        overwrite: bool,
83        /// Number of rows to write
84        #[arg(long, value_parser)]
85        rows: Option<u32>,
86        /// Type of reader{n}    mem = read all data into memory{n}    stream = read at most stream-rows into memory{n}Defaults to stream
87        #[arg(ignore_case = true, long, value_enum, value_parser)]
88        reader: Option<Reader>,
89        /// Number of rows to stream (read into memory) at a time{n}↑ rows = ↑ memory usage{n}Ignored if reader is set to mem{n}Defaults to 10,000 rows
90        #[arg(long, value_parser)]
91        stream_rows: Option<u32>,
92        /// Do not display progress bar
93        #[arg(action, long)]
94        no_progress: bool,
95        /// Convert sas7bdat data in parallel
96        #[arg(action, long)]
97        parallel: bool,
98        /// Write output data in parallel{n}Only effective when parallel is enabled{n}May write batches out of order for Parquet/Feather
99        #[arg(action, long)]
100        parallel_write: bool,
101        /// Memory buffer size in MB before spilling to disk during parallel writes{n}Defaults to 100 MB{n}Only effective when parallel-write is enabled
102        #[arg(long, value_parser = clap::value_parser!(u64).range(1..=10240), default_value = "100")]
103        parallel_write_buffer_mb: u64,
104        /// Parquet compression algorithm
105        #[arg(long, value_enum, value_parser)]
106        compression: Option<CliParquetCompression>,
107        /// Parquet compression level (if applicable)
108        #[arg(long, value_parser = clap::value_parser!(u32).range(0..=22))]
109        compression_level: Option<u32>,
110        /// Comma-separated list of column names to include in output
111        #[arg(long, value_delimiter = ',', num_args = 1..)]
112        columns: Option<Vec<String>>,
113        /// Path to a file containing column names (one per line, # comments)
114        #[arg(long, value_hint = ValueHint::FilePath, conflicts_with = "columns")]
115        columns_file: Option<PathBuf>,
116        /// SQL query to run against the data (requires sql feature){n}The table name is the input file stem (e.g. "cars" for cars.sas7bdat){n}Mutually exclusive with --columns/--columns-file
117        #[cfg(feature = "sql")]
118        #[arg(long, conflicts_with_all = ["columns", "columns_file"])]
119        sql: Option<String>,
120        /// Path to a file containing a SQL query (requires sql feature){n}Mutually exclusive with --sql and --columns/--columns-file
121        #[cfg(feature = "sql")]
122        #[arg(long, value_hint = ValueHint::FilePath, conflicts_with_all = ["sql", "columns", "columns_file"])]
123        sql_file: Option<PathBuf>,
124    },
125}
126
127/// CLI output file format (with clap `ValueEnum` derive).
128///
129/// Clap's `ValueEnum` derive converts `PascalCase` variants to lowercase
130/// for CLI input (e.g., `Csv` → `csv`).
131#[derive(Debug, Clone, Copy, ValueEnum)]
132pub enum CliOutFormat {
133    /// Comma-separated values.
134    Csv,
135    /// Feather (Arrow IPC) format.
136    Feather,
137    /// Newline-delimited JSON.
138    Ndjson,
139    /// Apache Parquet columnar format.
140    Parquet,
141}
142
143impl From<CliOutFormat> for OutFormat {
144    fn from(f: CliOutFormat) -> Self {
145        match f {
146            CliOutFormat::Csv => Self::Csv,
147            CliOutFormat::Feather => Self::Feather,
148            CliOutFormat::Ndjson => Self::Ndjson,
149            CliOutFormat::Parquet => Self::Parquet,
150        }
151    }
152}
153
154impl fmt::Display for CliOutFormat {
155    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
156        match self {
157            Self::Csv => f.write_str("csv"),
158            Self::Feather => f.write_str("feather"),
159            Self::Ndjson => f.write_str("ndjson"),
160            Self::Parquet => f.write_str("parquet"),
161        }
162    }
163}
164
165/// Strategy for reading SAS data into memory.
166///
167/// Clap's `ValueEnum` derive converts `PascalCase` variants to lowercase
168/// for CLI input (e.g., `Mem` → `mem`).
169#[derive(Debug, Clone, Copy, ValueEnum)]
170pub enum Reader {
171    /// Read all data into memory at once.
172    Mem,
173    /// Stream data in chunks (default, lower memory usage).
174    Stream,
175}
176
177impl fmt::Display for Reader {
178    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
179        match self {
180            Self::Mem => f.write_str("mem"),
181            Self::Stream => f.write_str("stream"),
182        }
183    }
184}
185
186/// CLI Parquet compression algorithm (with clap `ValueEnum` derive).
187#[derive(Debug, Clone, Copy, ValueEnum)]
188pub enum CliParquetCompression {
189    /// No compression.
190    Uncompressed,
191    /// Snappy compression (fast, moderate ratio).
192    Snappy,
193    /// Gzip compression (levels 0-9).
194    Gzip,
195    /// LZ4 raw compression.
196    Lz4Raw,
197    /// Brotli compression (levels 0-11).
198    Brotli,
199    /// Zstandard compression (levels 0-22).
200    Zstd,
201}
202
203impl From<CliParquetCompression> for ParquetCompression {
204    fn from(c: CliParquetCompression) -> Self {
205        match c {
206            CliParquetCompression::Uncompressed => Self::Uncompressed,
207            CliParquetCompression::Snappy => Self::Snappy,
208            CliParquetCompression::Gzip => Self::Gzip,
209            CliParquetCompression::Lz4Raw => Self::Lz4Raw,
210            CliParquetCompression::Brotli => Self::Brotli,
211            CliParquetCompression::Zstd => Self::Zstd,
212        }
213    }
214}
215
216impl fmt::Display for CliParquetCompression {
217    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
218        match self {
219            Self::Uncompressed => f.write_str("uncompressed"),
220            Self::Snappy => f.write_str("snappy"),
221            Self::Gzip => f.write_str("gzip"),
222            Self::Lz4Raw => f.write_str("lz4-raw"),
223            Self::Brotli => f.write_str("brotli"),
224            Self::Zstd => f.write_str("zstd"),
225        }
226    }
227}