Skip to content

Commit 711952d

Browse files
committed
add: automatically detect filetype without rely on ending
1 parent 5c6730a commit 711952d

File tree

3 files changed

+25
-17
lines changed

3 files changed

+25
-17
lines changed

Cargo.lock

+7
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+2-1
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,5 @@ comfy-table = "6.1.2"
2222
datafusion = { version = "35.0", features = ["avro"] }
2323
structopt = "0.3"
2424
tokio = { version = "1.36", features = ["rt-multi-thread"] }
25-
thiserror = "1"
25+
thiserror = "1"
26+
file-format = { version = "0.24.0", features = ["reader-txt"] }

src/utils.rs

+16-16
Original file line numberDiff line numberDiff line change
@@ -7,29 +7,29 @@ use datafusion::prelude::{
77
AvroReadOptions, CsvReadOptions, DataFrame, NdJsonReadOptions, ParquetReadOptions,
88
SessionContext,
99
};
10+
use file_format::FileFormat as DetectFileFormat;
1011
use std::path::Path;
1112

1213
pub fn file_format(filename: &str) -> Result<FileFormat, Error> {
13-
match file_ending(filename)?.as_str() {
14-
"avro" => Ok(FileFormat::Avro),
15-
"csv" => Ok(FileFormat::Csv),
16-
"json" => Ok(FileFormat::Json),
17-
"parquet" | "parq" => Ok(FileFormat::Parquet),
18-
other => Err(Error::General(format!(
19-
"unsupported file extension '{}'",
20-
other
21-
))),
14+
match DetectFileFormat::from_file(filename)? {
15+
DetectFileFormat::ApacheAvro => Ok(FileFormat::Avro),
16+
DetectFileFormat::ApacheParquet => Ok(FileFormat::Parquet),
17+
DetectFileFormat::PlainText => match file_ending(filename)?.as_str() {
18+
"json" => Ok(FileFormat::Json),
19+
"csv" => Ok(FileFormat::Csv),
20+
other => Err(Error::General(format!(
21+
"unsupported file extension '{}'",
22+
other
23+
))),
24+
},
25+
other => Err(Error::General(format!("unsupported file type '{}'", other))),
2226
}
2327
}
2428

2529
pub fn file_ending(filename: &str) -> Result<String, Error> {
26-
if let Some(ending) = std::path::Path::new(filename).extension() {
27-
Ok(ending.to_string_lossy().to_string())
28-
} else {
29-
Err(Error::General(
30-
"Could not determine file extension".to_string(),
31-
))
32-
}
30+
Ok(std::path::Path::new(filename)
31+
.extension()
32+
.map_or_else(|| "".to_owned(), |e| e.to_string_lossy().to_string()))
3333
}
3434

3535
pub fn parse_filename(filename: &Path) -> Result<&str, Error> {

0 commit comments

Comments
 (0)