Skip to content

Commit ddb4627

Browse files
jiangliuimeoer
authored andcommitted
builder: optimize tarfs building speed by skipping file content
The tarfs crate provides seekable reader to iterate entries in tar file, so optimize tarfs building speed by skipping file content. Signed-off-by: Jiang Liu <[email protected]>
1 parent 82ebd11 commit ddb4627

File tree

2 files changed

+104
-36
lines changed

2 files changed

+104
-36
lines changed

builder/src/tarball.rs

Lines changed: 94 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,15 @@
1717
//! - dump the RAFS filesystem tree into RAFS metadata blob
1818
use std::ffi::{OsStr, OsString};
1919
use std::fs::{File, OpenOptions};
20-
use std::io::{BufReader, Read};
20+
use std::io::{BufReader, Read, Seek, SeekFrom};
2121
use std::os::unix::ffi::OsStrExt;
2222
use std::path::{Path, PathBuf};
2323
use std::sync::Mutex;
2424

2525
use anyhow::{anyhow, bail, Context, Result};
2626
use tar::{Archive, Entry, EntryType, Header};
2727

28+
use nydus_api::enosys;
2829
use nydus_rafs::metadata::inode::{InodeWrapper, RafsInodeFlags, RafsV6Inode};
2930
use nydus_rafs::metadata::layout::v5::RafsV5Inode;
3031
use nydus_rafs::metadata::layout::RafsXAttrs;
@@ -46,20 +47,41 @@ use super::core::node::{Node, NodeInfo};
4647
use super::core::tree::Tree;
4748
use super::{build_bootstrap, dump_bootstrap, finalize_blob, Builder, TarBuilder};
4849

50+
enum CompressionType {
51+
None,
52+
Gzip,
53+
}
54+
4955
enum TarReader {
5056
File(File),
51-
Buf(BufReaderInfo<File>),
52-
TarGz(Box<ZlibDecoder<File>>),
53-
Zran(ZranReader<File>),
57+
BufReader(BufReader<File>),
58+
BufReaderInfo(BufReaderInfo<File>),
59+
BufReaderInfoSeekable(BufReaderInfo<File>),
60+
TarGzFile(Box<ZlibDecoder<File>>),
61+
TarGzBufReader(Box<ZlibDecoder<BufReader<File>>>),
62+
ZranReader(ZranReader<File>),
5463
}
5564

5665
impl Read for TarReader {
5766
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
5867
match self {
5968
TarReader::File(f) => f.read(buf),
60-
TarReader::Buf(b) => b.read(buf),
61-
TarReader::TarGz(f) => f.read(buf),
62-
TarReader::Zran(f) => f.read(buf),
69+
TarReader::BufReader(f) => f.read(buf),
70+
TarReader::BufReaderInfo(b) => b.read(buf),
71+
TarReader::BufReaderInfoSeekable(b) => b.read(buf),
72+
TarReader::TarGzFile(f) => f.read(buf),
73+
TarReader::TarGzBufReader(b) => b.read(buf),
74+
TarReader::ZranReader(f) => f.read(buf),
75+
}
76+
}
77+
}
78+
79+
impl Seek for TarReader {
80+
fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
81+
match self {
82+
TarReader::File(f) => f.seek(pos),
83+
TarReader::BufReaderInfoSeekable(b) => b.seek(pos),
84+
_ => Err(enosys!("seek() not supported!")),
6385
}
6486
}
6587
}
@@ -98,47 +120,66 @@ impl<'a> TarballTreeBuilder<'a> {
98120
.read(true)
99121
.open(self.ctx.source_path.clone())
100122
.context("tarball: can not open source file for conversion")?;
123+
let mut is_file = match file.metadata() {
124+
Ok(md) => md.file_type().is_file(),
125+
Err(_) => false,
126+
};
101127

102128
let reader = match self.ty {
103-
ConversionType::EStargzToRafs | ConversionType::TargzToRafs => {
104-
TarReader::TarGz(Box::new(ZlibDecoder::new(file)))
105-
}
106-
ConversionType::EStargzToRef | ConversionType::TargzToRef => {
107-
// Use 64K buffer to keep consistence with zlib-random.
108-
let mut buf_reader = BufReader::with_capacity(ZRAN_READER_BUF_SIZE, file);
109-
let mut buf = [0u8; 3];
110-
if buf_reader.read_exact(&mut buf).is_ok()
111-
&& buf[0] == 0x1f
112-
&& buf[1] == 0x8b
113-
&& buf[2] == 0x08
114-
{
115-
buf_reader.seek_relative(-3).unwrap();
129+
ConversionType::EStargzToRef
130+
| ConversionType::TargzToRef
131+
| ConversionType::TarToRef => match Self::detect_compression_algo(file)? {
132+
(CompressionType::Gzip, buf_reader) => {
116133
let generator = ZranContextGenerator::from_buf_reader(buf_reader)?;
117134
let reader = generator.reader();
118135
self.ctx.blob_zran_generator = Some(Mutex::new(generator));
119136
self.ctx.blob_features.insert(BlobFeatures::ZRAN);
120-
TarReader::Zran(reader)
121-
} else {
122-
buf_reader.seek_relative(-3).unwrap();
137+
TarReader::ZranReader(reader)
138+
}
139+
(CompressionType::None, buf_reader) => {
123140
self.ty = ConversionType::TarToRef;
124141
let reader = BufReaderInfo::from_buf_reader(buf_reader);
125142
self.ctx.blob_tar_reader = Some(reader.clone());
126-
TarReader::Buf(reader)
143+
TarReader::BufReaderInfo(reader)
127144
}
128-
}
129-
ConversionType::TarToRafs => TarReader::File(file),
130-
ConversionType::TarToRef => {
131-
let reader = BufReaderInfo::from_buf_reader(BufReader::new(file));
132-
self.ctx.blob_tar_reader = Some(reader.clone());
133-
TarReader::Buf(reader)
134-
}
145+
},
146+
ConversionType::EStargzToRafs
147+
| ConversionType::TargzToRafs
148+
| ConversionType::TarToRafs => match Self::detect_compression_algo(file)? {
149+
(CompressionType::Gzip, buf_reader) => {
150+
if is_file {
151+
let mut file = buf_reader.into_inner();
152+
file.seek(SeekFrom::Start(0))?;
153+
TarReader::TarGzFile(Box::new(ZlibDecoder::new(file)))
154+
} else {
155+
TarReader::TarGzBufReader(Box::new(ZlibDecoder::new(buf_reader)))
156+
}
157+
}
158+
(CompressionType::None, buf_reader) => {
159+
if is_file {
160+
let mut file = buf_reader.into_inner();
161+
file.seek(SeekFrom::Start(0))?;
162+
TarReader::File(file)
163+
} else {
164+
TarReader::BufReader(buf_reader)
165+
}
166+
}
167+
},
135168
ConversionType::TarToTarfs => {
136169
let mut reader = BufReaderInfo::from_buf_reader(BufReader::new(file));
137170
self.ctx.blob_tar_reader = Some(reader.clone());
138171
if !self.ctx.blob_id.is_empty() {
139172
reader.enable_digest_calculation(false);
173+
} else {
174+
// Disable seek when need to calculate hash value.
175+
is_file = false;
176+
}
177+
// only enable seek when hash computing is disabled.
178+
if is_file {
179+
TarReader::BufReaderInfoSeekable(reader)
180+
} else {
181+
TarReader::BufReaderInfo(reader)
140182
}
141-
TarReader::Buf(reader)
142183
}
143184
_ => return Err(anyhow!("tarball: unsupported image conversion type")),
144185
};
@@ -158,9 +199,13 @@ impl<'a> TarballTreeBuilder<'a> {
158199
let mut tree = Tree::new(root);
159200

160201
// Generate RAFS node for each tar entry, and optionally adding missing parents.
161-
let entries = tar
162-
.entries()
163-
.context("tarball: failed to read entries from tar")?;
202+
let entries = if is_file {
203+
tar.entries_with_seek()
204+
.context("tarball: failed to read entries from tar")?
205+
} else {
206+
tar.entries()
207+
.context("tarball: failed to read entries from tar")?
208+
};
164209
for entry in entries {
165210
let mut entry = entry.context("tarball: failed to read entry from tar")?;
166211
let path = entry
@@ -485,6 +530,20 @@ impl<'a> TarballTreeBuilder<'a> {
485530
let mut node = tree.lock_node();
486531
node.v5_set_dir_size(RafsVersion::V5, &tree.children);
487532
}
533+
534+
fn detect_compression_algo(file: File) -> Result<(CompressionType, BufReader<File>)> {
535+
// Use 64K buffer to keep consistence with zlib-random.
536+
let mut buf_reader = BufReader::with_capacity(ZRAN_READER_BUF_SIZE, file);
537+
let mut buf = [0u8; 3];
538+
buf_reader.read_exact(&mut buf)?;
539+
if buf[0] == 0x1f && buf[1] == 0x8b && buf[2] == 0x08 {
540+
buf_reader.seek_relative(-3).unwrap();
541+
Ok((CompressionType::Gzip, buf_reader))
542+
} else {
543+
buf_reader.seek_relative(-3).unwrap();
544+
Ok((CompressionType::None, buf_reader))
545+
}
546+
}
488547
}
489548

490549
/// Builder to create RAFS filesystems from tarballs.

utils/src/reader.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
// SPDX-License-Identifier: Apache-2.0
44

55
use std::fs::File;
6-
use std::io::{BufReader, Read};
6+
use std::io::{BufReader, Read, Seek, SeekFrom};
77
use std::marker::PhantomData;
88
use std::os::unix::io::{AsRawFd, RawFd};
99
use std::sync::{Arc, Mutex};
@@ -98,6 +98,15 @@ impl<R: Read> Read for BufReaderInfo<R> {
9898
}
9999
}
100100

101+
impl<R: Read + Seek> Seek for BufReaderInfo<R> {
102+
fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
103+
let mut state = self.state.lock().unwrap();
104+
let pos = state.reader.seek(pos)?;
105+
state.pos = pos;
106+
Ok(pos)
107+
}
108+
}
109+
101110
impl<R: Read> Clone for BufReaderInfo<R> {
102111
fn clone(&self) -> Self {
103112
Self {

0 commit comments

Comments
 (0)