|
| 1 | +use std::{ |
| 2 | + collections::{HashMap, HashSet}, |
| 3 | + fs::File, |
| 4 | + io::{BufWriter, Read}, |
| 5 | + path::PathBuf, |
| 6 | +}; |
| 7 | + |
| 8 | +use clap::Parser; |
| 9 | +use fingerprint::Combined; |
| 10 | +use getset::Getters; |
| 11 | +use serde::{Deserialize, Serialize}; |
| 12 | +use stable_eyre::{eyre::Context, Result}; |
| 13 | +use tar::{Archive, Entry}; |
| 14 | +use tracing::{debug, info, info_span, warn}; |
| 15 | +use typed_builder::TypedBuilder; |
| 16 | + |
| 17 | +#[derive(Debug, Parser, Getters)] |
| 18 | +#[getset(get = "pub")] |
| 19 | +#[clap(version)] |
| 20 | +pub struct Subcommand { |
| 21 | + /// The tar file image to search and fingerprint jars in. |
| 22 | + image_tar_file: PathBuf, |
| 23 | +} |
| 24 | + |
| 25 | +const JAR_OBSERVATION: &str = "v1.discover.binary.jar"; |
| 26 | + |
| 27 | +#[derive(Debug, PartialEq, Eq, Serialize, Clone)] |
| 28 | +struct DiscoveredJar { |
| 29 | + kind: &'static str, |
| 30 | + path: PathBuf, |
| 31 | + fingerprints: Combined, |
| 32 | +} |
| 33 | + |
| 34 | +impl DiscoveredJar { |
| 35 | + fn new(path: PathBuf, fingerprints: Combined) -> Self { |
| 36 | + DiscoveredJar { |
| 37 | + kind: JAR_OBSERVATION, |
| 38 | + path, |
| 39 | + fingerprints, |
| 40 | + } |
| 41 | + } |
| 42 | +} |
| 43 | + |
| 44 | +#[derive(Debug, PartialEq, Eq, Deserialize)] |
| 45 | +struct OciManifest { |
| 46 | + #[serde(rename = "Layers")] |
| 47 | + layers: Vec<PathBuf>, |
| 48 | +} |
| 49 | + |
| 50 | +/// The path in the manifest file corresponding to a layer. |
| 51 | +#[derive(Debug, PartialEq, Eq, Serialize, Hash)] |
| 52 | +struct LayerPath(PathBuf); |
| 53 | + |
| 54 | +#[derive(Debug, PartialEq, Eq, Serialize, TypedBuilder)] |
| 55 | +struct JarAnalysis { |
| 56 | + /// Jars and fingerprints associated with each layer in a jar file. |
| 57 | + discovered_jars: HashMap<LayerPath, Vec<DiscoveredJar>>, |
| 58 | +} |
| 59 | + |
| 60 | +#[tracing::instrument] |
| 61 | +pub fn main(opts: Subcommand) -> Result<()> { |
| 62 | + let tar_filename = opts.image_tar_file(); |
| 63 | + let jar_analysis = jars_in_container(opts.image_tar_file()) |
| 64 | + .with_context(|| format!("analyze container: {:?}", tar_filename))?; |
| 65 | + |
| 66 | + let mut stdout = BufWriter::new(std::io::stdout()); |
| 67 | + serde_json::to_writer(&mut stdout, &jar_analysis).context("Serialize Results") |
| 68 | +} |
| 69 | + |
| 70 | +/// Extracts the container (saved via `docker save`) and finds JAR files inside any layer. |
| 71 | +/// For each one found, fingerprints it and reports all those fingerprints along with their |
| 72 | +/// layer and path. |
| 73 | +#[tracing::instrument] |
| 74 | +fn jars_in_container(image_path: &PathBuf) -> Result<JarAnalysis> { |
| 75 | + // Visit each layer and fingerprint the JARs within. |
| 76 | + info!("inspecting container"); |
| 77 | + let layers = list_container_layers(image_path)?; |
| 78 | + let mut discoveries = HashMap::new(); |
| 79 | + |
| 80 | + let mut image = unpack(image_path)?; |
| 81 | + for entry in image.entries().context("iterate entries")? { |
| 82 | + let entry = entry.context("read entry")?; |
| 83 | + let path = entry.path().context("read path")?; |
| 84 | + if !layers.contains(path.as_ref()) { |
| 85 | + debug!(?path, "skipped: not a layer file"); |
| 86 | + continue; |
| 87 | + } |
| 88 | + |
| 89 | + let layer = path.to_path_buf(); |
| 90 | + // Layers should have a form like blob |
| 91 | + let layer_discoveries = |
| 92 | + jars_in_layer(entry).with_context(|| format!("read layer '{layer:?}'"))?; |
| 93 | + discoveries.insert(LayerPath(layer), layer_discoveries); |
| 94 | + } |
| 95 | + |
| 96 | + Ok(JarAnalysis { |
| 97 | + discovered_jars: discoveries, |
| 98 | + }) |
| 99 | +} |
| 100 | + |
| 101 | +/// Open and unpack a file and put it into a tar. |
| 102 | +/// This is done repeatedly because `entries()` can only be read once from an Archive. |
| 103 | +#[tracing::instrument] |
| 104 | +fn unpack(path: &PathBuf) -> Result<Archive<File>> { |
| 105 | + let file = File::open(path).context("open tar file")?; |
| 106 | + Ok(tar::Archive::new(file)) |
| 107 | +} |
| 108 | + |
| 109 | +#[tracing::instrument(skip(entry))] |
| 110 | +fn jars_in_layer(entry: Entry<'_, impl Read>) -> Result<Vec<DiscoveredJar>> { |
| 111 | + let mut discoveries = Vec::new(); |
| 112 | + |
| 113 | + let mut entry_archive = tar::Archive::new(entry); |
| 114 | + for entry in entry_archive.entries().context("list entries in layer")? { |
| 115 | + let entry = entry.context("read entry")?; |
| 116 | + let path = entry.path().context("read path")?; |
| 117 | + if !path.to_string_lossy().ends_with(".jar") { |
| 118 | + debug!(?path, "skipped: not a jar file"); |
| 119 | + continue; |
| 120 | + } |
| 121 | + |
| 122 | + let path = path.to_path_buf(); |
| 123 | + |
| 124 | + info_span!("jar", ?path).in_scope(|| -> Result<()> { |
| 125 | + debug!("fingerprinting"); |
| 126 | + let entry = buffer(entry).context("read jar file")?; |
| 127 | + |
| 128 | + match Combined::from_buffer(entry) { |
| 129 | + Ok(fingerprints) => discoveries.push(DiscoveredJar::new(path, fingerprints)), |
| 130 | + Err(e) => warn!("failed to fingerprint: {e:?}"), |
| 131 | + } |
| 132 | + |
| 133 | + Ok(()) |
| 134 | + })?; |
| 135 | + } |
| 136 | + |
| 137 | + Ok(discoveries) |
| 138 | +} |
| 139 | + |
| 140 | +#[tracing::instrument] |
| 141 | +fn list_container_layers(layer_path: &PathBuf) -> Result<HashSet<PathBuf>> { |
| 142 | + let mut layers = HashSet::new(); |
| 143 | + |
| 144 | + let mut container = unpack(layer_path)?; |
| 145 | + for entry in container.entries().context("list entries")? { |
| 146 | + let entry = match entry { |
| 147 | + Ok(entry) => entry, |
| 148 | + Err(e) => { |
| 149 | + warn!("failed to read entry: {e:?}"); |
| 150 | + continue; |
| 151 | + } |
| 152 | + }; |
| 153 | + |
| 154 | + let path = match entry.path() { |
| 155 | + Ok(path) => path, |
| 156 | + Err(e) => { |
| 157 | + warn!("Failed to read entry path: {e:?}"); |
| 158 | + continue; |
| 159 | + } |
| 160 | + }; |
| 161 | + |
| 162 | + if !path.ends_with("manifest.json") { |
| 163 | + debug!(?path, "skipped: not a manifest file"); |
| 164 | + continue; |
| 165 | + } |
| 166 | + |
| 167 | + info!(?path, "extracting manifests for image"); |
| 168 | + let manifests: Vec<OciManifest> = serde_json::from_reader(entry) |
| 169 | + .with_context(|| format!("parse manifest: {layer_path:?}"))?; |
| 170 | + |
| 171 | + for manifest in manifests { |
| 172 | + layers.extend(manifest.layers); |
| 173 | + } |
| 174 | + |
| 175 | + // There's only one manifest file. |
| 176 | + break; |
| 177 | + } |
| 178 | + |
| 179 | + Ok(layers) |
| 180 | +} |
| 181 | + |
| 182 | +#[tracing::instrument(skip(reader))] |
| 183 | +fn buffer(mut reader: impl Read) -> Result<Vec<u8>> { |
| 184 | + let mut buf = Vec::new(); |
| 185 | + reader.read_to_end(&mut buf).context("Read buffer")?; |
| 186 | + Ok(buf) |
| 187 | +} |
| 188 | + |
| 189 | +#[cfg(test)] |
| 190 | +mod tests { |
| 191 | + use serde_json::Value; |
| 192 | + use tap::Pipe; |
| 193 | + |
| 194 | + use super::*; |
| 195 | + |
| 196 | + const MILLHONE_OUT: &str = r#"{ |
| 197 | + "discovered_jars": { |
| 198 | + "blobs/sha256/5c079c30beb013e4b2f7729b6bdce6fba57941d28f20db985333fc1dd969f018": [ |
| 199 | + { |
| 200 | + "kind": "v1.discover.binary.jar", |
| 201 | + "path": "inner_directory/commons-email2-jakarta-2.0.0-M1.jar", |
| 202 | + "fingerprints": { |
| 203 | + "v1.mavencentral.jar": "6bzpyKql6Q+UxKQgm14pcP4wHGo=", |
| 204 | + "v1.raw.jar": "QA4SAurtJeo+lx1Vqve5uQYnvDKLFx1NgsSuoWKi8pw=", |
| 205 | + "sha_256": "MuEcK3nOFuySTTg4HOJi3vvTpI9bYspfMHa9AK2merQ=", |
| 206 | + "v1.class.jar": "2wRGbMGyGRwEXqNm53h1YK8OO879kvzDxazmJXiAcfI=" |
| 207 | + } |
| 208 | + } |
| 209 | + ], |
| 210 | + "blobs/sha256/9733ccc395133a067f01ee6e380003d80fe9f443673e0f992ae6a4a7860a872c": [], |
| 211 | + "blobs/sha256/61aed1a8baa251dee118b9ab203c1e420f0eda0a9b3f9322d67d235dd27a12ee": [ |
| 212 | + { |
| 213 | + "kind": "v1.discover.binary.jar", |
| 214 | + "path": "jackson-annotations-2.17.1.jar", |
| 215 | + "fingerprints": { |
| 216 | + "v1.mavencentral.jar": "/KfvYZLJrQXQe8UNqZG/k3qErzo=", |
| 217 | + "sha_256": "/MrYLhMXLA5DhNtxV3IZybhjHAgg9LGNqqVwFvtmHHY=", |
| 218 | + "v1.class.jar": "t2Btr6rNrvzghM5Nud2uldRGVjw0/n5rK9j0xooQQyk=", |
| 219 | + "v1.raw.jar": "wjGJk8cvY4tpKcUC5r8YuO15Wfv+rVuyWANBYCUIeDs=" |
| 220 | + } |
| 221 | + } |
| 222 | + ] |
| 223 | + } |
| 224 | +}"#; |
| 225 | + |
| 226 | + #[test] |
| 227 | + fn it_finds_expected_output() { |
| 228 | + let image_tar_file = |
| 229 | + PathBuf::from("../../test/App/Fossa/Container/testdata/jar_test_container.tar"); |
| 230 | + |
| 231 | + let res = jars_in_container(&image_tar_file) |
| 232 | + .expect("Read jars out of container image.") |
| 233 | + .pipe(serde_json::to_value) |
| 234 | + .expect("encode as json"); |
| 235 | + |
| 236 | + let expected: Value = serde_json::from_str(MILLHONE_OUT).expect("Parse expected json"); |
| 237 | + pretty_assertions::assert_eq!(expected, res); |
| 238 | + } |
| 239 | +} |
0 commit comments