Skip to content

Commit 638dd0a

Browse files
committed
Add support for rpm2extents transcoder
Two related parts: 1. If `LIBREPO_TRANSCODE_RPMS` environment is set to a program (with parameters) then downloads are piped through it. 2. Transcoded RPMS by definition will not have the same bits on disk as downloaded. This is inherent. The transcoder is tasked with measuring the bits that enter stdin and storing a copy of the digest(s) seen in the footer. `librepo` can then use these stored digests instead if the environment variable is set. This is part of changes described in https://fedoraproject.org/wiki/Changes/RPMCoW
1 parent 4daa959 commit 638dd0a

3 files changed

Lines changed: 254 additions & 4 deletions

File tree

librepo/checksum.c

Lines changed: 108 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@
3939
#define BUFFER_SIZE 2048
4040
#define MAX_CHECKSUM_NAME_LEN 7
4141

42+
/* magic value at end of file (64 bits) that indicates this is a transcoded rpm */
43+
#define MAGIC 3472329499408095051
44+
4245
LrChecksumType
4346
lr_checksum_type(const char *type)
4447
{
@@ -102,6 +105,100 @@ lr_checksum_type_to_str(LrChecksumType type)
102105
return NULL;
103106
}
104107

108+
char *
109+
lr_checksum_cow_fd(LrChecksumType type, int fd, GError **err)
110+
{
111+
struct __attribute__ ((__packed__)) csum_offset_magic {
112+
off64_t csum_offset;
113+
uint64_t magic;
114+
};
115+
struct __attribute__ ((__packed__)) orig_size_algos_len {
116+
ssize_t orig_size;
117+
uint32_t algos_len;
118+
};
119+
struct __attribute__ ((__packed__)) algo_len_digest_len {
120+
uint32_t algo_len;
121+
uint32_t digest_len;
122+
};
123+
124+
struct csum_offset_magic csum_offset_magic;
125+
struct orig_size_algos_len orig_size_algos_len;
126+
struct algo_len_digest_len algo_len_digest_len;
127+
char *algo, *checksum;
128+
unsigned char *digest;
129+
size_t len = sizeof(csum_offset_magic);
130+
131+
if (g_getenv("LIBREPO_TRANSCODE_RPMS") == NULL) {
132+
g_debug("Transcoding not enabled, skipping path");
133+
return NULL;
134+
}
135+
if (lseek(fd, -len, SEEK_END) == -1) {
136+
g_warning("seek for transcode failed, probably too small");
137+
return NULL;
138+
}
139+
if (read(fd, &csum_offset_magic, len) != len) {
140+
g_set_error(err, LR_CHECKSUM_ERROR, LRE_TRANSCODE,
141+
"Cannot read csum_offset, magic. size = %lu", len);
142+
return NULL;
143+
}
144+
if (csum_offset_magic.magic != MAGIC) {
145+
g_debug("Not transcoded");
146+
return NULL;
147+
}
148+
g_debug("Is transcoded");
149+
if (lseek(fd, csum_offset_magic.csum_offset, SEEK_SET) == -1) {
150+
g_set_error(err, LR_CHECKSUM_ERROR, LRE_TRANSCODE,
151+
"seek for transcode csum_offset failed");
152+
return NULL;
153+
}
154+
len = sizeof(orig_size_algos_len);
155+
if (read(fd, &orig_size_algos_len, len) != len) {
156+
g_set_error(err, LR_CHECKSUM_ERROR, LRE_TRANSCODE,
157+
"Cannot read orig_size_algos_len");
158+
return NULL;
159+
}
160+
while (orig_size_algos_len.algos_len > 0) {
161+
len = sizeof(algo_len_digest_len);
162+
if (read(fd, &algo_len_digest_len, len) != len) {
163+
g_set_error(err, LR_CHECKSUM_ERROR, LRE_TRANSCODE,
164+
"Cannot read algo_len_digest_len");
165+
return NULL;
166+
}
167+
168+
len = algo_len_digest_len.algo_len;
169+
algo = lr_malloc0(len + 1);
170+
if (read(fd, algo, len) != len) {
171+
g_set_error(err, LR_CHECKSUM_ERROR, LRE_TRANSCODE,
172+
"Cannot read algo");
173+
lr_free(algo);
174+
return NULL;
175+
}
176+
len = algo_len_digest_len.digest_len;
177+
digest = lr_malloc0(len);
178+
if (read(fd, digest, len) != len) {
179+
g_set_error(err, LR_CHECKSUM_ERROR, LRE_TRANSCODE,
180+
"Cannot read digest");
181+
lr_free(algo);
182+
lr_free(digest);
183+
return NULL;
184+
}
185+
if (lr_checksum_type(algo) == type) {
186+
/* found it, do the same as lr_checksum_fd does */
187+
checksum = lr_malloc0(sizeof(char) * (len * 2 + 1));
188+
for (size_t x = 0; x < len; x++) {
189+
sprintf(checksum+(x*2), "%02x", digest[x]);
190+
}
191+
lr_free(algo);
192+
lr_free(digest);
193+
return checksum;
194+
}
195+
lr_free(algo);
196+
lr_free(digest);
197+
orig_size_algos_len.algos_len--;
198+
}
199+
return NULL;
200+
}
201+
105202
char *
106203
lr_checksum_fd(LrChecksumType type, int fd, GError **err)
107204
{
@@ -259,9 +356,17 @@ lr_checksum_fd_compare(LrChecksumType type,
259356
}
260357
}
261358

262-
checksum = lr_checksum_fd(type, fd, err);
263-
if (!checksum)
264-
return FALSE;
359+
checksum = lr_checksum_cow_fd(type, fd, err);
360+
if (checksum) {
361+
// if checksum is found in CoW package, do not cache it in xattr
362+
// because looking this up is nearly constant time (cheap) but
363+
// is not valid when CoW is not enabled in RPM.
364+
caching = FALSE;
365+
} else {
366+
checksum = lr_checksum_fd(type, fd, err);
367+
if (!checksum)
368+
return FALSE;
369+
}
265370

266371
*matches = (strcmp(expected, checksum)) ? FALSE : TRUE;
267372

librepo/downloader.c

Lines changed: 144 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include <sys/types.h>
3333
#include <sys/stat.h>
3434
#include <sys/time.h>
35+
#include <sys/wait.h>
3536
#include <sys/xattr.h>
3637
#include <fcntl.h>
3738
#include <curl/curl.h>
@@ -151,6 +152,10 @@ typedef struct {
151152
FILE *f; /*!<
152153
fdopened file descriptor from LrDownloadTarget and used
153154
in curl_handle. */
155+
FILE *writef; /*!<
156+
the fd to write data to. Could be a subprocess. */
157+
pid_t pid; /*!<
158+
the pid of a transcoder. */
154159
char errorbuffer[CURL_ERROR_SIZE]; /*!<
155160
Error buffer used in curl handle */
156161
GSList *tried_mirrors; /*!<
@@ -614,7 +619,7 @@ lr_writecb(char *ptr, size_t size, size_t nmemb, void *userdata)
614619
if (range_start <= 0 && range_end <= 0) {
615620
// Write everything curl give to you
616621
target->writecb_recieved += all;
617-
return fwrite(ptr, size, nmemb, target->f);
622+
return fwrite(ptr, size, nmemb, target->writef);
618623
}
619624

620625
/* Deal with situation when user wants only specific byte range of the
@@ -1428,6 +1433,136 @@ open_target_file(LrTarget *target, GError **err)
14281433
return f;
14291434
}
14301435

1436+
/** Maybe transcode the file
1437+
*/
1438+
void
1439+
maybe_transcode(LrTarget *target, GError **err)
1440+
{
1441+
const char *e = g_getenv("LIBREPO_TRANSCODE_RPMS");
1442+
int transcoder_stdin[2], fd;
1443+
pid_t pid;
1444+
FILE *out;
1445+
_cleanup_strv_free_ gchar **args = NULL;
1446+
target->writef = NULL;
1447+
if (!e) {
1448+
g_debug("Not transcoding");
1449+
target->writef = target->f;
1450+
return;
1451+
}
1452+
if (g_str_has_suffix(target->target->path, ".rpm") == FALSE) {
1453+
g_debug("Not transcoding %s due to name", target->target->path);
1454+
target->writef = target->f;
1455+
return;
1456+
}
1457+
g_debug("Transcoding %s", target->target->path);
1458+
args = g_strsplit(e, " ", -1);
1459+
if (args[0] == NULL) {
1460+
g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
1461+
"transcode env empty");
1462+
return;
1463+
}
1464+
if (pipe(transcoder_stdin) != 0) {
1465+
g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
1466+
"input pipe creation failed: %s",
1467+
g_strerror(errno));
1468+
return;
1469+
}
1470+
/** librepo collects the 'write' ends of the pipes. We must mark these as
1471+
* FD_CLOEXEC so a second download/transcode does not inherit them and
1472+
* hold them open, as it'll prevent an EOF and cause a deadlock.
1473+
*/
1474+
if (fcntl(transcoder_stdin[1], F_SETFD, FD_CLOEXEC) != 0) {
1475+
g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
1476+
"input pipe write close-on-fork failed: %s",
1477+
g_strerror(errno));
1478+
return;
1479+
}
1480+
pid = fork();
1481+
if (pid == -1) {
1482+
g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
1483+
"fork failed: %s",
1484+
g_strerror(errno));
1485+
return;
1486+
}
1487+
if (pid == 0) {
1488+
/* child */
1489+
if (dup2(transcoder_stdin[0], STDIN_FILENO) == -1) {
1490+
g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
1491+
"dup2 of stdin failed: %s",
1492+
g_strerror(errno));
1493+
return;
1494+
}
1495+
close(transcoder_stdin[0]);
1496+
close(transcoder_stdin[1]);
1497+
fd = fileno(target->f);
1498+
if (fd == -1) {
1499+
g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
1500+
"fileno for target failed");
1501+
return;
1502+
}
1503+
if (dup2(fd, STDOUT_FILENO) == -1) {
1504+
g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
1505+
"dup2 of stdout failed: %s",
1506+
g_strerror(errno));
1507+
return;
1508+
}
1509+
if (execv(args[0], args) == -1) {
1510+
g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
1511+
"execv failed: %s", g_strerror(errno));
1512+
}
1513+
/* we never get here, but appease static analysis */
1514+
return;
1515+
} else {
1516+
/* parent */
1517+
close(transcoder_stdin[0]);
1518+
out = fdopen(transcoder_stdin[1], "w");
1519+
if (out == NULL) {
1520+
g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
1521+
"fdopen failed: %s",
1522+
g_strerror(errno));
1523+
return;
1524+
}
1525+
target->pid = pid;
1526+
target->writef = out;
1527+
/* resuming a transcode is not yet implemented */
1528+
target->resume = FALSE;
1529+
}
1530+
}
1531+
1532+
void
1533+
cleanup_transcode(LrTarget *target, GError **err)
1534+
{
1535+
int wstatus, trc;
1536+
if (!target->writef) {
1537+
return;
1538+
}
1539+
if (target->writef == target->f) {
1540+
return;
1541+
}
1542+
fclose(target->writef);
1543+
if(waitpid(target->pid, &wstatus, 0) == -1) {
1544+
g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
1545+
"transcode waitpid failed: %s", g_strerror(errno));
1546+
} else if (WIFEXITED(wstatus)) {
1547+
trc = WEXITSTATUS(wstatus);
1548+
if (trc != 0) {
1549+
g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
1550+
"transcode process non-zero exit code %d", trc);
1551+
}
1552+
} else if (WIFSIGNALED(wstatus)) {
1553+
g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
1554+
"transcode process was terminated with a signal: %d",
1555+
WTERMSIG(wstatus));
1556+
} else {
1557+
/* don't think this can happen, but covering all bases */
1558+
g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
1559+
"transcode unhandled circumstance in waitpid");
1560+
}
1561+
target->writef = NULL;
1562+
/* pid is only valid if writef is not NULL */
1563+
/* target->pid = -1; */
1564+
}
1565+
14311566
/** Prepare next transfer
14321567
*/
14331568
static gboolean
@@ -1509,6 +1644,9 @@ prepare_next_transfer(LrDownload *dd, gboolean *candidatefound, GError **err)
15091644
target->f = open_target_file(target, err);
15101645
if (!target->f)
15111646
goto fail;
1647+
maybe_transcode(target, err);
1648+
if (!target->writef)
1649+
goto fail;
15121650
target->writecb_recieved = 0;
15131651
target->writecb_required_range_written = FALSE;
15141652

@@ -1684,6 +1822,7 @@ prepare_next_transfer(LrDownload *dd, gboolean *candidatefound, GError **err)
16841822
curl_easy_cleanup(target->curl_handle);
16851823
target->curl_handle = NULL;
16861824
}
1825+
cleanup_transcode(target, err);
16871826
if (target->f != NULL) {
16881827
fclose(target->f);
16891828
target->f = NULL;
@@ -2254,6 +2393,8 @@ check_transfer_statuses(LrDownload *dd, GError **err)
22542393
if (transfer_err) // Transfer was unsuccessful
22552394
goto transfer_error;
22562395

2396+
cleanup_transcode(target, err);
2397+
22572398
//
22582399
// Checksum checking
22592400
//
@@ -2348,6 +2489,7 @@ check_transfer_statuses(LrDownload *dd, GError **err)
23482489
target->curl_handle = NULL;
23492490
g_free(target->headercb_interrupt_reason);
23502491
target->headercb_interrupt_reason = NULL;
2492+
cleanup_transcode(target, err);
23512493
fclose(target->f);
23522494
target->f = NULL;
23532495
if (target->curl_rqheaders) {
@@ -2751,6 +2893,7 @@ lr_download(GSList *targets,
27512893
curl_multi_remove_handle(dd.multi_handle, target->curl_handle);
27522894
curl_easy_cleanup(target->curl_handle);
27532895
target->curl_handle = NULL;
2896+
cleanup_transcode(target, err);
27542897
fclose(target->f);
27552898
target->f = NULL;
27562899
g_free(target->headercb_interrupt_reason);

librepo/rcodes.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,8 @@ typedef enum {
125125
key/group not found, ...) */
126126
LRE_ZCK, /*!<
127127
(41) Zchunk error (error reading zchunk file, ...) */
128+
LRE_TRANSCODE, /*!<
129+
(42) Transcode error (env empty, ...) */
128130
LRE_UNKNOWNERROR, /*!<
129131
(xx) unknown error - sentinel of error codes enum */
130132
} LrRc; /*!< Return codes */

0 commit comments

Comments
 (0)