Skip to content

Commit 16dbae4

Browse files
committed
lib: preferably feed w3m some UTF-8 input
w3m cannot cope with UTF-16 or UTF-32, so in at least those cases, input needs to be crossconverted to UTF-8 by us before handoff. References: DESK-3063, GXL-589
1 parent 16e846f commit 16dbae4

File tree

1 file changed

+46
-1
lines changed

1 file changed

+46
-1
lines changed

lib/rfbl.cpp

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -495,6 +495,51 @@ pid_t popenfd(const char *const *argv, int *fdinp, int *fdoutp,
495495
return pid;
496496
}
497497

498+
/**
499+
* @fp: file to emit to
500+
* @src: input data
501+
* @cset: character set of data in @src
502+
*
503+
* Convert @src to UTF-8 if needed (depends on @cset), and writeout to @fp.
504+
* Updates @cset in case w3m should not be given the -I argument.
505+
* Returns 0 for success; other values indicate an error condition.
506+
*/
507+
static int utf8_writeout(FILE *fp, const void *vsrc, size_t src_size, const char *&cset)
508+
{
509+
auto src = const_cast<char *>(static_cast<const char *>(vsrc));
510+
if (cset == nullptr || strcasecmp(cset, "utf8") == 0 ||
511+
strcasecmp(cset, "utf-8") == 0)
512+
return fwrite(src, src_size, 1, fp) == 1 ? 0 : -1;
513+
auto cd = iconv_open("utf-8", cset);
514+
if (cd == iconv_t(-1)) {
515+
/* Dunno how to translate, just feed it as-is to w3m */
516+
cset = nullptr;
517+
return 0;
518+
}
519+
auto cleanup = HX::make_scope_exit([&]() { iconv_close(cd); });
520+
char buffer[4096];
521+
522+
/* Loop copied from iconvtext() */
523+
while (src_size > 0) {
524+
auto dst = buffer;
525+
size_t dst_size = sizeof(buffer);
526+
auto ret = iconv(cd, &src, &src_size, &dst, &dst_size);
527+
if (ret != static_cast<size_t>(-1) || dst_size != sizeof(buffer)) {
528+
if (fwrite(buffer, sizeof(buffer) - dst_size, 1, fp) != 1)
529+
return -1;
530+
continue;
531+
}
532+
if (src_size > 0) {
533+
--src_size;
534+
++src;
535+
}
536+
if (fwrite(buffer, sizeof(buffer) - dst_size, 1, fp) != 1)
537+
return -1;
538+
}
539+
errno = 0;
540+
return 0;
541+
}
542+
498543
/**
499544
* Run an external HTML-to-text converter.
500545
*
@@ -520,7 +565,7 @@ int feed_w3m(const void *inbuf, size_t len, const char *cset,
520565
if (fp == nullptr)
521566
return -1;
522567
auto cl1 = HX::make_scope_exit([&]() { unlink(filename.c_str()); });
523-
if (fwrite(inbuf, len, 1, fp.get()) != 1)
568+
if (utf8_writeout(fp.get(), inbuf, len, cset) != 0)
524569
return -1;
525570
fp.reset();
526571
int fout = -1;

0 commit comments

Comments
 (0)