Skip to content

<failed to translate> on signal #7675

@egrimley-arm

Description

@egrimley-arm

I'm not sure if this is a known bug, but this program seems to reliably crash the DEBUG build of DynamoRIO (f892973) on Graviton3:

#include <signal.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>

/* Repeatedly call "try", a maximum of "tries" times. If "stop_on_hit"
 * is true, stop when "try" returns true. Return the number of times
 * that "try" returned true. The value of "param" is automatically
 * adjusted according to how "try" sets *adj: negative means increase
 * "param"; positive means decrease it; but "param" is never zero.
 */
int
keep_trying(bool (*try)(int *adj, unsigned long long param, void *arg),
            int tries, unsigned long long param, void *arg, bool stop_on_hit)
{
    int hits = 0;
    /* Typically we halve the step each time, which results in a
     * binary search, but if we have had the same non-zero result
     * several times in succession then we suspect that something has
     * changed so we start doubling the step instead.
     */
    unsigned long long step = 1; /* This will always be power of two. */
    int max_unchanged_results = 4;
    int unchanged_results = max_unchanged_results - 1;
    int previous_result = 0;
    for (int i = 0; i < tries; i++) {
        /* Call try. */
        int adj = 0;
        bool hit = try(&adj, param, arg);

        /* Handle hit. */
        if (hit) {
            ++hits;
            if (stop_on_hit)
                break;
        }

        /* Convert result. */
        int result = adj < 0 ? -1 : adj > 0 ? 1 : 0;

        /* Update unchanged_results. */
        if (result && (i == 0 || result == previous_result))
            ++unchanged_results;
        else
            unchanged_results = 0;
        previous_result = result;

        /* Update step. */
        if (unchanged_results <= max_unchanged_results)
            step = step / 2 > 0 ? step / 2 : step;
        else
            step = step * 2 > 0 ? step * 2 : step;

        /* Adjust param for next try. */
        if (result < 0) {
            param = param + step > param ? param + step : -1ULL;
        } if (result > 0) {
            if (param > step)
                param = param - step;
            else {
                param = 1;
                step = 1;
            }
        }
    }

    return hits;
}

static volatile int volatile_src;
static volatile int volatile_dst;

void
fail(const char *s)
{
    perror(s);
    exit(1);
}

void
handler(int signum, siginfo_t *info, void *ucontext)
{
    volatile_dst = volatile_src;
}

bool try(int *adj, unsigned long long param, void *arg)
{
    timer_t timer = *(timer_t *)arg;

    volatile_dst = 1;
    volatile_src = -1;

    /* Set timer. */
    struct itimerspec spec = {
        { 0, 0 }, { param / 1000000000, param % 1000000000 }
    };
    if (timer_settime(timer, 0, &spec, NULL))
        fail("timer_settime");

    *adj = volatile_dst;

    /* Cancel timer. */
    struct itimerspec spec0 = { { 0, 0 }, { 0, 0 } };
    struct itimerspec old_spec;
    if (timer_settime(timer, 0, &spec0, &old_spec))
        fail("timer_settime");

    if (0)
        printf("%llu %d\n", param, *adj);

    return false;
}

int main()
{
    int signum = SIGPIPE;

    /* Set up signal handler. */
    struct sigaction act = { 0 };
    act.sa_flags = SA_SIGINFO;
    act.sa_sigaction = handler;
    if (sigaction(signum, &act, 0) != 0)
        fail("sigaction");

    /* Create timer. */
    struct sigevent sevp = { 0 };
    sevp.sigev_notify = SIGEV_SIGNAL;
    sevp.sigev_signo = signum;
    timer_t timer;
    if (timer_create(CLOCK_MONOTONIC, &sevp, &timer))
        fail("timer_create");

    keep_trying(try, 100000, 10000, &timer, true);

    printf("all done\n");
    return 0;
}

The error is usually

<failed to translate>
<Application /tmp/bug (647800).  Internal Error: DynamoRIO debug check failure: /.../core/unix/signal.c:3187 false

but sometimes:

<Application /tmp/bug (647810).  Internal Error: DynamoRIO debug check failure: /.../core/translate.c:1116 tdcontext != get_thread_private_dcontext() || INTERNAL_OPTION(stress_recreate_pc) || tdcontext->client_data->is_translating || TEST(FRAG_SELFMOD_SANDBOXED, flags) || TEST(FRAG_WAS_DELETED, flags)

When I enable the printf I can see that the value of param that crashes DynamoRIO is about 1750. It seems that the crash is probably caused by the signal arriving as we come back from the timer_settime system call.

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions