Find the answer to your Linux question:
Results 1 to 7 of 7
Enjoy an ad free experience by logging in. Not a member yet? Register.
  1. #1

    Circular debugging using ptrace results in deadlock due to race condit


    Hi guys,

    As part of my personal research I am facing a challenging problem.
    I am trying to let two processes be each other's debuggers using the ptrace syscall. However, my proof-of-concept implementation always results in a deadlock state (both processes get stuck in 't+' state as shown by 'ps aux').

    Here is my code, it's pretty simple:
    Code:
    /* C standard headers */
    #include <errno.h>
    #include <inttypes.h>
    #include <setjmp.h>
    #include <stdbool.h>
    #include <stddef.h>
    #include <stdint.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    
    /* Linux headers */
    #include <dirent.h>
    #include <fcntl.h>
    #include <sys/mman.h>
    #include <sys/ptrace.h>
    #include <sys/types.h>
    #include <sys/wait.h>
    #include <unistd.h>
    #include <sys/user.h>
    /* Architecture-specific headers */
    #include <asm/ptrace.h>
    #include <asm/unistd.h>
    
    #include <signal.h>
    #include <assert.h>
    
    typedef void fun_moved_from_context();
    // using namespace std;
    void attachTo(pid_t pid, char* id) {
      long ret = ptrace(PTRACE_ATTACH, pid, NULL, NULL);
      printf("\t%s\tattachTo: %ld\n", id, ret);
      if (ret == -1) perror("err: ");
    }
    void seizeTo(pid_t pid, char* id) {
      long ret = ptrace(PTRACE_SEIZE, pid, NULL, NULL);
      assert(ret > 0);
      printf("\t%s\tseizeTo: %ld\n", id, ret);
    }
    void detachFrom(pid_t pid, char* id) {
      long ret = ptrace(PTRACE_DETACH, pid, NULL, NULL);
      printf("\t%s\tdetachFrom: %ld\n", id, ret);
    }
    void setOptions(pid_t pid, char* id) {
      long ret = ptrace(
          PTRACE_SETOPTIONS, pid, NULL,
          (void*)(PTRACE_O_TRACECLONE | PTRACE_O_TRACEEXEC | PTRACE_O_TRACEEXIT |
                  PTRACE_O_TRACEFORK | PTRACE_O_TRACEVFORK));
      printf("\t%s\tsetOptions: %ld\n", id, ret);
    }
    void setVarData(pid_t pid, volatile bool* can_run, void* data, char* id) {
      long ret = ptrace(PTRACE_POKEDATA, pid, (void*)can_run, (void*)data);
      printf("\t%s\tsetVarData: %ld\n", id, ret);
    }
    void cont(pid_t pid, char* id) {
      long ret = ptrace(PTRACE_CONT, pid, NULL, NULL);
      printf("\t%s\tcont: %ld\n", id, ret);
    }
    void interrupt(pid_t pid, char* id) {
      long ret = ptrace(PTRACE_INTERRUPT, pid, NULL, NULL);
      printf("\t%s\tinterrupt: %ld\n", id, ret);
    }
    void debug(int id) {
      int status;
      while (true) {
        printf("\t%s\twhile\n", id);
        sleep(1);
        struct user_regs regs;
        pid_t recv = wait(&status);
        if (recv == -1) {
          printf("\t%s\tDebugger exiting\n", id);
          return 0;
        } else {
          if (WIFSTOPPED(status)) {
            int signal = WSTOPSIG(status);
            switch (signal) {
              case SIGTRAP: {
                int event_code = (status >> 8) ^ SIGTRAP;
                switch (event_code) {
                  case PTRACE_EVENT_FORK << 8:
                    printf("\t%s\tFORK EVENT.\n", id);
                    cont(recv, 2);
                    break;
                  case PTRACE_EVENT_EXIT << 8:
                    printf("\t%s\t%li exited.\n", id, recv);
                    return 0;
                    break;
                  default: {
                    printf("\t%s\trecv: %i ; status: %i\n", id, recv, status);
                    long ret = ptrace(PTRACE_GETREGS, recv, NULL, &regs);
                    regs.uregs[15] += 2;  // addr_size;
                    printf("\t%s\tp: new PC: %lx\n", id, regs.uregs[15]);
                    ptrace(PTRACE_SETREGS, recv, NULL, &regs);
                    cont(recv, id);
                  }
                }
              }
            }
          }
        }
      }
    }
    
    int main() {
      volatile bool can_runA = false, can_runB = false;
      pid_t procA = getpid();
      volatile pid_t procB = 0;
    
      if (fork() > 0) {  // process A
        while (!can_runA) {
          printf("\tA\twaiting to continue...\n");
          sleep(1);
        }
        attachTo(procB, "A");
        waitpid(procB, NULL, __WALL);
        setOptions(procB, "A");
        setVarData(procB, &can_runB, 1, "A");
        cont(procB, "A");
        printf("\tA\tfinished\n");
      } else {  // process B
        procB = getpid();
        attachTo(procA, "B");
        waitpid(procA, NULL, __WALL);
        setOptions(procA, "B");
        setVarData(procA, &can_runA, 1, "B");
        setVarData(procA, &procB, procB, "B");
        cont(procA, "B");
        while (!can_runB) {
          printf("\tB\twaiting to continue...\n");
          sleep(1);
        }
        printf("\tB\tfinished\n");
      }  
      return 0;
    }
    I have compiled and run this on an ARMv7 developer board with kernel version 3.0.35 (Linaro 13.0.

    The output of the above code is this:

    Code:
      A    waiting to continue... 
      B    attachTo: 0 
      B    setOptions: 0 
      B    setVarData: 0 
      B    setVarData: 0 
      B    cont: 0 
      B    waiting to continue... 
      B    waiting to continue... 
      A    attachTo: 0
    As you can see it never reaches the "finished" printf code, and gets stuck as soon as the other process attempts to attach to the debugger.

    I have done a similar experiment for 3 processes, such that each one attempts to attach to the other in a circular fashion: A -> B -> C -> A
    The result in this case was exactly the same. However, here I was able to detect a race condition, because sometimes the code executed properly without getting stuck in a deadlock (but it's hard to reproduce).

    If you wish, you can test this by using a lightweight debugger I've developed and three console terminals. Here's the code:
    Code:
    /* C standard headers */
    #include <errno.h>
    #include <inttypes.h>
    #include <setjmp.h>
    #include <stdbool.h>
    #include <stddef.h>
    #include <stdint.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    
    /* Linux headers */
    #include <dirent.h>
    #include <fcntl.h>
    #include <sys/mman.h>
    #include <sys/ptrace.h>
    #include <sys/types.h>
    #include <sys/wait.h>
    #include <unistd.h>
    #include <sys/user.h>
    /* Architecture-specific headers */
    #include <asm/ptrace.h>
    #include <asm/unistd.h>
    
    #include <signal.h>
    #include <assert.h>
    
    typedef void fun_moved_from_context();
    //using namespace std;
    void attachTo(pid_t pid, int id) {
        long ret = ptrace (PTRACE_ATTACH, pid, NULL, NULL);        
        printf("%i  attachTo: %ld\n", id, ret);
    }
    void seizeTo(pid_t pid, int id) {
        long ret = ptrace (PTRACE_SEIZE, pid, NULL, NULL);       
        //assert(ret > 0); 
        printf("%i  seizeTo: %ld\n", id, ret);        
    }
    void detachFrom(pid_t pid, int id) {    
        long ret = ptrace (PTRACE_DETACH, pid, NULL, NULL);        
        printf("%i  detachFrom: %ld\n", id, ret);    
    }
    void setOptions(pid_t pid, int id) {    
        long ret = ptrace(PTRACE_SETOPTIONS, pid, NULL, (void*) (PTRACE_O_TRACECLONE | PTRACE_O_TRACEEXEC | PTRACE_O_TRACEEXIT | PTRACE_O_TRACEFORK | PTRACE_O_TRACEVFORK));   
        printf("%i  setOptions: %ld\n", id, ret);    
    }
    void setVarData(pid_t pid, volatile bool* can_run, void* data, int id) {
        long ret = ptrace(PTRACE_POKEDATA, pid, (void*)can_run, (void*)data);
        printf("%i  setVarData: %ld\n", id, ret);    
    }
    void cont(pid_t pid, int id) {
        long ret = ptrace (PTRACE_CONT, pid, NULL, NULL); 
        printf("%i  cont: %ld\n", id, ret);    
    }
    void interrupt(pid_t pid, int id) {
        long ret = ptrace (PTRACE_INTERRUPT, pid, NULL, NULL); 
        printf("%i  interrupt: %ld\n", id, ret);    
    }
    void debug(int id) {
        int status;  
        while (true) {          
            printf("%i  while\n", id);
            sleep(1);
            struct user_regs regs;    
            pid_t recv = wait(&status);   
            if (recv == -1) {
                printf("%i  Debugger exiting\n", id);
                return 0;
            } else {
                if (WIFSTOPPED(status)) {
                    int signal = WSTOPSIG(status);    
                    printf("%i  signal: %i\n",id,signal);
                    switch (signal) {
                        case SIGTRAP: {                 
                            int event_code = (status >> 8) ^ SIGTRAP;
                            printf("%i  event_code: %i\n",id,event_code);
                            switch (event_code) {
                                case PTRACE_EVENT_FORK << 8:
                                    printf("%i  FORK EVENT.\n", id);
                                    cont(recv,id);
                                    break;
                                case PTRACE_EVENT_EXIT << 8:
                                    printf("%i  %li exited.\n", id, recv);
                                    return 0;
                                    break;
                                default: {
                                    printf("%i  recv: %i ; status: %i\n", id, recv, status);
                                    long ret=ptrace (PTRACE_GETREGS, recv, NULL, &regs);
                                    regs.uregs[15] += 2;//addr_size;
                                    printf("%i  p: new PC: %lx\n", id, regs.uregs[15]);
                                    ptrace (PTRACE_SETREGS, recv, NULL, &regs);                
                                    cont(recv,id); 
                                }
                            }  
                            break;                      
                        }   
                        default: {
                            cont(recv,id); 
                            break;
                        }                
                    }
                }
    
            }         
        } 
    }
    
    int main() {    
        int pid;
        int me = getpid();
        printf("Hello, I am %d\n", me);
        printf("pid:");
        scanf("%d",&pid);
        if (pid == 0) {
            printf("bkpt asm\n");
            asm("bkpt");
        } else {
            attachTo(pid, me);
            printf("start waitpid\n");
            waitpid(pid, NULL, __WALL);
            printf("end waitpid\n");
            setOptions(pid, me);  
            cont(pid,me);   
            debug(me);
        }
        return 0;
    }
    Once you've compiled the above code, you simply run the binary on each console and enter the PID of another process to establish a 3-way circle.


    I am far from an expert on the kernel, but I did have a look at the ARM specific kernel implementation which left me puzzled. I couldn't find where/how/why this code does not work.
    Now I'm wondering if it's possible at all to make this work without a deadlock occurring? Does anyone have any experience with this, or can provide some clues/feedback?

    Thank you greatly for your time, attention and effort!
    Ilya

  2. #2
    Linux Guru Rubberman's Avatar
    Join Date
    Apr 2009
    Location
    I can be found either 40 miles west of Chicago, in Chicago, or in a galaxy far, far away.
    Posts
    14,038
    Why are you surprised you have a deadlock? You have multiple processes trying to get control of the same kernel resources (interrupts possibly) at the same time. Either you need to modify the kernel to allow this, or you need to change your code to handle such scenarios. That last part is usually the easier method to use. Kernel changes can have side-effects that are not good for the system as a whole. This is why we have mutexes and semaphores, to enable competing processes to "play nice" together.
    Sometimes, real fast is almost as good as real time.
    Just remember, Semper Gumbi - always be flexible!

  3. #3
    Quote Originally Posted by Rubberman View Post
    Why are you surprised you have a deadlock? You have multiple processes trying to get control of the same kernel resources (interrupts possibly) at the same time. Either you need to modify the kernel to allow this, or you need to change your code to handle such scenarios. That last part is usually the easier method to use. Kernel changes can have side-effects that are not good for the system as a whole. This is why we have mutexes and semaphores, to enable competing processes to "play nice" together.
    Thank you kindly for your response, I greatly appreciate your time and effort.

    It does make sense that a deadlock occurs, but it makes no sense that I am not able to find a single reason "why" (from Ptrace man page, linux kernel code, ...).
    Changing the kernel code is a no-go, the only option I am left with is doing it in C/C++ code. However, I am clueless how to make this work.

    The reason why I am trying to make this work is part of an anti-debugging technique. So, it's definitely a requirement that both processes are each others' debuggers (thus tracing each other). Maybe mutexes/semaphores can help, but I don't see how?

  4. $spacer_open
    $spacer_close
  5. #4
    Linux Guru Rubberman's Avatar
    Join Date
    Apr 2009
    Location
    I can be found either 40 miles west of Chicago, in Chicago, or in a galaxy far, far away.
    Posts
    14,038
    Read up on semaphores. Mutexes are good for multi-threaded applications. Semaphores are good for multiple processes. One process gets the semaphore and the other waits on it to be released at which point it gets the semaphore (resource). Not hard. Read the man pages - man semaphore.h
    Sometimes, real fast is almost as good as real time.
    Just remember, Semper Gumbi - always be flexible!

  6. #5
    Linux Guru Rubberman's Avatar
    Join Date
    Apr 2009
    Location
    I can be found either 40 miles west of Chicago, in Chicago, or in a galaxy far, far away.
    Posts
    14,038
    FWIW, semaphores are C code, but you can easily use them in C++ code as well, as I have many times in the past.
    Sometimes, real fast is almost as good as real time.
    Just remember, Semper Gumbi - always be flexible!

  7. #6
    Quote Originally Posted by Rubberman View Post
    FWIW, semaphores are C code, but you can easily use them in C++ code as well, as I have many times in the past.
    Thanks again, I'll see what I can come up with and will report back my findings.
    Cheers!

  8. #7
    -->
    Okay so I have added semaphores into my implementation but the result is the same.

    Semaphores are used exclusively for mutual exclusion, right?
    Mutual exclusion is already implemented in my original solution (see my very first code snippet) --
    it is achieved by making one process wait in a while(! can_run); until the other process has attached and sets the can_run variable to true.
    I believe we can conclude that the deadlock is not due to a lack of mutual exclusion code.

    Here's the version with semaphores:
    Code:
    /* C standard headers */
    #include <errno.h>
    #include <inttypes.h>
    #include <setjmp.h>
    #include <stdbool.h>
    #include <stddef.h>
    #include <stdint.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    
    /* Linux headers */
    #include <dirent.h>
    #include <fcntl.h>
    #include <sys/mman.h>
    #include <sys/ptrace.h>
    #include <sys/types.h>
    #include <sys/wait.h>
    #include <unistd.h>
    #include <sys/user.h>
    /* Architecture-specific headers */
    #include <asm/ptrace.h>
    #include <asm/unistd.h>
    
    #include <signal.h>
    #include <assert.h>
    
    typedef void fun_moved_from_context();
    // using namespace std;
    void attachTo(pid_t pid, char* id) {
      long ret = ptrace(PTRACE_ATTACH, pid, NULL, NULL);
      printf("\t%s\tattachTo: %ld\n", id, ret);
      if (ret == -1) perror("err: ");
    }
    void seizeTo(pid_t pid, char* id) {
      long ret = ptrace(PTRACE_SEIZE, pid, NULL, NULL);
      assert(ret > 0);
      printf("\t%s\tseizeTo: %ld\n", id, ret);
    }
    void detachFrom(pid_t pid, char* id) {
      long ret = ptrace(PTRACE_DETACH, pid, NULL, NULL);
      printf("\t%s\tdetachFrom: %ld\n", id, ret);
    }
    void setOptions(pid_t pid, char* id) {
      long ret = ptrace(
          PTRACE_SETOPTIONS, pid, NULL,
          (void*)(PTRACE_O_TRACECLONE | PTRACE_O_TRACEEXEC | PTRACE_O_TRACEEXIT |
                  PTRACE_O_TRACEFORK | PTRACE_O_TRACEVFORK));
      printf("\t%s\tsetOptions: %ld\n", id, ret);
    }
    void setVarData(pid_t pid, volatile bool* can_run, void* data, char* id) {
      long ret = ptrace(PTRACE_POKEDATA, pid, (void*)can_run, (void*)data);
      printf("\t%s\tsetVarData: %ld\n", id, ret);
    }
    void cont(pid_t pid, char* id) {
      long ret = ptrace(PTRACE_CONT, pid, NULL, NULL);
      printf("\t%s\tcont: %ld\n", id, ret);
    }
    void interrupt(pid_t pid, char* id) {
      long ret = ptrace(PTRACE_INTERRUPT, pid, NULL, NULL);
      printf("\t%s\tinterrupt: %ld\n", id, ret);
    }
    
    
    #include "semaphore.h"
    #define SEM_NAME "/mysem"
    
    int main() {
      volatile bool can_runA = false, can_runB = false;
      pid_t procA = getpid();
      volatile pid_t procB = 0;
    
      if (fork() > 0) {  // process A
        while (!can_runA) {
          printf("\tA\twaiting to continue...\n");
          sleep(1);
        }
        
        sem_t *sem = sem_open(SEM_NAME, 0); /* Open a preexisting semaphore. */
        if (sem == SEM_FAILED) {
            perror("sem_open(3) error");
            exit(EXIT_FAILURE);
        }
    
        attachTo(procB, "A");
        waitpid(procB, NULL, __WALL);
        setOptions(procB, "A");
        setVarData(procB, &can_runB, 1, "A");
    
        if (sem_close(sem) < 0) {
            perror("sem_close(3) failed");
            /* We ignore possible sem_unlink(3) errors here */
            sem_unlink(SEM_NAME);
            exit(EXIT_FAILURE);
        }
    
        cont(procB, "A");
        printf("\tA\tfinished\n");
      } else {  // process B
    
        sem_t *sem = sem_open(SEM_NAME, O_CREAT, 0644, 3); /* Initial value is 3. */
        if (sem == SEM_FAILED) {
            perror("sem_open(3) error");
            exit(EXIT_FAILURE);
        }
    
        procB = getpid();
        attachTo(procA, "B");
        waitpid(procA, NULL, __WALL);
        setOptions(procA, "B");
        setVarData(procA, &can_runA, 1, "B");
        setVarData(procA, &procB, procB, "B");
    
        if (sem_close(sem) < 0) {
            perror("sem_close(3) failed");
            /* We ignore possible sem_unlink(3) errors here */
            sem_unlink(SEM_NAME);
            exit(EXIT_FAILURE);
        }
    
        cont(procA, "B");
        while (!can_runB) {
          printf("\tB\twaiting to continue...\n");
          sleep(1);
        }
        printf("\tB\tfinished\n");
      }  
      return 0;
    }
    /*
      A    waiting to continue... 
      B    attachTo: 0 
      B    setOptions: 0 
      B    setVarData: 0 
      B    setVarData: 0 
      B    cont: 0 
      B    waiting to continue... 
    */
    Any other suggestions/ideas I can try?

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •