Page MenuHomeFreeBSD

Fix old semaphore interface race
ClosedPublic

Authored by firk_cantconnect.ru on Aug 20 2022, 1:23 AM.
Tags
None
Referenced Files
Unknown Object (File)
Mon, Nov 11, 10:43 AM
Unknown Object (File)
Fri, Nov 1, 3:34 PM
Unknown Object (File)
Fri, Nov 1, 3:34 PM
Unknown Object (File)
Fri, Nov 1, 3:34 PM
Unknown Object (File)
Fri, Nov 1, 3:34 PM
Unknown Object (File)
Fri, Nov 1, 3:34 PM
Unknown Object (File)
Fri, Nov 1, 9:05 AM
Unknown Object (File)
Fri, Oct 18, 8:33 AM
Subscribers

Details

Summary

The bug was introduced in r349951 r350478 which was also MFCed to 12.x as r351789

This is about freebsd10-compat semaphore kernel interface.

Previously, any unexpected cases will lead to just exiting syscall, leaving
the decision to the userspace sem wrapper (which was done that correctly).

After these patches, non-zero sem->_has_waiters will lead to going straightly
to umtxq_sleep() without checking if sem->_count is non-zero. Non-zero
sem->_has_waiters may happen spuriously due to the same code: it doesn't
clear this flag after cancelling sleep due to non-zero sem->_count.

So:

  1. make sem_wait() and sem_post() race:
    1. sem_wait() check _count in userspace, sees it is zero, calls kernel -> do_sem_wait()
    2. sem_post() sets _count to 1 and calls do_sem_wake() which does nothing because waiter-thread still not in queue
    3. do_sem_wait() sets _has_waiters=1, then sees _count==1 and exits
    4. userspace sem_wait() decrements _count back to 0 _has_waiters left non-zero
  1. make them race again:
    1. sem_wait() check _count in userspace, sees it is zero, calls kernel -> do_sem_wait()
    2. sem_post() sets _count to 1 again, not touching _has_waiters, and calls do_sem_wake() which does nothing again, because waiter-thread not in queue
    3. do_sem_wait() fails to set _has_waiters 0->1 because it already non-zero, and then goes to umtxq_sleep()
    4. umtxq_sleep() has no chance to wake up without another sem_post(), despite the fact that _count==1

May be I am missing something but I see no reason for using _has_waiters at
all. As we required to maintain it (are we?) lets do it, but let it be
write-only field. do_sem_wait() may check only _count to make the proper
decision, and it will be independept from any spurious values in _has_waiters.

Test Plan

Source code using library semaphore.h (should be compiler under freebsd10 world to expose the bug).

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>
#include <pthread.h>
#include <semaphore.h>
#include <time.h>

static time_t last, now;
static pthread_t t;
static sem_t s;
static volatile unsigned long count, postcount;
static unsigned int delay;
static int sv;

static void SEM_post(void) {
  if(sem_getvalue(&s, &sv)<0) { fprintf(stderr,"sem_getvalue() error %d", errno); abort(); }
  if(sv<=0) postcount++;
  if(sv<=0 && sem_post(&s)<0) { fprintf(stderr,"sem_post() error %d", errno); abort(); }
}

static void SEM_wait(void) {
  int e;
  while(sem_wait(&s)<0) if((e=errno)!=EINTR) { fprintf(stderr,"sem_wait() error %d", e); abort(); }
}

static void * tf(void *arg) {
  while(1) {
    if(delay) usleep(delay);
    SEM_wait();
    count++;
  }
  return NULL;
}

int main(int argc, char **argv) {
  int e;
  unsigned long delta, lastc, newc;
  if(argc>=2) delay = atoi(argv[1]);
  if(sem_init(&s, 0, 0)<0) { fprintf(stderr,"sem_init error %d (%s)\n", errno, strerror(errno)); return -1; }
  if(e = pthread_create(&t, NULL, &tf, NULL)) { fprintf(stderr,"pthread_create error %d (%s)\n", e, strerror(e)); return -1; }
  time(&last); now = last; lastc = count;
  while(1) {
    SEM_post();
    time(&now);
    if(now!=last) {
      last = now;
      newc = count;
      delta = newc - lastc;
      lastc = newc;
      fprintf(stderr, "count=%lu delta=%lu postcount=%lu lastvalue=%d\n", lastc, delta, postcount, sv);
    }
  }
  return 0;
}

OR: using the kernel interface _umtx_op() directly, may be compiled on modern freebsd versions:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <assert.h>
#include <errno.h>
#include <pthread.h>
#include <semaphore.h>
#include <time.h>
#include <machine/atomic.h>
#include <sys/umtx.h>

static time_t last, now;
static pthread_t t;
static struct _usem us;
static volatile unsigned long count, postcount;
static unsigned int delay;
static int sv;

static void SEM_post(void) {
  u_int count;
  sv = (int)us._count;
  if(sv<=0) postcount++; else return;
#ifdef MORE_POST
  do { count=us._count; } while(!atomic_cmpset_rel_int(&us._count, count, count+1));
#else
  if(!atomic_cmpset_rel_int(&us._count, 0, 1)) { postcount--; return; }
#endif
  (void)_umtx_op(&us, UMTX_OP_SEM_WAKE, 0, NULL, NULL);
}

static void SEM_wait(void) {
  int e, val, retval;
again:
  retval = 0;
  while(1) {
    while((val=us._count)>0) {
      if(atomic_cmpset_acq_int(&us._count,val,val-1)) return;
    }
    if(retval) break;
    retval = _umtx_op(&us, UMTX_OP_SEM_WAIT, 0, NULL, NULL);
  }
  if(retval<0) { if((e=errno)!=EINTR) { fprintf(stderr,"sem_wait() error %d", e); abort(); } else goto again; }
  assert(!"unreach");
}

static void * tf(void *arg) {
  while(1) {
    if(delay) usleep(delay);
    SEM_wait();
    count++;
  }
  return NULL;
}

int main(int argc, char **argv) {
  int e;
  unsigned long delta, lastc, newc;
  if(argc>=2) delay = atoi(argv[1]);
  bzero(&us, sizeof(us));
  if(e = pthread_create(&t, NULL, &tf, NULL)) { fprintf(stderr,"pthread_create error %d (%s)\n", e, strerror(e)); return -1; }
  time(&last); now = last; lastc = count;
  while(1) {
    SEM_post();
    time(&now);
    if(now!=last) {
      last = now;
      newc = count;
      delta = newc - lastc;
      lastc = newc;
      fprintf(stderr, "count=%lu delta=%lu postcount=%lu lastvalue=%d\n", lastc, delta, postcount, sv);
    }
  }
  return 0;
}

Just compile it with -lpthread and run. It will stuck after a few seconds when bug unpatched.

Diff Detail

Repository
rG FreeBSD src repository
Lint
Lint Not Applicable
Unit
Tests Not Applicable

Event Timeline

sys/kern/kern_umtx.c
3558

You unconditionally clear _has_waiters there. But we do not know if we are the only waiter. In other words, I suspect that this clearing is fine for single waiter, but racy for more than one, even it rv == 0 which implies that we set _has_waiters.

3560

This looks like a good change on its own, just returning EFAULT in case any usermode access failed, after removing from queue, makes logic easier to follow.

3568

The MPASS() is tautological, but I do not object.

avoid possible race when clearing _has_waiters

sys/kern/kern_umtx.c
3558

Moved this earlier to do under the same busy state which was during setting it to 1, so no other 0 -> 1 transition attempts could be made in between.

There is still possible that we return here due to count!=0 but it will become 0 while we are returning to userspace, and the userspace will call do_sem_wait() again and we will set _has_waiters to 1 again, while it will be -slightly- better to keep _has_waiters set until userspace done with decrementing _count. But it is not possible with separate _has_waiters field w/o additional userspace locks anyway (and it seems that 11.x new sem interface was added due to this).

Could you please mail me git format-patch with the change, proper commit log, and author?

sys/kern/kern_umtx.c
3553

{} are not needed.

3558

Yes, it was the reason for sem2, and the reason for removal of this interface once.

This revision is now accepted and ready to land.Aug 26 2022, 7:22 AM
This revision now requires review to proceed.Aug 26 2022, 7:56 AM
In D36272#825614, @kib wrote:

Could you please mail me git format-patch with the change, proper commit log, and author?

done

This revision was not accepted when it landed; it landed in state Needs Review.Aug 26 2022, 5:36 PM
This revision was automatically updated to reflect the committed changes.

Forgot to mention in previous responses.

The implementation can be properly fixed, at least on architectures that offer 64-bit atomics, with the assumption that client-allocated struct usem is 64-bit aligned. For instance, all 64bit machines fit, you would use casueword() to atomically update both count and has_waiters. For armv7 and any non-ancient i386, this can be done as well, but we do not expose 64bit atomic for userspace access, so it is more work.

with the assumption that client-allocated struct usem is 64-bit aligned

Having struct _usem 4-byte offset from struct sem_t, this leads to "align at 4 byte of 8" requirement for sem_t, which is weird.

Also, maybe it could be fixed in the past, but for now we have legacy freebsd10-compiled applications with already inefficient implementation (calling sem_wake() always on sem_post()). Older (pre-r233913) code which checks _has_waiters before calling sem_wake() is racy with possible semaphore deallocating by sem-waiting thread, but not with kernel _has_waiters logic.