Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 27922b4260f65d317aabda37e42bbbff > files > 1919

kernel-2.6.18-238.el5.src.rpm

From: Dave Anderson <anderson@redhat.com>
Date: Wed, 22 Sep 2010 18:12:35 -0400
Subject: [misc] fix race in pid generation causing immediate reuse
Message-id: <463014829.76231285179155773.JavaMail.root@zmail05.collab.prod.int.phx2.redhat.com>
Patchwork-id: 28341
O-Subject: [RHEL5.6 PATCH] fix race in pid generation that causes pids to be
	reused immediately
Bugzilla: 634850
RH-Acked-by: Rik van Riel <riel@redhat.com>
RH-Acked-by: Oleg Nesterov <oleg@redhat.com>
RH-Acked-by: Amerigo Wang <amwang@redhat.com>

Bug 634850 - [5.5] a race in pid generation that causes pids to be reused immediately.
https://bugzilla.redhat.com/show_bug.cgi?id=634850

Backport of this upstream commit:

  commit 5fdee8c4a5e1800489ce61963208f8cc55e42ea1
  Author: Salman <sqazi@google.com>
  Date:   Tue Aug 10 18:03:16 2010 -0700

  pids: fix a race in pid generation that causes pids to be reused immediately

  A program that repeatedly forks and waits is susceptible to having the
  same pid repeated, especially when it competes with another instance of
  the same program.  This is really bad for bash implementation.
  Furthermore, many shell scripts assume that pid numbers will not be used
  for some length of time.

  Race Description:

  A                                    B

  // pid == offset == n                // pid == offset == n + 1
  test_and_set_bit(offset, map->page)
                                       test_and_set_bit(offset, map->page);
                                       pid_ns->last_pid = pid;
  pid_ns->last_pid = pid;
                                       // pid == n + 1 is freed (wait())

                                       // Next fork()...
                                       last = pid_ns->last_pid; // == n
                                       pid = last + 1;

  Code to reproduce it (Running multiple instances is more effective):

  #include <errno.h>
  #include <sys/types.h>
  #include <sys/wait.h>
  #include <unistd.h>
  #include <stdio.h>
  #include <stdlib.h>

  // The distance mod 32768 between two pids, where the first pid is expected
  // to be smaller than the second.
  int PidDistance(pid_t first, pid_t second) {
    return (second + 32768 - first) % 32768;
  }

  int main(int argc, char* argv[]) {
    int failed = 0;
    pid_t last_pid = 0;
    int i;
    printf("%d\n", sizeof(pid_t));
    for (i = 0; i < 10000000; ++i) {
      if (i % 32786 == 0)
        printf("Iter: %d\n", i/32768);
      int child_exit_code = i % 256;
      pid_t pid = fork();
      if (pid == -1) {
        fprintf(stderr, "fork failed, iteration %d, errno=%d", i, errno);
        exit(1);
      }
      if (pid == 0) {
        // Child
        exit(child_exit_code);
      } else {
        // Parent
        if (i > 0) {
          int distance = PidDistance(last_pid, pid);
          if (distance == 0 || distance > 30000) {
            fprintf(stderr,
                    "Unexpected pid sequence: previous fork: pid=%d, "
                    "current fork: pid=%d for iteration=%d.\n",
                    last_pid, pid, i);
            failed = 1;
          }
        }
        last_pid = pid;
        int status;
        int reaped = wait(&status);
        if (reaped != pid) {
          fprintf(stderr,
                  "Wait return value: expected pid=%d, "
                  "got %d, iteration %d\n",
                  pid, reaped, i);
          failed = 1;
        } else if (WEXITSTATUS(status) != child_exit_code) {
          fprintf(stderr,
                  "Unexpected exit status %x, iteration %d\n",
                  WEXITSTATUS(status), i);
          failed = 1;
        }
      }
    }
    exit(failed);
  }

  Thanks to Ted Tso for the key ideas of this implementation.

  Signed-off-by: Salman Qazi <sqazi@google.com>
  Cc: Ingo Molnar <mingo@elte.hu>
  Cc: Theodore Ts'o <tytso@mit.edu>
  Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
  Cc: Sukadev Bhattiprolu <sukadev@us.ibm.com>
  Cc: "Eric W. Biederman" <ebiederm@xmission.com>
  Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
  Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Test status:

  Without the patch, running two instances of the reproducer program
  above results in pid reuse instances such as these:

  ...
  Unexpected pid sequence: previous fork: pid=3505, current fork: pid=3505 for iteration=589711.
  Iter: 18
  Iter: 19
  Iter: 20
  Iter: 21
  Unexpected pid sequence: previous fork: pid=1292, current fork: pid=1292 for iteration=691389.
  Iter: 22
  Unexpected pid sequence: previous fork: pid=21760, current fork: pid=21760 for iteration=736160.
  Iter: 23
  Unexpected pid sequence: previous fork: pid=24289, current fork: pid=24289 for iteration=754394.
  ...

  With the patch applied, no pid reuse instances occur.

Brew build:

  http://brewweb.devel.redhat.com/brew/taskinfo?taskID=2775489

Signed-off-by: Jarod Wilson <jarod@redhat.com>

diff --git a/kernel/pid.c b/kernel/pid.c
index 4611f42..e22e056 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -86,6 +86,43 @@ static fastcall void free_pidmap(int pid)
 	atomic_inc(&map->nr_free);
 }
 
+/*
+ * If we started walking pids at 'base', is 'a' seen before 'b'?
+ */
+static int pid_before(int base, int a, int b)
+{
+	/*
+	 * This is the same as saying
+	 *
+	 * (a - base + MAXUINT) % MAXUINT < (b - base + MAXUINT) % MAXUINT
+	 * and that mapping orders 'a' and 'b' with respect to 'base'.
+	 */
+	return (unsigned)(a - base) < (unsigned)(b - base);
+}
+
+/*
+ * We might be racing with someone else trying to set last_pid.
+ * We want the winner to have the "later" value, because if the
+ * "earlier" value prevails, then a pid may get reused immediately.
+ *
+ * Since pids rollover, it is not sufficient to just pick the bigger
+ * value.  We have to consider where we started counting from.
+ *
+ * 'base' is the value of last_pid that we observed when
+ * we started looking for a pid.
+ *
+ * 'pid' is the pid that we eventually found.
+ */
+static void set_last_pid(int *last, int base, int pid)
+{
+	int prev;
+	int last_write = base;
+	do {
+		prev = last_write;
+		last_write = cmpxchg(last, prev, pid);
+	} while ((prev != last_write) && (pid_before(base, last_write, pid)));
+}
+
 static int alloc_pidmap(void)
 {
 	int i, offset, max_scan, pid, last = last_pid;
@@ -117,7 +154,7 @@ static int alloc_pidmap(void)
 			do {
 				if (!test_and_set_bit(offset, map->page)) {
 					atomic_dec(&map->nr_free);
-					last_pid = pid;
+					set_last_pid(&last_pid, last, pid);
 					return pid;
 				}
 				offset = find_next_offset(map, offset);