/* * Copyright (c) 2015 * The President and Fellows of Harvard College. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * bigfork - concurrent VM test that behaves somewhat better than * parallelvm. * * This test is a mixture of forktest and parallelvm: it does nested * forks like forktest, and aimless matrix operations like parallelvm; * the goal is to serve as a performance benchmark more than as a * stress test (though it can be that too) and in particular to * exhibit less timing variance than parallelvm does. The variance is * still fairly high, but the variance of parallelvm is horrific. */ #include #include #include #include #include #define BRANCHES 6 /* * 6 branches gives 64 procs at the final stage, and we want this to * use about 4M. So each proc's memory load should be about 1/16M or * 64K. Which is 16384 ints, or four 64x64 matrixes. */ #define DIM 64 static int m1[DIM * DIM], m2[DIM * DIM], m3[DIM * DIM], m4[DIM * DIM]; static const int right[BRANCHES] = { 536763422, 478946723, 375722852, 369910585, 328220902, 62977821, }; static unsigned failures; static void init(void) { unsigned i, j; srandom(73771); for (i = 0; i < DIM; i++) { for (j = 0; j < DIM; j++) { m1[i * DIM + j] = random() % 11 - 5; } } } static void add(int *x, const int *a, const int *b) { unsigned i, j; for (i = 0; i < DIM; i++) { for (j = 0; j < DIM; j++) { x[i * DIM + j] = a[i * DIM + j] + b[i * DIM + j]; } } } static void mul(int *x, const int *a, const int *b) { unsigned i, j, k; for (i = 0; i < DIM; i++) { for (j = 0; j < DIM; j++) { x[i * DIM + j] = 0; for (k = 0; k < DIM; k++) { x[i * DIM + j] += a[i * DIM + k] * b[k * DIM + j]; } } } } static void scale(int *x, const int *a, int b) { unsigned i, j; for (i = 0; i < DIM; i++) { for (j = 0; j < DIM; j++) { x[i * DIM + j] = a[i * DIM + j] / b; } } } static void grind(void) { /* * compute: m2 = m1*m1, m3 = m2+m1, m4 = m3*m3, m1 = m4 / 2 */ mul(m2, m1, m1); add(m3, m2, m1); mul(m4, m3, m3); scale(m1, m4, 2); } static int trace(void) { unsigned i; int val = 0; for (i = 0; i < DIM; i++) { val += m1[i * DIM + i]; } while (val < 0) { val += 0x20000000; } return val % 0x20000000; } static pid_t dofork(void) { pid_t pid; pid = fork(); if (pid < 0) { warn("fork"); } return pid; } static void dowait(pid_t pid) { int status; if (pid == -1) { failures++; return; } if (pid == 0) { exit(failures); } else { if (waitpid(pid, &status, 0) < 0) { warn("waitpid(%d)", pid); } else if (WIFSIGNALED(status)) { warnx("pid %d: signal %d", pid, WTERMSIG(status)); } else if (WEXITSTATUS(status) > 0) { failures += WEXITSTATUS(status); } } } static void dotest(void) { unsigned i, me; pid_t pids[BRANCHES]; int t; char msg[128]; me = 0; for (i = 0; i < BRANCHES; i++) { pids[i] = dofork(); if (pids[i] == 0) { me += 1U << i; } grind(); t = trace(); if (t == right[i]) { snprintf(msg, sizeof(msg), "Stage %u #%u done: %d\n", i, me, trace()); } else { snprintf(msg, sizeof(msg), "Stage %u #%u FAILED: got %d, expected %d\n", i, me, t, right[i]); failures++; } (void)write(STDOUT_FILENO, msg, strlen(msg)); } for (i = BRANCHES; i-- > 0;) { dowait(pids[i]); } if (failures > 0) { printf("%u failures.\n", failures); } else { printf("Done.\n"); } } int main(void) { init(); dotest(); return 0; }