Return to BSD News archive
Path: sserve!manuel!munnari.oz.au!uunet!mcsun!sun4nl!tuegate.tue.nl!svin02!wzv!gvr.win.tue.nl From: guido@gvr.win.tue.nl (Guido van Rooij) Newsgroups: comp.unix.bsd Subject: kern_execve Message-ID: <3493@wzv.win.tue.nl> Date: 30 Jun 92 15:19:13 GMT Sender: news@wzv.win.tue.nl Organization: Eindhoven University of Technology, The Netherlands Lines: 486 Hello all...Since there is no sign yet of 386bsd 0.1 and because noone seems to know how a reasonable kern_execve looks nowadays, I decided to post mine. My machine seems stable now (however it sometimes hangs because of a full porc table). It contains quite some patches, and to be honest...i don't have the original one anymore. (but i guess posting the patches would mean an even bigger article :-)) Most of the changes are obvious, and therefor they aren't commented. If you want to know why a particular patch is done, or if I missed an essential one (not very likely though), then please mail me. -Guido ------8<------------------------------------------------------------- /* * Copyright (c) 1992 William Jolitz. All rights reserved. * Written by William Jolitz 1/92 * * Redistribution and use in source and binary forms are freely permitted * provided that the above copyright notice and attribution and date of work * and this paragraph are duplicated in all such forms. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. * * This procedure implements a minimal program execution facility for * 386BSD. It interfaces to the BSD kernel as the execve system call. * Significant limitations and lack of compatiblity with POSIX are * present with this version, to make its basic operation more clear. * */ #include "param.h" #include "systm.h" #include "proc.h" #include "mount.h" #include "namei.h" #include "vnode.h" #include "file.h" #include "exec.h" #include "stat.h" #include "wait.h" #include "signalvar.h" #include "mman.h" #include "vm/vm.h" #include "vm/vm_param.h" #include "vm/vm_map.h" #include "vm/vm_kern.h" #include "machine/reg.h" static char rcsid[] = "$Header: /usr/bill/working/sys/kern/RCS/kern_execve.c,v 1.3 92/01/21 21:29:13 william Exp $"; /* * Bill's first-cut execve() system call. Puts hair on your chest. */ /* ARGSUSED */ execve(p, uap, retval) struct proc *p; register struct args { char *fname; char **argp; char **envp; } *uap; int *retval; { register struct nameidata *ndp; int rv, amt; struct nameidata nd; struct exec hdr; char **kargbuf, **kargbufp, *kstringbuf, *kstringbufp; char **org, **vectp, *ep; int needsenv, limitonargs; u_int stringlen; int addr, size; int argc; char *cp; struct vmspace *vs; int tsize, dsize, bsize, cnt, foff; int indir = 0; int newcred; register struct pcred *pcred = p->p_cred; register struct ucred *cred ; struct vnode *vp; struct vattr vattr; uid_t uid; gid_t gid; char *shellname; union { char ex_shell[MAXINTERP]; /* #! and interpreter name */ struct exec ex_exec; } exdata; /* * Step 1. Lookup filename to see if we have something to execute. */ ndp = &nd; ndp->ni_dirp = uap->fname; ndp->ni_segflg = UIO_USERSPACE; again: ndp->ni_nameiop = LOOKUP | LOCKLEAF | FOLLOW; /* is it there? */ if (rv = namei(ndp, p)) return (rv); vp = ndp->ni_vp; cred = pcred->pc_ucred; uid = cred->cr_uid; /* get orginal uid/gid */ gid = cred->cr_gid; if (rv = VOP_GETATTR(vp, &vattr, cred, p)) goto exec_fail; if (vp->v_mount->mnt_flag & MNT_NOEXEC) { /* no exec on fs ? */ rv = EACCES; goto exec_fail; } if (indir == 0 && (p->p_flag & STRC) == 0 && (vp->v_mount->mnt_flag & MNT_NOSUID) == 0) { if (vattr.va_mode & VSUID) { /* check for SUID */ uid = vattr.va_uid; newcred = 1; } if (vattr.va_mode & VSGID) { /* check for SGID */ gid = vattr.va_gid; newcred = 1; } } /* is it a regular file? */ if (vp->v_type != VREG) { rv = EACCES; goto exec_fail; } /* is it executable? this is relevant for root only. */ /* we are second-guessing the xxx_getattr(),xxx_access() routines. */ if ((vattr.va_mode & (0111)) == 0) { rv = EACCES; goto exec_fail; } /* Can user access it? (always succeeds for root) */ if (rv = VOP_ACCESS(vp, VEXEC, cred, p)) goto exec_fail; /* * Step 2. Does the file contain a format we can * understand and execute */ /* * Read in first few bytes of file for segment sizes, magic number: * ZMAGIC = demand paged RO text * Also an ASCII line beginning with #! is * the file name of a ``shell'' and arguments may be prepended * to the argument list if given here. */ exdata.ex_shell[0] = '\0'; /* for zero length files */ rv = vn_rdwr(UIO_READ, vp, (caddr_t)&exdata, sizeof (exdata), (off_t)0, UIO_SYSSPACE, (IO_UNIT|IO_NODELOCKED), cred, &amt, p); if (rv) goto exec_fail; #ifndef lint if (amt > sizeof(exdata) - sizeof(exdata.ex_exec) && exdata.ex_shell[0] != '#') { rv = ENOEXEC; goto exec_fail; } #endif /* that we recognize? */ rv = ENOEXEC; if ((int)exdata.ex_exec.a_magic == ZMAGIC) { /* sanity check "ain't not such thing as a sanity clause" -groucho */ if ( /*exdata.ex_exec.a_text == 0 || */ exdata.ex_exec.a_text > MAXTSIZ || exdata.ex_exec.a_text % NBPG || exdata.ex_exec.a_text > vattr.va_size) goto exec_fail; if ( exdata.ex_exec.a_data == 0 || exdata.ex_exec.a_data > DFLDSIZ || exdata.ex_exec.a_data > vattr.va_size || exdata.ex_exec.a_data + exdata.ex_exec.a_text > vattr.va_size) goto exec_fail; if (exdata.ex_exec.a_bss > MAXDSIZ) goto exec_fail; if (exdata.ex_exec.a_text + exdata.ex_exec.a_data + exdata.ex_exec.a_bss > MAXTSIZ + MAXDSIZ) goto exec_fail; } else { if (exdata.ex_shell[0] != '#' || exdata.ex_shell[1] != '!' || indir) { rv = ENOEXEC; goto exec_fail; } for (cp = &exdata.ex_shell[2];; ++cp) { if (cp >= &exdata.ex_shell[MAXINTERP]) { rv = ENOEXEC; goto exec_fail; } if (*cp == '\n') { *cp = '\0'; break; } if (*cp == '\t') *cp = ' '; } cp = &exdata.ex_shell[2]; /* get shell interpreter name */ while (*cp == ' ') cp++; shellname = ndp->ni_dirp = cp; while (*cp && *cp != ' ') cp++; if (*cp) *cp++ = '\0'; indir = 1; /* indicate this is a script file */ vput(vp); /* find shell interpreter */ ndp->ni_segflg = UIO_SYSSPACE; goto again; #if 0 ndp->ni_nameiop = LOOKUP | FOLLOW | LOCKLEAF; if (rv = namei(ndp, p)) return (rv); vp = ndp->ni_vp; if (rv = VOP_GETATTR(vp, &vattr, cred, p)) goto exec_fail; uid = cred->cr_uid; /* shell scripts can't be setuid */ gid = cred->cr_gid; #endif } /* * Step 3. File and header are valid. Now, dig out the strings * out of the old process image. */ /* assumption: most execve's have less than 256 arguments, with a * total of string storage space not exceeding 2K. It is more * frequent that when this fails, string space falls short first * (e.g. as when a large termcap environment variable is present). * It is infrequent when more than 256 arguments are used that take * up less than 2K of space (e.g. args average more than 8 chars). * * What we give up in this implementation is a dense encoding of * the data structure in the receiving program's address space. * This means that there is plenty of wasted space (up to 6KB) * as the price we pay for a fast, single pass algorithm. * * Our alternative would be to accumulate strings and pointers * in the first pass, then, knowing the sizes and number of the * strings, pack them neatly and tightly togeither in the second * pass. This means two copies of the strings, and string copying * is much of the cost of exec. */ /* allocate string buffer and arg buffer */ org = kargbuf = (char **) kmem_alloc_wait(exec_map, NCARGS + PAGE_SIZE); kstringbuf = kstringbufp = ((char *)kargbuf) + NBPG/2; kargbuf += NBPG/(4*sizeof(int)); kargbufp = kargbuf; /* for each envp, copy in string */ limitonargs = NCARGS; cnt = 0; /* first, do (shell name if any then) args */ needsenv = 1; if (indir) { ep = shellname ; twice: if (ep) { /* did we outgrow initial argbuf, if so, die */ if (kargbufp >= (char **)kstringbuf) { rv = E2BIG; goto exec_fail; } if (rv = copyinstr(ep, kstringbufp, limitonargs, &stringlen)) goto exec_fail; *kargbufp = (char *)(kstringbufp - kstringbuf + USRSTACK + NBPG/2); kargbufp++; cnt++; kstringbufp += stringlen; limitonargs -= stringlen + sizeof(long); } if (indir) { indir = 0; ep = uap->fname; /* orginal executable is 1st arguement with scripts */ goto twice; } if (vectp = uap->argp) vectp++; /* manually doing the first arguement with scripts */ } else vectp = uap->argp; /* normal executable */ do_env_as_well: if (vectp == 0) goto dont_bother; do { /* did we outgrow initial argbuf, if so, die */ if (kargbufp >= (char **)kstringbuf) goto exec_fail; /* get an string pointer */ ep = (char *)fuword(vectp++); if (ep == (char *)-1) { rv = EFAULT; goto exec_fail; } /* if not null pointer, copy in string */ if (ep) { if (rv = copyinstr(ep, kstringbufp, limitonargs, &stringlen)) goto exec_fail; /* assume that strings usually all fit in last page */ *kargbufp = (char *)(kstringbufp - kstringbuf + USRSTACK + NBPG/2); kargbufp++; cnt++; kstringbufp += stringlen; limitonargs -= stringlen + sizeof(long); } else { *kargbufp++ = 0; limitonargs -= sizeof(long); break; } } while (limitonargs > 0); dont_bother: if (limitonargs <= 0) { rv = E2BIG; goto exec_fail; } if (needsenv) { argc = cnt; vectp = uap->envp; needsenv = 0; goto do_env_as_well; } kargbuf[-1] = (char *)argc; /* * Step 4. Build the new processes image. */ /* At this point, we are committed -- destroy old executable */ /* * This seems to work for the moment, should really be using `copyin' * Besides, `namei' already did this, should have a way of culling * this out of nameidata */ cp = uap->fname + strlen(uap->fname) - 1; while (*cp != '/' && cp > uap->fname) cp--; strncpy(p->p_comm, cp+1, MAXCOMLEN); vs = p->p_vmspace; addr = 0; size = USRSTACK - addr; /* blow away all address space */ rv = vm_deallocate(&vs->vm_map, addr, size, FALSE); /* At this point we release vfork'ed parent, before we blow it */ if (p->p_flag & SPPWAIT) { p->p_flag &= ~(SPPWAIT); wakeup((caddr_t)p->p_pptr); } if (rv) goto exec_abort; /* build a new address space */ addr = 0; if (exdata.ex_exec.a_text == 0) { /* screwball mode */ foff = tsize = 0; exdata.ex_exec.a_data += exdata.ex_exec.a_text; } else { tsize = roundup(exdata.ex_exec.a_text, NBPG); foff = NBPG; } dsize = roundup(exdata.ex_exec.a_data, NBPG); bsize = roundup(exdata.ex_exec.a_bss + dsize, NBPG); bsize -= dsize; /* map text & data*/ rv = vm_mmap(&vs->vm_map, &addr, tsize+dsize, VM_PROT_ALL, MAP_FILE|MAP_COPY|MAP_FIXED, (caddr_t)ndp->ni_vp, foff); if (rv) goto exec_abort; /* r/w data, ro text */ if (tsize) { addr = 0; rv = vm_protect(&vs->vm_map, addr, tsize, FALSE, VM_PROT_READ|VM_PROT_EXECUTE); if (rv) goto exec_abort; } /* create anonymous memory region for bss */ addr = dsize + tsize; rv = vm_allocate(&vs->vm_map, &addr, bsize, FALSE); if (rv) goto exec_abort; /* create anonymous memory region for stack */ addr = USRSTACK - MAXSSIZ; rv = vm_allocate(&vs->vm_map, &addr, MAXSSIZ, FALSE); if (rv) goto exec_abort; /* * Step 5. Prepare process for execution. */ /* touchup process information */ vs->vm_tsize = tsize/NBPG; /* text size (pages) XXX */ vs->vm_dsize = (dsize+bsize)/NBPG; /* data size (pages) XXX */ vs->vm_ssize = MAXSSIZ/NBPG; /* stack size (pages) */ vs->vm_taddr = 0; /* user virtual address of text XXX */ vs->vm_daddr = (caddr_t)tsize; /* user virtual address of data XXX */ /* user VA at max stack growth */ vs->vm_maxsaddr = (caddr_t)(USRSTACK - MAXSSIZ); /* everything fits in a single page, no fixups, no more work */ /* (groan) due to bug in vm_map_copy, can't remap. copy for now. */ size = kstringbufp + 1 - kstringbuf; size = roundup(size + NBPG/2, NBPG); for (; kargbuf < kargbufp; kargbuf++) if (*kargbuf) *kargbuf -= size; rv = copyout((caddr_t)org, (caddr_t)USRSTACK - size, size); if(rv) goto exec_abort; /* close files on exec, fixup signals */ fdcloseexec(p); execsigs(p); p->p_regs[SP] = USRSTACK - size + NBPG/4 - 4; vs->vm_ssize = size / NBPG; /* stack size (pages) */ setregs(p, exdata.ex_exec.a_entry); kmem_free_wakeup(exec_map, org, NCARGS + PAGE_SIZE); vput(ndp->ni_vp); if(newcred) { p->p_ucred = crcopy(p->p_ucred); newcred = 0; } p->p_ucred->cr_uid = p->p_cred->p_svuid = uid; p->p_ucred->cr_gid = p->p_cred->p_svgid = gid; /* Notify debugger that interesting things are abound */ if (p->p_flag & STRC) psignal(p, SIGTRAP); return (0); exec_fail: vput(ndp->ni_vp); return(rv); exec_abort: /* untested and probably bogus */ kmem_free_wakeup(exec_map, org, NCARGS + PAGE_SIZE); vput(ndp->ni_vp); exit(p, W_EXITCODE(0, SIGABRT)); return(0); }