Return to BSD News archive
Path: sserve!manuel!munnari.oz.au!uunet!mcsun!sun4nl!tuegate.tue.nl!svin02!wzv!gvr.win.tue.nl
From: guido@gvr.win.tue.nl (Guido van Rooij)
Newsgroups: comp.unix.bsd
Subject: kern_execve
Message-ID: <3493@wzv.win.tue.nl>
Date: 30 Jun 92 15:19:13 GMT
Sender: news@wzv.win.tue.nl
Organization: Eindhoven University of Technology, The Netherlands
Lines: 486
Hello all...Since there is no sign yet of 386bsd 0.1 and because noone
seems to know how a reasonable kern_execve looks nowadays, I decided
to post mine. My machine seems stable now (however it sometimes hangs
because of a full porc table). It contains quite some patches, and
to be honest...i don't have the original one anymore. (but i guess
posting the patches would mean an even bigger article :-))
Most of the changes are obvious, and therefor they aren't commented.
If you want to know why a particular patch is done, or if I missed an
essential one (not very likely though), then please mail me.
-Guido
------8<-------------------------------------------------------------
/*
* Copyright (c) 1992 William Jolitz. All rights reserved.
* Written by William Jolitz 1/92
*
* Redistribution and use in source and binary forms are freely permitted
* provided that the above copyright notice and attribution and date of work
* and this paragraph are duplicated in all such forms.
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
* WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*
* This procedure implements a minimal program execution facility for
* 386BSD. It interfaces to the BSD kernel as the execve system call.
* Significant limitations and lack of compatiblity with POSIX are
* present with this version, to make its basic operation more clear.
*
*/
#include "param.h"
#include "systm.h"
#include "proc.h"
#include "mount.h"
#include "namei.h"
#include "vnode.h"
#include "file.h"
#include "exec.h"
#include "stat.h"
#include "wait.h"
#include "signalvar.h"
#include "mman.h"
#include "vm/vm.h"
#include "vm/vm_param.h"
#include "vm/vm_map.h"
#include "vm/vm_kern.h"
#include "machine/reg.h"
static char rcsid[] = "$Header: /usr/bill/working/sys/kern/RCS/kern_execve.c,v 1.3 92/01/21 21:29:13 william Exp $";
/*
* Bill's first-cut execve() system call. Puts hair on your chest.
*/
/* ARGSUSED */
execve(p, uap, retval)
struct proc *p;
register struct args {
char *fname;
char **argp;
char **envp;
} *uap;
int *retval;
{
register struct nameidata *ndp;
int rv, amt;
struct nameidata nd;
struct exec hdr;
char **kargbuf, **kargbufp, *kstringbuf, *kstringbufp;
char **org, **vectp, *ep;
int needsenv, limitonargs;
u_int stringlen;
int addr, size;
int argc;
char *cp;
struct vmspace *vs;
int tsize, dsize, bsize, cnt, foff;
int indir = 0;
int newcred;
register struct pcred *pcred = p->p_cred;
register struct ucred *cred ;
struct vnode *vp;
struct vattr vattr;
uid_t uid;
gid_t gid;
char *shellname;
union {
char ex_shell[MAXINTERP]; /* #! and interpreter name */
struct exec ex_exec;
} exdata;
/*
* Step 1. Lookup filename to see if we have something to execute.
*/
ndp = &nd;
ndp->ni_dirp = uap->fname;
ndp->ni_segflg = UIO_USERSPACE;
again:
ndp->ni_nameiop = LOOKUP | LOCKLEAF | FOLLOW;
/* is it there? */
if (rv = namei(ndp, p))
return (rv);
vp = ndp->ni_vp;
cred = pcred->pc_ucred;
uid = cred->cr_uid; /* get orginal uid/gid */
gid = cred->cr_gid;
if (rv = VOP_GETATTR(vp, &vattr, cred, p))
goto exec_fail;
if (vp->v_mount->mnt_flag & MNT_NOEXEC) { /* no exec on fs ? */
rv = EACCES;
goto exec_fail;
}
if (indir == 0 && (p->p_flag & STRC) == 0 &&
(vp->v_mount->mnt_flag & MNT_NOSUID) == 0) {
if (vattr.va_mode & VSUID) { /* check for SUID */
uid = vattr.va_uid;
newcred = 1;
}
if (vattr.va_mode & VSGID) { /* check for SGID */
gid = vattr.va_gid;
newcred = 1;
}
}
/* is it a regular file? */
if (vp->v_type != VREG) {
rv = EACCES;
goto exec_fail;
}
/* is it executable? this is relevant for root only. */
/* we are second-guessing the xxx_getattr(),xxx_access() routines. */
if ((vattr.va_mode & (0111)) == 0) {
rv = EACCES;
goto exec_fail;
}
/* Can user access it? (always succeeds for root) */
if (rv = VOP_ACCESS(vp, VEXEC, cred, p))
goto exec_fail;
/*
* Step 2. Does the file contain a format we can
* understand and execute
*/
/*
* Read in first few bytes of file for segment sizes, magic number:
* ZMAGIC = demand paged RO text
* Also an ASCII line beginning with #! is
* the file name of a ``shell'' and arguments may be prepended
* to the argument list if given here.
*/
exdata.ex_shell[0] = '\0'; /* for zero length files */
rv = vn_rdwr(UIO_READ, vp, (caddr_t)&exdata, sizeof (exdata),
(off_t)0, UIO_SYSSPACE, (IO_UNIT|IO_NODELOCKED), cred, &amt, p);
if (rv)
goto exec_fail;
#ifndef lint
if (amt > sizeof(exdata) - sizeof(exdata.ex_exec) &&
exdata.ex_shell[0] != '#') {
rv = ENOEXEC;
goto exec_fail;
}
#endif
/* that we recognize? */
rv = ENOEXEC;
if ((int)exdata.ex_exec.a_magic == ZMAGIC) {
/* sanity check "ain't not such thing as a sanity clause" -groucho */
if ( /*exdata.ex_exec.a_text == 0 || */
exdata.ex_exec.a_text > MAXTSIZ ||
exdata.ex_exec.a_text % NBPG ||
exdata.ex_exec.a_text > vattr.va_size)
goto exec_fail;
if ( exdata.ex_exec.a_data == 0 ||
exdata.ex_exec.a_data > DFLDSIZ ||
exdata.ex_exec.a_data > vattr.va_size ||
exdata.ex_exec.a_data + exdata.ex_exec.a_text > vattr.va_size)
goto exec_fail;
if (exdata.ex_exec.a_bss > MAXDSIZ)
goto exec_fail;
if (exdata.ex_exec.a_text + exdata.ex_exec.a_data +
exdata.ex_exec.a_bss > MAXTSIZ + MAXDSIZ)
goto exec_fail;
} else {
if (exdata.ex_shell[0] != '#' ||
exdata.ex_shell[1] != '!' ||
indir) {
rv = ENOEXEC;
goto exec_fail;
}
for (cp = &exdata.ex_shell[2];; ++cp) {
if (cp >= &exdata.ex_shell[MAXINTERP]) {
rv = ENOEXEC;
goto exec_fail;
}
if (*cp == '\n') {
*cp = '\0';
break;
}
if (*cp == '\t')
*cp = ' ';
}
cp = &exdata.ex_shell[2]; /* get shell interpreter name */
while (*cp == ' ')
cp++;
shellname = ndp->ni_dirp = cp;
while (*cp && *cp != ' ')
cp++;
if (*cp)
*cp++ = '\0';
indir = 1; /* indicate this is a script file */
vput(vp); /* find shell interpreter */
ndp->ni_segflg = UIO_SYSSPACE;
goto again;
#if 0
ndp->ni_nameiop = LOOKUP | FOLLOW | LOCKLEAF;
if (rv = namei(ndp, p))
return (rv);
vp = ndp->ni_vp;
if (rv = VOP_GETATTR(vp, &vattr, cred, p))
goto exec_fail;
uid = cred->cr_uid; /* shell scripts can't be setuid */
gid = cred->cr_gid;
#endif
}
/*
* Step 3. File and header are valid. Now, dig out the strings
* out of the old process image.
*/
/* assumption: most execve's have less than 256 arguments, with a
* total of string storage space not exceeding 2K. It is more
* frequent that when this fails, string space falls short first
* (e.g. as when a large termcap environment variable is present).
* It is infrequent when more than 256 arguments are used that take
* up less than 2K of space (e.g. args average more than 8 chars).
*
* What we give up in this implementation is a dense encoding of
* the data structure in the receiving program's address space.
* This means that there is plenty of wasted space (up to 6KB)
* as the price we pay for a fast, single pass algorithm.
*
* Our alternative would be to accumulate strings and pointers
* in the first pass, then, knowing the sizes and number of the
* strings, pack them neatly and tightly togeither in the second
* pass. This means two copies of the strings, and string copying
* is much of the cost of exec.
*/
/* allocate string buffer and arg buffer */
org = kargbuf = (char **) kmem_alloc_wait(exec_map, NCARGS + PAGE_SIZE);
kstringbuf = kstringbufp = ((char *)kargbuf) + NBPG/2;
kargbuf += NBPG/(4*sizeof(int));
kargbufp = kargbuf;
/* for each envp, copy in string */
limitonargs = NCARGS;
cnt = 0;
/* first, do (shell name if any then) args */
needsenv = 1;
if (indir) {
ep = shellname ;
twice:
if (ep) {
/* did we outgrow initial argbuf, if so, die */
if (kargbufp >= (char **)kstringbuf) {
rv = E2BIG;
goto exec_fail;
}
if (rv = copyinstr(ep, kstringbufp, limitonargs,
&stringlen)) goto exec_fail;
*kargbufp = (char *)(kstringbufp - kstringbuf
+ USRSTACK + NBPG/2);
kargbufp++;
cnt++;
kstringbufp += stringlen;
limitonargs -= stringlen + sizeof(long);
}
if (indir) {
indir = 0;
ep = uap->fname; /* orginal executable is 1st
arguement with scripts */
goto twice;
}
if (vectp = uap->argp) vectp++; /* manually doing the first
arguement with scripts */
}
else
vectp = uap->argp; /* normal executable */
do_env_as_well:
if (vectp == 0) goto dont_bother;
do {
/* did we outgrow initial argbuf, if so, die */
if (kargbufp >= (char **)kstringbuf)
goto exec_fail;
/* get an string pointer */
ep = (char *)fuword(vectp++);
if (ep == (char *)-1) {
rv = EFAULT;
goto exec_fail;
}
/* if not null pointer, copy in string */
if (ep) {
if (rv = copyinstr(ep, kstringbufp, limitonargs,
&stringlen)) goto exec_fail;
/* assume that strings usually all fit in last page */
*kargbufp = (char *)(kstringbufp - kstringbuf
+ USRSTACK + NBPG/2);
kargbufp++;
cnt++;
kstringbufp += stringlen;
limitonargs -= stringlen + sizeof(long);
} else {
*kargbufp++ = 0;
limitonargs -= sizeof(long);
break;
}
} while (limitonargs > 0);
dont_bother:
if (limitonargs <= 0) {
rv = E2BIG;
goto exec_fail;
}
if (needsenv) {
argc = cnt;
vectp = uap->envp;
needsenv = 0;
goto do_env_as_well;
}
kargbuf[-1] = (char *)argc;
/*
* Step 4. Build the new processes image.
*/
/* At this point, we are committed -- destroy old executable */
/*
* This seems to work for the moment, should really be using `copyin'
* Besides, `namei' already did this, should have a way of culling
* this out of nameidata
*/
cp = uap->fname + strlen(uap->fname) - 1;
while (*cp != '/' && cp > uap->fname) cp--;
strncpy(p->p_comm, cp+1, MAXCOMLEN);
vs = p->p_vmspace;
addr = 0;
size = USRSTACK - addr;
/* blow away all address space */
rv = vm_deallocate(&vs->vm_map, addr, size, FALSE);
/* At this point we release vfork'ed parent, before we blow it */
if (p->p_flag & SPPWAIT) {
p->p_flag &= ~(SPPWAIT);
wakeup((caddr_t)p->p_pptr);
}
if (rv)
goto exec_abort;
/* build a new address space */
addr = 0;
if (exdata.ex_exec.a_text == 0) {
/* screwball mode */
foff = tsize = 0;
exdata.ex_exec.a_data += exdata.ex_exec.a_text;
} else {
tsize = roundup(exdata.ex_exec.a_text, NBPG);
foff = NBPG;
}
dsize = roundup(exdata.ex_exec.a_data, NBPG);
bsize = roundup(exdata.ex_exec.a_bss + dsize, NBPG);
bsize -= dsize;
/* map text & data*/
rv = vm_mmap(&vs->vm_map, &addr, tsize+dsize, VM_PROT_ALL,
MAP_FILE|MAP_COPY|MAP_FIXED, (caddr_t)ndp->ni_vp, foff);
if (rv)
goto exec_abort;
/* r/w data, ro text */
if (tsize) {
addr = 0;
rv = vm_protect(&vs->vm_map, addr, tsize, FALSE, VM_PROT_READ|VM_PROT_EXECUTE);
if (rv)
goto exec_abort;
}
/* create anonymous memory region for bss */
addr = dsize + tsize;
rv = vm_allocate(&vs->vm_map, &addr, bsize, FALSE);
if (rv)
goto exec_abort;
/* create anonymous memory region for stack */
addr = USRSTACK - MAXSSIZ;
rv = vm_allocate(&vs->vm_map, &addr, MAXSSIZ, FALSE);
if (rv)
goto exec_abort;
/*
* Step 5. Prepare process for execution.
*/
/* touchup process information */
vs->vm_tsize = tsize/NBPG; /* text size (pages) XXX */
vs->vm_dsize = (dsize+bsize)/NBPG; /* data size (pages) XXX */
vs->vm_ssize = MAXSSIZ/NBPG; /* stack size (pages) */
vs->vm_taddr = 0; /* user virtual address of text XXX */
vs->vm_daddr = (caddr_t)tsize; /* user virtual address of data XXX */
/* user VA at max stack growth */
vs->vm_maxsaddr = (caddr_t)(USRSTACK - MAXSSIZ);
/* everything fits in a single page, no fixups, no more work */
/* (groan) due to bug in vm_map_copy, can't remap. copy for now. */
size = kstringbufp + 1 - kstringbuf;
size = roundup(size + NBPG/2, NBPG);
for (; kargbuf < kargbufp; kargbuf++)
if (*kargbuf)
*kargbuf -= size;
rv = copyout((caddr_t)org, (caddr_t)USRSTACK - size, size);
if(rv)
goto exec_abort;
/* close files on exec, fixup signals */
fdcloseexec(p);
execsigs(p);
p->p_regs[SP] = USRSTACK - size + NBPG/4 - 4;
vs->vm_ssize = size / NBPG; /* stack size (pages) */
setregs(p, exdata.ex_exec.a_entry);
kmem_free_wakeup(exec_map, org, NCARGS + PAGE_SIZE);
vput(ndp->ni_vp);
if(newcred) {
p->p_ucred = crcopy(p->p_ucred);
newcred = 0;
}
p->p_ucred->cr_uid = p->p_cred->p_svuid = uid;
p->p_ucred->cr_gid = p->p_cred->p_svgid = gid;
/* Notify debugger that interesting things are abound */
if (p->p_flag & STRC)
psignal(p, SIGTRAP);
return (0);
exec_fail:
vput(ndp->ni_vp);
return(rv);
exec_abort:
/* untested and probably bogus */
kmem_free_wakeup(exec_map, org, NCARGS + PAGE_SIZE);
vput(ndp->ni_vp);
exit(p, W_EXITCODE(0, SIGABRT));
return(0);
}