*BSD News Article 1761


Return to BSD News archive

Path: sserve!manuel!munnari.oz.au!uunet!mcsun!sun4nl!tuegate.tue.nl!svin02!wzv!gvr.win.tue.nl
From: guido@gvr.win.tue.nl (Guido van Rooij)
Newsgroups: comp.unix.bsd
Subject: kern_execve
Message-ID: <3493@wzv.win.tue.nl>
Date: 30 Jun 92 15:19:13 GMT
Sender: news@wzv.win.tue.nl
Organization: Eindhoven University of Technology, The Netherlands
Lines: 486

Hello all...Since there is no sign yet of 386bsd 0.1 and because noone
seems to know how a reasonable kern_execve looks nowadays, I decided
to post mine. My machine seems stable now (however it sometimes hangs
because of a full porc table). It contains quite some patches, and
to be honest...i don't have the original one anymore. (but i guess
posting the patches would mean an even bigger article :-))
Most of the changes are obvious, and therefor they aren't commented.
If you want to know why a particular patch is done, or if I missed an
essential one (not very likely though), then please mail me.

-Guido
------8<-------------------------------------------------------------

/*
 * Copyright (c) 1992 William Jolitz. All rights reserved.
 * Written by William Jolitz 1/92
 *
 * Redistribution and use in source and binary forms are freely permitted
 * provided that the above copyright notice and attribution and date of work
 * and this paragraph are duplicated in all such forms.
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
 *
 * This procedure implements a minimal program execution facility for
 * 386BSD. It interfaces to the BSD kernel as the execve system call.
 * Significant limitations and lack of compatiblity with POSIX are
 * present with this version, to make its basic operation more clear.
 *
 */

#include "param.h"
#include "systm.h"
#include "proc.h"
#include "mount.h"
#include "namei.h"
#include "vnode.h"
#include "file.h"
#include "exec.h"
#include "stat.h"
#include "wait.h"
#include "signalvar.h"
#include "mman.h"

#include "vm/vm.h"
#include "vm/vm_param.h"
#include "vm/vm_map.h"
#include "vm/vm_kern.h"

#include "machine/reg.h"

static char rcsid[] = "$Header: /usr/bill/working/sys/kern/RCS/kern_execve.c,v 1.3 92/01/21 21:29:13 william Exp $";

/*
 * Bill's first-cut execve() system call. Puts hair on your chest.
 */

/* ARGSUSED */
execve(p, uap, retval)
	struct proc *p;
	register struct args {
		char	*fname;
		char	**argp;
		char	**envp;
	} *uap;
	int *retval;
{
	register struct nameidata *ndp;
	int rv, amt;
	struct nameidata nd;
	struct exec hdr;
	char **kargbuf, **kargbufp, *kstringbuf, *kstringbufp;
	char **org, **vectp, *ep;
	int needsenv, limitonargs;
	u_int stringlen;
	int addr, size;
	int argc;
	char *cp;
	struct vmspace *vs;
	int tsize, dsize, bsize, cnt, foff;

	int indir = 0;
	int newcred;
	register struct pcred *pcred = p->p_cred;
	register struct ucred *cred ;
	struct vnode *vp;
	struct vattr vattr;
	uid_t uid;
	gid_t gid;
	char *shellname;
	union {
		char	ex_shell[MAXINTERP];	/* #! and interpreter name */
		struct	exec ex_exec;
	} exdata;

	/*
	 * Step 1. Lookup filename to see if we have something to execute.
	 */
	ndp = &nd;
	ndp->ni_dirp = uap->fname;
	ndp->ni_segflg = UIO_USERSPACE;

again:
	ndp->ni_nameiop = LOOKUP | LOCKLEAF | FOLLOW;

	/* is it there? */
	if (rv = namei(ndp, p))
		return (rv);

	vp = ndp->ni_vp;
	cred = pcred->pc_ucred;
	uid = cred->cr_uid;		/* get orginal uid/gid */
	gid = cred->cr_gid;

        if (rv = VOP_GETATTR(vp, &vattr, cred, p))
		goto exec_fail;

        if (vp->v_mount->mnt_flag & MNT_NOEXEC) {	/* no exec on fs ? */
		rv = EACCES;
		goto exec_fail;
	}
        if (indir == 0 && (p->p_flag & STRC) == 0 &&
			(vp->v_mount->mnt_flag & MNT_NOSUID) == 0) {
		if (vattr.va_mode & VSUID) {	/* check for SUID */
			uid = vattr.va_uid;
			newcred = 1;
		}
		if (vattr.va_mode & VSGID) {	/* check for SGID */
			gid = vattr.va_gid;
			newcred = 1;
		}
	}

	/* is it a regular file? */
	if (vp->v_type != VREG) {
		rv = EACCES;
		goto exec_fail;
	}

	/* is it executable? this is relevant for root only. */
	/* we are second-guessing the xxx_getattr(),xxx_access() routines. */
	if ((vattr.va_mode & (0111)) == 0) {
		rv = EACCES;
		goto exec_fail;
	}

	/* Can user access it? (always succeeds for root) */
	if (rv = VOP_ACCESS(vp, VEXEC, cred, p))
		goto exec_fail;
	

	/*
	 * Step 2. Does the file contain a format we can
	 * understand and execute
	 */
	/*
	 * Read in first few bytes of file for segment sizes, magic number:
	 *	ZMAGIC = demand paged RO text
	 * Also an ASCII line beginning with #! is
	 * the file name of a ``shell'' and arguments may be prepended
	 * to the argument list if given here.
	 */
	exdata.ex_shell[0] = '\0';	/* for zero length files */

	rv = vn_rdwr(UIO_READ, vp, (caddr_t)&exdata, sizeof (exdata),
	    (off_t)0, UIO_SYSSPACE, (IO_UNIT|IO_NODELOCKED), cred, &amt, p);
	if (rv)
		goto exec_fail;
#ifndef lint
	if (amt > sizeof(exdata) - sizeof(exdata.ex_exec) &&
	    exdata.ex_shell[0] != '#') {
		rv = ENOEXEC;
		goto exec_fail;
	}
#endif
	/* that we recognize? */
	rv = ENOEXEC;
	if ((int)exdata.ex_exec.a_magic == ZMAGIC) {

		/* sanity check  "ain't not such thing as a sanity clause" -groucho */
		if (	/*exdata.ex_exec.a_text == 0 || */
			exdata.ex_exec.a_text > MAXTSIZ ||
			exdata.ex_exec.a_text % NBPG ||
			exdata.ex_exec.a_text > vattr.va_size)
			goto exec_fail;

		if (	exdata.ex_exec.a_data == 0 ||
			exdata.ex_exec.a_data > DFLDSIZ ||
			exdata.ex_exec.a_data > vattr.va_size ||
			exdata.ex_exec.a_data + exdata.ex_exec.a_text > vattr.va_size)
			goto exec_fail;

		if (exdata.ex_exec.a_bss > MAXDSIZ)
			goto exec_fail;
	
		if (exdata.ex_exec.a_text + exdata.ex_exec.a_data +
				exdata.ex_exec.a_bss > MAXTSIZ + MAXDSIZ)
			goto exec_fail;

	} else {

		if (exdata.ex_shell[0] != '#' ||
		    exdata.ex_shell[1] != '!' ||
		    indir) {
			rv = ENOEXEC;
			goto exec_fail;
		}
		for (cp = &exdata.ex_shell[2];; ++cp) {
			if (cp >= &exdata.ex_shell[MAXINTERP]) {
				rv = ENOEXEC;
				goto exec_fail;
			}
			if (*cp == '\n') {
				*cp = '\0';
				break;
			}
			if (*cp == '\t')
				*cp = ' ';
		}
		cp = &exdata.ex_shell[2];	/* get shell interpreter name */
		while (*cp == ' ')
			cp++;
		shellname = ndp->ni_dirp = cp;
		while (*cp && *cp != ' ')
			cp++;
		if (*cp)
			*cp++ = '\0';

		indir = 1;	/* indicate this is a script file */
		vput(vp);	/* find shell interpreter */
		ndp->ni_segflg = UIO_SYSSPACE;
		goto again;
#if 0
		ndp->ni_nameiop = LOOKUP | FOLLOW | LOCKLEAF;
		if (rv = namei(ndp, p))
			return (rv);
		vp = ndp->ni_vp;
		if (rv = VOP_GETATTR(vp, &vattr, cred, p))
			goto exec_fail;
		uid = cred->cr_uid;	/* shell scripts can't be setuid */
		gid = cred->cr_gid;
#endif
	}
	
	/*
	 * Step 3.  File and header are valid. Now, dig out the strings
	 * out of the old process image.
	 */

	/* assumption: most execve's have less than 256 arguments, with a
	 * total of string storage space not exceeding 2K. It is more
	 * frequent that when this fails, string space falls short first
	 * (e.g. as when a large termcap environment variable is present).
	 * It is infrequent when more than 256 arguments are used that take
	 * up less than 2K of space (e.g. args average more than 8 chars).
	 *
	 * What we give up in this implementation is a dense encoding of
	 * the data structure in the receiving program's address space.
	 * This means that there is plenty of wasted space (up to 6KB)
	 * as the price we pay for a fast, single pass algorithm.
	 *
	 * Our alternative would be to accumulate strings and pointers
	 * in the first pass, then, knowing the sizes and number of the
	 * strings, pack them neatly and tightly togeither in the second
	 * pass. This means two copies of the strings, and string copying
	 * is much of the cost of exec.
	 */

	/* allocate string buffer and arg buffer */
	org = kargbuf = (char **) kmem_alloc_wait(exec_map, NCARGS + PAGE_SIZE);
	kstringbuf = kstringbufp = ((char *)kargbuf) + NBPG/2;
	kargbuf += NBPG/(4*sizeof(int));
	kargbufp = kargbuf;
				/* for each envp, copy in string */
	limitonargs = NCARGS;
	cnt = 0;

	/* first, do (shell name if any then) args */
	needsenv = 1;
	if (indir)  {
		ep = shellname ;
twice:
	    if (ep) {
		/* did we outgrow initial argbuf, if so, die */
		if (kargbufp >= (char **)kstringbuf) {
			rv = E2BIG;
			goto exec_fail;
		}
	
		if (rv = copyinstr(ep, kstringbufp, limitonargs,
			&stringlen)) goto exec_fail;
		*kargbufp = (char *)(kstringbufp - kstringbuf
			+ USRSTACK + NBPG/2);
		kargbufp++;
		cnt++;
		kstringbufp += stringlen;
		limitonargs -= stringlen + sizeof(long);
	    }

		if (indir) {
			indir = 0;
			ep = uap->fname;	/* orginal executable is 1st
						   arguement with scripts */
			goto twice;
		}
		if (vectp = uap->argp) vectp++;	/* manually doing the first
						   arguement with scripts */
	}
	else
		vectp = uap->argp;	/* normal executable */

do_env_as_well:
	if (vectp == 0) goto dont_bother;
	do {
		/* did we outgrow initial argbuf, if so, die */
		if (kargbufp >= (char **)kstringbuf)
			goto exec_fail;
	
		/* get an string pointer */
		ep = (char *)fuword(vectp++);
		if (ep == (char *)-1) {
			rv = EFAULT;
			goto exec_fail;
		}

		/* if not null pointer, copy in string */
		if (ep) {
			if (rv = copyinstr(ep, kstringbufp, limitonargs,
				&stringlen)) goto exec_fail;
			/* assume that strings usually all fit in last page */
			*kargbufp = (char *)(kstringbufp - kstringbuf
				+ USRSTACK + NBPG/2);
			kargbufp++;
			cnt++;
			kstringbufp += stringlen;
			limitonargs -= stringlen + sizeof(long);
		} else {
			*kargbufp++ = 0;
			limitonargs -= sizeof(long);
			break;
		}
	} while (limitonargs > 0);

dont_bother:
	if (limitonargs <= 0) {
		rv = E2BIG;
		goto exec_fail;
	}

	if (needsenv) {
		argc = cnt;
		vectp = uap->envp;
		needsenv = 0;
		goto do_env_as_well;
	}
 
	kargbuf[-1] = (char *)argc;

	/*
	 * Step 4. Build the new processes image.
	 */

	/* At this point, we are committed -- destroy old executable */

	/*
	 * This seems to work for the moment, should really be using `copyin'
	 * Besides, `namei' already did this, should have a way of culling
	 * this out of nameidata
	 */
	cp = uap->fname + strlen(uap->fname) - 1;
	while (*cp != '/' && cp > uap->fname) cp--;
	strncpy(p->p_comm, cp+1, MAXCOMLEN);

	vs = p->p_vmspace;
	addr = 0;
	size = USRSTACK - addr;
	/* blow away all address space */
	rv = vm_deallocate(&vs->vm_map, addr, size, FALSE);

	/* At this point we release vfork'ed parent, before we blow it */
	if (p->p_flag & SPPWAIT) {
		p->p_flag &= ~(SPPWAIT);
		wakeup((caddr_t)p->p_pptr);
	}
	if (rv)
		goto exec_abort;

	/* build a new address space */
	addr = 0;
	if (exdata.ex_exec.a_text == 0) {
		/* screwball mode */
		foff = tsize = 0;
		exdata.ex_exec.a_data += exdata.ex_exec.a_text;
	} else {
		tsize = roundup(exdata.ex_exec.a_text, NBPG);
		foff = NBPG;
	}
	dsize = roundup(exdata.ex_exec.a_data, NBPG);
	bsize = roundup(exdata.ex_exec.a_bss + dsize, NBPG);
	bsize -= dsize;

	/* map text & data*/
	rv = vm_mmap(&vs->vm_map, &addr, tsize+dsize, VM_PROT_ALL,
	MAP_FILE|MAP_COPY|MAP_FIXED, (caddr_t)ndp->ni_vp, foff);
	if (rv)
		goto exec_abort;

	/* r/w data, ro text */
	if (tsize) {
		addr = 0;
		rv = vm_protect(&vs->vm_map, addr, tsize, FALSE, VM_PROT_READ|VM_PROT_EXECUTE);
		if (rv)
			goto exec_abort;
	}

	/* create anonymous memory region for bss */
	addr = dsize + tsize;
	rv = vm_allocate(&vs->vm_map, &addr, bsize, FALSE);
	if (rv)
		goto exec_abort;

	/* create anonymous memory region for stack */
	addr = USRSTACK - MAXSSIZ;
	rv = vm_allocate(&vs->vm_map, &addr, MAXSSIZ, FALSE);
	if (rv)
		goto exec_abort;

	/*
	 * Step 5. Prepare process for execution.
	 */

	/* touchup process information */
	vs->vm_tsize = tsize/NBPG;		/* text size (pages) XXX */
	vs->vm_dsize = (dsize+bsize)/NBPG;	/* data size (pages) XXX */
	vs->vm_ssize = MAXSSIZ/NBPG;	/* stack size (pages) */
	vs->vm_taddr = 0;	/* user virtual address of text XXX */
	vs->vm_daddr = (caddr_t)tsize;	/* user virtual address of data XXX */
	/* user VA at max stack growth */
	vs->vm_maxsaddr = (caddr_t)(USRSTACK - MAXSSIZ);

	/* everything fits in a single page, no fixups, no more work */
	/* (groan) due to bug in vm_map_copy, can't remap. copy for now. */
	size = kstringbufp + 1 - kstringbuf;
	size = roundup(size + NBPG/2, NBPG);
	for (; kargbuf < kargbufp; kargbuf++)
		if (*kargbuf)
			*kargbuf -= size;
	rv = copyout((caddr_t)org, (caddr_t)USRSTACK - size, size);
	if(rv)
		goto exec_abort;

	/* close files on exec, fixup signals */
	fdcloseexec(p);
	execsigs(p);

	p->p_regs[SP] = USRSTACK - size + NBPG/4 - 4;
	vs->vm_ssize = size / NBPG;	/* stack size (pages) */
	setregs(p, exdata.ex_exec.a_entry);
	kmem_free_wakeup(exec_map, org, NCARGS + PAGE_SIZE);
	vput(ndp->ni_vp);

	if(newcred) {
		p->p_ucred = crcopy(p->p_ucred);
		newcred = 0;
	}
	p->p_ucred->cr_uid = p->p_cred->p_svuid = uid;
	p->p_ucred->cr_gid = p->p_cred->p_svgid = gid;

	/* Notify debugger that interesting things are abound */
	if (p->p_flag & STRC)
		psignal(p, SIGTRAP);

	return (0);

exec_fail:
	vput(ndp->ni_vp);
	return(rv);

exec_abort:
	/* untested and probably bogus */
	kmem_free_wakeup(exec_map, org, NCARGS + PAGE_SIZE);
	vput(ndp->ni_vp);
	exit(p, W_EXITCODE(0, SIGABRT));
	return(0);

}