*BSD News Article 9228

Received: by minnie.vk1xwt.ampr.org with NNTP
	id AA5415 ; Thu, 24 Dec 92 04:01:25 EST
Xref: sserve comp.unix.bsd:9285 comp.lang.perl:12862 alt.sources:4812
Newsgroups: comp.unix.bsd,comp.lang.perl,alt.sources
Path: sserve!manuel.anu.edu.au!munnari.oz.au!spool.mu.edu!cass.ma02.bull.com!melb.bull.oz.au!zen.void.oz.au!sjg
From: sjg@zen.void.oz.au (Simon J. Gerraty)
Subject: Re: Easy way to create unix man pages?
Message-ID: <1992Dec22.082736.21124@zen.void.oz.au>
Keywords: unix manual hypertext
Organization: zen programming...
References: <1992Dec17.165257.9439@oakhill.sps.mot.com> <1992Dec18.040514.11824@netcom.com> <id.FBYV.EU5@ferranti.com>
Date: Tue, 22 Dec 1992 08:27:36 GMT
Lines: 578

Want easy to write man pages?
Want to have them in your source?
Have you got perl installed?  If you don't, this might tempt you :-)

[I have posted this before, but it appears upstream news problems
dropped it on the floor.]

cmt2doc.pl can generate pretty decent troff source from (almost)
plain text comments in your source.  The comment at the start of
cmt2doc.pl IS the man page for cmt2doc.  Just try:

	cmt2doc.pl -pm cmt2doc.pl | groff -Tps -man | lpr -Plaser

or whatever works for you.

#!/bin/sh
# This is a shell archive.
# remove everything above the "#!/bin/sh" line
# and feed to /bin/sh
# Use -c option to overwrite existing files
#
# Contents: 
#	cmt2doc.pl
#
# packed by: <sjg@zen> on Fri Jun  5 22:40:38 EST 1992
#
PATH=/bin:/usr/bin:/usr/ucb ; export PATH
if test -f cmt2doc.pl -a "${1}" != "-c" ; then
  echo shar: Will not over-write existing file \"cmt2doc.pl\"
else
  echo shar: Extracting \"cmt2doc.pl\" \(13166 characters\)
  sed 's/^X//' >cmt2doc.pl << '!EOF'
X#!/usr/bin/perl --
Xeval 'exec /usr/bin/perl -S $0 ${1+"$@"}'
X  if $running_under_some_shell;
X#
X# NAME:
X#	cmt2doc - extract documentation from source
X#
X# SYNOPSIS:
X#	cmt2doc [-pamit][-e "oext"][-S "secn"][-D "secd"][-O "org"]
X#		[-L "lang"][-C "cmt"][-E "ecmt"] "file"
X#
X# DESCRIPTION:
X#	This 'Perl' script extracts documentation from comments	in
X#	source files.  It allows manual pages to be written in ``plain
X#	text'' in source files where they are most likely to be updated
X#	when the source code is.
X#
X#	'cmt2doc' extracts the documentation as either clean text or as
X#	input suitable for 'troff'(1) and friends.   The results in
X#	either case are usually quite adequate.  Try the following
X#	commands:
X#.nf
X#
X#		'perl cmt2doc.pl -p cmt2doc.pl | more'
X#		'perl cmt2doc.pl -pm cmt2doc.pl | nroff -man | more'
X#.fi
X#
X#	'cmt2doc' can usually work out for itself how to extract the
X#	text from a comment.  It looks for the regular expression 
X#	'.* NAME:$' which it treats as the start of a manual page, and
X#	uses what ever is found before 'NAME' as the characters to
X#	remove from the start of each line. 
X#
X#	Typographical conventions:
X#.nf
X#		Words like \'this word\' will be type-set in 'bold'.
X#		Words like \"this word\" will be set in "italics".
X#		Words like ``this quote'' will not be touched.
X#.fi
X#
X#	It is possible to put 'troff' commands at the start of an
X#	otherwise blank line.  Indeed they are sometimes needed such as
X#	when setting out examples.  They will be stripped if not
X#	generating 'troff' output.  
X#
X#	'cmt2doc' understands the format required for most manual page
X#	sections and attempts to set them appropriately.
X#
X# OPTIONS:
X#	-p	print to stdout.  By default documentation for
X#		"file" will be printed to a file in the current
X#		directory of the same name but with an extention
X#		that represents the format (.doc,.man,.tex). 
X#
X#	-a	print all documentation, not just the top level.
X#
X#	-m	Output for 'troff -man'.
X#
X#	-i	Output for texinfo (no yet implemented).
X#
X#	-t	``Plain text'' strip single quotes.  Leave double quotes
X#		alone though.
X#
X#	-e "oext"
X#		Use "oext" as the extension for the output file.
X#
X#	-S "secn"
X#		Tell [nt]roff which section the man page belongs in.
X#		Default is 'L'.
X#
X#	-L "lang"
X#		Select default values for "cmt" and "ecmt" based on
X#		"lang" ('c','c++','lisp').  Most shell like languages
X#		such as 'perl' and 'sh' are easily handled by the
X#		defaults.
X#
X#	-D "secd"
X#		Use "secd" as the section description.
X#
X#	-C "cmt"
X#		Assume the comment lines start with "cmt".  Otherwise
X#		we attempt to work it out either based on the file
X#		extention (.c,.h,.cc etc) or from the comment itself.
X#
X#	-E "ecmt"
X#		The comment ends when we see "ecmt" otherwise the first
X#		line that does not start with "cmt".
X#
X#	-O "org"
X#		Use "org" as the organization identifier (printed bottom
X#		left of each page).
X#
X#	Some options only apply to certain output modes.
X#
X# FILES:
X#	/usr/bin/perl		The perl interpreter.  This entry
X#				is really just to show how 'cmt2doc'
X#				handles the 'FILES' section.
X#	/local/bin/cmt2doc.pl	This script. "ditto".
X#
X# BUGS:
X#	It probably does not handle nested quotes correctly.
X#	Lines starting with a \'.\' are in trouble.
X#	For good results it is hard to avoid using 'troff' commands,
X#	particularly '.nf' and '.fi'.
X#	
X#	Handling of '.TH' seems to vary with different man macro sets.
X#	You may have to hack 'man_init' to get good results.
X#
X
X#
X# RCSid:
X#	$Id: cmt2doc.pl,v 1.7 1992/06/05 12:39:07 sjg Exp $
X#
X#	@(#)Copyright (c) 1992, Simon J. Gerraty
X#
X#	This file is provided in the hope that it will
X#	be of use.  There is absolutely NO WARRANTY.
X#	Permission to copy, redistribute or otherwise
X#	use this file is hereby granted provided that 
X#	the above copyright notice and this notice are
X#	left intact. 
X#      
X#	Please send copies of changes and bug-fixes to:
X#	sjg@zen.void.oz.au
X#
X
X$Myname=$0;
X$Myname=~ s#^.*/([^/]*)$#$1#;
X
X# some defaults
X$do_init='txt_init';
X$do_fini='noop';
X$do_sec='txt_sec';
X$do_para='noop';
X$do_line='txt_line';
X
X$man_secn='L';			# local commands
X$oext='.doc';
X$Debug = 0;
X$start_para='';
X$indent=0;
X$defPD='.8v';
X
X$date=&get_date;
X$org='FreeWare';		# be sure to set this!
X
Xrequire 'getopts.pl';
Xdo Getopts('dpamite:L:S:C:E:D:O:');
X
X$org=$opt_O if defined($opt_O);
X$cmt=$opt_C if defined($opt_C);
X$ecmt=$opt_E if defined($opt_E);
X# redefine the necessary functions
Xif (defined($opt_m)) {	# [tn]roff -man
X  $oext = '.man';
X  $do_init='man_init';
X  $do_para='man_para';
X  $do_sec='man_sec';
X  $do_line='man_line';
X} elsif (defined($opt_i)) {	# texinfo
X  $oext = '.tex';
X  $do_init='texi_init';
X  $do_fini='texi_fini';
X  $do_sec='texi_sec';
X  $do_line='texi_line';
X}
X$man_secn=$opt_S if defined($opt_S);
Xif (defined($opt_D)) {
X  $man_secd=$opt_D;
X} else {
X  $man_secd=&lookup_mansec($man_secn);
X}
X$oext=$opt_e if defined($opt_e);
X$Debug = 1 if defined($opt_d);
X$Lang=$opt_L if defined($opt_L);
X
X
X$indoc=0;
X$in_para = 0;
X
XFILE: foreach $file (@ARGV) {
X  print STDERR "doing $file\n" if $Debug > 0;
X  $name="./$file";
X  $name=~s#^.*/([^/]*)$#$1#;
X  $ext=$name;
X  $ext=~s/.*(\.[^.]*)$/\1/;
X
X  if (!defined($opt_L)) {
X    $Lang='c' if ($ext =~ m/\.[ch]$/);
X    $Lang='c++' if ($ext =~ m/\.(cc|C|H)$/);
X    $Lang='lisp' if ($ext =~ m/\.el$/);
X  }
X  if (defined($Lang)) {
X    if ($Lang eq 'c') {
X      $cmt = '[/ ]\*';
X      $ecmt = ' *\*/';
X    } elsif ($Lang eq 'c++') {
X      $cmt = '(//|[/ ]\*)';
X      $ecmt = ' *\*/';
X    } elsif ($Lang eq 'lisp') {
X      $cmt = ';+';
X    }
X  }
X  if (!defined($opt_p)) {
X    $ofile = $name;		# we've already stripped dirname
X    $ofile =~ s#\.[^/.]+$##;
X    $ofile .= $oext;
X    print STDERR "Output to $ofile\n" if $Debug > 0;
X    open(STDOUT, "> $ofile") || die "can't redirect STDOUT: $!\n";
X  }
X  if (!open(F, "< $file")) {
X    print STDERR "can't open $file: $!\n";
X    next FILE;
X  }
X  LINE: while (<F>) {
X    chop;
X    if ($indoc == 0 && m/ NAME:$/) {
X      if (!defined($cmt)) {
X	$cmt = $_;
X	$cmt =~ s/^(.*) NAME.*/\1/;
X      }
X      $indoc = 1;
X      $in_para = 0;
X      &$do_init;
X    }
X    next if ($indoc == 0);
X    # we are inside doc section
X    if ($_ !~ m@^$cmt@ || (defined($ecmt) && $_ =~ m@^$ecmt@)) {
X      $indoc = 0;
X      &$do_fini;
X      if (defined($opt_a)) {
X	next LINE;
X      } else {
X	next FILE;
X      }
X    }
X    s@^$cmt ?@@;
X    $needout = 1;
X    if (m/^[A-Z][A-Za-z _-]+:$/) {
X      &$do_sec;
X    } elsif (m/^[ \t]*$/) {
X      $in_para = 0;
X      if (defined($opt_m)) {
X	$needout = 0;
X      }
X    } else {
X      if ($in_para == 0) {
X	$in_para = 1;
X	&$do_para;
X      }
X      &$do_line;
X    }
X    print "$_\n" if ($needout > 0);
X  }
X  close F;
X}
Xexit 0;
X
X# for plain text these are noops
Xsub noop {
X}
X
Xsub txt_init {
X  local($i,$c);
X  $llength = 65;
X  $c = 0;
X  
X  $nm=$name;
X  $nm=~s/\.[^.]*$//;
X  $nm =~ tr/[a-z]/[A-Z]/;
X  $nm = "$nm($man_secn)";
X  print "\n$nm";
X  $c += length($nm);
X  $i = int(($llength - length($man_secd))/ 2);
X  while ($c < $i) {
X    $c++;
X    print " ";
X  }
X  print "$man_secd";
X  $c += length($man_secd);
X  $i = $llength - length($nm);
X  while ($c < $i) {
X    $c++;
X    print " ";
X  }
X  print "$nm\n\n\n";
X}
X
Xsub txt_sec {
X  # just loose the trailing ':'
X  $sec = $_;
X  $sec =~ s/ *([A-Z][A-Za-z _-]*):/\1/;
X  $_ = $sec;
X  $in_para = 0;
X}
X
Xsub txt_line {
X  $needout = 0 if (m/^\.\w+/);	# strip nroff commands
X  if (defined($opt_t)) {
X    # strip 'word' to just word.
X    # a bit of trickery to avoid ``quotes'' and \'word\'.
X    s/^'([^']+)'/\1/g;	# 'bold'
X    s/([^'\\])'([^']+)'/\1\2/g;	# 'bold'
X  }
X  s/\\(['"\\])/\1/g;		# strip \\ \' and \" to ' " and \
X}
X
X
Xsub man_init {
X  print ".\\\" extracted from $file $date by $Myname\n";
X  
X  $nm=$name;
X  $nm=~s/\.[^.]*$//;
X  $nm =~ tr/[a-z]/[A-Z]/;
X  # some tmac.an macros don't support $org HP-UX for example.
X  # But most do.  Just comment out setting of $org above.
X  if (defined($org)) {
X    print ".TH $nm $man_secn \"$date\" \"$org\" \"$man_secd\"\n";
X  } else {
X    print ".TH $nm $man_secn \"$date\" \"$man_secd\"\n";
X  }
X  # just to be sure
X  print ".PD $defPD\n";
X}
X
Xsub man_sec {
X  &man_indent(0);		# make sure indentation is back to 0
X
X  if ($start_para eq '.nf') {
X    print ".fi\n";
X  }
X  if ($sec eq 'FILES') {
X    # previous section was FILES
X    # restore inter-paragraph distance
X    print ".PD $defPD\n";
X  }
X  # get new section name.
X  $sec = $_;
X  $sec =~ s/ *([A-Z][A-Za-z _-]*):/\1/;
X
X  if ($sec ne 'NAME') {
X    print "\n";
X  }
X  if ($sec =~ m/ /) {
X    print ".SH \"$sec\"\n";
X  } else {
X    print ".SH $sec\n";
X  }
X  if ($sec eq 'FILES') {
X    # little or no gap between paragraphs.
X    # so it looks like it should.
X    print ".PD .1v\n";
X  }
X  $needout = 0;
X  $in_para = 0;
X}
X
X# this gets a little messy
Xsub man_para {
X  if (m/^\.\w+/) {
X    # a [tn]roff command, next line is start of para
X    $in_para=0;
X    return;
X  }
X  if ($sec =~ m/DESCRIPTION|OPTIONS/ && m/^[ \t]*-/) {
X    $start_para = '.TP';
X  } elsif ($sec eq 'FILES') {
X    $start_para = '.TP 30';
X  } elsif ($sec =~ m/NAME|SYNOPSIS/) {
X    $start_para = '.nf';
X  } elsif ($start_para =~ m/\.TP/) {
X    $start_para = '.PP';
X  } else {
X    $start_para = '';
X  }
X  print "$start_para\n" if ($needout > 0);
X  # handle indented paras
X  if ($start_para !~ m/\.TP/ && m/^\t/) {
X    &man_indent(-1);
X  }
X}
X
X
X# we have to do more that we would like here, to
X# set 'bold' and "italics" but not to harm \'words\'
X# \"words\" and ``quotes''.
Xsub man_line {
X  # man_para will have been called once already
X  # so first time in after a new para, $in_para==1.
X  # in here we can set it to other values to indicate
X  # a need to force a new para, or adjust indentation.
X  if ($in_para == 3) {
X    &man_indent(-1);
X    $in_para=1;
X  }
X  if (m/^\.\w+/) {
X    # a [tn]roff command
X    $in_para=3;
X    return;
X  }
X  s/^[ \t]*//;
X  if ($sec eq 'FILES') {
X    # we assume file descriptions are formated
X    # filename\tdecription
X    if (m/^[^\t]+\t+[^\t]+/) {
X      if ($in_para == 2) {
X        &man_para;
X      }
X      $in_para = 2;
X      s/^[ \t]*//;
X      s/^([^\t]+)\t+([^\t]+)/\1\n\2/;
X    }
X  } elsif ($sec =~ m/DESCRIPTION|OPTIONS/ && m/^[ \t]*-/) {
X    if ($in_para == 2) {
X      &man_para;
X    }
X    $in_para = 2;
X    s/^[ \t]*//;
X    s/\t/ /g;
X    # format options correctly
X    s/^([^'" ]+)/'\1'/ if (m/^[^'"]/);
X    s/^('[^']+' *"[^"]+") *([^'" ])/\1\n\2/;
X    s/^('[^']+') *([^'" ])/\1\n\2/;
X  }
X  s/\t/ /g;
X  if ($sec eq 'SYNOPSIS') {
X    s/^(\w+)/'\1'/;
X    s/(-\w+)/'\1'/g if (m/\[/);
X  }
X  s/([ '"])-/\1\\-/g;
X  s/^"([^"]+)"/\\fI\1\\fR/g;	# "italic"
X  # avoid \"word\"
X  s/([^\\])"([^"]*[^\\])"/\1\\fI\2\\fR/g;	# "italic"
X  # a bit of trickery to avoid ``quotes'' and \'word\'.
X  s/^'([^']+)'/\\fB\1\\fR/g;	# 'bold'
X  s/([^'\\])'([^']+)'/\1\\fB\2\\fR/g;	# 'bold'
X  # now make \['"] into just ' or "
X  s/\\(['"])/\1/g;
X}
X
X# adjust the indent level
Xsub man_indent {
X  local($i) = @_;
X  local($itabs,@tabs);
X  
X  if ($i < 0) {
X    # calculate required indent level
X    $itabs=$_;
X    $itabs =~ s/^(\t+)[^\t].*/\1/;
X
X    @tabs=split(/\t/,$itabs, 10);
X    $i = $#tabs - 1;
X  }
X  if ($i >= 0) {
X    while ($indent < $i) {
X      $indent++;
X      print ".RS\n";
X    }
X    while ($indent > $i) {
X      $indent--;
X      print ".RE\n";
X    }
X  }
X}
X
X
Xsub lookup_mansec {
X  local($n) = @_;
X  local($d);
X  %s = &init_secd if (!defined(%s));
X
X  $d = $s{$n};
X  if (!defined($d)) {
X    $d = $s{'default'};
X  }
X  $d;
X}
X
Xsub init_secd {
X  local(%s);
X
X  $s{'default'} = 'MISC. REFERENCE MANUAL PAGES';
X  $s{'1'} = 'USER COMMANDS ';
X  $s{'1C'} = 'USER COMMANDS';
X  $s{'1G'} = 'USER COMMANDS';
X  $s{'1S'} = 'USER COMMANDS';
X  $s{'1V'} = 'USER COMMANDS ';
X  $s{'2'} = 'SYSTEM CALLS';
X  $s{'2V'} = 'SYSTEM CALLS';
X  $s{'3'} = 'C LIBRARY FUNCTIONS';
X  $s{'3C'} = 'COMPATIBILITY FUNCTIONS';
X  $s{'3F'} = 'FORTRAN LIBRARY ROUTINES';
X  $s{'3K'} = 'KERNEL VM LIBRARY FUNCTIONS';
X  $s{'3L'} = 'LIGHTWEIGHT PROCESSES LIBRARY';
X  $s{'3M'} = 'MATHEMATICAL LIBRARY';
X  $s{'3N'} = 'NETWORK FUNCTIONS';
X  $s{'3R'} = 'RPC SERVICES LIBRARY';
X  $s{'3S'} = 'STANDARD I/O FUNCTIONS';
X  $s{'3V'} = 'C LIBRARY FUNCTIONS';
X  $s{'3X'} = 'MISCELLANEOUS LIBRARY FUNCTIONS';
X  $s{'4'} = 'DEVICES AND NETWORK INTERFACES';
X  $s{'4F'} = 'PROTOCOL FAMILIES';
X  $s{'4I'} = 'DEVICES AND NETWORK INTERFACES';
X  $s{'4M'} = 'DEVICES AND NETWORK INTERFACES';
X  $s{'4N'} = 'DEVICES AND NETWORK INTERFACES';
X  $s{'4P'} = 'PROTOCOLS';
X  $s{'4S'} = 'DEVICES AND NETWORK INTERFACES';
X  $s{'4V'} = 'DEVICES AND NETWORK INTERFACES';
X  $s{'5'} = 'FILE FORMATS';
X  $s{'5V'} = 'FILE FORMATS';
X  $s{'6'} = 'GAMES AND DEMOS';
X  $s{'7'} = 'ENVIRONMENTS, TABLES, AND TROFF MACROS';
X  $s{'7V'} = 'ENVIRONMENTS, TABLES, AND TROFF MACROS';
X  $s{'8'} = 'MAINTENANCE COMMANDS';
X  $s{'8C'} = 'MAINTENANCE COMMANDS';
X  $s{'8S'} = 'MAINTENANCE COMMANDS';
X  $s{'8V'} = 'MAINTENANCE COMMANDS';
X  $s{'L'} = 'LOCAL COMMANDS';
X  %s;
X}
X
Xsub get_date {
X  @months = ('January','February','March','April','May',
X	     'June','July','August','September','October',
X	     'November','December');
X  ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$idst) =
X    localtime(time);
X  if ($year < 70) {
X    $cent='20';
X  } else {
X    $cent = '19';
X  }
X  $month = $months[$mon];
X  "$mday $month $cent$year";
X}
!EOF
  if test 13166 -ne `wc -c < cmt2doc.pl`; then
    echo shar: \"cmt2doc.pl\" unpacked with wrong size!
  fi
fi
exit 0
-- 
Simon J. Gerraty        <sjg@zen.void.oz.au>

#include <disclaimer>   /* imagine something _very_ witty here */