#!/bin/sh

#
#	.txt2html.sh -	Process a story-format text file and
#			copy it out to a .html file.
#
#		Author:	Jack C Lipton,  liptonsoup1951@yahoo.com
#
#		Note:	This is fairly naive and doesn't do much
#			formatting;  It's mostly there to provide
#			a wrapper that *isn't* just a <PRE> </PRE>
#			file.
#
#		Note:	Because of the placement of the awk scripts,
#			it's not a good idea to run this as root.
#			Capische?
#

AWK1=/tmp/txt2htmlA1.$$		# awk script for "normal" stories
AWK2=/tmp/txt2htmlA2.$$		# awk script for poetry (retains lines)
TMP=/tmp/txt2htmlT.$$		# temporary file ...

AUTHNAME="Jack C Lipton"	# Author's name for Copyright section


#
#	This awk fragment is the preface of the actual script and
#	is used to process the headings, which will generate the
#	initial HEAD and top of the BODY
#

cat				>$AWK1		<<AWK1SCRIPT
BEGIN	{
		SHSrdr =	1;	# start in headings
		skips =		0;
		crap =		2;

		hdglines =	0;
	}
/^$/	{	#
		#	Assuming the headers start on the first
		#	line, we want to know when we're done
		#	reading them.  This is when the HTML
		#	header gets built.  Cruel, ain't I?
		#

		if ( SHSrdr != 0 ) {	# establish structure
			printf( "<HTML>\n<HEAD>\n\n");
			printf( "<TITLE> %s </TITLE>\n\n", title);

			printf( "<!--ADULTSONLY-->\n");

			printf( "<META NAME=\\"title\\" CONTENT=\\"%s\\" />\n", title);
			printf( "<META NAME=\\"part\\" CONTENT=\\"%s\\" />\n", part);
			printf( "<META NAME=\\"author\\" CONTENT=\\"%s\\" />\n", author);
			printf( "<META NAME=\\"keywords\\" CONTENT=\\"%s\\" />\n", codes);
			printf( "<META NAME=\\"date\\" CONTENT=\\"%s\\" />\n", "`date`");
			printf( "<META NAME=\\"universe\\" CONTENT=\\"%s\\" />\n", universe);
			printf( "<META NAME=\\"summary\\" CONTENT=\\"%s\\" />\n", summary);
			printf( "<META NAME=\\"revision\\" CONTENT=\\"%s\\" />\n", revision);

			printf( "</HEAD>");
			printf( "<BODY>\n\n");

			printf( "<H1 ALIGN=\\"CENTER\\">\n");
			printf( "%s\n", title);

			if ( part != "" ) {
				printf( "(part %s)\n", part);
			}

			printf( "</H1>\n");
			printf( "<CENTER>\n");

			if ( codes != "" ) {
				printf( "<B>codes:</B> %s\t<BR>\n", codes);
			}

			printf( "by <A HREF=\\"mailto:liptonsoup1951@yahoo.com\\">%s</A><BR>\n", author);
			printf( "</CENTER>\n");
			printf( "<BR><BR>\n");

			SHSrdr =	0;
			next;
		}

		skips++;	# blank lines delimit paragraphs?
		next;
	}
(SHSrdr!=0) {	#
		#	We'll capture each header line verbatim
		#	so that we can append it at the end...
		#

		SHSheader[++headings] =		\$0;
	}
(SHSrdr!=0) && \$1=="Author:"	{
		author =	\$2;
		for ( f = 3 ; f <= NF ; f++ ) {
			if ( substr( \$f, 1, 1) == "<" ) {
				break;
			}

			author =	author " " \$f;
		}
	}
(SHSrdr!=0) && \$1=="Title:"	{
		title =	\$2;
		for ( f = 3 ; f <= NF ; f++ ) {
			title =		title " " \$f;
		}
	}
(SHSrdr!=0) && \$1=="Part:"	{
		part =	\$2;
		for ( f = 3 ; f <= NF ; f++ ) {
			part =		part " " \$f;
		}
	}
(SHSrdr!=0) && \$1=="Universe:"	{
		universe =	\$2;
		for ( f = 3 ; f <= NF ; f++ ) {
			universe =	universe " " \$f;
		}
	}
(SHSrdr!=0) && \$1=="Summary:"	{
		summary =	\$2;
		for ( f = 3 ; f <= NF ; f++ ) {
			summary =	summary " " \$f;
		}
	}
(SHSrdr!=0) && \$1=="Keywords:"	{
		codes =	\$2;
		for ( f = 3 ; f <= NF ; f++ ) {
			codes =		codes " " \$f;
		}
	}
(SHSrdr!=0) && \$1=="Revision:"	{
		revision =	\$2;
		for ( f = 3 ; f <= NF ; f++ ) {
			revision =	revision " " \$f;
		}
	}
(SHSrdr!=0)	{ next; }
#
#	Recognize my "Fini" line so that it can be centered.
#
/[ 	]Fini[$ 	]/	{
		printf( "<CENTER>\n")
		printf( "<B>%s</B>\n", \$0);
		printf( "</CENTER>\n")

		next;
	}
AWK1SCRIPT


#
#	Make a copy to the second awk2 script, used to process
#	poetry (which wants to retain line breaks).  I could've
#	just embedded it in a PRE-formatted text block but that'd
#	be *way* to lazy
#

cat	$AWK1					>$AWK2


#
#	Normal story line processing.  Knock yourself out.
#

cat				>>$AWK1		<<AWK1SCRIPT
#
#	My files usually have a couple of centered headings for
#	the .txt format (which don't make any sense for HTML)
#	so this section about "crap" smokes it.  This is specific
#	to me, so it shouldn't be considered generic.
#
/^		/	{ if ( crap > 0 ) { crap--; } ; next; }
#
#	I've been stupid and inserted some extra commentary with
#	a single "-" in front of it, so it'll be displayed as a
#	line that's been italicized.  More non-generic code.
#
\$1=="-"		{ printf( "<I>%s</I><BR>\n", \$0); }
#
#	Main processing of story lines:
#
	{
		if ( crap > 0 ) {	# skip any bullshit
			crap--;
			next;
		}


		#
		#	A blank line between paragraphs is handy
		#	so I can insert paragraph breaks...
		#

		if ( skips != 0 ) {
			printf( "<P>\n");
			skips =		0;
		}

		print;		# pass the line through directly
	}
AWK1SCRIPT


#
#	2nd AWK script forces per-line format...
#

cat				>>$AWK2		<<AWK2SCRIPT
#
#	My files usually have a couple of centered headings for
#	the .txt format (which don't make any sense for HTML)
#	so this section about "crap" smokes it.  This is specific
#	to me, so it shouldn't be considered generic.
#
/^		/	{ if ( crap > 0 ) { crap--; } ; next; }
#
#	I've been stupid and inserted some extra commentary with
#	a single "-" in front of it, so it'll be displayed as a
#	line that's been italicized.  More non-generic code.
#
\$1=="-"		{ printf( "<I>%s</I><BR>\n", \$0); }
#
#
#
	{
		if ( crap > 0 ) {
			crap--;
			next;
		}


		#
		#	A blank line between paragraphs is handy
		#	so I can insert paragraph breaks...
		#

		if ( skips != 0 ) {
			printf( "<P>\n");
			skips =		0;
		}


		#
		#	We make sure we close the line
		#

		printf( "%s<BR>\n", \$0);
	}
AWK2SCRIPT


#
#	Ensure proper closure of the text file...
#

cat				>>$TMP		<<CLOSURE
END	{
		#
		#	Close the HTML file "properly"
		#

		printf( "\n<BR> <BR>\n<HR SIZE=5>\n<BR>\n");
		printf( "<B>Copyright (c) 2002 $AUTHNAME</B>;\n");
		printf( "Derivatives are allowed (and encouraged) but\n");
		printf( "should reference the Title/Author of\n");
		printf( "this source material in some way.\n");
		printf( "\n<BR> <BR>\n<HR SIZE=5>\n<BR>\n<PRE>\n");

		for ( i = 1 ; i <= headings ; i++ ) {
			printf( "%s\n", SHSheader[i]);
		}

		printf( "</PRE>\n");
		printf( "\n\n</BODY>\n</HTML>\n");
	}
CLOSURE


cat		$TMP				>>$AWK1
cat		$TMP				>>$AWK2



#
#	Using the scripts above, process each file as it's presented
#

SCRIPT=$AWK1			# set default script

for	F	in	$@
    do


	case	$F	in

	    -1)		SCRIPT=$AWK1	# "normal" story format
			;;

	    -2)		SCRIPT=$AWK2	# "poem" format w/ line breaks
			;;

	    *)		: process a file
			CORENAME=`echo $F | awk -F. '{print $1}'`
			WEBNAME=$CORENAME.html

			awk	-f $SCRIPT	$F	>$WEBNAME
			;;
	    esac

    done


#
#	All done here, clean up the scripts...
#

rm -f	$AWK1	$AWK2	$TMP

exit	0