#!/bin/sh
#
# .txt2html.sh - Process a story-format text file and
# copy it out to a .html file.
#
# Author: Jack C Lipton, liptonsoup1951@yahoo.com
#
# Note: This is fairly naive and doesn't do much
# formatting; It's mostly there to provide
# a wrapper that *isn't* just a <PRE> </PRE>
# file.
#
# Note: Because of the placement of the awk scripts,
# it's not a good idea to run this as root.
# Capische?
#
AWK1=/tmp/txt2htmlA1.$$ # awk script for "normal" stories
AWK2=/tmp/txt2htmlA2.$$ # awk script for poetry (retains lines)
TMP=/tmp/txt2htmlT.$$ # temporary file ...
AUTHNAME="Jack C Lipton" # Author's name for Copyright section
#
# This awk fragment is the preface of the actual script and
# is used to process the headings, which will generate the
# initial HEAD and top of the BODY
#
cat >$AWK1 <<AWK1SCRIPT
BEGIN {
SHSrdr = 1; # start in headings
skips = 0;
crap = 2;
hdglines = 0;
}
/^$/ { #
# Assuming the headers start on the first
# line, we want to know when we're done
# reading them. This is when the HTML
# header gets built. Cruel, ain't I?
#
if ( SHSrdr != 0 ) { # establish structure
printf( "<HTML>\n<HEAD>\n\n");
printf( "<TITLE> %s </TITLE>\n\n", title);
printf( "<!--ADULTSONLY-->\n");
printf( "<META NAME=\\"title\\" CONTENT=\\"%s\\" />\n", title);
printf( "<META NAME=\\"part\\" CONTENT=\\"%s\\" />\n", part);
printf( "<META NAME=\\"author\\" CONTENT=\\"%s\\" />\n", author);
printf( "<META NAME=\\"keywords\\" CONTENT=\\"%s\\" />\n", codes);
printf( "<META NAME=\\"date\\" CONTENT=\\"%s\\" />\n", "`date`");
printf( "<META NAME=\\"universe\\" CONTENT=\\"%s\\" />\n", universe);
printf( "<META NAME=\\"summary\\" CONTENT=\\"%s\\" />\n", summary);
printf( "<META NAME=\\"revision\\" CONTENT=\\"%s\\" />\n", revision);
printf( "</HEAD>");
printf( "<BODY>\n\n");
printf( "<H1 ALIGN=\\"CENTER\\">\n");
printf( "%s\n", title);
if ( part != "" ) {
printf( "(part %s)\n", part);
}
printf( "</H1>\n");
printf( "<CENTER>\n");
if ( codes != "" ) {
printf( "<B>codes:</B> %s\t<BR>\n", codes);
}
printf( "by <A HREF=\\"mailto:liptonsoup1951@yahoo.com\\">%s</A><BR>\n", author);
printf( "</CENTER>\n");
printf( "<BR><BR>\n");
SHSrdr = 0;
next;
}
skips++; # blank lines delimit paragraphs?
next;
}
(SHSrdr!=0) { #
# We'll capture each header line verbatim
# so that we can append it at the end...
#
SHSheader[++headings] = \$0;
}
(SHSrdr!=0) && \$1=="Author:" {
author = \$2;
for ( f = 3 ; f <= NF ; f++ ) {
if ( substr( \$f, 1, 1) == "<" ) {
break;
}
author = author " " \$f;
}
}
(SHSrdr!=0) && \$1=="Title:" {
title = \$2;
for ( f = 3 ; f <= NF ; f++ ) {
title = title " " \$f;
}
}
(SHSrdr!=0) && \$1=="Part:" {
part = \$2;
for ( f = 3 ; f <= NF ; f++ ) {
part = part " " \$f;
}
}
(SHSrdr!=0) && \$1=="Universe:" {
universe = \$2;
for ( f = 3 ; f <= NF ; f++ ) {
universe = universe " " \$f;
}
}
(SHSrdr!=0) && \$1=="Summary:" {
summary = \$2;
for ( f = 3 ; f <= NF ; f++ ) {
summary = summary " " \$f;
}
}
(SHSrdr!=0) && \$1=="Keywords:" {
codes = \$2;
for ( f = 3 ; f <= NF ; f++ ) {
codes = codes " " \$f;
}
}
(SHSrdr!=0) && \$1=="Revision:" {
revision = \$2;
for ( f = 3 ; f <= NF ; f++ ) {
revision = revision " " \$f;
}
}
(SHSrdr!=0) { next; }
#
# Recognize my "Fini" line so that it can be centered.
#
/[ ]Fini[$ ]/ {
printf( "<CENTER>\n")
printf( "<B>%s</B>\n", \$0);
printf( "</CENTER>\n")
next;
}
AWK1SCRIPT
#
# Make a copy to the second awk2 script, used to process
# poetry (which wants to retain line breaks). I could've
# just embedded it in a PRE-formatted text block but that'd
# be *way* to lazy
#
cat $AWK1 >$AWK2
#
# Normal story line processing. Knock yourself out.
#
cat >>$AWK1 <<AWK1SCRIPT
#
# My files usually have a couple of centered headings for
# the .txt format (which don't make any sense for HTML)
# so this section about "crap" smokes it. This is specific
# to me, so it shouldn't be considered generic.
#
/^ / { if ( crap > 0 ) { crap--; } ; next; }
#
# I've been stupid and inserted some extra commentary with
# a single "-" in front of it, so it'll be displayed as a
# line that's been italicized. More non-generic code.
#
\$1=="-" { printf( "<I>%s</I><BR>\n", \$0); }
#
# Main processing of story lines:
#
{
if ( crap > 0 ) { # skip any bullshit
crap--;
next;
}
#
# A blank line between paragraphs is handy
# so I can insert paragraph breaks...
#
if ( skips != 0 ) {
printf( "<P>\n");
skips = 0;
}
print; # pass the line through directly
}
AWK1SCRIPT
#
# 2nd AWK script forces per-line format...
#
cat >>$AWK2 <<AWK2SCRIPT
#
# My files usually have a couple of centered headings for
# the .txt format (which don't make any sense for HTML)
# so this section about "crap" smokes it. This is specific
# to me, so it shouldn't be considered generic.
#
/^ / { if ( crap > 0 ) { crap--; } ; next; }
#
# I've been stupid and inserted some extra commentary with
# a single "-" in front of it, so it'll be displayed as a
# line that's been italicized. More non-generic code.
#
\$1=="-" { printf( "<I>%s</I><BR>\n", \$0); }
#
#
#
{
if ( crap > 0 ) {
crap--;
next;
}
#
# A blank line between paragraphs is handy
# so I can insert paragraph breaks...
#
if ( skips != 0 ) {
printf( "<P>\n");
skips = 0;
}
#
# We make sure we close the line
#
printf( "%s<BR>\n", \$0);
}
AWK2SCRIPT
#
# Ensure proper closure of the text file...
#
cat >>$TMP <<CLOSURE
END {
#
# Close the HTML file "properly"
#
printf( "\n<BR> <BR>\n<HR SIZE=5>\n<BR>\n");
printf( "<B>Copyright (c) 2002 $AUTHNAME</B>;\n");
printf( "Derivatives are allowed (and encouraged) but\n");
printf( "should reference the Title/Author of\n");
printf( "this source material in some way.\n");
printf( "\n<BR> <BR>\n<HR SIZE=5>\n<BR>\n<PRE>\n");
for ( i = 1 ; i <= headings ; i++ ) {
printf( "%s\n", SHSheader[i]);
}
printf( "</PRE>\n");
printf( "\n\n</BODY>\n</HTML>\n");
}
CLOSURE
cat $TMP >>$AWK1
cat $TMP >>$AWK2
#
# Using the scripts above, process each file as it's presented
#
SCRIPT=$AWK1 # set default script
for F in $@
do
case $F in
-1) SCRIPT=$AWK1 # "normal" story format
;;
-2) SCRIPT=$AWK2 # "poem" format w/ line breaks
;;
*) : process a file
CORENAME=`echo $F | awk -F. '{print $1}'`
WEBNAME=$CORENAME.html
awk -f $SCRIPT $F >$WEBNAME
;;
esac
done
#
# All done here, clean up the scripts...
#
rm -f $AWK1 $AWK2 $TMP
exit 0