#!/usr/bin/perl use strict; # | # | OMHPP # | # | The Omino HTML Preprocessor # | # | Like many "simple" projects, this one, too, is born of the # | traditional hubris: # | # | I can do something simple but sufficient for a traditionally complex task. # | # | The task at hand is to support a small manually-maintained website where the # | client can make changes to the content. The design goal is to give them a # | simplified markup language that can be edited in a normal text editor. # | # | The approach taken here is also traditional -- templates. # | The templates are of exactly two types: flat files and roster files. # | This combined with the ability to ::include:: other files gives a # | moderately flexible toolset for building simple sites. # | # | The omhpp is intended to be used in either of # | two ways: # | way one: As a server-side CGI script, processing templates on-demand # | way two: As a build-time preprocessor, where you might run "make" # | to construct your site. # | # | Expected file suffixes: # | # | foofoo.hpp -- a top level linkable page # | foofoo.hpp_include -- a file for inclusion to a linkable page # | foofoo.hpp_roster -- a file with roster (table) elements # | # | Default file when running as CGI: index.hpp # | # | Directives and Markup # | # | # a comment line starts with a hash # | # | ::include any_filename:: # | includes a file, presumed relative to the current file. # | # | [[any_url and some descriptive text]] # | becomes an link. If the url ends with .hpp or .hpp*, then # | the generated link includes the preprocessor invocation as well # | # | Simple Markup Shortcuts # | *any text to be bolded* may have spaces before and after # | _any text to be italicized_ must have spaces before and after # | # | # | Roster Structure # | # | A "roster" is a list of elements, each formatted similarly. # | A roster file is divided into four distinct sections. Each # | section is separated by a line containing five (or more) # | equals signs. The last section contains all the elements. # | # | We proceed by example: # | # | This is the header of the roster. # |

Cast Of Characters

# | # | =============== # | # this part is repeated for each element # | # | # | # optional field # | # | =============== # |
*::name::* *::description::*
# | That was the table.
# | =============== # | # now the elements of the roster... # | name:Nigel # | description: a detective # | -------- # | name:Samantha # | description: a woman with a mysterious past # | -------- # | name:Nigel # | description: Samantha's brother, who is also named Nigel # | -------- # | name:Fred # | description: A bartender # | # end of file # | Roster markup: blah blah *this is bold* blah # | hah hah _this is italic_ blah # | this that click [[here_or_there.com/that here]] to see what I mean # | (dont include http:// in the URL) my $gDebugBlab = 0; my $TOOL = $0; # who am i? my $COLON_HOLDER = "---f23_COLON---"; my $BRACE_HOLDER = "---f23_BRACE---"; my $STARR_HOLDER = "---f23_STAR---"; sub errorBox { my $e; $e .= "
\n";
    $e .= "Error $TOOL\n";
    while(@_)
    {
        my $x = shift;
        $e .= $x;
    }
    $e .= "\n
\n"; return $e; } sub fail { my $e = errorBox(@_); print $e; #die; } my %gArgs; my $gIsCGI = defined($ENV{QUERY_STRING}); my $gScriptName = $ENV{SCRIPT_NAME}; # if cgi-ing sub fetchArguments() { foreach my $arg (@ARGV) { dPrint("arg: $arg"); if($arg =~ /^--(.*?)=(.*)$/) { $gArgs{$1} = $2; } } my $cgiQueryString = $ENV{QUERY_STRING}; my @cgiArgv = split(/&/,$cgiQueryString); foreach my $arg (@cgiArgv) { if($arg =~ /^(.*?)=(.*)$/) { $gArgs{$1} = $2; $gIsCGI = 1; } } $gDebugBlab = getArg("debug",$gDebugBlab); } sub getArg($$) { my ($arg,$dflt) = (@_); my $result = $gArgs{$arg}; if($result eq "") { $result = $dflt; } return $result; } # +--------------------- # | fetch the contents of a file # | if the file is named "-" then # | fetch stdin # | sub readFile($) { my ($fileName) = (@_); my $fileContents = ""; dPrint("readFile: $fileName"); my $fh; if($fileName eq "-") { $fh = *STDIN; } else { dPrint ("readFile: opening $fileName"); if(! -f $fileName) { return errorBox("cannot open $fileName"); } open F,$fileName || fail("cannot open $fileName"); $fh = *F; } while(my $a = <$fh>) { if($a !~ /^\#.*/) # throw out comments { $fileContents .= $a; } } dPrint ("readFile: read $fileName to get -- $fileContents"); return $fileContents; } # +------ # | For debugging sub dPrint(@) { return unless $gDebugBlab; print "\n"; } # +------ # | For debugging sub printArray($@) { return unless $gDebugBlab; print "\n\n=========================\n"; my $msg = shift; my $count = scalar(@_); print "$msg: $count\n"; my $i = 0; foreach my $e (@_) { print "$i. $e\n"; $i++; } print "\n"; } # +------ # | For debugging sub printHash($\%) { return unless $gDebugBlab; print "\n\n=========================\n"; my $msg = shift; my $hashRef = shift; my $count = scalar(%$hashRef); print "$msg: $count\n"; my $i = 0; foreach my $e (sort(keys(%$hashRef))) { my $v = $$hashRef{$e}; print "$i. $e : $v \n"; $i++; } } my $rosterFieldNamePattern = "[a-zA-Z0-9-_]+"; my $cr = "[\n\r]"; # +----------------- # | Given a roster section, return a # | hash of the fields # | Rosters look like: # | # | fieldname: stuff # | fieldname: stuff # | stuff stuff stuff # | fieldname: stuff # | sub processRosterSection($) { my $rosterSection = shift; my %sectionValues; while($rosterSection =~ /^\s*($rosterFieldNamePattern):(.*)$/s) { my $field = $1; my $value; my $restOfSection = $2; # # Is there another field after this one? if($restOfSection =~ /^(.*?)\n($rosterFieldNamePattern:.*)$/s) { $value = $1; $rosterSection = $2; # the rest of it } else { $value = $restOfSection; # all of it $rosterSection = ""; } $sectionValues{$field} = $value; } return \%sectionValues; } # +------------------ # | Given the last part of a roster file, # | break up the roster elements and return # | an array of roster-field hashes. # | # | Roster elements look like: # | # | ------- # | field: stuff # | # | ------- # | # another section # | field: stuff # | sub processRoster($) { my $rosterSection = shift; my @result; my @rosterSections = split(/${cr}-{5,}${cr}/,$rosterSection); printArray("roster sections: ",@rosterSections); foreach my $rosterSection (@rosterSections) { my $rosterSectionFields = processRosterSection($rosterSection); printHash("roster section",%$rosterSectionFields); push(@result,$rosterSectionFields); } return @result; } # +----------------- # | given a directive and the current stack # | of replacement mappings, perform it. # | it may be a file insertion, or a table, &c &c sub xxxprocessDirective($;@) { my $directiveRaw = shift; my @replacements = @_; my $result = ""; dPrint("processDirective: $directiveRaw"); my @directiveParts = split(/ +/,$directiveRaw); my $directive = shift(@directiveParts); if($directive eq "include") { my $includeFile = shift(@directiveParts); $result = processFile($includeFile,@replacements); } elsif($directive eq "href") { my $linkTo = shift(@directiveParts); if($gIsCGI) { $result = "$gScriptName?src=$linkTo"; } else { # strip trailing .hpp, if that's the suffix if($linkTo =~ /^(.*)\.hpp$/) { $linkTo = $1; } $result = "$linkTo.html"; } } else { $result = errorBox("

Unknown directive ::${directiveRaw}::"); } return $result; } # +------------------- # | Given a block of text and # | optionally one or more hash fields, # | do all the supported expansions, including # | file- and table- inclusions. # | sub doIncludes($$;@) { my $fileName = shift; my $sourceText = shift; my @replacements = @_; my $resultText = ""; while($sourceText =~ /^(.*?)::include ([^\n\r]+?)::(.*)$/s) { $resultText .= $1; my $includeFileName = $2; $sourceText = $3; dPrint("doIncludes, includeFileName=\"$includeFileName\""); $includeFileName = makePathRelative($fileName,$includeFileName); my $includeFileContents = readFile($includeFileName); $includeFileContents = processFile($includeFileName,@replacements); $resultText .= $includeFileContents; } # add on the tail... $resultText .= $sourceText; dPrint("doIncludes: leaving with $resultText"); return $resultText; } sub makePathRelative($$) { my ($ownerPath,$includePath) = (@_); # if ownerpath is, say foo/bar.hpp, and includepath is baz.hpp, # then result is foo/baz.hpp. # # if ownerpath is foo/bar.hpp, and includepath is /baz.hpp, # result is /baz.hpp. if($includePath =~ /^\/.*$/) { return $includePath; } my $prefix = ""; if($ownerPath =~ /^(.*\/).*?$/) { $prefix = $1; } my $result = $prefix . $includePath; return $result; } # +------------------ # | given a file name and optional stack # | of replacements, process it. # | decide whether it is flat or a roster # | sub processFile($;@) { my $fileName = shift; my @replacements = @_; my $result = ""; dPrint("processFile: $fileName"); my $fileContents = readFile($fileName); # allow \: to be a non-interpreted colon $fileContents =~ s/\\:/$COLON_HOLDER/gs; $fileContents =~ s/\\\[/$BRACE_HOLDER/gs; $fileContents =~ s/\\\*/$STARR_HOLDER/gs; $fileContents = doMarkupShortcuts($fileName,$fileContents); if(length($fileContents) == 0) { dPrint("processFile: empty or missing $fileName"); $result = errorBox("empty or missing file: \"$fileName\""); dPrint("processFile: result is $result"); } elsif($fileName =~ /^.*\.hpp_roster$/) { $result = processRosterFile($fileName,$fileContents,@replacements); } else { $result = processFlatFile($fileName,$fileContents,@replacements); } # restore escaped characters, sans escape $result =~ s/$COLON_HOLDER/:/gs; $result =~ s/$BRACE_HOLDER/[/gs; $result =~ s/$STARR_HOLDER/*/gs; return $result; } # +----------------- # | given a filename and an optional stack of # | replacement hashes, read it in, perform # | all substitutions and directives, and # | return the result # | sub processFlatFile($$;@) { my $fileName = shift; my $fileContents = shift; my @replacements = @_; my $result = doIncludes($fileName,$fileContents,@replacements); return $result; } sub processRosterFile($$;@) { my $fileName = shift; my $fileContents = shift; my @replacements = @_; # # The file is in four parts # 1. template for top # 2. template for repeated middle # 3. template for end # 4. roster elements # # split on =========='s. # my @fileParts = split(/${cr}={5,}${cr}/,$fileContents); printArray("file parts: ",@fileParts); my @rosterSections = processRoster($fileParts[3]); my $result = ""; $result .= $fileParts[0]; # TOP of output # repeat for each roster-row foreach my $rosterSectionFields (@rosterSections) { my $html = $fileParts[1]; # html is now in tempate form... foreach my $field (sort(keys(%$rosterSectionFields))) { my $value = $$rosterSectionFields{$field}; my @valueSplit = split(/(${cr}){2,}/,$value); $value = ""; for(my $ii = 0; $ii <= $#valueSplit; $ii++) { my $v = $valueSplit[$ii]; next if $v =~ /^\s*$/; my $pp = $ii == $#valueSplit ? "" : "

"; #cr-cr = small gap? $pp = ""; # DISABLE the double-line p-breaking. $value .= "$v$pp\n"; } $html =~ s/::${field}::/$value/gs; # remove conditionals for this field: $html =~ s/\?\?${field}\?\?//gs; } # # Clean up any stray unfound substitutions # $html =~ s/::[a-z-]+:://gs; # # Do the standard markup shortcuts, like # *bold* and [[URL linktext]] # $html = doMarkupShortcuts($fileName,$html); # # Remove unsatisfied conditionals # $html =~ s/\?\?[a-z-]+\?\?.*?[\n\r]//gs; # and keep it. $result .= $html; } $result .= $fileParts[2]; # BOTTOM of output return $result; } # +---------------------------- # | Perform any handy quote-fixing, *bolding*, and so on. sub doMarkupShortcuts($$) { my ($fileName,$input) = (@_); my $src = $input; my $srcHtmlized = ""; while($src =~ /^(.*?)\[\[([^ ]+?)\s(.+?)\]\](.*)$/s) { dPrint("htmlizing src=$src"); my $textBefore = $1; my $url = $2; my $urlText = $3; $src = $4; # IF its an hpp reference, handle specially # to retain links # TODO: flag to change it into html-ref, if using # as a build-time preprocessor # leading backslash means, treat hpp literally, dont cgi it my $okToEscape = 1; if($url =~ /^\\(.*)$/) { $url = $1; $okToEscape = 0; } if($url =~ /^.+\?.+$/) # dont modify ?queries { $okToEscape = 0; } $url = makePathRelative($fileName,$url); if($okToEscape && $url =~ /(.*)\.hpp/) { $url = "$TOOL?src=$url"; } $srcHtmlized .= $textBefore . "
$urlText"; } $srcHtmlized .= $src; $src = $srcHtmlized; $srcHtmlized = ""; # done. ###$src =~ s/\[\[([^ ]+?)\s(.+?)\]\]/$2<\/a>/gs; # start by protecting things inside of , # so they don't get formatted my @chunks; while($src =~ /^(.*?)(<.*?>)(.*)$/s) { my $openText = $1; my $markupElement = $2; $src = $3; dPrint "openText:$openText"; dPrint "markupElement:$markupElement"; push(@chunks,$openText); push(@chunks,$markupElement); } dPrint "src:$src"; push(@chunks,$src); my $result = ""; # now do each chunk, leaving markup elements intact foreach my $value (@chunks) { dPrint "chunks: value = $value"; if($value =~ /^<.*>$/) { # markup chunk? just append it... $result .= $value; next; } dPrint "a: $value"; # # Bold things in two *asterisks* on same line # italicized things in _two underscores_ on the same line # $value =~ s/\*([^\n]+?)\*/$1<\/b>/gs; $value =~ s/ \_([^\n]+?)\_ / $1<\/i> /gs; dPrint "b: $value"; # # General quoting (what about attributes, oops) # # 2007.03.06 dvb I decided: I dont like quoting # it interferes too much with stylesheets, javascript, &c. #$value =~ s/(\s)\"/$1“/gs; #$value =~ s/\"/”/gs; #$value =~ s/([a-zA-Z])\'s/$1’s/gs; # special chars, too #$value =~ s/\xd5/’/gs; $result .= $value; } dPrint("doMarkupShortcuts $input returns $result"); return $result; } my $passCount = 0; my $failCount = 0; sub assertEq($$$) { my($msg,$v1,$v2) = (@_); if(!($v1 eq $v2)) { print("ERROR mismatch $msg: \nwant $v1\ngot $v2\n"); $failCount++; } else { $passCount++; } } sub assertRelativePath($$$) { my ($path,$includePath,$expectedResultPath) = (@_); my $result = makePathRelative($path,$includePath); assertEq("relative path expected $expectedResultPath got $result",$expectedResultPath,$result); } # miscellaneous runtime tests, invoke with "TEST" as the argument sub tests() { $gDebugBlab = 1; my $value = "this *is* a test"; my $result = doMarkupShortcuts("",$value); assertEq("bold asterisks","this is a test",$result); $result = doMarkupShortcuts("","what _about_ it?"); assertEq("underlines ","what about it?",$result); $result = doMarkupShortcuts("","
my link: [[http://omino.com omino]].
"); assertEq("href ","
my link: omino.
",$result); $result = doMarkupShortcuts("","
my link: [[http://omino.com omino]].
"); assertEq("href & attributes ","
my link: omino.
",$result); $result = doMarkupShortcuts("","[[foo.hpp go]]"); assertEq("hpp href","go",$result); $result = doMarkupShortcuts("here/doc.hpp","[[deeper/foo.hpp go]]"); assertEq("hpp href","go",$result); # check use of leading \ on url to prevent cgi-ifying $result = doMarkupShortcuts("here/doc.hpp","[[\\deeper/foo.hpp go]]"); assertEq("escaped hpp href","go",$result); # works for multiple lines? $value = < what about *this* ? out EOP ; my $expected = < what about this ? out EOP ; $result = doMarkupShortcuts("",$value); assertEq("line breaks",$expected,$result); # relative path tests assertRelativePath("here.hpp","subdir/there.hpp","subdir/there.hpp"); assertRelativePath("subdir/here.hpp","subdir2/there.hpp","subdir/subdir2/there.hpp"); assertRelativePath("subdir/here.hpp","/subdir2/there.hpp","/subdir2/there.hpp"); # REPORT the results. print "$passCount tests pass\n"; print "$failCount tests fail\n"; } sub main(@) { $TOOL = $0; # who am i? $TOOL = $1 if($TOOL =~ /^.*\/(.*)?$/); # just the name, url is relative to here. if( $_[0] eq "TEST") { tests(); exit(0); } print "content-type: text/html\n\n" if $gIsCGI; fetchArguments(); my $sourceFileName = getArg("src","index.hpp"); if($sourceFileName =~ /^.*\/$/) # ends with slash? { $sourceFileName .= "index.hpp"; } my $result = processFile($sourceFileName); print $result; print "\n"; } main(@ARGV); #eof