From bcbf441e09fb502cf64924ff2530fa144bdf52c5 Mon Sep 17 00:00:00 2001 From: Andreas Brachold Date: Mon, 13 Aug 2007 18:41:27 +0000 Subject: * Move files to trunk --- lib/HTML/TextToHTML.pm | 5266 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 5266 insertions(+) create mode 100644 lib/HTML/TextToHTML.pm (limited to 'lib/HTML') diff --git a/lib/HTML/TextToHTML.pm b/lib/HTML/TextToHTML.pm new file mode 100644 index 0000000..82f3dc2 --- /dev/null +++ b/lib/HTML/TextToHTML.pm @@ -0,0 +1,5266 @@ +package HTML::TextToHTML; +use 5.006_001; +use strict; +#------------------------------------------------------------------------ + +=head1 NAME + +HTML::TextToHTML - convert plain text file to HTML. + +=head1 VERSION + +This describes version B<2.42> of HTML::TextToHTML. + +=cut + +our $VERSION = '2.42'; + +=head1 SYNOPSIS + + From the command line: + + txt2html I + + From Scripts: + + use HTML::TextToHTML; + + # create a new object + my $conv = new HTML::TextToHTML(); + + # convert a file + $conv->txt2html(infile=>[$text_file], + outfile=>$html_file, + title=>"Wonderful Things", + mail=>1, + ]); + + # reset arguments + $conv->args(infile=>[], mail=>0); + + # convert a string + $newstring = $conv->process_chunk($mystring) + +=head1 DESCRIPTION + +HTML::TextToHTML converts plain text files to HTML. The txt2html script +uses this module to do the same from the command-line. + +It supports headings, tables, lists, simple character markup, and +hyperlinking, and is highly customizable. It recognizes some of the +apparent structure of the source document (mostly whitespace and +typographic layout), and attempts to mark that structure explicitly +using HTML. The purpose for this tool is to provide an easier way of +converting existing text documents to HTML format, giving something nicer +than just whapping the text into a big PRE block. + +=head2 History + +The original txt2html script was written by Seth Golub (see +http://www.aigeek.com/txt2html/), and converted to a perl module by +Kathryn Andersen (see http://www.katspace.com/tools/text_to_html/) and +made into a sourceforge project by Sun Tong (see +http://sourceforge.net/projects/txt2html/). Earlier versions of the +HTML::TextToHTML module called the included script texthyper so as not +to clash with the original txt2html script, but now the projects have +all been merged. + +=head1 REQUIRES + +HTML::TextToHTML requires Perl 5.6.1 or later. + +For installation, it needs: + + Module::Build + +The txt2html script needs: + + Getopt::Long + Getopt::ArgvFile + Pod::Usage + File::Basename + +For testing, it also needs: + + Test::More + +For debugging, it also needs: + + Data::Dumper + +=head1 INSTALLATION + +Make sure you have the dependencies installed first! +(see REQUIRES above) + +Some of those modules come standard with more recent versions of perl, +but I thought I'd mention them anyway, just in case you may not have +them. + +If you don't know how to install these, try using the CPAN module, an +easy way of auto-installing modules from the Comprehensive Perl Archive +Network, where the above modules reside. +Do "perldoc perlmodinstall" or "perldoc CPAN" for more information. + +To install this module type the following: + + perl Build.PL + ./Build + ./Build test + ./Build install + +Or, if you're on a platform (like DOS or Windows) that doesn't like the +"./" notation, you can do this: + + perl Build.PL + perl Build + perl Build test + perl Build install + +In order to install somewhere other than the default, such as +in a directory under your home directory, like "/home/fred/perl" +go + + perl Build.PL --install_base /home/fred/perl + +as the first step instead. + +This will install the files underneath /home/fred/perl. + +You will then need to make sure that you alter the PERL5LIB variable to +find the modules, and the PATH variable to find the script. + +Therefore you will need to change: +your path, to include /home/fred/perl/script (where the script will be) + + PATH=/home/fred/perl/script:${PATH} + +the PERL5LIB variable to add /home/fred/perl/lib + + PERL5LIB=/home/fred/perl/lib:${PERL5LIB} + +Note that the system links dictionary will be installed as +"/home/fred/perl/share/txt2html/txt2html.dict" + +If you want to install in a temporary install directory (such as +if you are building a package) then instead of going + + perl Build install + +go + + perl Build install destdir=/my/temp/dir + +and it will be installed there, with a directory structure under +/my/temp/dir the same as it would be if it were installed plain. +Note that this is NOT the same as setting --install_base, because +certain things are done at build-time which use the install_base info. + +See "perldoc perlrun" for more information on PERL5LIB, and +see "perldoc Module::Build" for more information on +installation options. + +=head1 OPTIONS + +All arguments can be set when the object is created, and further options +can be set when calling the actual txt2html method. Arguments +to methods can take either a hash of arguments, or a reference to an +array. Note that the reference-to-array method is depricated and is only +retained for backwards compatibility. + +Note that all option-names must match exactly -- no abbreviations are +allowed. + +The arguments get treated differently depending on whether they are +given in a hash or a reference to an array. When the arguments are +in a hash, the argument-keys are expected to have values matching +those required for that argument -- whether that be a boolean, a string, +a reference to an array or a reference to a hash. These will replace +any value for that argument that might have been there before. + +When the arguments are in a reference to an array, it is treated +somewhat as if it were a command-line: option names are expected to +start with '--' or '-', boolean options are set to true as soon as the +option is given (no value is expected to follow), boolean options with +the word "no" prepended set the option to false, string options are +expected to have a string value following, and those options which are +internally arrays or hashes are treated as cumulative; that is, the +value following the --option is added to the current set for that +option, to add more, one just repeats the --option with the next value, +and in order to reset that option to empty, the special value of "CLEAR" +must be added to the list. + +NOTE: the reference-to-an-array usage is DEPRECATED and will be removed +in the future. + +=over + +=item append_file + + append_file=>I + +If you want something appended by default, put the filename here. +The appended text will not be processed at all, so make sure it's +plain text or decent HTML. i.e. do not have things like: + Mary Andersen Ekitty@example.comE +but instead, have: + Mary Andersen <kitty@example.com> + +(default: nothing) + +=item append_head + + append_head=>I + +If you want something appended to the head by default, put the filename here. +The appended text will not be processed at all, so make sure it's +plain text or decent HTML. i.e. do not have things like: + Mary Andersen Ekitty@example.comE +but instead, have: + Mary Andersen <kitty@example.com> + +(default: nothing) + +=item body_deco + + body_deco=>I + +Body decoration string: a string to be added to the BODY tag so that +one can set attributes to the BODY (such as class, style, bgcolor etc) +For example, "class='withimage'". + +=item bold_delimiter + + bold_delimiter=>I + +This defines what character (or string) is taken to be the delimiter of +text which is to be interpreted as bold (that is, to be given a STRONG +tag). If this is empty, then no bolding of text will be done. +(default: #) + +=item bullets + + bullets=>I + +This defines what single characters are taken to be "bullet" characters +for unordered lists. Note that because this is used as a character +class, if you use '-' it must come first. +(default:-=o*\267) + +=item bullets_ordered + + bullets_ordered=>I + +This defines what single characters are taken to be "bullet" placeholder +characters for ordered lists. Ordered lists are normally marked by +a number or letter followed by '.' or ')' or ']' or ':'. If an ordered +bullet is used, then it simply indicates that this is an ordered list, +without giving explicit numbers. + +Note that because this is used as a character class, if you use '-' it +must come first. +(default:nothing) + +=item caps_tag + + caps_tag=>I + +Tag to put around all-caps lines +(default: STRONG) +If an empty tag is given, then no tag will be put around all-caps lines. + +=item custom_heading_regexp + + custom_heading_regexp=>I + +Add a regexp for headings. Header levels are assigned by regexp +in order seen When a line matches a custom header regexp, it is tagged as +a header. If it's the first time that particular regexp has matched, +the next available header level is associated with it and applied to +the line. Any later matches of that regexp will use the same header level. +Therefore, if you want to match numbered header lines, you could use +something like this: + + -H '^ *\d+\. \w+' -H '^ *\d+\.\d+\. \w+' -H '^ *\d+\.\d+\.\d+\. \w+' + +Then lines like + + " 1. Examples " + " 1.1. Things" + and " 4.2.5. Cold Fusion" + +Would be marked as H1, H2, and H3 (assuming they were found in that +order, and that no other header styles were encountered). +If you prefer that the first one specified always be H1, the second +always be H2, the third H3, etc, then use the -EH/--explicit-headings +option. + +This is a multi-valued option. + +(default: none) + +=item debug + + debug=>1 + +Enable copious script debugging output (don't bother, this is for the +developer) (default: false) + +=item default_link_dict + + default_link_dict=>I + +The name of the default "user" link dictionary. +(default: "$ENV{'HOME'}/.txt2html.dict" -- this is the same as for +the txt2html script. If there is no $ENV{HOME} then it is just '.txt2html.dict') + +=item demoronize + + demoronize=>1 + +Convert Microsoft-generated character codes that are non-ISO codes into +something more reasonable. +(default:true) + +=item dict_debug + + dict_debug=>I + +Debug mode for link dictionaries Bitwise-Or what you want to see: + 1: The parsing of the dictionary + 2: The code that will make the links + 4: When each rule matches something + 8: When each tag is created + +(default: 0) + +=item doctype + + doctype=>I + +This gets put in the DOCTYPE field at the top of the document, unless it's +empty. (default : "-//W3C//DTD HTML 3.2 Final//EN") +If --xhtml is true, the contents of this is ignored, unless it's +empty, in which case no DOCTYPE declaration is output. + +=item eight_bit_clean + + eight_bit_clean=>1 + +disable Latin-1 character entity naming +(default: false) + +=item escape_HTML_chars + + escape_HTML_chars=>1 + +turn & E E into & > < +(default: true) + +=item explicit_headings + + explicit_headings=>1 + +Don't try to find any headings except the ones specified in the +--custom_heading_regexp option. +Also, the custom headings will not be assigned levels in the order they +are encountered in the document, but in the order they are specified on +the command line. +(default: false) + +=item extract + + extract=>1 + +Extract Mode; don't put HTML headers or footers on the result, just +the plain HTML (thus making the result suitable for inserting into +another document (or as part of the output of a CGI script). +(default: false) + +=item hrule_min + + hrule_min=>I + +Min number of ---s for an HRule. +(default: 4) + +=item indent_width + + indent_width=>I + +Indents this many spaces for each level of a list. +(default: 2) + +=item indent_par_break + + indent_par_break=>1 + +Treat paragraphs marked solely by indents as breaks with indents. +That is, instead of taking a three-space indent as a new paragraph, +put in a
and three non-breaking spaces instead. +(see also --preserve_indent) +(default: false) + +=item infile + + infile=>\@my_files + infile=>['chapter1.txt', 'chapter2.txt'] + "--infile", "chapter1.txt", "--infile", "chapter2.txt" + +The name of the input file(s). When the arguments are given as a hash, +this expects a reference to an array of filenames. When the arguments +are given as a reference to an array, then the "--infile" option must +be repeated for each new file added to the list. If you want to reset +the list to be empty, give the special value of "CLEAR". + +The special filename '-' designates STDIN. + +See also L and L. + +(default:-) + +=item inhandle + + inhandle=>\@my_handles + inhandle=>[\*MYINHANDLE, \*STDIN] + +An array of input filehandles; use this instead of +L or L to use a filehandle or filehandles +as input. + +=item instring + + instring=>\@my_strings + instring=>[$string1, $string2] + +An array of input strings; use this instead of +L or L to use a string or strings +as input. + +=item italic_delimiter + + italic_delimiter=>I + +This defines what character (or string) is taken to be the delimiter of +text which is to be interpreted as italic (that is, to be given a EM +tag). If this is empty, no italicising of text will be done. +(default: *) + +=item links_dictionaries + + links_dictionaries=>\@my_link_dicts + links_dictionaries=>['url_links.dict', 'format_links.dict'] + "--links_dictionaries", "url_links.dict", "--links_dictionaries", "format_links.dict" + +File(s) to use as a link-dictionary. There can be more than one of +these. These are in addition to the Global Link Dictionary and the User +Link Dictionary. When the arguments are given as a hash, this expects a +reference to an array of filenames. When the arguments are given as a +reference to an array, then the "--links_dictionaries" option must be +repeated for each new file added to the list. If you want to reset the +list to be empty, give the special value of "CLEAR". + +=item link_only + + link_only=>1 + +Do no escaping or marking up at all, except for processing the links +dictionary file and applying it. This is useful if you want to use +the linking feature on an HTML document. If the HTML is a +complete document (includes HTML,HEAD,BODY tags, etc) then you'll +probably want to use the --extract option also. +(default: false) + +=item lower_case_tags + + lower_case_tags=>1 + +Force all tags to be in lower-case. + +=item mailmode + + mailmode=>1 + +Deal with mail headers & quoted text. The mail header paragraph is +given the class 'mail_header', and mail-quoted text is given the class +'quote_mail'. +(default: false) + +=item make_anchors + + make_anchors=>0 + +Should we try to make anchors in headings? +(default: true) + +=item make_links + + make_links=>0 + +Should we try to build links? If this is false, then the links +dictionaries are not consulted and only structural text-to-HTML +conversion is done. (default: true) + +=item make_tables + + make_tables=>1 + +Should we try to build tables? If true, spots tables and marks them up +appropriately. See L for information on how tables +should be formatted. + +This overrides the detection of lists; if something looks like a table, +it is taken as a table, and list-checking is not done for that +paragraph. + +(default: false) + +=item min_caps_length + + min_caps_length=>I + +min sequential CAPS for an all-caps line +(default: 3) + +=item outfile + + outfile=>I + +The name of the output file. If it is "-" then the output goes +to Standard Output. +(default: - ) + +=item outhandle + +The output filehandle; if this is given then the output goes +to this filehandle instead of to the file given in L. + +=item par_indent + + par_indent=>I + +Minumum number of spaces indented in first lines of paragraphs. + Only used when there's no blank line +preceding the new paragraph. +(default: 2) + +=item preformat_trigger_lines + + preformat_trigger_lines=>I + +How many lines of preformatted-looking text are needed to switch to
+          <= 0 : Preformat entire document
+             1 : one line triggers
+          >= 2 : two lines trigger
+
+(default: 2)
+
+=item endpreformat_trigger_lines
+
+    endpreformat_trigger_lines=>I
+
+How many lines of unpreformatted-looking text are needed to switch from 
+           <= 0 : Never preformat within document
+              1 : one line triggers
+           >= 2 : two lines trigger
+(default: 2)
+
+NOTE for preformat_trigger_lines and endpreformat_trigger_lines:
+A zero takes precedence.  If one is zero, the other is ignored.
+If both are zero, entire document is preformatted.
+
+=item preformat_start_marker
+
+    preformat_start_marker=>I
+
+What flags the start of a preformatted section if --use_preformat_marker
+is true.
+
+(default: "^(:?(:?<)|<)PRE(:?(:?>)|>)\$")
+
+=item preformat_end_marker
+
+    preformat_end_marker=>I
+
+What flags the end of a preformatted section if --use_preformat_marker
+is true.
+
+(default: "^(:?(:?<)|<)/PRE(:?(:?>)|>)\$")
+
+=item preformat_whitespace_min
+
+    preformat_whitespace_min=>I
+
+Minimum number of consecutive whitespace characters to trigger
+normal preformatting. 
+NOTE: Tabs are expanded to spaces before this check is made.
+That means if B is 8 and this is 5, then one tab may be
+expanded to 8 spaces, which is enough to trigger preformatting.
+(default: 5)
+
+=item prepend_file
+
+    prepend_file=>I
+
+If you want something prepended to the processed body text, put the
+filename here.  The prepended text will not be processed at all, so make
+sure it's plain text or decent HTML.
+
+(default: nothing)
+
+=item preserve_indent
+
+    preserve_indent=>1
+
+Preserve the first-line indentation of paragraphs marked with indents
+by replacing the spaces of the first line with non-breaking spaces.
+(default: false)
+
+=item short_line_length
+
+    short_line_length=>I
+
+Lines this short (or shorter) must be intentionally broken and are kept
+that short.
+(default: 40)
+
+=item style_url
+
+    style_url=>I
+
+This gives the URL of a stylesheet; a LINK tag will be added to the
+output.
+
+=item tab_width
+
+    tab_width=>I
+
+How many spaces equal a tab?
+(default: 8)
+
+=item table_type
+    
+    table_type=>{ ALIGN=>0, PGSQL=>0, BORDER=>1, DELIM=>0 }
+
+This determines which types of tables will be recognised when "make_tables"
+is true.  The possible types are ALIGN, PGSQL, BORDER and DELIM.
+(default: all types are true)
+
+=item title
+
+    title=>I
+
+You can specify a title.  Otherwise it will use a blank one.
+(default: nothing)
+
+=item titlefirst
+
+    titlefirst=>1
+
+Use the first non-blank line as the title.
+
+=item underline_length_tolerance
+
+    underline_length_tolerance=>I<n>
+
+How much longer or shorter can underlines be and still be underlines?
+(default: 1)
+
+=item underline_offset_tolerance
+
+    underline_offset_tolerance=>I<n>
+
+How far offset can underlines be and still be underlines?
+(default: 1)
+
+=item unhyphenation
+
+    unhyphenation=>0
+
+Enables unhyphenation of text.
+(default: true)
+
+=item use_mosaic_header
+
+    use_mosaic_header=>1
+
+Use this option if you want to force the heading styles to match what Mosaic
+outputs.  (Underlined with "***"s is H1,
+with "==="s is H2, with "+++" is H3, with "---" is H4, with "~~~" is H5
+and with "..." is H6)
+This was the behavior of txt2html up to version 1.10.
+(default: false)
+
+=item use_preformat_marker
+
+    use_preformat_marker=>1
+
+Turn on preformatting when encountering "<PRE>" on a line by itself, and turn
+it off when there's a line containing only "</PRE>".
+When such preformatted text is detected, the PRE tag will be given the
+class 'quote_explicit'.
+(default: off)
+
+=item xhtml
+
+    xhtml=>1
+
+Try to make the output conform to the XHTML standard, including
+closing all open tags and marking empty tags correctly.  This
+turns on --lower_case_tags and overrides the --doctype option.
+Note that if you add a header or a footer file, it is up to you
+to make it conform; the header/footer isn't touched by this.
+Likewise, if you make link-dictionary entries that break XHTML,
+then this won't fix them, except to the degree of putting all tags
+into lower-case.
+
+=back
+
+=head1 METHODS
+
+=cut
+
+#------------------------------------------------------------------------
+
+require Exporter;
+use Data::Dumper;
+
+our $PROG = 'HTML::TextToHTML';
+
+#------------------------------------------------------------------------
+use constant TEXT_TO_HTML => "TEXT_TO_HTML";
+
+########################################
+# Definitions  (Don't change these)
+#
+
+# These are just constants I use for making bit vectors to keep track
+# of what modes I'm in and what actions I've taken on the current and
+# previous lines.
+
+our $NONE         = 0;
+our $LIST         = 1;
+our $HRULE        = 2;
+our $PAR          = 4;
+our $PRE          = 8;
+our $END          = 16;
+our $BREAK        = 32;
+our $HEADER       = 64;
+our $MAILHEADER   = 128;
+our $MAILQUOTE    = 256;
+our $CAPS         = 512;
+our $LINK         = 1024;
+our $PRE_EXPLICIT = 2048;
+our $TABLE        = 4096;
+our $IND_BREAK    = 8192;
+our $LIST_START   = 16384;
+our $LIST_ITEM    = 32768;
+
+# Constants for Link-processing
+# bit-vectors for what to do with a particular link-dictionary entry
+our $LINK_NOCASE    = 1;
+our $LINK_EVAL      = 2;
+our $LINK_HTML      = 4;
+our $LINK_ONCE      = 8;
+our $LINK_SECT_ONCE = 16;
+
+# Constants for Ordered Lists and Unordered Lists.
+# And Definition Lists.
+# I use this in the list stack to keep track of what's what.
+
+our $OL = 1;
+our $UL = 2;
+our $DL = 3;
+
+# Constants for table types
+our $TAB_ALIGN  = 1;
+our $TAB_PGSQL  = 2;
+our $TAB_BORDER = 3;
+our $TAB_DELIM  = 4;
+
+# Character entity names
+# characters to replace with entities
+our %char_entities = (
+    "\241", "¡",  "\242", "¢",   "\243", "£",
+    "\244", "¤", "\245", "¥",    "\246", "¦",
+    "\247", "§",   "\250", "¨",    "\251", "©",
+    "\252", "ª",   "\253", "«",  "\254", "¬",
+    "\255", "­",    "\256", "®",    "\257", "&hibar;",
+    "\260", "°",    "\261", "±", "\262", "²",
+    "\263", "³",   "\264", "´",  "\265", "µ",
+    "\266", "¶",   "\270", "¸",  "\271", "¹",
+    "\272", "º",   "\273", "»",  "\274", "¼",
+    "\275", "½", "\276", "¾", "\277", "¿",
+    "\300", "À", "\301", "Á", "\302", "Â",
+    "\303", "Ã", "\304", "Ä",   "\305", "Å",
+    "\306", "Æ",  "\307", "Ç", "\310", "È",
+    "\311", "É", "\312", "Ê",  "\313", "Ë",
+    "\314", "Ì", "\315", "Í", "\316", "Î",
+    "\317", "Ï",   "\320", "Ð",    "\321", "Ñ",
+    "\322", "Ò", "\323", "Ó", "\324", "Ô",
+    "\325", "Õ", "\326", "Ö",   "\327", "×",
+    "\330", "Ø", "\331", "Ù", "\332", "Ú",
+    "\333", "Û",  "\334", "Ü",   "\335", "Ý",
+    "\336", "Þ",  "\337", "ß",  "\340", "à",
+    "\341", "á", "\342", "â",  "\343", "ã",
+    "\344", "ä",   "\345", "å",  "\346", "æ",
+    "\347", "ç", "\350", "è", "\351", "é",
+    "\352", "ê",  "\353", "ë",   "\354", "ì",
+    "\355", "í", "\356", "î",  "\357", "ï",
+    "\360", "ð",    "\361", "ñ", "\362", "ò",
+    "\363", "ó", "\364", "ô",  "\365", "õ",
+    "\366", "ö",   "\367", "÷", "\370", "ø",
+    "\371", "ù", "\372", "ú", "\373", "û",
+    "\374", "ü",   "\375", "ý", "\376", "þ",
+    "\377", "ÿ",   "\267", "·",
+);
+
+# alignments for tables
+our @alignments    = ('', '', ' ALIGN="RIGHT"', ' ALIGN="CENTER"');
+our @lc_alignments = ('', '', ' align="right"', ' align="center"');
+our @xhtml_alignments =
+  ('', '', ' style="text-align: right;"', ' style="text-align: center;"');
+
+#---------------------------------------------------------------#
+# Object interface
+#---------------------------------------------------------------#
+
+=head2 new
+
+    $conv = new HTML::TextToHTML()
+
+    $conv = new HTML::TextToHTML(titlefirst=>1,
+	...
+    );
+
+Create a new object with new.  If one argument is given, it is assumed
+to be a reference to an array of arguments.  If more than one argument
+is given, it is assumed to be a hash of arguments.  These arguments will
+be used in invocations of other methods.
+
+See L</OPTIONS> for the possible values of the arguments.
+
+=cut
+
+sub new
+{
+    my $invocant = shift;
+    my $self     = {};
+
+    my $class = ref($invocant) || $invocant;    # Object or class name
+    init_our_data($self);
+
+    # bless self
+    bless($self, $class);
+
+    $self->args(@_);
+
+    return $self;
+}    # new
+
+=head2 args
+
+    $conv->args(short_line_length=>60,
+	titlefirst=>1,
+	....
+    );
+
+Updates the current arguments/options of the HTML::TextToHTML object.
+Takes either a hash, or a reference to an array of arguments, which will
+be used in invocations of other methods.
+See L</OPTIONS> for the possible values of the arguments.
+
+NOTE: the reference-to-an-array usage is DEPRECATED and will be removed
+in the future.
+
+=cut
+
+sub args
+{
+    my $self      = shift;
+    my %args      = ();
+    my @arg_array = ();
+    if (   @_
+        && @_ == 1
+        && ref $_[0] eq 'ARRAY')
+    {
+        # this is a reference to an array -- use the old style args
+        my $aref = shift;
+        @arg_array = @{$aref};
+    }
+    elsif (@_)
+    {
+        %args = @_;
+    }
+
+    if (%args)
+    {
+        if ($self->{debug})
+        {
+            print STDERR "========args(hash)========\n";
+            print STDERR Dumper(%args);
+        }
+        foreach my $arg (keys %args)
+        {
+            if (defined $args{$arg})
+            {
+                if ($arg =~ /^-/)
+                {
+                    $arg =~ s/^-//;    # get rid of first dash
+                    $arg =~ s/^-//;    # get rid of possible second dash
+                }
+                if ($self->{debug})
+                {
+                    print STDERR "--", $arg;
+                }
+                $self->{$arg} = $args{$arg};
+                if ($self->{debug})
+                {
+                    print STDERR " ", $args{$arg}, "\n";
+                }
+            }
+        }
+    }
+    elsif (@arg_array)
+    {
+        if ($self->{debug})
+        {
+            print STDERR "========args(array)========\n";
+            print STDERR Dumper(@arg_array);
+        }
+        # the arg array may have filenames at the end of it,
+        # so don't consume them
+        my $look_at_args = 1;
+        while (@arg_array && $look_at_args)
+        {
+            my $arg = shift @arg_array;
+            # check for arguments which are bools,
+            # and thus have no companion value
+            if ($arg =~ /^-/)
+            {
+                $arg =~ s/^-//;    # get rid of first dash
+                $arg =~ s/^-//;    # get rid of possible second dash
+                if ($self->{debug})
+                {
+                    print STDERR "--", $arg;
+                }
+                if (   $arg eq 'debug'
+                    || $arg eq 'demoronize'
+                    || $arg eq 'eight_bit_clean'
+                    || $arg eq 'escape_HTML_chars'
+                    || $arg eq 'explicit_headings'
+                    || $arg eq 'extract'
+                    || $arg eq 'link_only'
+                    || $arg eq 'lower_case_tags'
+                    || $arg eq 'mailmode'
+                    || $arg eq 'make_anchors'
+                    || $arg eq 'make_links'
+                    || $arg eq 'make_tables'
+                    || $arg eq 'preserve_indent'
+                    || $arg eq 'titlefirst'
+                    || $arg eq 'unhyphenation'
+                    || $arg eq 'use_mosaic_header'
+                    || $arg eq 'use_preformat_marker'
+                    || $arg eq 'verbose'
+                    || $arg eq 'xhtml')
+                {
+                    $self->{$arg} = 1;
+                    if ($self->{debug})
+                    {
+                        print STDERR "=true\n";
+                    }
+                }
+                elsif ($arg eq 'nodebug'
+                    || $arg eq 'nodemoronize'
+                    || $arg eq 'noeight_bit_clean'
+                    || $arg eq 'noescape_HTML_chars'
+                    || $arg eq 'noexplicit_headings'
+                    || $arg eq 'noextract'
+                    || $arg eq 'nolink_only'
+                    || $arg eq 'nolower_case_tags'
+                    || $arg eq 'nomailmode'
+                    || $arg eq 'nomake_anchors'
+                    || $arg eq 'nomake_links'
+                    || $arg eq 'nomake_tables'
+                    || $arg eq 'nopreserve_indent'
+                    || $arg eq 'notitlefirst'
+                    || $arg eq 'nounhyphenation'
+                    || $arg eq 'nouse_mosaic_header'
+                    || $arg eq 'nouse_preformat_marker'
+                    || $arg eq 'noverbose'
+                    || $arg eq 'noxhtml')
+                {
+                    $arg =~ s/^no//;
+                    $self->{$arg} = 0;
+                    if ($self->{debug})
+                    {
+                        print STDERR " $arg=false\n";
+                    }
+                }
+                else
+                {
+                    my $val = shift @arg_array;
+                    if ($self->{debug})
+                    {
+                        print STDERR "=", $val, "\n";
+                    }
+                    # check the types
+                    if (defined $arg && defined $val)
+                    {
+                        if (   $arg eq 'infile'
+                            || $arg eq 'custom_heading_regexp'
+                            || $arg eq 'links_dictionaries')
+                        {    # arrays
+                            if ($val eq 'CLEAR')
+                            {
+                                $self->{$arg} = [];
+                            }
+                            else
+                            {
+                                push @{$self->{$arg}}, $val;
+                            }
+                        }
+                        elsif ($arg eq 'file')
+                        {    # alternate for 'infile'
+                            if ($val eq 'CLEAR')
+                            {
+                                $self->{infile} = [];
+                            }
+                            else
+                            {
+                                push @{$self->{infile}}, $val;
+                            }
+                        }
+                        elsif ($arg eq 'table_type')
+                        {
+                            # hash
+                            if ($val eq 'CLEAR')
+                            {
+                                $self->{$arg} = {};
+                            }
+                            else
+                            {
+                                my ($f1, $v1) = split(/=/, $val, 2);
+                                $self->{$arg}->{$f1} = $v1;
+                            }
+                        }
+                        else
+                        {
+                            $self->{$arg} = $val;
+                        }
+                    }
+                }
+            }
+            else
+            {
+                # if an option don't start with - then we've
+                # come to the end of the options
+                $look_at_args = 0;
+            }
+        }
+    }
+    if ($self->{debug})
+    {
+        print STDERR Dumper($self);
+    }
+
+    return 1;
+}    # args
+
+=head2 process_chunk
+
+$newstring = $conv->process_chunk($mystring);
+
+Convert a string to a HTML fragment.  This assumes that this string is
+at the least, a single paragraph, but it can contain more than that.
+This returns the processed string.  If you want to pass arguments to
+alter the behaviour of this conversion, you need to do that earlier,
+either when you create the object, or with the L</args> method.
+
+    $newstring = $conv->process_chunk($mystring,
+			    close_tags=>0);
+
+If there are open tags (such as lists) in the input string,
+process_chunk will now automatically close them, unless you specify not
+to, with the close_tags option.
+
+    $newstring = $conv->process_chunk($mystring,
+			    is_fragment=>1);
+
+If you want this string to be treated as a fragment, and not assumed to
+be a paragraph, set is_fragment to true.  If there is more than one
+paragraph in the string (ie it contains blank lines) then this option
+will be ignored.
+
+=cut
+
+sub process_chunk ($$;%)
+{
+    my $self  = shift;
+    my $chunk = shift;
+    my %args  = (
+        close_tags  => 1,
+        is_fragment => 0,
+        @_
+    );
+
+    my $ret_str = '';
+    my @paras   = split(/\r?\n\r?\n/, $chunk);
+    my $ind     = 0;
+    if (@paras == 1)    # just one paragraph
+    {
+        $ret_str .= $self->process_para(
+            $chunk,
+            close_tags  => $args{close_tags},
+            is_fragment => $args{is_fragment}
+        );
+    }
+    else
+    {
+        my $ind = 0;
+        foreach my $para (@paras)
+        {
+            # if the paragraph doesn't end with a newline, add one
+            $para .= "\n" if ($para !~ /\n$/);
+            if ($ind == @paras - 1)    # last one
+            {
+                $ret_str .= $self->process_para(
+                    $para,
+                    close_tags  => $args{close_tags},
+                    is_fragment => 0
+                );
+            }
+            else
+            {
+                $ret_str .= $self->process_para(
+                    $para,
+                    close_tags  => 0,
+                    is_fragment => 0
+                );
+            }
+            $ind++;
+        }
+    }
+    $ret_str;
+}    # process_chunk
+
+=head2 process_para
+
+$newstring = $conv->process_para($mystring);
+
+Convert a string to a HTML fragment.  This assumes that this string is
+at the most a single paragraph, with no blank lines in it.  If you don't
+know whether your string will contain blank lines or not, use the
+L</process_chunk> method instead.
+
+This returns the processed string.  If you want to pass arguments to
+alter the behaviour of this conversion, you need to do that earlier,
+either when you create the object, or with the L</args> method.
+
+    $newstring = $conv->process_para($mystring,
+			    close_tags=>0);
+
+If there are open tags (such as lists) in the input string, process_para
+will now automatically close them, unless you specify not to, with the
+close_tags option.
+
+    $newstring = $conv->process_para($mystring,
+			    is_fragment=>1);
+
+If you want this string to be treated as a fragment, and not assumed to be
+a paragraph, set is_fragment to true.
+
+=cut
+
+sub process_para ($$;%)
+{
+    my $self = shift;
+    my $para = shift;
+    my %args = (
+        close_tags  => 1,
+        is_fragment => 0,
+        @_
+    );
+
+    # if this is an external call, do certain initializations
+    $self->do_init_call();
+
+    my $para_action = $NONE;
+
+    # tables and mailheaders don't carry over from one para to the next
+    if ($self->{__mode} & $TABLE)
+    {
+        $self->{__mode} ^= $TABLE;
+    }
+    if ($self->{__mode} & $MAILHEADER)
+    {
+        $self->{__mode} ^= $MAILHEADER;
+    }
+
+    # convert Microsoft character codes into sensible characters
+    if ($self->{demoronize})
+    {
+        demoronize_char($para);
+    }
+
+    # if we are not just linking, we are discerning structure
+    if (!$self->{link_only})
+    {
+
+        # Chop trailing whitespace and DOS CRs
+        $para =~ s/[ \011]*\015$//;
+        # Chop leading whitespace and DOS CRs
+        $para =~ s/^[ \011]*\015//;
+        $para =~ s/\r//g;             # remove any stray carriage returns
+
+        my @done_lines = ();          # lines which have been processed
+
+        # The PRE_EXPLICIT structure can carry over from one
+        # paragraph to the next, but it is ended with the
+        # explicit end-tag designated for it.
+        # Therefore we can shortcut for this by checking
+        # for the end of the PRE_EXPLICIT and chomping off
+        # the preformatted string part of this para before
+        # we have to split it into lines.
+        # Note that after this check, we could *still* be
+        # in PRE_EXPLICIT mode.
+        if ($self->{__mode} & $PRE_EXPLICIT)
+        {
+            my $pre_str =
+              $self->split_end_explicit_preformat(para_ref => \$para);
+            if ($pre_str)
+            {
+                push @done_lines, $pre_str;
+            }
+        }
+
+        if (defined $para && $para ne "")
+        {
+            #
+            # Now we split the paragraph into lines
+            #
+            my $para_len         = length($para);
+            my @para_lines       = split(/^/, $para);
+            my @para_line_len    = ();
+            my @para_line_indent = ();
+            my @para_line_action = ();
+            my $i                = 0;
+            foreach my $line (@para_lines)
+            {
+                # Change all tabs to spaces
+                while ($line =~ /\011/)
+                {
+                    my $tw = $self->{tab_width};
+                    $line =~ s/\011/" " x ($tw - (length($`) % $tw))/e;
+                }
+                push @para_line_len, length($line);
+                if ($line =~ /^\s*$/)
+                {
+                    # if the line is blank, use the previous indent
+                    # if there is one
+                    push @para_line_indent,
+                      ($i == 0 ? 0 : $para_line_indent[$i - 1]);
+                }
+                else
+                {
+                    # count the number of leading spaces
+                    my ($ws) = $line =~ /^( *)[^ ]/;
+                    push @para_line_indent, length($ws);
+                }
+                push @para_line_action, $NONE;
+                $i++;
+            }
+
+            # There are two more structures which carry over from one
+            # paragraph to the next: LIST, PRE
+            # There are also certain things which will immediately end
+            # multi-paragraph LIST and PRE, if found at the start
+            # of a paragraph:
+            # A list will be ended by
+            # TABLE, MAILHEADER, HEADER, custom-header
+            # A PRE will be ended by
+            # TABLE, MAILHEADER and non-pre text
+
+            my $is_table         = 0;
+            my $table_type       = 0;
+            my $is_mailheader    = 0;
+            my $is_header        = 0;
+            my $is_custom_header = 0;
+            if (@{$self->{custom_heading_regexp}})
+            {
+                $is_custom_header =
+                  $self->is_custom_heading(line => $para_lines[0]);
+            }
+            if (   $self->{make_tables}
+                && @para_lines > 1)
+            {
+                $table_type = $self->get_table_type(
+                    rows_ref => \@para_lines,
+                    para_len => $para_len
+                );
+                $is_table = ($table_type != 0);
+            }
+            if (   !$self->{explicit_headings}
+                && @para_lines > 1
+                && !$is_table)
+            {
+                $is_header = $self->is_heading(
+                    line_ref => \$para_lines[0],
+                    next_ref => \$para_lines[1]
+                );
+            }
+            # Note that it is concievable that someone has
+            # partially disabled mailmode by making a custom header
+            # which matches the start of mail.
+            # This is stupid, but allowable, so we check.
+            if (   $self->{mailmode}
+                && !$is_table
+                && !$is_custom_header)
+            {
+                $is_mailheader = $self->is_mailheader(rows_ref => \@para_lines);
+            }
+
+            # end the list if we can end it
+            if (
+                ($self->{__mode} & $LIST)
+                && (   $is_table
+                    || $is_mailheader
+                    || $is_header
+                    || $is_custom_header)
+              )
+            {
+                my $list_end = '';
+                my $action   = 0;
+                $self->endlist(
+                    num_lists       => $self->{__listnum},
+                    prev_ref        => \$list_end,
+                    line_action_ref => \$action
+                );
+                push @done_lines, $list_end;
+                $self->{__prev_para_action} |= $END;
+            }
+
+            # end the PRE if we can end it
+            if (
+                   ($self->{__mode} & $PRE)
+                && !($self->{__mode} & $PRE_EXPLICIT)
+                && (   $is_table
+                    || $is_mailheader
+                    || !$self->is_preformatted($para_lines[0]))
+                && ($self->{preformat_trigger_lines} != 0)
+              )
+            {
+                my $pre_end = '';
+                my $tag     = $self->close_tag('PRE');
+                $pre_end = "${tag}\n";
+                $self->{__mode} ^= ($PRE & $self->{__mode});
+                push @done_lines, $pre_end;
+                $self->{__prev_para_action} |= $END;
+            }
+
+            # The PRE and PRE_EXPLICIT structure can carry over
+            # from one paragraph to the next, but because we don't
+            # want trailing newlines, such newlines would have been
+            # gotten rid of in the previous call.  However, with
+            # a preformatted text, we do want the blank lines in it
+            # to be preserved, so let's add a blank line in here.
+            if ($self->{__mode} & $PRE)
+            {
+                push @done_lines, "\n";
+            }
+
+            # Now, we do certain things which are only found at the
+            # start of a paragraph:
+            # HEADER, custom-header, TABLE and MAILHEADER
+            # These could concievably eat the rest of the paragraph.
+
+            if ($is_custom_header)
+            {
+                # custom header eats the first line
+                my $header = shift @para_lines;
+                shift @para_line_len;
+                shift @para_line_indent;
+                shift @para_line_action;
+                $self->custom_heading(line_ref => \$header);
+                push @done_lines, $header;
+                $self->{__prev_para_action} |= $HEADER;
+            }
+            elsif ($is_header)
+            {
+                # normal header eats the first two lines
+                my $header = shift @para_lines;
+                shift @para_line_len;
+                shift @para_line_indent;
+                shift @para_line_action;
+                my $underline = shift @para_lines;
+                shift @para_line_len;
+                shift @para_line_indent;
+                shift @para_line_action;
+                $self->heading(
+                    line_ref => \$header,
+                    next_ref => \$underline
+                );
+                push @done_lines, $header;
+                $self->{__prev_para_action} |= $HEADER;
+            }
+
+            # do the table stuff on the array of lines
+            if ($self->{make_tables} && $is_table)
+            {
+                if (
+                    $self->tablestuff(
+                        table_type => $table_type,
+                        rows_ref   => \@para_lines,
+                        para_len   => $para_len
+                    )
+                  )
+                {
+                    # this has used up all the lines
+                    push @done_lines, @para_lines;
+                    @para_lines = ();
+                }
+            }
+
+            # check of this para is a mail-header
+            if (   $is_mailheader
+                && !($self->{__mode} & $TABLE)
+                && @para_lines)
+            {
+                $self->mailheader(rows_ref => \@para_lines);
+                # this has used up all the lines
+                push @done_lines, @para_lines;
+                @para_lines = ();
+            }
+
+            #
+            # Now go through the paragraph lines one at a time
+            # Note that we won't have TABLE, MAILHEADER, HEADER modes
+            # because they would have eaten the lines
+            #
+            my $prev        = '';
+            my $prev_action = $self->{__prev_para_action};
+            for (my $i = 0; $i < @para_lines; $i++)
+            {
+                my $prev_ref;
+                my $prev_action_ref;
+                my $prev_line_indent;
+                my $prev_line_len;
+                if ($i == 0)
+                {
+                    $prev_ref         = \$prev;
+                    $prev_action_ref  = \$prev_action;
+                    $prev_line_indent = 0;
+                    $prev_line_len    = 0;
+                }
+                else
+                {
+                    $prev_ref         = \$para_lines[$i - 1];
+                    $prev_action_ref  = \$para_line_action[$i - 1];
+                    $prev_line_indent = $para_line_indent[$i - 1];
+                    $prev_line_len    = $para_line_len[$i - 1];
+                }
+                my $next_ref;
+                if ($i == $#para_lines)
+                {
+                    $next_ref = undef;
+                }
+                else
+                {
+                    $next_ref = \$para_lines[$i + 1];
+                }
+
+                $para_lines[$i] = escape($para_lines[$i])
+                  if ($self->{escape_HTML_chars});
+
+                if ($self->{mailmode}
+                    && !($self->{__mode} & ($PRE_EXPLICIT)))
+                {
+                    $self->mailquote(
+                        line_ref        => \$para_lines[$i],
+                        line_action_ref => \$para_line_action[$i],
+                        prev_ref        => $prev_ref,
+                        prev_action_ref => $prev_action_ref,
+                        next_ref        => $next_ref
+                    );
+                }
+
+                if (   ($self->{__mode} & $PRE)
+                    && ($self->{preformat_trigger_lines} != 0))
+                {
+                    $self->endpreformat(
+                        para_lines_ref  => \@para_lines,
+                        para_action_ref => \@para_line_action,
+                        ind             => $i,
+                        prev_ref        => $prev_ref
+                    );
+                }
+
+                if (!($self->{__mode} & $PRE))
+                {
+                    $self->hrule(
+                        para_lines_ref  => \@para_lines,
+                        para_action_ref => \@para_line_action,
+                        ind             => $i
+                    );
+                }
+                if (!($self->{__mode} & ($PRE))
+                    && ($para_lines[$i] !~ /^\s*$/))
+                {
+                    $self->liststuff(
+                        para_lines_ref       => \@para_lines,
+                        para_action_ref      => \@para_line_action,
+                        para_line_indent_ref => \@para_line_indent,
+                        ind                  => $i,
+                        prev_ref             => $prev_ref
+                    );
+                }
+                if (   !($para_line_action[$i] & ($HEADER | $LIST))
+                    && !($self->{__mode} & ($LIST | $PRE))
+                    && $self->{__preformat_enabled})
+                {
+                    $self->preformat(
+                        mode_ref        => \$self->{__mode},
+                        line_ref        => \$para_lines[$i],
+                        line_action_ref => \$para_line_action[$i],
+                        prev_ref        => $prev_ref,
+                        next_ref        => $next_ref,
+                        prev_action_ref => $prev_action_ref
+                    );
+                }
+                if (!($self->{__mode} & ($PRE)))
+                {
+                    $self->paragraph(
+                        line_ref        => \$para_lines[$i],
+                        line_action_ref => \$para_line_action[$i],
+                        prev_ref        => $prev_ref,
+                        prev_action_ref => $prev_action_ref,
+                        line_indent     => $para_line_indent[$i],
+                        prev_indent     => $prev_line_indent,
+                        is_fragment     => $args{is_fragment},
+                        ind             => $i,
+                    );
+                }
+                if (!($self->{__mode} & ($PRE | $LIST)))
+                {
+                    $self->shortline(
+                        line_ref        => \$para_lines[$i],
+                        line_action_ref => \$para_line_action[$i],
+                        prev_ref        => $prev_ref,
+                        prev_action_ref => $prev_action_ref,
+                        prev_line_len   => $prev_line_len
+                    );
+                }
+                if (!($self->{__mode} & ($PRE)))
+                {
+                    $self->caps(
+                        line_ref        => \$para_lines[$i],
+                        line_action_ref => \$para_line_action[$i]
+                    );
+                }
+
+                # put the "prev" line in front of the first line
+                $para_lines[$i] = $prev . $para_lines[$i]
+                  if ($i == 0 && ($prev !~ /^\s*$/));
+            }
+
+            # para action is the action of the last line of the para
+            $para_action = $para_line_action[$#para_line_action];
+            $para_action = $NONE if (!defined $para_action);
+
+            # push them on the done lines
+            push @done_lines, @para_lines;
+            @para_lines = ();
+
+        }
+        # now put the para back together as one string
+        $para = join('', @done_lines);
+
+        # if this is a paragraph, and we are in XHTML mode,
+        # close an open paragraph.
+        if ($self->{xhtml})
+        {
+            my $open_tag = @{$self->{__tags}}[$#{$self->{__tags}}];
+            if (defined $open_tag && $open_tag eq 'P')
+            {
+                $para .= $self->close_tag('P');
+            }
+        }
+
+        if (
+            $self->{unhyphenation}
+
+            # ends in hyphen & next line starts w/letters
+            && ($para =~ /[^\W\d_]\-\n\s*[^\W\d_]/s) && !(
+                $self->{__mode} &
+                ($PRE | $HEADER | $MAILHEADER | $TABLE | $BREAK)
+            )
+          )
+        {
+            $self->unhyphenate_para(\$para);
+        }
+        # chop trailing newlines for continuing lists and PRE
+        if (   $self->{__mode} & $LIST
+            || $self->{__mode} & $PRE)
+        {
+            $para =~ s/\n$//g;
+        }
+    }
+
+    # apply links and bold/italic formatting
+    if ($para !~ /^\s*$/)
+    {
+        $self->apply_links(
+            para_ref        => \$para,
+            para_action_ref => \$para_action
+        );
+    }
+
+    # close any open lists if required to
+    if (   $args{close_tags}
+        && $self->{__mode} & $LIST)    # End all lists
+    {
+        $self->endlist(
+            num_lists       => $self->{__listnum},
+            prev_ref        => \$para,
+            line_action_ref => \$para_action
+        );
+    }
+    # close any open tags
+    if ($args{close_tags} && $self->{xhtml})
+    {
+        while (@{$self->{__tags}})
+        {
+            $para .= $self->close_tag('');
+        }
+    }
+
+    # convert remaining Microsoft character codes into sensible HTML
+    if ($self->{demoronize})
+    {
+        $para = demoronize_code($para);
+    }
+    # All the matching and formatting is done.  Now we can
+    # replace non-ASCII characters with character entities.
+    if (!$self->{eight_bit_clean})
+    {
+        my @chars = split(//, $para);
+        foreach $_ (@chars)
+        {
+            $_ = $char_entities{$_} if defined($char_entities{$_});
+        }
+        $para = join('', @chars);
+    }
+
+    $self->{__prev_para_action} = $para_action;
+
+    return $para;
+}    # process_para
+
+=head2 txt2html
+
+    $conv->txt2html(%args);
+
+Convert a text file to HTML.  Takes a hash of arguments, or a reference
+to an array of arguments to customize the conversion; (this includes
+saying what file to convert!) See L</OPTIONS> for the possible values of
+the arguments.  Arguments which have already been set with B<new> or
+B<args> will remain as they are, unless they are overridden.
+
+=cut
+
+sub txt2html ($;$)
+{
+    my $self = shift;
+
+    if (@_)
+    {
+        $self->args(@_);
+    }
+
+    $self->do_init_call();
+
+    my $outhandle;
+    my $outhandle_needs_closing;
+
+    # set up the output
+    if ($self->{outhandle})
+    {
+        $outhandle               = $self->{outhandle};
+        $outhandle_needs_closing = 1;
+    }
+    elsif ($self->{outfile} eq "-")
+    {
+        $outhandle               = *STDOUT;
+        $outhandle_needs_closing = 0;
+    }
+    else
+    {
+        open($outhandle, "> " . $self->{outfile})
+          || die "Error: unable to open ", $self->{outfile}, ": $!\n";
+        $outhandle_needs_closing = 1;
+    }
+
+    # slurp up a paragraph at a time, a file at a time
+    local $/ = "";
+    my $para        = '';
+    my $count       = 0;
+    my $print_count = 0;
+    my @sources     = ();
+    my $source_type;
+    if ($self->{infile} and @{$self->{infile}})
+    {
+        @sources     = @{$self->{infile}};
+        $source_type = 'file';
+    }
+    elsif ($self->{inhandle} and @{$self->{inhandle}})
+    {
+        @sources     = @{$self->{inhandle}};
+        $source_type = 'filehandle';
+    }
+    elsif ($self->{instring} and @{$self->{instring}})
+    {
+        @sources     = @{$self->{instring}};
+        $source_type = 'string';
+    }
+    my $inhandle;
+    my $inhandle_needs_closing = 0;
+    foreach my $source (@sources)
+    {
+        $inhandle = undef;
+        if ($source_type eq 'file')
+        {
+            if (!$source or $source eq '-')
+            {
+                $inhandle               = *STDIN;
+                $inhandle_needs_closing = 0;
+            }
+            else
+            {
+                if (-f $source && open($inhandle, $source))
+                {
+                    $inhandle_needs_closing = 1;
+                }
+                else    # error
+                {
+                    warn "Could not open $source\n";
+                    next;
+                }
+            }
+        }
+        elsif ($source_type eq 'filehandle')
+        {
+            $inhandle               = $source;
+            $inhandle_needs_closing = 1;
+        }
+        if ($source_type eq 'string')
+        {
+            # process the string
+            $para = $_;
+            $para =~ s/\n$//;    # trim the endline
+            if ($count == 0)
+            {
+                $self->do_file_start($outhandle, $para);
+            }
+            $self->{__done_with_sect_link} = [];
+            $para = $self->process_chunk($para, close_tags => 0);
+            print $outhandle $para, "\n";
+            $print_count++;
+            $count++;
+        }
+        else                     # file or filehandle
+        {
+            while (<$inhandle>)
+            {
+                $para = $_;
+                $para =~ s/\n$//;    # trim the endline
+                if ($count == 0)
+                {
+                    $self->do_file_start($outhandle, $para);
+                }
+                $self->{__done_with_sect_link} = [];
+                $para = $self->process_chunk($para, close_tags => 0);
+                print $outhandle $para, "\n";
+                $print_count++;
+                $count++;
+            }
+            if ($inhandle_needs_closing)
+            {
+                close($inhandle);
+            }
+        }
+    }    # for each file
+
+    $self->{__prev} = "";
+    if ($self->{__mode} & $LIST)    # End all lists
+    {
+        $self->endlist(
+            num_lists       => $self->{__listnum},
+            prev_ref        => \$self->{__prev},
+            line_action_ref => \$self->{__line_action}
+        );
+    }
+    print $outhandle $self->{__prev};
+
+    # end open preformats
+    if ($self->{__mode} & $PRE)
+    {
+        my $tag = $self->close_tag('PRE');
+        print $outhandle $tag;
+    }
+
+    # close all open tags
+    if (   $self->{xhtml}
+        && !$self->{extract}
+        && @{$self->{__tags}})
+    {
+        if ($self->{dict_debug} & 8)
+        {
+            print STDERR "closing all tags at end\n";
+        }
+        # close any open tags (until we get to the body)
+        my $open_tag = @{$self->{__tags}}[$#{$self->{__tags}}];
+        while (@{$self->{__tags}}
+            && $open_tag ne 'BODY'
+            && $open_tag ne 'HTML')
+        {
+            print $outhandle $self->close_tag('');
+            $open_tag = @{$self->{__tags}}[$#{$self->{__tags}}];
+        }
+        print $outhandle "\n";
+    }
+
+    if ($self->{append_file})
+    {
+        if (-r $self->{append_file})
+        {
+            open(APPEND, $self->{append_file});
+            while (<APPEND>)
+            {
+                print $outhandle $_;
+                $print_count++;
+            }
+            close(APPEND);
+        }
+        else
+        {
+            print STDERR "Can't find or read file ", $self->{append_file},
+              " to append.\n";
+        }
+    }
+
+    # print the closing tags (if we have printed stuff at all)
+    if ($print_count && !$self->{extract})
+    {
+        print $outhandle $self->close_tag('BODY'), "\n";
+        print $outhandle $self->close_tag('HTML'), "\n";
+    }
+    if ($outhandle_needs_closing)
+    {
+        close($outhandle);
+    }
+    return 1;
+}
+
+#---------------------------------------------------------------#
+# Init-related subroutines
+
+#--------------------------------#
+# Name: init_our_data
+# Args:
+#   $self
+sub init_our_data ($)
+{
+    my $self = shift;
+
+    $self->{debug} = 0;
+
+    #
+    # All the options, in alphabetical order
+    #
+    $self->{append_file}           = '';
+    $self->{append_head}           = '';
+    $self->{body_deco}             = '';
+    $self->{bullets}               = '-=o*\267';
+    $self->{bullets_ordered}       = '';
+    $self->{bold_delimiter}        = '#';
+    $self->{caps_tag}              = 'STRONG';
+    $self->{custom_heading_regexp} = [];
+    $self->{default_link_dict}     =
+      ($ENV{HOME} ? "$ENV{HOME}/.txt2html.dict" : '.txt2html.dict');
+    $self->{dict_debug}                 = 0;
+    $self->{doctype}                    = "-//W3C//DTD HTML 3.2 Final//EN";
+    $self->{demoronize}                 = 1;
+    $self->{eight_bit_clean}            = 0;
+    $self->{escape_HTML_chars}          = 1;
+    $self->{explicit_headings}          = 0;
+    $self->{extract}                    = 0;
+    $self->{hrule_min}                  = 4;
+    $self->{indent_width}               = 2;
+    $self->{indent_par_break}           = 0;
+    $self->{infile}                     = [];
+    $self->{inhandle}                   = [];
+    $self->{instring}                   = [];
+    $self->{italic_delimiter}           = '*';
+    $self->{links_dictionaries}         = [];
+    $self->{link_only}                  = 0;
+    $self->{lower_case_tags}            = 0;
+    $self->{mailmode}                   = 0;
+    $self->{make_anchors}               = 1;
+    $self->{make_links}                 = 1;
+    $self->{make_tables}                = 0;
+    $self->{min_caps_length}            = 3;
+    $self->{outfile}                    = '-';
+    $self->{par_indent}                 = 2;
+    $self->{preformat_trigger_lines}    = 2;
+    $self->{endpreformat_trigger_lines} = 2;
+    $self->{preformat_start_marker}     = "^(:?(:?<)|<)PRE(:?(:?>)|>)\$";
+    $self->{preformat_end_marker}       = "^(:?(:?<)|<)/PRE(:?(:?>)|>)\$";
+    $self->{preformat_whitespace_min}   = 5;
+    $self->{prepend_file}               = '';
+    $self->{preserve_indent}            = 0;
+    $self->{short_line_length}          = 40;
+    $self->{style_url}                  = '';
+    $self->{tab_width}                  = 8;
+    $self->{table_type}                 = {
+        ALIGN  => 1,
+        PGSQL  => 1,
+        BORDER => 1,
+        DELIM  => 1,
+    };
+    $self->{title}                      = '';
+    $self->{titlefirst}                 = 0;
+    $self->{underline_length_tolerance} = 1;
+    $self->{underline_offset_tolerance} = 1;
+    $self->{unhyphenation}              = 1;
+    $self->{use_mosaic_header}          = 0;
+    $self->{use_preformat_marker}       = 0;
+    $self->{xhtml}                      = 0;
+
+    # accumulation variables
+    $self->{__file}               = "";    # Current file being processed
+    $self->{__heading_styles}     = {};
+    $self->{__num_heading_styles} = 0;
+    $self->{__links_table}        = {};
+    $self->{__links_table_order}  = [];
+    $self->{__search_patterns}    = [];
+    $self->{__repl_code}          = [];
+    $self->{__prev_para_action}   = 0;
+    $self->{__non_header_anchor}  = 0;
+    $self->{__mode}               = 0;
+    $self->{__listnum}            = 0;
+    $self->{__list_nice_indent}   = "";
+    $self->{__list_indent}        = [];
+
+    $self->{__call_init_done} = 0;
+
+    #
+    # The global links data
+    #
+    # This is stored in the DATA handle, after the __DATA__ at
+    # the end of this file; but because the test scripts (and possibly
+    # other scripts) don't just create one instance of this object,
+    # we have to remember the position of the DATA handle
+    # and reset it after we've read from it, just in case
+    # we have to read from it again.
+    # This also means that we don't close it, either.  Hope that doesn't
+    # cause a problem...
+    #
+    my $curpos = tell(DATA);    # remember the __DATA__ position
+    my @lines  = ();
+    while (<DATA>)
+    {
+        # skip lines that start with '#'
+        next if /^\#/;
+        # skip lines that end with unescaped ':'
+        next if /^.*[^\\]:\s*$/;
+        push @lines, $_;
+    }
+    # reset the data handle to the start, just in case
+    seek(DATA, $curpos, 0);
+    $self->{__global_links_data} = join('', @lines);
+}    # init_our_data
+
+#---------------------------------------------------------------#
+# txt2html-related subroutines
+
+#--------------------------------#
+# Name: deal_with_options
+#   do extra processing related to particular options
+# Args:
+#   $self
+sub deal_with_options ($)
+{
+    my $self = shift;
+
+    if ($self->{links_dictionaries})
+    {
+        # only put into the links dictionaries files which are readable
+        my @dict_files = @{$self->{links_dictionaries}};
+        $self->args(links_dictionaries => []);
+
+        foreach my $ld (@dict_files)
+        {
+            if (-r $ld)
+            {
+                $self->{'make_links'} = 1;
+                $self->args(['--links_dictionaries', $ld]);
+            }
+            else
+            {
+                print STDERR "Can't find or read link-file $ld\n";
+            }
+        }
+    }
+    if (!$self->{make_links})
+    {
+        $self->{'links_dictionaries'} = 0;
+    }
+    if ($self->{append_file})
+    {
+        if (!-r $self->{append_file})
+        {
+            print STDERR "Can't find or read ", $self->{append_file}, "\n";
+            $self->{append_file} = '';
+        }
+    }
+    if ($self->{prepend_file})
+    {
+        if (!-r $self->{prepend_file})
+        {
+            print STDERR "Can't find or read ", $self->{prepend_file}, "\n";
+            $self->{'prepend_file'} = '';
+        }
+    }
+    if ($self->{append_head})
+    {
+        if (!-r $self->{append_head})
+        {
+            print STDERR "Can't find or read ", $self->{append_head}, "\n";
+            $self->{'append_head'} = '';
+        }
+    }
+
+    if (!$self->{outfile})
+    {
+        $self->{'outfile'} = "-";
+    }
+
+    $self->{'preformat_trigger_lines'} = 0
+      if ($self->{preformat_trigger_lines} < 0);
+    $self->{'preformat_trigger_lines'} = 2
+      if ($self->{preformat_trigger_lines} > 2);
+
+    $self->{'endpreformat_trigger_lines'} = 1
+      if ($self->{preformat_trigger_lines} == 0);
+    $self->{'endpreformat_trigger_lines'} = 0
+      if ($self->{endpreformat_trigger_lines} < 0);
+    $self->{'endpreformat_trigger_lines'} = 2
+      if ($self->{endpreformat_trigger_lines} > 2);
+
+    $self->{__preformat_enabled} =
+      (($self->{endpreformat_trigger_lines} != 0)
+          || $self->{use_preformat_marker});
+
+    if ($self->{use_mosaic_header})
+    {
+        my $num_heading_styles = 0;
+        my %heading_styles     = ();
+        $heading_styles{"*"}          = ++$num_heading_styles;
+        $heading_styles{"="}          = ++$num_heading_styles;
+        $heading_styles{"+"}          = ++$num_heading_styles;
+        $heading_styles{"-"}          = ++$num_heading_styles;
+        $heading_styles{"~"}          = ++$num_heading_styles;
+        $heading_styles{"."}          = ++$num_heading_styles;
+        $self->{__heading_styles}     = \%heading_styles;
+        $self->{__num_heading_styles} = $num_heading_styles;
+    }
+    # XHTML implies lower case
+    $self->{'lower_case_tags'} = 1 if ($self->{xhtml});
+}
+
+sub escape ($)
+{
+    my ($text) = @_;
+    $text =~ s/&/&/g;
+    $text =~ s/>/>/g;
+    $text =~ s/</</g;
+    return $text;
+}
+
+#     Added by Alan Jackson, alan at ajackson dot org, and based
+#     on the demoronize script by John Walker, http://www.fourmilab.ch/
+# Convert Microsoft character entities into characters.
+sub demoronize_char($)
+{
+    my $s = shift;
+    #   Map strategically incompatible non-ISO characters in the
+    #   range 0x82 -- 0x9F into plausible substitutes where
+    #   possible.
+
+    $s =~ s/\x82/,/g;
+    $s =~ s/\x84/,,/g;
+    $s =~ s/\x85/.../g;
+
+    $s =~ s/\x88/^/g;
+
+    $s =~ s/\x8B/</g;
+    $s =~ s/\x8C/Oe/g;
+
+    $s =~ s/\x91/`/g;
+    $s =~ s/\x92/'/g;
+    $s =~ s/\x93/"/g;
+    $s =~ s/\x94/"/g;
+    $s =~ s/\x95/*/g;
+    $s =~ s/\x96/-/g;
+    $s =~ s/\x97/--/g;
+
+    $s =~ s/\x9B/>/g;
+    $s =~ s/\x9C/oe/g;
+
+    return $s;
+}
+
+# convert Microsoft character entities into HTML code
+sub demoronize_code($)
+{
+    my $s = shift;
+    #   Map strategically incompatible non-ISO characters in the
+    #   range 0x82 -- 0x9F into plausible substitutes where
+    #   possible.
+
+    $s =~ s-\x83-<em>f</em>-g;
+
+    $s =~ s-\x98-<sup>~</sup>-g;
+    $s =~ s-\x99-<sup>TM</sup>-g;
+
+    return $s;
+}
+
+# output the tag wanted (add the <> and the / if necessary)
+# - output in lower or upper case
+# - do tag-related processing
+# options:
+#   tag_type=>'start' | tag_type=>'end' | tag_type=>'empty'
+#   (default start)
+#   inside_tag=>string (default empty)
+sub get_tag ($$;%)
+{
+    my $self   = shift;
+    my $in_tag = shift;
+    my %args   = (
+        tag_type   => 'start',
+        inside_tag => '',
+        @_
+    );
+    my $inside_tag = $args{inside_tag};
+
+    my $open_tag = @{$self->{__tags}}[$#{$self->{__tags}}];
+    if (!defined $open_tag)
+    {
+        $open_tag = '';
+    }
+    # close any open tags that need closing
+    # Note that we only have to check for the structural tags we make,
+    # not every possible HTML tag
+    my $tag_prefix = '';
+    if ($self->{xhtml})
+    {
+        if (    $open_tag eq 'P'
+            and $in_tag eq 'P'
+            and $args{tag_type} ne 'end')
+        {
+            $tag_prefix = $self->close_tag('P');
+        }
+        elsif ( $open_tag eq 'P'
+            and $in_tag =~ /^(HR|UL|OL|DL|PRE|TABLE|H)/)
+        {
+            $tag_prefix = $self->close_tag('P');
+        }
+        elsif ( $open_tag eq 'LI'
+            and $in_tag eq 'LI'
+            and $args{tag_type} ne 'end')
+        {
+            # close a LI before the next LI
+            $tag_prefix = $self->close_tag('LI');
+        }
+        elsif ( $open_tag eq 'LI'
+            and $in_tag =~ /^(UL|OL)$/
+            and $args{tag_type} eq 'end')
+        {
+            # close the LI before the list closes
+            $tag_prefix = $self->close_tag('LI');
+        }
+        elsif ( $open_tag eq 'DT'
+            and $in_tag eq 'DD'
+            and $args{tag_type} ne 'end')
+        {
+            # close a DT before the next DD
+            $tag_prefix = $self->close_tag('DT');
+        }
+        elsif ( $open_tag eq 'DD'
+            and $in_tag eq 'DT'
+            and $args{tag_type} ne 'end')
+        {
+            # close a DD before the next DT
+            $tag_prefix = $self->close_tag('DD');
+        }
+        elsif ( $open_tag eq 'DD'
+            and $in_tag         eq 'DL'
+            and $args{tag_type} eq 'end')
+        {
+            # close the DD before the list closes
+            $tag_prefix = $self->close_tag('DD');
+        }
+    }
+
+    my $out_tag = $in_tag;
+    if ($args{tag_type} eq 'end')
+    {
+        $out_tag = $self->close_tag($in_tag);
+    }
+    else
+    {
+        if ($self->{lower_case_tags})
+        {
+            $out_tag =~ tr/A-Z/a-z/;
+        }
+        else    # upper case
+        {
+            $out_tag =~ tr/a-z/A-Z/;
+        }
+        if ($args{tag_type} eq 'empty')
+        {
+            if ($self->{xhtml})
+            {
+                $out_tag = "<${out_tag}${inside_tag}/>";
+            }
+            else
+            {
+                $out_tag = "<${out_tag}${inside_tag}>";
+            }
+        }
+        else
+        {
+            push @{$self->{__tags}}, $in_tag;
+            $out_tag = "<${out_tag}${inside_tag}>";
+        }
+    }
+    $out_tag = $tag_prefix . $out_tag if $tag_prefix;
+    if ($self->{dict_debug} & 8)
+    {
+        print STDERR
+          "open_tag = '${open_tag}', in_tag = '${in_tag}', tag_type = ",
+          $args{tag_type},
+          ", inside_tag = '${inside_tag}', out_tag = '$out_tag'\n";
+    }
+
+    return $out_tag;
+}    # get_tag
+
+# close the open tag
+sub close_tag ($$)
+{
+    my $self   = shift;
+    my $in_tag = shift;
+
+    my $open_tag = pop @{$self->{__tags}};
+    $in_tag ||= $open_tag;
+    # put the open tag back on the stack if the in-tag is not the same
+    if (defined $open_tag && $open_tag ne $in_tag)
+    {
+        push @{$self->{__tags}}, $open_tag;
+    }
+    my $out_tag = $in_tag;
+    if ($self->{lower_case_tags})
+    {
+        $out_tag =~ tr/A-Z/a-z/;
+    }
+    else    # upper case
+    {
+        $out_tag =~ tr/a-z/A-Z/;
+    }
+    $out_tag = "<\/${out_tag}>";
+    if ($self->{dict_debug} & 8)
+    {
+        print STDERR
+"close_tag: open_tag = '${open_tag}', in_tag = '${in_tag}', out_tag = '$out_tag'\n";
+    }
+
+    return $out_tag;
+}
+
+sub hrule ($%)
+{
+    my $self = shift;
+    my %args = (
+        para_lines_ref  => undef,
+        para_action_ref => undef,
+        ind             => 0,
+        @_
+    );
+    my $para_lines_ref  = $args{para_lines_ref};
+    my $para_action_ref = $args{para_action_ref};
+    my $ind             = $args{ind};
+
+    my $hrmin = $self->{hrule_min};
+    if ($para_lines_ref->[$ind] =~ /^\s*([-_~=\*]\s*){$hrmin,}$/)
+    {
+        my $tag = $self->get_tag("HR", tag_type => 'empty');
+        $para_lines_ref->[$ind] = "$tag\n";
+        $para_action_ref->[$ind] |= $HRULE;
+    }
+    elsif ($para_lines_ref->[$ind] =~ /\014/)
+    {
+        # Linefeeds become horizontal rules
+        $para_action_ref->[$ind] |= $HRULE;
+        my $tag = $self->get_tag("HR", tag_type => 'empty');
+        $para_lines_ref->[$ind] =~ s/\014/\n${tag}\n/g;
+    }
+}
+
+sub shortline ($%)
+{
+    my $self = shift;
+    my %args = (
+        line_ref        => undef,
+        line_action_ref => undef,
+        prev_ref        => undef,
+        prev_action_ref => undef,
+        prev_line_len   => 0,
+        @_
+    );
+    my $mode_ref        = $args{mode_ref};
+    my $line_ref        = $args{line_ref};
+    my $line_action_ref = $args{line_action_ref};
+    my $prev_ref        = $args{prev_ref};
+    my $prev_action_ref = $args{prev_action_ref};
+    my $prev_line_len   = $args{prev_line_len};
+
+    # Short lines should be broken even on list item lines iff the
+    # following line is more text.  I haven't figured out how to do
+    # that yet.  For now, I'll just not break on short lines in lists.
+    # (sorry)
+
+    my $tag = $self->get_tag('BR', tag_type => 'empty');
+    if (
+           ${$line_ref} !~ /^\s*$/
+        && ${$prev_ref} !~ /^\s*$/
+        && ($prev_line_len < $self->{short_line_length})
+        && !(
+            ${$line_action_ref} &
+            ($END | $HEADER | $HRULE | $LIST | $IND_BREAK | $PAR)
+        )
+        && !(${$prev_action_ref} & ($HEADER | $HRULE | $BREAK | $IND_BREAK))
+      )
+    {
+        ${$prev_ref} .= $tag . chop(${$prev_ref});
+        ${$prev_action_ref} |= $BREAK;
+    }
+}
+
+sub is_mailheader ($%)
+{
+    my $self = shift;
+    my %args = (
+        rows_ref => undef,
+        @_
+    );
+    my $rows_ref = $args{rows_ref};
+
+    # a mail header is assumed to be the whole
+    # paragraph which starts with a From , From: or Newsgroups: line
+
+    if ($rows_ref->[0] =~ /^(From:?)|(Newsgroups:) /)
+    {
+        return 1;
+    }
+    return 0;
+
+}    # is_mailheader
+
+sub mailheader ($%)
+{
+    my $self = shift;
+    my %args = (
+        rows_ref => undef,
+        @_
+    );
+    my $rows_ref = $args{rows_ref};
+
+    # a mail header is assumed to be the whole
+    # paragraph which starts with a From: or Newsgroups: line
+    my $tag  = '';
+    my @rows = @{$rows_ref};
+
+    if ($self->is_mailheader(%args))
+    {
+        $self->{__mode} |= $MAILHEADER;
+        if ($self->{escape_HTML_chars})
+        {
+            $rows[0] = escape($rows[0]);
+        }
+        $self->anchor_mail(\$rows[0]);
+        chomp ${rows}[0];
+        $tag = $self->get_tag('P', inside_tag => " class='mail_header'");
+        my $tag2 = $self->get_tag('BR', tag_type => 'empty');
+        $rows[0] =
+          join('', "<!-- New Message -->\n", $tag, $rows[0], $tag2, "\n");
+        # now put breaks on the rest of the paragraph
+        # apart from the last line
+        for (my $rn = 1; $rn < @rows; $rn++)
+        {
+            if ($self->{escape_HTML_chars})
+            {
+                $rows[$rn] = escape($rows[$rn]);
+            }
+            if ($rn != (@rows - 1))
+            {
+                $tag = $self->get_tag('BR', tag_type => 'empty');
+                chomp $rows[$rn];
+                $rows[$rn] =~ s/$/${tag}\n/;
+            }
+        }
+    }
+    @{$rows_ref} = @rows;
+
+}    # mailheader
+
+sub mailquote ($%)
+{
+    my $self = shift;
+    my %args = (
+        line_ref        => undef,
+        line_action_ref => undef,
+        prev_ref        => undef,
+        prev_action_ref => undef,
+        next_ref        => undef,
+        @_
+    );
+    my $line_ref        = $args{line_ref};
+    my $line_action_ref = $args{line_action_ref};
+    my $prev_ref        = $args{prev_ref};
+    my $prev_action_ref = $args{prev_action_ref};
+    my $next_ref        = $args{next_ref};
+
+    my $tag = '';
+    if (
+        (
+            (${$line_ref} =~ /^\w*>/)    # Handle "FF> Werewolves."
+            || (${$line_ref} =~ /^[\|:]/)
+        )                                  # Handle "[|:] There wolves."
+        && defined($next_ref) && (${$next_ref} !~ /^\s*$/)
+      )
+    {
+        $tag = $self->get_tag('BR', tag_type => 'empty');
+        ${$line_ref} =~ s/$/${tag}/;
+        ${$line_action_ref} |= ($BREAK | $MAILQUOTE);
+        if (!(${$prev_action_ref} & ($BREAK | $MAILQUOTE)))
+        {
+            $tag = $self->get_tag('P', inside_tag => " class='quote_mail'");
+            ${$prev_ref} .= $tag;
+            ${$line_action_ref} |= $PAR;
+        }
+    }
+}
+
+# Subtracts modes listed in $mask from $vector.
+sub subtract_modes ($$)
+{
+    my ($vector, $mask) = @_;
+    return ($vector | $mask) - $mask;
+}
+
+sub paragraph ($%)
+{
+    my $self = shift;
+    my %args = (
+        line_ref        => undef,
+        line_action_ref => undef,
+        prev_ref        => undef,
+        prev_action_ref => undef,
+        line_indent     => 0,
+        prev_indent     => 0,
+        is_fragment     => 0,
+        ind             => 0,
+        @_
+    );
+    my $line_ref        = $args{line_ref};
+    my $line_action_ref = $args{line_action_ref};
+    my $prev_ref        = $args{prev_ref};
+    my $prev_action_ref = $args{prev_action_ref};
+    my $line_indent     = $args{line_indent};
+    my $prev_indent     = $args{prev_indent};
+    my $is_fragment     = $args{is_fragment};
+    my $line_no         = $args{ind};
+
+    my $tag = '';
+    if (
+        ${$line_ref} !~ /^\s*$/
+        && !subtract_modes(
+            ${$line_action_ref}, $END | $MAILQUOTE | $CAPS | $BREAK
+        )
+        && (   ${$prev_ref} =~ /^\s*$/
+            || (${$line_action_ref} & $END)
+            || ($line_indent > $prev_indent + $self->{par_indent}))
+        && !($is_fragment && $line_no == 0)
+      )
+    {
+
+        if (   $self->{indent_par_break}
+            && ${$prev_ref} !~ /^\s*$/
+            && !(${$line_action_ref} & $END)
+            && ($line_indent > $prev_indent + $self->{par_indent}))
+        {
+            $tag = $self->get_tag('BR', tag_type => 'empty');
+            ${$prev_ref} .= $tag;
+            ${$prev_ref} .= " " x $line_indent;
+            ${$line_ref} =~ s/^ {$line_indent}//;
+            ${$prev_action_ref} |= $BREAK;
+            ${$line_action_ref} |= $IND_BREAK;
+        }
+        elsif ($self->{preserve_indent})
+        {
+            $tag = $self->get_tag('P');
+            ${$prev_ref} .= $tag;
+            ${$prev_ref} .= " " x $line_indent;
+            ${$line_ref} =~ s/^ {$line_indent}//;
+            ${$line_action_ref} |= $PAR;
+        }
+        else
+        {
+            $tag = $self->get_tag('P');
+            ${$prev_ref} .= $tag;
+            ${$line_action_ref} |= $PAR;
+        }
+    }
+    # detect also a continuing indentation at the same level
+    elsif ($self->{indent_par_break}
+        && !($self->{__mode} & ($PRE | $TABLE | $LIST))
+        && ${$prev_ref} !~ /^\s*$/
+        && !(${$line_action_ref} & $END)
+        && (${$prev_action_ref} & ($IND_BREAK | $PAR))
+        && !subtract_modes(${$line_action_ref}, $END | $MAILQUOTE | $CAPS)
+        && ($line_indent > $self->{par_indent})
+        && ($line_indent == $prev_indent))
+    {
+        $tag = $self->get_tag('BR', tag_type => 'empty');
+        ${$prev_ref} .= $tag;
+        ${$prev_ref} .= " " x $line_indent;
+        ${$line_ref} =~ s/^ {$line_indent}//;
+        ${$prev_action_ref} |= $BREAK;
+        ${$line_action_ref} |= $IND_BREAK;
+    }
+}
+
+sub listprefix ($$)
+{
+    my $self = shift;
+    my $line = shift;
+
+    my ($prefix, $number, $rawprefix, $term);
+
+    my $bullets         = $self->{bullets};
+    my $bullets_ordered = $self->{bullets_ordered};
+    my $number_match    = '(\d+|[^\W\d])';
+    if ($bullets_ordered)
+    {
+        $number_match = '(\d+|[a-zA-Z]|[' . "${bullets_ordered}])";
+    }
+    $self->{__number_match} = $number_match;
+    my $term_match = '(\w\w+)';
+    $self->{__term_match} = $term_match;
+    return (0, 0, 0, 0)
+      if ( !($line =~ /^\s*[${bullets}]\s+\S/)
+        && !($line =~ /^\s*${number_match}[\.\)\]:]\s+\S/)
+        && !($line =~ /^\s*${term_match}:$/));
+
+    ($term)   = $line =~ /^\s*${term_match}:$/;
+    ($number) = $line =~ /^\s*${number_match}\S\s+\S/;
+    $number = 0 unless defined($number);
+    if (   $bullets_ordered
+        && $number =~ /[${bullets_ordered}]/)
+    {
+        $number = 1;
+    }
+
+    # That slippery exception of "o" as a bullet
+    # (This ought to be determined using the context of what lists
+    #  we have in progress, but this will probably work well enough.)
+    if ($bullets =~ /o/ && $line =~ /^\s*o\s/)
+    {
+        $number = 0;
+    }
+
+    if ($term)
+    {
+        ($rawprefix) = $line =~ /^(\s*${term_match}.)$/;
+        $prefix = $rawprefix;
+        $prefix =~ s/${term_match}//;    # Take the term out
+    }
+    elsif ($number)
+    {
+        ($rawprefix) = $line =~ /^(\s*${number_match}.)/;
+        $prefix = $rawprefix;
+        $prefix =~ s/${number_match}//;    # Take the number out
+    }
+    else
+    {
+        ($rawprefix) = $line =~ /^(\s*[${bullets}].)/;
+        $prefix = $rawprefix;
+    }
+    ($prefix, $number, $rawprefix, $term);
+}    # listprefix
+
+sub startlist ($%)
+{
+    my $self = shift;
+    my %args = (
+        prefix          => '',
+        number          => 0,
+        rawprefix       => '',
+        term            => '',
+        para_lines_ref  => undef,
+        para_action_ref => undef,
+        ind             => 0,
+        prev_ref        => undef,
+        total_prefix    => '',
+        @_
+    );
+    my $prefix          = $args{prefix};
+    my $number          = $args{number};
+    my $rawprefix       = $args{rawprefix};
+    my $term            = $args{term};
+    my $para_lines_ref  = $args{para_lines_ref};
+    my $para_action_ref = $args{para_action_ref};
+    my $ind             = $args{ind};
+    my $prev_ref        = $args{prev_ref};
+
+    my $tag = '';
+    $self->{__listprefix}->[$self->{__listnum}] = $prefix;
+    if ($number)
+    {
+
+        # It doesn't start with 1,a,A.  Let's not screw with it.
+        if (($number ne "1") && ($number ne "a") && ($number ne "A"))
+        {
+            return 0;
+        }
+        $tag = $self->get_tag('OL');
+        ${$prev_ref} .= join('', $self->{__list_nice_indent}, $tag, "\n");
+        $self->{__list}->[$self->{__listnum}] = $OL;
+    }
+    elsif ($term)
+    {
+        $tag = $self->get_tag('DL');
+        ${$prev_ref} .= join('', $self->{__list_nice_indent}, $tag, "\n");
+        $self->{__list}->[$self->{__listnum}] = $DL;
+    }
+    else
+    {
+        $tag = $self->get_tag('UL');
+        ${$prev_ref} .= join('', $self->{__list_nice_indent}, $tag, "\n");
+        $self->{__list}->[$self->{__listnum}] = $UL;
+    }
+
+    $self->{__list_indent}->[$self->{__listnum}] = length($args{total_prefix});
+    $self->{__listnum}++;
+    $self->{__list_nice_indent} =
+      " " x $self->{__listnum} x $self->{indent_width};
+    $para_action_ref->[$ind] |= $LIST;
+    $para_action_ref->[$ind] |= $LIST_START;
+    $self->{__mode}          |= $LIST;
+    1;
+}    # startlist
+
+# End N lists
+sub endlist ($%)
+{
+    my $self = shift;
+    my %args = (
+        num_lists       => 0,
+        prev_ref        => undef,
+        line_action_ref => undef,
+        @_
+    );
+    my $n               = $args{num_lists};
+    my $prev_ref        = $args{prev_ref};
+    my $line_action_ref = $args{line_action_ref};
+
+    my $tag = '';
+    for (; $n > 0; $n--, $self->{__listnum}--)
+    {
+        $self->{__list_nice_indent} =
+          " " x ($self->{__listnum} - 1) x $self->{indent_width};
+        if ($self->{__list}->[$self->{__listnum} - 1] == $UL)
+        {
+            $tag = $self->get_tag('UL', tag_type => 'end');
+            ${$prev_ref} .= join('', $self->{__list_nice_indent}, $tag, "\n");
+            pop @{$self->{__list_indent}};
+        }
+        elsif ($self->{__list}->[$self->{__listnum} - 1] == $OL)
+        {
+            $tag = $self->get_tag('OL', tag_type => 'end');
+            ${$prev_ref} .= join('', $self->{__list_nice_indent}, $tag, "\n");
+            pop @{$self->{__list_indent}};
+        }
+        elsif ($self->{__list}->[$self->{__listnum} - 1] == $DL)
+        {
+            $tag = $self->get_tag('DL', tag_type => 'end');
+            ${$prev_ref} .= join('', $self->{__list_nice_indent}, $tag, "\n");
+            pop @{$self->{__list_indent}};
+        }
+        else
+        {
+            print STDERR "Encountered list of unknown type\n";
+        }
+    }
+    ${$line_action_ref} |= $END;
+    $self->{__mode} ^= $LIST if (!$self->{__listnum});
+}    # endlist
+
+sub continuelist ($%)
+{
+    my $self = shift;
+    my %args = (
+        para_lines_ref  => undef,
+        para_action_ref => undef,
+        ind             => 0,
+        term            => '',
+        @_
+    );
+    my $para_lines_ref  = $args{para_lines_ref};
+    my $para_action_ref = $args{para_action_ref};
+    my $ind             = $args{ind};
+    my $term            = $args{term};
+
+    my $list_indent = $self->{__list_nice_indent};
+    my $bullets     = $self->{bullets};
+    my $num_match   = $self->{__number_match};
+    my $term_match  = $self->{__term_match};
+    my $tag         = '';
+    if (   $self->{__list}->[$self->{__listnum} - 1] == $UL
+        && $para_lines_ref->[$ind] =~ /^\s*[${bullets}]\s*/)
+    {
+        $tag = $self->get_tag('LI');
+        $para_lines_ref->[$ind] =~ s/^\s*[${bullets}]\s*/${list_indent}${tag}/;
+        $para_action_ref->[$ind] |= $LIST_ITEM;
+    }
+    if ($self->{__list}->[$self->{__listnum} - 1] == $OL)
+    {
+        $tag = $self->get_tag('LI');
+        $para_lines_ref->[$ind] =~ s/^\s*${num_match}.\s*/${list_indent}${tag}/;
+        $para_action_ref->[$ind] |= $LIST_ITEM;
+    }
+    if (   $self->{__list}->[$self->{__listnum} - 1] == $DL
+        && $term)
+    {
+        $tag = $self->get_tag('DT');
+        my $tag2 = $self->get_tag('DT', tag_type => 'end');
+        $term =~ s/_/ /g;    # underscores are now spaces in the term
+        $para_lines_ref->[$ind] =~
+          s/^\s*${term_match}.$/${list_indent}${tag}${term}${tag2}/;
+        $tag = $self->get_tag('DD');
+        $para_lines_ref->[$ind] .= ${tag};
+        $para_action_ref->[$ind] |= $LIST_ITEM;
+    }
+    $para_action_ref->[$ind] |= $LIST;
+}    # continuelist
+
+sub liststuff ($%)
+{
+    my $self = shift;
+    my %args = (
+        para_lines_ref       => undef,
+        para_action_ref      => undef,
+        para_line_indent_ref => undef,
+        ind                  => 0,
+        prev_ref             => undef,
+        @_
+    );
+    my $para_lines_ref       = $args{para_lines_ref};
+    my $para_action_ref      = $args{para_action_ref};
+    my $para_line_indent_ref = $args{para_line_indent_ref};
+    my $ind                  = $args{ind};
+    my $prev_ref             = $args{prev_ref};
+
+    my $i;
+
+    my ($prefix, $number, $rawprefix, $term) =
+      $self->listprefix($para_lines_ref->[$ind]);
+
+    if (!$prefix)
+    {
+        # if the previous line is not blank
+        if ($ind > 0 && $para_lines_ref->[$ind - 1] !~ /^\s*$/)
+        {
+            # inside a list item
+            return;
+        }
+        # This might be a new paragraph within an existing list item;
+        # It will be the first line, and have the same indentation
+        # as the list's indentation.
+        if (   $ind == 0
+            && $self->{__listnum}
+            && $para_line_indent_ref->[$ind] ==
+            $self->{__list_indent}->[$self->{__listnum} - 1])
+        {
+            # start a paragraph
+            my $tag = $self->get_tag('P');
+            ${$prev_ref} .= $tag;
+            $para_action_ref->[$ind] |= $PAR;
+            return;
+        }
+        # This ain't no list.  We'll want to end all of them.
+        if ($self->{__listnum})
+        {
+            $self->endlist(
+                num_lists       => $self->{__listnum},
+                prev_ref        => $prev_ref,
+                line_action_ref => \$para_action_ref->[$ind]
+            );
+        }
+        return;
+    }
+
+    # If numbers with more than one digit grow to the left instead of
+    # to the right, the prefix will shrink and we'll fail to match the
+    # right list.  We need to account for this.
+    my $prefix_alternate;
+    if (length("" . $number) > 1)
+    {
+        $prefix_alternate = (" " x (length("" . $number) - 1)) . $prefix;
+    }
+
+    # Maybe we're going back up to a previous list
+    for (
+        $i = $self->{__listnum} - 1;
+        ($i >= 0) && ($prefix ne $self->{__listprefix}->[$i]);
+        $i--
+      )
+    {
+        if (length("" . $number) > 1)
+        {
+            last if $prefix_alternate eq $self->{__listprefix}->[$i];
+        }
+    }
+
+    my $islist;
+
+    # Measure the indent from where the text starts, not where the
+    # prefix starts.  This won't screw anything up, and if we don't do
+    # it, the next line might appear to be indented relative to this
+    # line, and get tagged as a new paragraph.
+    my $bullets         = $self->{bullets};
+    my $bullets_ordered = $self->{bullets_ordered};
+    my $term_match      = $self->{__term_match};
+    my ($total_prefix)  =
+      $para_lines_ref->[$ind] =~ /^(\s*[${bullets}${bullets_ordered}\w]+.\s*)/;
+    # a DL indent starts from the edge of the term, plus indent_width
+    if ($term)
+    {
+        ($total_prefix) = $para_lines_ref->[$ind] =~ /^(\s*)${term_match}.$/;
+        $total_prefix .= " " x $self->{indent_width};
+    }
+
+    # Of course, we only use it if it really turns out to be a list.
+
+    $islist = 1;
+    $i++;
+    if (($i > 0) && ($i != $self->{__listnum}))
+    {
+        $self->endlist(
+            num_lists       => $self->{__listnum} - $i,
+            prev_ref        => $prev_ref,
+            line_action_ref => \$para_action_ref->[$ind]
+        );
+        $islist = 0;
+    }
+    elsif (!$self->{__listnum} || ($i != $self->{__listnum}))
+    {
+        if (
+               ($para_line_indent_ref->[$ind] > 0)
+            || $ind == 0
+            || ($ind > 0 && ($para_lines_ref->[$ind - 1] =~ /^\s*$/))
+            || (   $ind > 0
+                && $para_action_ref->[$ind - 1] & ($BREAK | $HEADER | $CAPS))
+          )
+        {
+            $islist = $self->startlist(
+                prefix          => $prefix,
+                number          => $number,
+                rawprefix       => $rawprefix,
+                term            => $term,
+                para_lines_ref  => $para_lines_ref,
+                para_action_ref => $para_action_ref,
+                ind             => $ind,
+                prev_ref        => $prev_ref,
+                total_prefix    => $total_prefix
+            );
+        }
+        else
+        {
+
+            # We have something like this: "- foo" which usually
+            # turns out not to be a list.
+            return;
+        }
+    }
+
+    $self->continuelist(
+        para_lines_ref  => $para_lines_ref,
+        para_action_ref => $para_action_ref,
+        ind             => $ind,
+        term            => $term
+      )
+      if ($self->{__mode} & $LIST);
+    $para_line_indent_ref->[$ind] = length($total_prefix) if $islist;
+}    # liststuff
+
+# figure out the table type of this table, if any
+sub get_table_type ($%)
+{
+    my $self = shift;
+    my %args = (
+        rows_ref => undef,
+        para_len => 0,
+        @_
+    );
+    my $table_type = 0;
+    if (   $self->{table_type}->{DELIM}
+        && $self->is_delim_table(%args))
+    {
+        $table_type = $TAB_DELIM;
+    }
+    elsif ($self->{table_type}->{ALIGN}
+        && $self->is_aligned_table(%args))
+    {
+        $table_type = $TAB_ALIGN;
+    }
+    elsif ($self->{table_type}->{PGSQL}
+        && $self->is_pgsql_table(%args))
+    {
+        $table_type = $TAB_PGSQL;
+    }
+    elsif ($self->{table_type}->{BORDER}
+        && $self->is_border_table(%args))
+    {
+        $table_type = $TAB_BORDER;
+    }
+
+    return $table_type;
+}
+
+# check if the given paragraph-array is an aligned table
+sub is_aligned_table ($%)
+{
+    my $self = shift;
+    my %args = (
+        rows_ref => undef,
+        para_len => 0,
+        @_
+    );
+    my $rows_ref = $args{rows_ref};
+    my $para_len = $args{para_len};
+
+    # TABLES: spot and mark up tables.  We combine the lines of the
+    # paragraph using the string bitwise or (|) operator, the result
+    # being in $spaces.  A character in $spaces is a space only if
+    # there was a space at that position in every line of the
+    # paragraph.  $space can be used to search for contiguous spaces
+    # that occur on all lines of the paragraph.  If this results in at
+    # least two columns, the paragraph is identified as a table.
+
+    # Note that this sub must be called before checking for preformatted
+    # lines because a table may well have whitespace to the left, in
+    # which case it must not be incorrectly recognised as a preformat.
+    my @rows = @{$rows_ref};
+    my @starts;
+    my $spaces = '';
+    my $max    = 0;
+    my $min    = $para_len;
+    foreach my $row (@rows)
+    {
+        ($spaces |= $row) =~ tr/ /\xff/c;
+        $min = length $row if length $row < $min;
+        $max = length $row if $max < length $row;
+    }
+    $spaces = substr $spaces, 0, $min;
+    push(@starts, 0) unless $spaces =~ /^ /;
+    while ($spaces =~ /((?:^| ) +)(?=[^ ])/g)
+    {
+        push @starts, pos($spaces);
+    }
+
+    if (2 <= @rows and 2 <= @starts)
+    {
+        return 1;
+    }
+    else
+    {
+        return 0;
+    }
+}
+
+sub is_pgsql_table ($%)
+{
+    my $self = shift;
+    my %args = (
+        rows_ref => undef,
+        para_len => 0,
+        @_
+    );
+    my $rows_ref = $args{rows_ref};
+    my $para_len = $args{para_len};
+
+    # a PGSQL table can start with an optional table-caption,
+    # then it has a row of column headings separated by |
+    # then it has a row of ------+-----
+    # then it has one or more rows of column values separated by |
+    # then it has a row-count (N rows)
+    # Thus it must have at least 4 rows.
+    if (@{$rows_ref} < 4)
+    {
+        return 0;
+    }
+
+    my @rows = @{$rows_ref};
+    if ($rows[0] !~ /\|/ && $rows[0] =~ /^\s*\w+/)    # possible caption
+    {
+        shift @rows;
+    }
+    if (@rows < 4)
+    {
+        return 0;
+    }
+    if ($rows[0] !~ /^\s*\w+\s+\|\s+/)                # Colname |
+    {
+        return 0;
+    }
+    if ($rows[1] !~ /^\s*[-]+[+][-]+/)                # ----+----
+    {
+        return 0;
+    }
+    if ($rows[2] !~ /^\s*[^|]*\s+\|\s+/)              # value |
+    {
+        return 0;
+    }
+    # check the last row for rowcount
+    if ($rows[$#rows] !~ /\(\d+\s+rows\)/)
+    {
+        return 0;
+    }
+
+    return 1;
+}
+
+sub is_border_table ($%)
+{
+    my $self = shift;
+    my %args = (
+        rows_ref => undef,
+        para_len => 0,
+        @_
+    );
+    my $rows_ref = $args{rows_ref};
+    my $para_len = $args{para_len};
+
+    # a BORDER table can start with an optional table-caption,
+    # then it has a row of +------+-----+
+    # then it has a row of column headings separated by |
+    # then it has a row of +------+-----+
+    # then it has one or more rows of column values separated by |
+    # then it has a row of +------+-----+
+    # Thus it must have at least 5 rows.
+    # And note that it could be indented with spaces
+    if (@{$rows_ref} < 5)
+    {
+        return 0;
+    }
+
+    my @rows = @{$rows_ref};
+    if ($rows[0] !~ /\|/ && $rows[0] =~ /^\s*\w+/)    # possible caption
+    {
+        shift @rows;
+    }
+    if (@rows < 5)
+    {
+        return 0;
+    }
+    if ($rows[0] !~ /^\s*[+][-]+[+][-]+[+][-+]*$/)    # +----+----+
+    {
+        return 0;
+    }
+    if ($rows[1] !~ /^\s*\|\s*\w+\s+\|\s+.*\|$/)      # | Colname |
+    {
+        return 0;
+    }
+    if ($rows[2] !~ /^\s*[+][-]+[+][-]+[+][-+]*$/)    # +----+----+
+    {
+        return 0;
+    }
+    if ($rows[3] !~ /^\s*\|\s*[^|]*\s+\|\s+.*\|$/)    # | value |
+    {
+        return 0;
+    }
+    # check the last row for +------+------+
+    if ($rows[$#rows] !~ /^\s*[+][-]+[+][-]+[+][-+]*$/)    # +----+----+
+    {
+        return 0;
+    }
+
+    return 1;
+}    # is_border_table
+
+sub is_delim_table ($%)
+{
+    my $self = shift;
+    my %args = (
+        rows_ref => undef,
+        para_len => 0,
+        @_
+    );
+    my $rows_ref = $args{rows_ref};
+    my $para_len = $args{para_len};
+
+    # a DELIM table can start with an optional table-caption,
+    # then it has at least two rows which start and end and are
+    # punctuated by a non-alphanumeric delimiter.
+    #
+    # | val1 | val2 |
+    # | val3 | val4 |
+    #
+    # And note that it could be indented with spaces
+    if (@{$rows_ref} < 2)
+    {
+        return 0;
+    }
+
+    my @rows = @{$rows_ref};
+    if ($rows[0] !~ /[^\w\s]/ && $rows[0] =~ /^\s*\w+/)    # possible caption
+    {
+        shift @rows;
+    }
+    if (@rows < 2)
+    {
+        return 0;
+    }
+    # figure out if the row starts with a possible delimiter
+    my $delim = '';
+    if ($rows[0] =~ /^\s*([^a-zA-Z0-9])/)
+    {
+        $delim = $1;
+        # have to get rid of ^ and []
+        $delim =~ s/\^//g;
+        $delim =~ s/\[//g;
+        $delim =~ s/\]//g;
+        if (!$delim)    # no delimiter after all
+        {
+            return 0;
+        }
+    }
+    else
+    {
+        return 0;
+    }
+    # There needs to be at least three delimiters in the row
+    my @all_delims = ($rows[0] =~ /[${delim}]/g);
+    my $total_num_delims = @all_delims;
+    if ($total_num_delims < 3)
+    {
+        return 0;
+    }
+    # All rows must start and end with the delimiter
+    # and have $total_num_delims number of them
+    foreach my $row (@rows)
+    {
+        if ($row !~ /^\s*[${delim}]/)
+        {
+            return 0;
+        }
+        if ($row !~ /[${delim}]\s*$/)
+        {
+            return 0;
+        }
+        @all_delims = ($row =~ /[${delim}]/g);
+        if (@all_delims != $total_num_delims)
+        {
+            return 0;
+        }
+    }
+
+    return 1;
+}    # is_delim_table
+
+sub tablestuff ($%)
+{
+    my $self = shift;
+    my %args = (
+        table_type => 0,
+        rows_ref   => undef,
+        para_len   => 0,
+        @_
+    );
+    my $table_type = $args{table_type};
+    if ($table_type eq $TAB_ALIGN)
+    {
+        return $self->make_aligned_table(%args);
+    }
+    if ($table_type eq $TAB_PGSQL)
+    {
+        return $self->make_pgsql_table(%args);
+    }
+    if ($table_type eq $TAB_BORDER)
+    {
+        return $self->make_border_table(%args);
+    }
+    if ($table_type eq $TAB_DELIM)
+    {
+        return $self->make_delim_table(%args);
+    }
+}    # tablestuff
+
+sub make_aligned_table ($%)
+{
+    my $self = shift;
+    my %args = (
+        rows_ref => undef,
+        para_len => 0,
+        @_
+    );
+    my $rows_ref = $args{rows_ref};
+    my $para_len = $args{para_len};
+
+    # TABLES: spot and mark up tables.  We combine the lines of the
+    # paragraph using the string bitwise or (|) operator, the result
+    # being in $spaces.  A character in $spaces is a space only if
+    # there was a space at that position in every line of the
+    # paragraph.  $space can be used to search for contiguous spaces
+    # that occur on all lines of the paragraph.  If this results in at
+    # least two columns, the paragraph is identified as a table.
+
+    # Note that this sub must be called before checking for preformatted
+    # lines because a table may well have whitespace to the left, in
+    # which case it must not be incorrectly recognised as a preformat.
+    my @rows = @{$rows_ref};
+    my @starts;
+    my @ends;
+    my $spaces;
+    my $max = 0;
+    my $min = $para_len;
+    foreach my $row (@rows)
+    {
+        ($spaces |= $row) =~ tr/ /\xff/c;
+        $min = length $row if length $row < $min;
+        $max = length $row if $max < length $row;
+    }
+    $spaces = substr $spaces, 0, $min;
+    push(@starts, 0) unless $spaces =~ /^ /;
+    while ($spaces =~ /((?:^| ) +)(?=[^ ])/g)
+    {
+        push @ends,   pos($spaces) - length $1;
+        push @starts, pos($spaces);
+    }
+    shift(@ends) if $spaces =~ /^ /;
+    push(@ends, $max);
+
+    # Two or more rows and two or more columns indicate a table.
+    if (2 <= @rows and 2 <= @starts)
+    {
+        $self->{__mode} |= $TABLE;
+
+        # For each column, guess whether it should be left, centre or
+        # right aligned by examining all cells in that column for space
+        # to the left or the right.  A simple majority among those cells
+        # that actually have space to one side or another decides (if no
+        # alignment gets a majority, left alignment wins by default).
+        my @align;
+        my $cell = '';
+        foreach my $col (0 .. $#starts)
+        {
+            my @count = (0, 0, 0, 0);
+            foreach my $row (@rows)
+            {
+                my $width = $ends[$col] - $starts[$col];
+                $cell = substr $row, $starts[$col], $width;
+                ++$count[($cell =~ /^ / ? 2 : 0) +
+                  ($cell =~ / $/ || length($cell) < $width ? 1 : 0)];
+            }
+            $align[$col] = 0;
+            my $population = $count[1] + $count[2] + $count[3];
+            foreach (1 .. 3)
+            {
+                if ($count[$_] * 2 > $population)
+                {
+                    $align[$col] = $_;
+                    last;
+                }
+            }
+        }
+
+        foreach my $row (@rows)
+        {
+            $row = join '', $self->get_tag('TR'), (
+                map {
+                    $cell = substr $row, $starts[$_], $ends[$_] - $starts[$_];
+                    $cell =~ s/^ +//;
+                    $cell =~ s/ +$//;
+
+                    if ($self->{escape_HTML_chars})
+                    {
+                        $cell = escape($cell);
+                    }
+
+                    (
+                        $self->get_tag(
+                            'TD',
+                            inside_tag => (
+                                $self->{xhtml} ? $xhtml_alignments[$align[$_]]
+                                : (
+                                      $self->{lower_case_tags}
+                                    ? $lc_alignments[$align[$_]]
+                                    : $alignments[$align[$_]]
+                                )
+                            )
+                        ),
+                        $cell,
+                        $self->close_tag('TD')
+                    );
+                  } 0 .. $#starts
+              ),
+              $self->close_tag('TR');
+        }
+
+        # put the <TABLE> around the rows
+        my $tag;
+        if ($self->{xhtml})
+        {
+            $tag = $self->get_tag('TABLE', inside_tag => ' summary=""');
+        }
+        else
+        {
+            $tag = $self->get_tag('TABLE');
+        }
+        $rows[0] = join("\n", $tag, $rows[0]);
+        $tag = $self->close_tag('TABLE', tag_type => 'end');
+        $rows[$#rows] .= "\n${tag}";
+        @{$rows_ref} = @rows;
+        return 1;
+    }
+    else
+    {
+        return 0;
+    }
+}    # make_aligned_table
+
+sub make_pgsql_table ($%)
+{
+    my $self = shift;
+    my %args = (
+        rows_ref => undef,
+        para_len => 0,
+        @_
+    );
+    my $rows_ref = $args{rows_ref};
+    my $para_len = $args{para_len};
+
+    # a PGSQL table can start with an optional table-caption,
+    # then it has a row of column headings separated by |
+    # then it has a row of ------+-----
+    # then it has one or more rows of column values separated by |
+    # then it has a row-count (N rows)
+    # Thus it must have at least 4 rows.
+    my @rows    = @{$rows_ref};
+    my $caption = '';
+    if ($rows[0] !~ /\|/ && $rows[0] =~ /^\s*\w+/)    # possible caption
+    {
+        $caption = shift @rows;
+    }
+    my @headings = split(/\s+\|\s+/, shift @rows);
+    # skip the ----+--- line
+    shift @rows;
+    # grab the N rows line
+    my $n_rows = pop @rows;
+
+    # now start making the table
+    my @tab_lines = ();
+    my $tag;
+    my $tag2;
+    if ($self->{xhtml})
+    {
+        $tag = $self->get_tag('TABLE', inside_tag => ' border="1" summary=""');
+    }
+    else
+    {
+        $tag = $self->get_tag('TABLE', inside_tag => ' border="1"');
+    }
+    push @tab_lines, "$tag\n";
+    if ($caption)
+    {
+        $caption =~ s/^\s+//;
+        $caption =~ s/\s+$//;
+        $tag     = $self->get_tag('CAPTION');
+        $tag2    = $self->close_tag('CAPTION');
+        $caption = join('', $tag, $caption, $tag2, "\n");
+        push @tab_lines, $caption;
+    }
+    # table header
+    my $thead = '';
+    $tag = $self->get_tag('THEAD');
+    $thead .= $tag;
+    $tag = $self->get_tag('TR');
+    $thead .= $tag;
+    foreach my $col (@headings)
+    {
+        $col =~ s/^\s+//;
+        $col =~ s/\s+$//;
+        $tag  = $self->get_tag('TH');
+        $tag2 = $self->close_tag('TH');
+        $thead .= join('', $tag, $col, $tag2);
+    }
+    $tag = $self->close_tag('TR');
+    $thead .= $tag;
+    $tag = $self->close_tag('THEAD');
+    $thead .= $tag;
+    push @tab_lines, "${thead}\n";
+    $tag = $self->get_tag('TBODY');
+    push @tab_lines, "$tag\n";
+
+    # each row
+    foreach my $row (@rows)
+    {
+        my $this_row = '';
+        $tag = $self->get_tag('TR');
+        $this_row .= $tag;
+        my @cols = split(/\|/, $row);
+        foreach my $cell (@cols)
+        {
+            $cell =~ s/^\s+//;
+            $cell =~ s/\s+$//;
+            if ($self->{escape_HTML_chars})
+            {
+                $cell = escape($cell);
+            }
+            if (!$cell)
+            {
+                $cell = ' ';
+            }
+            $tag  = $self->get_tag('TD');
+            $tag2 = $self->close_tag('TD');
+            $this_row .= join('', $tag, $cell, $tag2);
+        }
+        $tag = $self->close_tag('TR');
+        $this_row .= $tag;
+        push @tab_lines, "${this_row}\n";
+    }
+
+    # end the table
+    $tag = $self->close_tag('TBODY');
+    push @tab_lines, "$tag\n";
+    $tag = $self->get_tag('TABLE', tag_type => 'end');
+    push @tab_lines, "$tag\n";
+
+    # and add the N rows line
+    $tag = $self->get_tag('P');
+    push @tab_lines, "${tag}${n_rows}\n";
+    if ($self->{xhtml})
+    {
+        $tag = $self->get_tag('P', tag_type => 'end');
+        $tab_lines[$#tab_lines] =~ s/\n/${tag}\n/;
+    }
+
+    # replace the rows
+    @{$rows_ref} = @tab_lines;
+}    # make_pgsql_table
+
+sub make_border_table ($%)
+{
+    my $self = shift;
+    my %args = (
+        rows_ref => undef,
+        para_len => 0,
+        @_
+    );
+    my $rows_ref = $args{rows_ref};
+    my $para_len = $args{para_len};
+
+    # a BORDER table can start with an optional table-caption,
+    # then it has a row of +------+-----+
+    # then it has a row of column headings separated by |
+    # then it has a row of +------+-----+
+    # then it has one or more rows of column values separated by |
+    # then it has a row of +------+-----+
+    my @rows    = @{$rows_ref};
+    my $caption = '';
+    if ($rows[0] !~ /\|/ && $rows[0] =~ /^\s*\w+/)    # possible caption
+    {
+        $caption = shift @rows;
+    }
+    # skip the +----+---+ line
+    shift @rows;
+    # get the head row and cut off the start and end |
+    my $head_row = shift @rows;
+    $head_row =~ s/^\s*\|//;
+    $head_row =~ s/\|$//;
+    my @headings = split(/\s+\|\s+/, $head_row);
+    # skip the +----+---+ line
+    shift @rows;
+    # skip the last +----+---+ line
+    pop @rows;
+
+    # now start making the table
+    my @tab_lines = ();
+    my $tag;
+    if ($self->{xhtml})
+    {
+        $tag = $self->get_tag('TABLE', inside_tag => ' border="1" summary=""');
+    }
+    else
+    {
+        $tag = $self->get_tag('TABLE', inside_tag => ' border="1"');
+    }
+    push @tab_lines, "$tag\n";
+    if ($caption)
+    {
+        $caption =~ s/^\s+//;
+        $caption =~ s/\s+$//;
+        $tag     = $self->get_tag('CAPTION');
+        $caption = $tag . $caption;
+        $tag     = $self->close_tag('CAPTION');
+        $caption .= $tag;
+        push @tab_lines, "$caption\n";
+    }
+    # table header
+    my $thead = '';
+    $tag = $self->get_tag('THEAD');
+    $thead .= $tag;
+    $tag = $self->get_tag('TR');
+    $thead .= $tag;
+    foreach my $col (@headings)
+    {
+        $col =~ s/^\s+//;
+        $col =~ s/\s+$//;
+        $tag = $self->get_tag('TH');
+        $thead .= $tag;
+        $thead .= $col;
+        $tag = $self->close_tag('TH');
+        $thead .= $tag;
+    }
+    $tag = $self->close_tag('TR');
+    $thead .= $tag;
+    $tag = $self->close_tag('THEAD');
+    $thead .= $tag;
+    push @tab_lines, "${thead}\n";
+    $tag = $self->get_tag('TBODY');
+    push @tab_lines, "$tag\n";
+
+    # each row
+    foreach my $row (@rows)
+    {
+        # cut off the start and end |
+        $row =~ s/^\s*\|//;
+        $row =~ s/\|$//;
+        my $this_row = '';
+        $tag = $self->get_tag('TR');
+        $this_row .= $tag;
+        my @cols = split(/\|/, $row);
+        foreach my $cell (@cols)
+        {
+            $cell =~ s/^\s+//;
+            $cell =~ s/\s+$//;
+            if ($self->{escape_HTML_chars})
+            {
+                $cell = escape($cell);
+            }
+            if (!$cell)
+            {
+                $cell = ' ';
+            }
+            $tag = $self->get_tag('TD');
+            $this_row .= $tag;
+            $this_row .= $cell;
+            $tag = $self->close_tag('TD');
+            $this_row .= $tag;
+        }
+        $tag = $self->close_tag('TR');
+        $this_row .= $tag;
+        push @tab_lines, "${this_row}\n";
+    }
+
+    # end the table
+    $tag = $self->close_tag('TBODY');
+    push @tab_lines, "$tag\n";
+    $tag = $self->get_tag('TABLE', tag_type => 'end');
+    push @tab_lines, "$tag\n";
+
+    # replace the rows
+    @{$rows_ref} = @tab_lines;
+}    # make_border_table
+
+sub make_delim_table ($%)
+{
+    my $self = shift;
+    my %args = (
+        rows_ref => undef,
+        para_len => 0,
+        @_
+    );
+    my $rows_ref = $args{rows_ref};
+    my $para_len = $args{para_len};
+
+    # a DELIM table can start with an optional table-caption,
+    # then it has at least two rows which start and end and are
+    # punctuated by a non-alphanumeric delimiter.
+    # A DELIM table has no table-header.
+    my @rows    = @{$rows_ref};
+    my $caption = '';
+    if ($rows[0] !~ /\|/ && $rows[0] =~ /^\s*\w+/)    # possible caption
+    {
+        $caption = shift @rows;
+    }
+    # figure out the delimiter
+    my $delim = '';
+    if ($rows[0] =~ /^\s*([^a-zA-Z0-9])/)
+    {
+        $delim = $1;
+    }
+    else
+    {
+        return 0;
+    }
+
+    # now start making the table
+    my @tab_lines = ();
+    my $tag;
+    if ($self->{xhtml})
+    {
+        $tag = $self->get_tag('TABLE', inside_tag => ' border="1" summary=""');
+    }
+    else
+    {
+        $tag = $self->get_tag('TABLE', inside_tag => ' border="1"');
+    }
+    push @tab_lines, "$tag\n";
+    if ($caption)
+    {
+        $caption =~ s/^\s+//;
+        $caption =~ s/\s+$//;
+        $tag     = $self->get_tag('CAPTION');
+        $caption = $tag . $caption;
+        $tag     = $self->close_tag('CAPTION');
+        $caption .= $tag;
+        push @tab_lines, "$caption\n";
+    }
+
+    # each row
+    foreach my $row (@rows)
+    {
+        # cut off the start and end delimiter
+        $row =~ s/^\s*[${delim}]//;
+        $row =~ s/[${delim}]$//;
+        my $this_row = '';
+        $tag = $self->get_tag('TR');
+        $this_row .= $tag;
+        my @cols = split(/[${delim}]/, $row);
+        foreach my $cell (@cols)
+        {
+            $cell =~ s/^\s+//;
+            $cell =~ s/\s+$//;
+            if ($self->{escape_HTML_chars})
+            {
+                $cell = escape($cell);
+            }
+            if (!$cell)
+            {
+                $cell = ' ';
+            }
+            $tag = $self->get_tag('TD');
+            $this_row .= $tag;
+            $this_row .= $cell;
+            $tag = $self->close_tag('TD');
+            $this_row .= $tag;
+        }
+        $tag = $self->close_tag('TR');
+        $this_row .= $tag;
+        push @tab_lines, "${this_row}\n";
+    }
+
+    # end the table
+    $tag = $self->get_tag('TABLE', tag_type => 'end');
+    push @tab_lines, "$tag\n";
+
+    # replace the rows
+    @{$rows_ref} = @tab_lines;
+}    # make_delim_table
+
+# Returns true if the passed string is considered to be preformatted
+sub is_preformatted ($$)
+{
+    my $self = shift;
+    my $line = shift;
+
+    my $pre_white_min = $self->{preformat_whitespace_min};
+    my $result        = (
+        ($line =~ /\s{$pre_white_min,}\S+/o)    # whitespaces
+          || ($line =~ /\.{$pre_white_min,}\S+/o)
+    );                                          # dots
+    return $result;
+}
+
+# modifies the given string,
+# and returns the front preformatted part
+sub split_end_explicit_preformat ($%)
+{
+    my $self = shift;
+    my %args = (
+        para_ref => undef,
+        @_
+    );
+    my $para_ref = $args{para_ref};
+
+    my $tag      = '';
+    my $pre_str  = '';
+    my $post_str = '';
+    if ($self->{__mode} & $PRE_EXPLICIT)
+    {
+        my $pe_mark = $self->{preformat_end_marker};
+        if (${para_ref} =~ /$pe_mark/io)
+        {
+            ($pre_str, $post_str) = split(/$pe_mark/, ${$para_ref}, 2);
+            if ($self->{escape_HTML_chars})
+            {
+                $pre_str = escape($pre_str);
+            }
+            $tag = $self->close_tag('PRE');
+            $pre_str .= "${tag}\n";
+            $self->{__mode} ^= (($PRE | $PRE_EXPLICIT) & $self->{__mode});
+        }
+        else    # no end -- the whole thing is preformatted
+        {
+            $pre_str = ${$para_ref};
+            if ($self->{escape_HTML_chars})
+            {
+                $pre_str = escape($pre_str);
+            }
+            ${$para_ref} = '';
+        }
+    }
+    return $pre_str;
+}    # split_end_explicit_preformat
+
+sub endpreformat ($%)
+{
+    my $self = shift;
+    my %args = (
+        para_lines_ref  => undef,
+        para_action_ref => undef,
+        ind             => 0,
+        prev_ref        => undef,
+        @_
+    );
+    my $para_lines_ref  = $args{para_lines_ref};
+    my $para_action_ref = $args{para_action_ref};
+    my $ind             = $args{ind};
+    my $prev_ref        = $args{prev_ref};
+
+    my $tag = '';
+    if ($self->{__mode} & $PRE_EXPLICIT)
+    {
+        my $pe_mark = $self->{preformat_end_marker};
+        if ($para_lines_ref->[$ind] =~ /$pe_mark/io)
+        {
+            if ($ind == 0)
+            {
+                $tag = $self->close_tag('PRE');
+                $para_lines_ref->[$ind] = "${tag}\n";
+            }
+            else
+            {
+                $tag = $self->close_tag('PRE');
+                $para_lines_ref->[$ind - 1] .= "${tag}\n";
+                $para_lines_ref->[$ind] = "";
+            }
+            $self->{__mode} ^= (($PRE | $PRE_EXPLICIT) & $self->{__mode});
+            $para_action_ref->[$ind] |= $END;
+        }
+        return;
+    }
+
+    if (
+        !$self->is_preformatted($para_lines_ref->[$ind])
+        && (
+            $self->{endpreformat_trigger_lines} == 1
+            || ($ind + 1 < @{$para_lines_ref}
+                && !$self->is_preformatted($para_lines_ref->[$ind + 1]))
+            || $ind + 1 >= @{$para_lines_ref}    # last line of para
+        )
+      )
+    {
+        if ($ind == 0)
+        {
+            $tag = $self->close_tag('PRE');
+            ${$prev_ref} = "${tag}\n";
+        }
+        else
+        {
+            $tag = $self->close_tag('PRE');
+            $para_lines_ref->[$ind - 1] .= "${tag}\n";
+        }
+        $self->{__mode} ^= ($PRE & $self->{__mode});
+        $para_action_ref->[$ind] |= $END;
+    }
+}    # endpreformat
+
+sub preformat ($%)
+{
+    my $self = shift;
+    my %args = (
+        mode_ref        => undef,
+        line_ref        => undef,
+        line_action_ref => undef,
+        prev_ref        => undef,
+        next_ref        => undef,
+        prev_action_ref => undef,
+        @_
+    );
+    my $mode_ref        = $args{mode_ref};
+    my $line_ref        = $args{line_ref};
+    my $line_action_ref = $args{line_action_ref};
+    my $prev_ref        = $args{prev_ref};
+    my $next_ref        = $args{next_ref};
+    my $prev_action_ref = $args{prev_action_ref};
+
+    my $tag = '';
+    if ($self->{use_preformat_marker})
+    {
+        my $pstart = $self->{preformat_start_marker};
+        if (${$line_ref} =~ /$pstart/io)
+        {
+            if (${$prev_ref} =~ s/<P>$//)
+            {
+                pop @{$self->{__tags}};
+            }
+            $tag =
+              $self->get_tag('PRE', inside_tag => " class='quote_explicit'");
+            ${$line_ref} = "${tag}\n";
+            ${$mode_ref}        |= $PRE | $PRE_EXPLICIT;
+            ${$line_action_ref} |= $PRE;
+            return;
+        }
+    }
+
+    if (
+           !(${$line_action_ref} & $MAILQUOTE)
+        && !(${$prev_action_ref} & $MAILQUOTE)
+        && (
+            $self->{preformat_trigger_lines} == 0
+            || (
+                $self->is_preformatted(${$line_ref})
+                && (
+                    $self->{preformat_trigger_lines} == 1
+                    || (defined $next_ref
+                        && $self->is_preformatted(${$next_ref}))
+                )
+            )
+        )
+      )
+    {
+        if (${$prev_ref} =~ s/<P>$//)
+        {
+            pop @{$self->{__tags}};
+        }
+        $tag = $self->get_tag('PRE');
+        ${$line_ref} =~ s/^/${tag}\n/;
+        ${$mode_ref}        |= $PRE;
+        ${$line_action_ref} |= $PRE;
+    }
+}    # preformat
+
+sub make_new_anchor ($$)
+{
+    my $self          = shift;
+    my $heading_level = shift;
+
+    my ($anchor, $i);
+
+    return sprintf("%d", $self->{__non_header_anchor}++) if (!$heading_level);
+
+    $anchor = "section";
+    $self->{__heading_count}->[$heading_level - 1]++;
+
+    # Reset lower order counters
+    for ($i = @{$self->{__heading_count}}; $i > $heading_level; $i--)
+    {
+        $self->{__heading_count}->[$i - 1] = 0;
+    }
+
+    for ($i = 0; $i < $heading_level; $i++)
+    {
+        $self->{__heading_count}->[$i] = 1
+          if !$self->{__heading_count}->[$i];    # In case they skip any
+        $anchor .= sprintf("_%d", $self->{__heading_count}->[$i]);
+    }
+    chomp($anchor);
+    $anchor;
+}    # make_new_anchor
+
+sub anchor_mail ($$)
+{
+    my $self     = shift;
+    my $line_ref = shift;
+
+    if ($self->{make_anchors})
+    {
+        my ($anchor) = $self->make_new_anchor(0);
+        if ($self->{lower_case_tags})
+        {
+            ${$line_ref} =~ s/([^ ]*)/<a name="$anchor">$1<\/a>/;
+        }
+        else
+        {
+            ${$line_ref} =~ s/([^ ]*)/<A NAME="$anchor">$1<\/A>/;
+        }
+    }
+}    # anchor_mail
+
+sub anchor_heading ($$$)
+{
+    my $self     = shift;
+    my $level    = shift;
+    my $line_ref = shift;
+
+    if ($self->{dict_debug} & 8)
+    {
+        print STDERR "anchor_heading: ", ${$line_ref}, "\n";
+    }
+    if ($self->{make_anchors})
+    {
+        my ($anchor) = $self->make_new_anchor($level);
+        if ($self->{lower_case_tags})
+        {
+            ${$line_ref} =~ s/(<h.>)(.*)(<\/h.>)/$1<a name="$anchor">$2<\/a>$3/;
+        }
+        else
+        {
+            ${$line_ref} =~ s/(<H.>)(.*)(<\/H.>)/$1<A NAME="$anchor">$2<\/A>$3/;
+        }
+    }
+    if ($self->{dict_debug} & 8)
+    {
+        print STDERR "anchor_heading(after): ", ${$line_ref}, "\n";
+    }
+}    # anchor_heading
+
+sub heading_level ($$)
+{
+    my $self = shift;
+
+    my ($style) = @_;
+    $self->{__heading_styles}->{$style} = ++$self->{__num_heading_styles}
+      if !$self->{__heading_styles}->{$style};
+    $self->{__heading_styles}->{$style};
+}    # heading_level
+
+sub is_ul_list_line ($%)
+{
+    my $self = shift;
+    my %args = (
+        line => undef,
+        @_
+    );
+    my $line = $args{line};
+
+    my ($prefix, $number, $rawprefix, $term) = $self->listprefix($line);
+    if ($prefix && !$number)
+    {
+        return 1;
+    }
+    return 0;
+}
+
+sub is_heading ($%)
+{
+    my $self = shift;
+    my %args = (
+        line_ref => undef,
+        next_ref => undef,
+        @_
+    );
+    my $line_ref = $args{line_ref};
+    my $next_ref = $args{next_ref};
+
+    if (   ${$line_ref} !~ /^\s*$/
+        && !$self->is_ul_list_line(line => ${$line_ref})
+        && defined $next_ref
+        && ${$next_ref} =~ /^\s*[-=*.~+]+\s*$/)
+    {
+        my ($hoffset, $heading) = ${$line_ref} =~ /^(\s*)(.+)$/;
+        $hoffset = "" unless defined($hoffset);
+        $heading = "" unless defined($heading);
+        # Unescape chars so we get an accurate length
+        $heading =~ s/&[^;]+;/X/g;
+        my ($uoffset, $underline) = ${$next_ref} =~ /^(\s*)(\S+)\s*$/;
+        $uoffset   = "" unless defined($uoffset);
+        $underline = "" unless defined($underline);
+        my ($lendiff, $offsetdiff);
+        $lendiff = length($heading) - length($underline);
+        $lendiff *= -1 if $lendiff < 0;
+
+        $offsetdiff = length($hoffset) - length($uoffset);
+        $offsetdiff *= -1 if $offsetdiff < 0;
+        if (   ($lendiff <= $self->{underline_length_tolerance})
+            || ($offsetdiff <= $self->{underline_offset_tolerance}))
+        {
+            return 1;
+        }
+    }
+
+    return 0;
+
+}    # is_heading
+
+# make a heading
+# assumes is_heading is true
+sub heading ($%)
+{
+    my $self = shift;
+    my %args = (
+        line_ref => undef,
+        next_ref => undef,
+        @_
+    );
+    my $line_ref = $args{line_ref};
+    my $next_ref = $args{next_ref};
+
+    my ($hoffset, $heading) = ${$line_ref} =~ /^(\s*)(.+)$/;
+    $hoffset = "" unless defined($hoffset);
+    $heading = "" unless defined($heading);
+    $heading =~ s/&[^;]+;/X/g;    # Unescape chars so we get an accurate length
+    my ($uoffset, $underline) = ${$next_ref} =~ /^(\s*)(\S+)\s*$/;
+    $uoffset   = "" unless defined($uoffset);
+    $underline = "" unless defined($underline);
+
+    $underline = substr($underline, 0, 1);
+
+    # Call it a different style if the heading is in all caps.
+    $underline .= "C" if $self->iscaps(${$line_ref});
+    ${$next_ref} = " ";           # Eat the underline
+    $self->{__heading_level} = $self->heading_level($underline);
+    if ($self->{escape_HTML_chars})
+    {
+        ${$line_ref} = escape(${$line_ref});
+    }
+    $self->tagline("H" . $self->{__heading_level}, $line_ref);
+    $self->anchor_heading($self->{__heading_level}, $line_ref);
+}    # heading
+
+# check if the given line matches a custom heading
+sub is_custom_heading ($%)
+{
+    my $self = shift;
+    my %args = (
+        line => undef,
+        @_
+    );
+    my $line = $args{line};
+
+    foreach my $reg (@{$self->{custom_heading_regexp}})
+    {
+        return 1 if ($line =~ /$reg/);
+    }
+    return 0;
+}    # is_custom_heading
+
+sub custom_heading ($%)
+{
+    my $self = shift;
+    my %args = (
+        line_ref => undef,
+        @_
+    );
+    my $line_ref = $args{line_ref};
+
+    my $level;
+    my $i = 0;
+    foreach my $reg (@{$self->{custom_heading_regexp}})
+    {
+        if (${$line_ref} =~ /$reg/)
+        {
+            if ($self->{explicit_headings})
+            {
+                $level = $i + 1;
+            }
+            else
+            {
+                $level = $self->heading_level("Cust" . $i);
+            }
+            if ($self->{escape_HTML_chars})
+            {
+                ${$line_ref} = escape(${$line_ref});
+            }
+            $self->tagline("H" . $level, $line_ref);
+            $self->anchor_heading($level, $line_ref);
+            last;
+        }
+        $i++;
+    }
+}    # custom_heading
+
+sub unhyphenate_para ($$)
+{
+    my $self     = shift;
+    my $para_ref = shift;
+
+    # Treating this whole paragraph as one string, look for
+    # 1 - whitespace
+    # 2 - a word (ending in a hyphen, followed by a newline)
+    # 3 - whitespace (starting on the next line)
+    # 4 - a word with its punctuation
+    # Substitute this with
+    # 1-whitespace 2-word 4-word newline 3-whitespace
+    # We preserve the 3-whitespace because we don't want to mess up
+    # our existing indentation.
+    ${$para_ref} =~
+      /(\s*)([^\W\d_]*)\-\n(\s*)([^\W\d_]+[\)\}\]\.,:;\'\"\>]*\s*)/s;
+    ${$para_ref} =~
+s/(\s*)([^\W\d_]*)\-\n(\s*)([^\W\d_]+[\)\}\]\.,:;\'\"\>]*\s*)/$1$2$4\n$3/gs;
+}    # unhyphenate_para
+
+sub tagline ($$$)
+{
+    my $self     = shift;
+    my $tag      = shift;
+    my $line_ref = shift;
+
+    chomp ${$line_ref};    # Drop newline
+    my $tag1 = $self->get_tag($tag);
+    my $tag2 = $self->close_tag($tag);
+    ${$line_ref} =~ s/^\s*(.*)$/${tag1}$1${tag2}\n/;
+}    # tagline
+
+sub iscaps
+{
+    my $self = shift;
+    local ($_) = @_;
+
+    my $min_caps_len = $self->{min_caps_length};
+
+    # This is ugly, but I don't know a better way to do it.
+    # (And, yes, I could use the literal characters instead of the
+    # numeric codes, but this keeps the script 8-bit clean, which will
+    # save someone a big headache when they transfer via ASCII ftp.
+/^[^a-z\341\343\344\352\353\354\363\370\337\373\375\342\345\347\350\355\357\364\365\376\371\377\340\346\351\360\356\361\362\366\372\374<]*[A-Z\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\330\331\332\333\334\335\336]{$min_caps_len,}[^a-z\341\343\344\352\353\354\363\370\337\373\375\342\345\347\350\355\357\364\365\376\371\377\340\346\351\360\356\361\362\366\372\374<]*$/;
+}    # iscaps
+
+sub caps
+{
+    my $self = shift;
+    my %args = (
+        line_ref        => undef,
+        line_action_ref => undef,
+        @_
+    );
+    my $line_ref        = $args{line_ref};
+    my $line_action_ref = $args{line_action_ref};
+
+    if (   $self->{caps_tag}
+        && $self->iscaps(${$line_ref}))
+    {
+        $self->tagline($self->{caps_tag}, $line_ref);
+        ${$line_action_ref} |= $CAPS;
+    }
+}    # caps
+
+sub do_delim
+{
+    my $self = shift;
+    my %args = (
+        line_ref        => undef,
+        line_action_ref => undef,
+        delim           => '*',
+        tag             => 'STRONG',
+        @_
+    );
+    my $line_ref        = $args{line_ref};
+    my $line_action_ref = $args{line_action_ref};
+    my $delim           = $args{delim};
+    my $tag             = $args{tag};
+
+    if ($delim eq '#')    # special treatment of # for the #num case
+    {
+        ${$line_ref} =~
+s/#([^0-9#](?![^#]*(?:<li>|<LI>|<P>|<p>))[^#]*[^# \t\n])#/<${tag}>$1<\/${tag}>/gs;
+        ${$line_ref} =~ s/\B#([a-zA-Z])#\B/<${tag}>$1<\/${tag}>/gs;
+    }
+    elsif ($delim eq '^')
+    {
+        ${$line_ref} =~
+s/\^((?![^^]*(?:<li>|<LI>|<p>|<P>))(\w|["'<>])[^^]*)\^/<${tag}>$1<\/${tag}>/gs;
+        ${$line_ref} =~ s/\B\^([a-zA-Z])\^\B/<${tag}>$1<\/${tag}>/gs;
+    }
+    elsif (length($delim) eq 1)    # one-character, general
+    {
+        ${$line_ref} =~
+s/(?<![${delim}])[${delim}](?![^${delim}]*(?:<li>|<LI>|<p>|<P>))((\w|["'<>])[^${delim}]*)[${delim}]/<${tag}>$1<\/${tag}>/gs;
+        ${$line_ref} =~
+          s/\B[${delim}]([a-zA-Z])[${delim}]\B/<${tag}>$1<\/${tag}>/gs;
+    }
+    else
+    {
+        ${$line_ref} =~
+s/(?<!${delim})${delim}((\w|["'])(\w|[-\s\.;:,!?"'])*[^\s])${delim}/<${tag}>$1<\/${tag}>/gs;
+        ${$line_ref} =~ s/${delim}]([a-zA-Z])${delim}/<${tag}>$1<\/${tag}>/gs;
+    }
+}    # do_delim
+
+# Convert very simple globs to regexps
+sub glob2regexp
+{
+    my ($glob) = @_;
+
+    # Escape funky chars
+    $glob =~ s/[^\w\[\]\*\?\|\\]/\\$&/g;
+    my ($regexp, $i, $len, $escaped) = ("", 0, length($glob), 0);
+
+    for (; $i < $len; $i++)
+    {
+        my $char = substr($glob, $i, 1);
+        if ($escaped)
+        {
+            $escaped = 0;
+            $regexp .= $char;
+            next;
+        }
+        if ($char eq "\\")
+        {
+            $escaped = 1;
+            next;
+            $regexp .= $char;
+        }
+        if ($char eq "?")
+        {
+            $regexp .= ".";
+            next;
+        }
+        if ($char eq "*")
+        {
+            $regexp .= ".*";
+            next;
+        }
+        $regexp .= $char;    # Normal character
+    }
+    join('', "\\b", $regexp, "\\b");
+}    # glob2regexp
+
+sub add_regexp_to_links_table ($$$$)
+{
+    my $self = shift;
+    my ($key, $URL, $switches) = @_;
+
+    # No sense adding a second one if it's already in there.
+    # It would never get used.
+    if (!$self->{__links_table}->{$key})
+    {
+
+        # Keep track of the order they were added so we can
+        # look for matches in the same order
+        push(@{$self->{__links_table_order}}, ($key));
+
+        $self->{__links_table}->{$key}        = $URL;      # Put it in The Table
+        $self->{__links_switch_table}->{$key} = $switches;
+        my $ind = @{$self->{__links_table_order}} - 1;
+        print STDERR " (", $ind,
+          ")\tKEY: $key\n\tVALUE: $URL\n\tSWITCHES: $switches\n\n"
+          if ($self->{dict_debug} & 1);
+    }
+    else
+    {
+        if ($self->{dict_debug} & 1)
+        {
+            print STDERR " Skipping entry.  Key already in table.\n";
+            print STDERR "\tKEY: $key\n\tVALUE: $URL\n\n";
+        }
+    }
+}    # add_regexp_to_links_table
+
+sub add_literal_to_links_table ($$$$)
+{
+    my $self = shift;
+    my ($key, $URL, $switches) = @_;
+
+    $key =~ s/(\W)/\\$1/g;    # Escape non-alphanumeric chars
+    $key = "\\b$key\\b";      # Make a regexp out of it
+    $self->add_regexp_to_links_table($key, $URL, $switches);
+}    # add_literal_to_links_table
+
+sub add_glob_to_links_table ($$$$)
+{
+    my $self = shift;
+    my ($key, $URL, $switches) = @_;
+
+    $self->add_regexp_to_links_table(glob2regexp($key), $URL, $switches);
+}    # add_glob_to_links_table
+
+# Parse the dictionary file.
+# (see also load_dictionary_links, for things that were stripped)
+sub parse_dict ($$$)
+{
+    my $self = shift;
+
+    my ($dictfile, $dict) = @_;
+
+    print STDERR "Parsing dictionary file $dictfile\n"
+      if ($self->{dict_debug} & 1);
+
+    if ($dict =~ /->\s*->/)
+    {
+        my $message = "Two consecutive '->'s found in $dictfile\n";
+        my $near;
+
+        # Print out any useful context so they can find it.
+        ($near) = $dict =~ /([\S ]*\s*->\s*->\s*\S*)/;
+        $message .= "\n$near\n" if $near =~ /\S/;
+        die $message;
+    }
+
+    my ($key, $URL, $switches, $options);
+    while ($dict =~ /\s*(.+)\s+\-+([iehos]+\-+)?\>\s*(.*\S+)\s*\n/ig)
+    {
+        $key      = $1;
+        $options  = $2;
+        $options  = "" unless defined($options);
+        $URL      = $3;
+        $switches = 0;
+        # Case insensitivity
+        $switches += $LINK_NOCASE if $options =~ /i/i;
+        # Evaluate as Perl code
+        $switches += $LINK_EVAL if $options =~ /e/i;
+        # provides HTML, not just URL
+        $switches += $LINK_HTML if $options =~ /h/i;
+        # Only do this link once
+        $switches += $LINK_ONCE if $options =~ /o/i;
+        # Only do this link once per section
+        $switches += $LINK_SECT_ONCE if $options =~ /s/i;
+
+        $key =~ s/\s*$//;    # Chop trailing whitespace
+
+        if ($key =~ m|^/|)   # Regexp
+        {
+            $key = substr($key, 1);
+            $key =~ s|/$||;    # Allow them to forget the closing /
+            $self->add_regexp_to_links_table($key, $URL, $switches);
+        }
+        elsif ($key =~ /^\|/)    # alternate regexp format
+        {
+            $key = substr($key, 1);
+            $key =~ s/\|$//;      # Allow them to forget the closing |
+            $key =~ s|/|\\/|g;    # Escape all slashes
+            $self->add_regexp_to_links_table($key, $URL, $switches);
+        }
+        elsif ($key =~ /\"/)
+        {
+            $key = substr($key, 1);
+            $key =~ s/\"$//;      # Allow them to forget the closing "
+            $self->add_literal_to_links_table($key, $URL, $switches);
+        }
+        else
+        {
+            $self->add_glob_to_links_table($key, $URL, $switches);
+        }
+    }
+
+}    # parse_dict
+
+sub setup_dict_checking ($)
+{
+    my $self = shift;
+
+    # now create the replace funcs and precomile the regexes
+    my ($URL, $switches, $options, $tag1, $tag2);
+    my ($href, $r_sw);
+    my @subs;
+    my $i = 0;
+    foreach my $pattern (@{$self->{__links_table_order}})
+    {
+        $switches = $self->{__links_switch_table}->{$pattern};
+
+        $href = $self->{__links_table}->{$pattern};
+
+        if (!($switches & $LINK_HTML))
+        {
+            $href =~ s#/#\\/#g;
+            $href = (
+                $self->{lower_case_tags}
+                ? join('', '<a href="', $href, '">$&<\\/a>')
+                : join('', '<A HREF="', $href, '">$&<\\/A>')
+            );
+        }
+        else
+        {
+            # change the uppercase tags to lower case
+            if ($self->{lower_case_tags})
+            {
+                $href =~ s#(</)([A-Z]*)(>)#${1}\L${2}${3}#g;
+                $href =~ s/(<)([A-Z]*)(>)/${1}\L${2}${3}/g;
+                # and the anchors
+                $href =~ s/(<)(A\s*HREF)([^>]*>)/$1\L$2$3/g;
+            }
+            $href =~ s#/#\\/#g;
+        }
+
+        $r_sw = "s";    # Options for replacing
+        $r_sw .= "i" if ($switches & $LINK_NOCASE);
+        $r_sw .= "e" if ($switches & $LINK_EVAL);
+
+        # Generate code for replacements.
+        # Create an anonymous subroutine for each replacement,
+        # and store its reference in an array.
+        # We need to do an "eval" to create these because we need to
+        # be able to treat the *contents* of the $href variable
+        # as if it were perl code, because sometimes the $href
+        # contains things which need to be evaluated, such as $& or $1,
+        # not just those cases where we have a "e" switch.
+        my $code = <<EOT;
+\$self->{__repl_code}->[$i] =
+sub {
+my \$al = shift;
+\$al =~ s/$pattern/$href/$r_sw;
+return \$al;
+};
+EOT
+        print STDERR $code if ($self->{dict_debug} & 2);
+        push @subs, $code;
+
+        # compile searching pattern
+        if ($switches & $LINK_NOCASE)    # i
+        {
+            $self->{__search_patterns}->[$i] = qr/$pattern/si;
+        }
+        else
+        {
+            $self->{__search_patterns}->[$i] = qr/$pattern/s;
+        }
+        $i++;
+    }
+    # now eval the replacements code string
+    my $codes = join('', @subs);
+    eval "$codes";
+}    # setup_dict_checking
+
+sub in_link_context ($$$)
+{
+    my $self = shift;
+    my ($match, $before) = @_;
+    return 1 if $match =~ m@</?A>@i;    # No links allowed inside match
+
+    my ($final_open, $final_close);
+    if ($self->{lower_case_tags})
+    {
+        $final_open  = rindex($before, "<a ") - $[;
+        $final_close = rindex($before, "</a>") - $[;
+    }
+    else
+    {
+        $final_open  = rindex($before, "<A ") - $[;
+        $final_close = rindex($before, "</A>") - $[;
+    }
+
+    return 1 if ($final_open >= 0)    # Link opened
+      && (
+        ($final_close < 0)            # and not closed    or
+        || ($final_open > $final_close)
+      );                              # one opened after last close
+
+    # Now check to see if we're inside a tag, matching a tag name,
+    # or attribute name or value
+    $final_open  = rindex($before, "<") - $[;
+    $final_close = rindex($before, ">") - $[;
+    ($final_open >= 0)                # Tag opened
+      && (
+        ($final_close < 0)            # and not closed    or
+        || ($final_open > $final_close)
+      );                              # one opened after last close
+}    # in_link_context
+
+# apply links and formatting to this paragraph
+sub apply_links ($%)
+{
+    my $self = shift;
+    my %args = (
+        para_ref        => undef,
+        para_action_ref => undef,
+        @_
+    );
+    my $para_ref        = $args{para_ref};
+    my $para_action_ref = $args{para_action_ref};
+
+    if ($self->{make_links}
+        && @{$self->{__links_table_order}})
+    {
+        $self->check_dictionary_links(
+            line_ref        => $para_ref,
+            line_action_ref => $para_action_ref
+        );
+    }
+    if ($self->{bold_delimiter})
+    {
+        my $tag = ($self->{lower_case_tags} ? 'strong' : 'STRONG');
+        $self->do_delim(
+            line_ref        => $para_ref,
+            line_action_ref => $para_action_ref,
+            delim           => $self->{bold_delimiter},
+            tag             => $tag
+        );
+    }
+    if ($self->{italic_delimiter})
+    {
+        my $tag = ($self->{lower_case_tags} ? 'em' : 'EM');
+        $self->do_delim(
+            line_ref        => $para_ref,
+            line_action_ref => $para_action_ref,
+            delim           => $self->{italic_delimiter},
+            tag             => $tag
+        );
+    }
+
+}    # apply_links
+
+# Check (and alter if need be) the bits in this line matching
+# the patterns in the link dictionary.
+sub check_dictionary_links ($%)
+{
+    my $self = shift;
+    my %args = (
+        line_ref        => undef,
+        line_action_ref => undef,
+        @_
+    );
+    my $line_ref        = $args{line_ref};
+    my $line_action_ref = $args{line_action_ref};
+
+    my ($switches, $options, $repl_func);
+    my ($linkme, $line_with_links);
+
+    # for each pattern, check and alter the line
+    my $i = 0;
+    foreach my $pattern (@{$self->{__links_table_order}})
+    {
+        $switches = $self->{__links_switch_table}->{$pattern};
+
+        # check the pattern
+        if ($switches & $LINK_ONCE)    # Do link only once
+        {
+            $line_with_links = '';
+            if (!$self->{__done_with_link}->[$i]
+                && ${$line_ref} =~ $self->{__search_patterns}->[$i])
+            {
+                $self->{__done_with_link}->[$i] = 1;
+                $line_with_links .= $`;
+                $linkme = $&;
+
+                ${$line_ref} = $';
+                if (!$self->in_link_context($linkme, $line_with_links))
+                {
+                    print STDERR "Link rule $i matches $linkme\n"
+                      if ($self->{dict_debug} & 4);
+
+                    # call the special subroutine already created to do
+                    # this replacement
+                    $repl_func = $self->{__repl_code}->[$i];
+                    $linkme    = &$repl_func($linkme);
+                }
+                $line_with_links .= $linkme;
+            }
+            ${$line_ref} = $line_with_links . ${$line_ref};
+        }
+        elsif ($switches & $LINK_SECT_ONCE)    # Do link only once per section
+        {
+            $line_with_links = '';
+            if (!$self->{__done_with_sect_link}->[$i]
+                && ${$line_ref} =~ $self->{__search_patterns}->[$i])
+            {
+                $self->{__done_with_sect_link}->[$i] = 1;
+                $line_with_links .= $`;
+                $linkme = $&;
+
+                ${$line_ref} = $';
+                if (!$self->in_link_context($linkme, $line_with_links))
+                {
+                    print STDERR "Link rule $i matches $linkme\n"
+                      if ($self->{dict_debug} & 4);
+
+                    # call the special subroutine already created to do
+                    # this replacement
+                    $repl_func = $self->{__repl_code}->[$i];
+                    $linkme    = &$repl_func($linkme);
+                }
+                $line_with_links .= $linkme;
+            }
+            ${$line_ref} = $line_with_links . ${$line_ref};
+        }
+        else
+        {
+            $line_with_links = '';
+            while (${$line_ref} =~ $self->{__search_patterns}->[$i])
+            {
+                $line_with_links .= $`;
+                $linkme = $&;
+
+                ${$line_ref} = $';
+                if (!$self->in_link_context($linkme, $line_with_links))
+                {
+                    print STDERR "Link rule $i matches $linkme\n"
+                      if ($self->{dict_debug} & 4);
+
+                    # call the special subroutine already created to do
+                    # this replacement
+                    $repl_func = $self->{__repl_code}->[$i];
+                    $linkme    = &$repl_func($linkme);
+                }
+                $line_with_links .= $linkme;
+            }
+            ${$line_ref} = $line_with_links . ${$line_ref};
+        }
+        $i++;
+    }
+    ${$line_action_ref} |= $LINK;
+}    # check_dictionary_links
+
+sub load_dictionary_links ($)
+{
+    my $self = shift;
+
+    @{$self->{__links_table_order}} = ();
+    %{$self->{__links_table}}       = ();
+
+    my $dict;
+    foreach $dict (@{$self->{links_dictionaries}})
+    {
+        next unless $dict;
+        open(DICT, "$dict") || die "Can't open Dictionary file $dict\n";
+
+        my @lines = ();
+        while (<DICT>)
+        {
+            # skip lines that start with '#'
+            next if /^\#/;
+            # skip lines that end with unescaped ':'
+            next if /^.*[^\\]:\s*$/;
+            push @lines, $_;
+        }
+        close(DICT);
+        my $contents = join('', @lines);
+        $self->parse_dict($dict, $contents);
+    }
+    # last of all, do the system dictionary, already read in from DATA
+    if ($self->{__global_links_data})
+    {
+        $self->parse_dict("DATA", $self->{__global_links_data});
+    }
+
+    $self->setup_dict_checking();
+}    # load_dictionary_links
+
+# do_file_start
+#    extra stuff needed for the beginning
+# Args:
+#   $self
+#   $para
+# Return:
+#   processed $para string
+sub do_file_start ($$$)
+{
+    my $self      = shift;
+    my $outhandle = shift;
+    my $para      = shift;
+
+    if (!$self->{extract})
+    {
+        my @para_lines = split(/\n/, $para);
+        my $first_line = $para_lines[0];
+
+        if ($self->{doctype})
+        {
+            if ($self->{xhtml})
+            {
+                print $outhandle
+'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"',
+                  "\n";
+                print $outhandle
+                  '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">',
+                  "\n";
+            }
+            else
+            {
+                print $outhandle '<!DOCTYPE HTML PUBLIC "', $self->{doctype},
+                  "\">\n";
+            }
+        }
+        print $outhandle $self->get_tag('HTML'), "\n";
+        print $outhandle $self->get_tag('HEAD'), "\n";
+
+        # if --titlefirst is set and --title isn't, use the first line
+        # as the title.
+        if ($self->{titlefirst} && !$self->{title})
+        {
+            my ($tit) = $first_line =~ /^ *(.*)/;    # grab first line
+            $tit =~ s/ *$//;                         # strip trailing whitespace
+            $tit = escape($tit) if $self->{escape_HTML_chars};
+            $self->{'title'} = $tit;
+        }
+        if (!$self->{title})
+        {
+            $self->{'title'} = "";
+        }
+        print $outhandle $self->get_tag('TITLE'), $self->{title},
+          $self->close_tag('TITLE'), "\n";
+
+        if ($self->{append_head})
+        {
+            open(APPEND, $self->{append_head})
+              || die "Failed to open ", $self->{append_head}, "\n";
+            while (<APPEND>)
+            {
+                print $outhandle $_;
+            }
+            close(APPEND);
+        }
+
+        if ($self->{lower_case_tags})
+        {
+            print $outhandle $self->get_tag(
+                'META',
+                tag_type   => 'empty',
+                inside_tag => " name=\"generator\" content=\"$PROG v$VERSION\""
+              ),
+              "\n";
+        }
+        else
+        {
+            print $outhandle $self->get_tag(
+                'META',
+                tag_type   => 'empty',
+                inside_tag => " NAME=\"generator\" CONTENT=\"$PROG v$VERSION\""
+              ),
+              "\n";
+        }
+        if ($self->{style_url})
+        {
+            my $style_url = $self->{style_url};
+            if ($self->{lower_case_tags})
+            {
+                print $outhandle $self->get_tag(
+                    'LINK',
+                    tag_type   => 'empty',
+                    inside_tag =>
+" rel=\"stylesheet\" type=\"text/css\" href=\"$style_url\""
+                  ),
+                  "\n";
+            }
+            else
+            {
+                print $outhandle $self->get_tag(
+                    'LINK',
+                    tag_type   => 'empty',
+                    inside_tag =>
+" REL=\"stylesheet\" TYPE=\"text/css\" HREF=\"$style_url\""
+                  ),
+                  "\n";
+            }
+        }
+        print $outhandle $self->close_tag('HEAD'), "\n";
+        if ($self->{body_deco})
+        {
+            print $outhandle $self->get_tag('BODY',
+                inside_tag => $self->{body_deco}), "\n";
+        }
+        else
+        {
+            print $outhandle $self->get_tag('BODY'), "\n";
+        }
+    }
+
+    if ($self->{prepend_file})
+    {
+        if (-r $self->{prepend_file})
+        {
+            open(PREPEND, $self->{prepend_file});
+            while (<PREPEND>)
+            {
+                print $outhandle $_;
+            }
+            close(PREPEND);
+        }
+        else
+        {
+            print STDERR "Can't find or read file ", $self->{prepend_file},
+              " to prepend.\n";
+        }
+    }
+}    # do_file_start
+
+# do_init_call
+# certain things, like reading link dictionaries, need to be
+# done once
+sub do_init_call ($)
+{
+    my $self = shift;
+
+    if (!$self->{__call_init_done})
+    {
+        push(@{$self->{links_dictionaries}}, ($self->{default_link_dict}))
+          if ($self->{make_links} && (-f $self->{default_link_dict}));
+        $self->deal_with_options();
+        if ($self->{make_links})
+        {
+            $self->load_dictionary_links();
+        }
+
+        # various initializations
+        $self->{__non_header_anchor} = 0;
+        $self->{__mode}              = 0;
+        $self->{__listnum}           = 0;
+        $self->{__list_nice_indent}  = '';
+        $self->{__list_indent}       = [];
+        $self->{__tags}              = [];
+
+        $self->{__call_init_done} = 1;
+    }
+}    # do_init_call
+
+=head1 FILE FORMATS
+
+There are two files which are used which can affect the outcome of the
+conversion.  One is the link dictionary, which contains patterns (of how
+to recognise http links and other things) and how to convert them. The
+other is, naturally, the format of the input file itself.
+
+=head2 Link Dictionary
+
+A link dictionary file contains patterns to match, and what to convert
+them to.  It is called a "link" dictionary because it was intended to be
+something which defined what a href link was, but it can be used for
+more than that.  However, if you wish to define your own links, it is
+strongly advised to read up on regular expressions (regexes) because
+this relies heavily on them.
+
+The file consists of comments (which are lines starting with #)
+and blank lines, and link entries.
+Each entry consists of a regular expression, a -> separator (with
+optional flags), and a link "result".
+
+In the simplest case, with no flags, the regular expression
+defines the pattern to look for, and the result says what part
+of the regular expression is the actual link, and the link which
+is generated has the href as the link, and the whole matched pattern
+as the visible part of the link.  The first character of the regular
+expression is taken to be the separator for the regex, so one
+could either use the traditional / separator, or something else
+such as | (which can be helpful with URLs which are full of / characters).
+
+So, for example, an ftp URL might be defined as:
+
+    |ftp:[\w/\.:+\-]+|      -> $&
+
+This takes the whole pattern as the href, and the resultant link
+has the same thing in the href as in the contents of the anchor.
+
+But sometimes the href isn't the whole pattern.
+
+    /<URL:\s*(\S+?)\s*>/ --> $1
+
+With the above regex, a () grouping marks the first subexpression,
+which is represented as $1 (rather than $& the whole expression).
+This entry matches a URL which was marked explicity as a URL
+with the pattern <URL:foo>  (note the < is shown as the
+entity, not the actual character.  This is because by the
+time the links dictionary is checked, all such things have
+already been converted to their HTML entity forms, unless, of course,
+the escape_HTML_chars option was turned off)
+This would give us a link in the form
+<A HREF="foo"><URL:foo></A>
+
+B<The h flag>
+
+However, if we want more control over the way the link is constructed,
+we can construct it ourself.  If one gives the h flag, then the
+"result" part of the entry is taken not to contain the href part of
+the link, but the whole link.
+
+For example, the entry:
+
+    /<URL:\s*(\S+?)\s*>/ -h-> <A HREF="$1">$1</A>
+
+will take <URL:foo> and give us <A HREF="foo">foo</A>
+
+However, this is a very powerful mechanism, because it
+can be used to construct custom tags which aren't links at all.
+For example, to flag *italicised words* the following
+entry will surround the words with EM tags.
+
+    /\B\*([a-z][a-z -]*[a-z])\*\B/ -hi-> <EM>$1</EM>
+
+B<The i flag>
+
+This turns on ignore case in the pattern matching.
+
+B<The e flag>
+
+This turns on execute in the pattern substitution.  This really
+only makes sense if h is turned on too.  In that case, the "result"
+part of the entry is taken as perl code to be executed, and the
+result of that code is what replaces the pattern.
+
+B<The o flag>
+
+This marks the entry as a once-only link.  This will convert the
+first instance of a matching pattern, and ignore any others
+further on.
+
+For example, the following pattern will take the first mention
+of HTML::TextToHTML and convert it to a link to the module's home page.
+
+    "HTML::TextToHTML"  -io-> http://www.katspace.com/tools/text_to_html/
+
+=head2 Input File Format
+
+For the most part, this module tries to use intuitive conventions for
+determining the structure of the text input.  Unordered lists are
+marked by bullets; ordered lists are marked by numbers or letters;
+in either case, an increase in indentation marks a sub-list contained
+in the outer list.
+
+Headers (apart from custom headers) are distinguished by "underlines"
+underneath them; headers in all-capitals are distinguished from
+those in mixed case.  All headers, both normal and custom headers,
+are expected to start at the first line in a "paragraph".
+
+In other words, the following is a header:
+
+    I am Head Man
+    -------------
+
+But the following does not have a header:
+
+    I am not a head Man, man
+    I am Head Man
+    -------------
+
+Tables require a more rigid convention.  A table must be marked as a
+separate paragraph, that is, it must be surrounded by blank lines.
+Tables come in different types.  For a table to be parsed, its
+--table_type option must be on, and the --make_tables option must be true.
+
+B<ALIGN Table Type>
+
+Columns must be separated by two or more spaces (this prevents
+accidental incorrect recognition of a paragraph where interword spaces
+happen to line up).  If there are two or more rows in a paragraph and
+all rows share the same set of (two or more) columns, the paragraph is
+assumed to be a table.  For example
+
+    -e  File exists.
+    -z  File has zero size.
+    -s  File has nonzero size (returns size).
+
+becomes
+
+    <TABLE>
+    <TR><TD>-e</TD><TD>File exists.</TD></TR>
+    <TR><TD>-z</TD><TD>File has zero size.</TD></TR>
+    <TR><TD>-s</TD><TD>File has nonzero size (returns size).</TD></TR>
+    </TABLE>
+
+This guesses for each column whether it is intended to be left,
+centre or right aligned.
+
+B<BORDER Table Type>
+
+This table type has nice borders around it, and will be rendered
+with a border, like so:
+
+    +---------+---------+
+    | Column1 | Column2 |
+    +---------+---------+
+    | val1    | val2    |
+    | val3    | val3    |
+    +---------+---------+
+
+The above becomes
+
+    <TABLE border="1">
+    <THEAD><TR><TH>Column1</TH><TH>Column2</TH></TR></THEAD>
+    <TBODY>
+    <TR><TD>val1</TD><TD>val2</TD></TR>
+    <TR><TD>val3</TD><TD>val3</TD></TR>
+    </TBODY>
+    </TABLE>
+
+It can also have an optional caption at the start.
+
+         My Caption
+    +---------+---------+
+    | Column1 | Column2 |
+    +---------+---------+
+    | val1    | val2    |
+    | val3    | val3    |
+    +---------+---------+
+
+B<PGSQL Table Type>
+
+This format of table is what one gets from the output of a Postgresql
+query.
+
+     Column1 | Column2
+    ---------+---------
+     val1    | val2
+     val3    | val3
+    (2 rows)
+
+This can also have an optional caption at the start.
+This table is also rendered with a border and table-headers like
+the BORDER type.
+
+B<DELIM Table Type>
+
+This table type is delimited by non-alphanumeric characters, and has to
+have at least two rows and two columns before it's recognised as a table.
+
+This one is delimited by the '| character:
+
+    | val1  | val2  |
+    | val3  | val3  |
+
+But one can use almost any suitable character such as : # $ % + and so on.
+This is clever enough to figure out what you are using as the delimiter
+if you have your data set up like a table.  Note that the line has to
+both begin and end with the delimiter, as well as using it to separate
+values.
+
+This can also have an optional caption at the start.
+
+=head1 EXAMPLES
+
+    use HTML::TextToHTML;
+ 
+=head2 Create a new object
+
+    my $conv = new HTML::TextToHTML();
+
+    my $conv = new HTML::TextToHTML(title=>"Wonderful Things",
+			    default_link_dict=>$my_link_file,
+      );
+
+=head2 Add further arguments
+
+    $conv->args(short_line_length=>60,
+	       preformat_trigger_lines=>4,
+	       caps_tag=>"strong",
+      );
+
+=head2 Convert a file
+
+    $conv->txt2html(infile=>[$text_file],
+                     outfile=>$html_file,
+		     title=>"Wonderful Things",
+		     mail=>1
+      );
+
+=head2 Make a pipleline
+
+    open(IN, "ls |") or die "could not open!";
+    $conv->txt2html(inhandle=>[\*IN],
+                     outfile=>'-',
+      );
+
+=head1 NOTES
+
+=over
+
+=item *
+
+One cannot use "CLEAR" as a value for the cumulative arguments.
+
+=item *
+
+If the underline used to mark a header is off by more than 1, then 
+that part of the text will not be picked up as a header unless you
+change the value of --underline_length_tolerance and/or
+--underline_offset_tolerance.  People tend to forget this.
+
+=back
+
+=head1 BUGS
+
+Tell me about them.
+
+=head1 SEE ALSO
+
+perl
+L<txt2html>.
+Data::Dumper
+
+=head1 AUTHOR
+
+    Kathryn Andersen (RUBYKAT)
+    perlkat AT katspace dot com
+    http//www.katspace.com/
+
+based on txt2html by Seth Golub
+
+=head1 COPYRIGHT AND LICENCE
+
+Original txt2html script copyright (c) 1994-2000 Seth Golub <seth AT aigeek.com>
+
+Copyright (c) 2002-2005 by Kathryn Andersen
+
+This program is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
+=cut
+
+#------------------------------------------------------------------------
+1;
+__DATA__
+#
+# Global links dictionary file for HTML::TextToHTML
+# http://www.katspace.com/tools/text_to_html
+# http://txt2html.sourceforge.net/
+# based on links dictionary for Seth Golub's txt2html
+# http://www.aigeek.com/txt2html/
+#
+# This dictionary contains some patterns for converting obvious URLs,
+# ftp sites, hostnames, email addresses and the like to hrefs.
+#
+# Original adapted from the html.pl package by Oscar Nierstrasz in
+# the Software Archive of the Software Composition Group
+# http://iamwww.unibe.ch/~scg/Src/
+#
+
+# Some people even like to mark the URL label explicitly <URL:foo:label>
+/<URL:([-\w\.\/:~_\@]+):([a-zA-Z0-9'() ]+)>/ -h-> <A HREF="$1">$2</A>
+
+# Some people like to mark URLs explicitly <URL:foo>
+/<URL:\s*(\S+?)\s*>/ -h-> <A HREF="$1">$1</A>
+
+#  <http://site>
+/<(http:\S+?)\s*>/ -h-> <<A HREF="$1">$1</A>>
+
+# Urls: <service>:<rest-of-url>
+
+|snews:[\w\.]+|        -> $&
+|news:[\w\.]+|         -> $&
+|nntp:[\w/\.:+\-]+|    -> $&
+|http:[\w/\.:\@+\-~\%#?=&;,]+[\w/]|  -> $&
+|shttp:[\w/\.:+\-~\%#?=&;,]+| -> $&
+|https:[\w/\.:+\-~\%#?=&;,]+| -> $&
+|file:[\w/\.:+\-]+|     -> $&
+|ftp:[\w/\.:+\-]+|      -> $&
+|wais:[\w/\.:+\-]+|     -> $&
+|gopher:[\w/\.:+\-]+|   -> $&
+|telnet:[\w/\@\.:+\-]+|   -> $&
+
+
+# catch some newsgroups to avoid confusion with sites:
+|([^\w\-/\.:\@>])(alt\.[\w\.+\-]+[\w+\-]+)|    -h-> $1<A HREF="news:$2">$2</A>
+|([^\w\-/\.:\@>])(bionet\.[\w\.+\-]+[\w+\-]+)| -h-> $1<A HREF="news:$2">$2</A>
+|([^\w\-/\.:\@>])(bit\.[\w\.+\-]+[\w+\-]+)|    -h-> $1<A HREF="news:$2">$2</A>
+|([^\w\-/\.:\@>])(biz\.[\w\.+\-]+[\w+\-]+)|    -h-> $1<A HREF="news:$2">$2</A>
+|([^\w\-/\.:\@>])(clari\.[\w\.+\-]+[\w+\-]+)|  -h-> $1<A HREF="news:$2">$2</A>
+|([^\w\-/\.:\@>])(comp\.[\w\.+\-]+[\w+\-]+)|   -h-> $1<A HREF="news:$2">$2</A>
+|([^\w\-/\.:\@>])(gnu\.[\w\.+\-]+[\w+\-]+)|    -h-> $1<A HREF="news:$2">$2</A>
+|([^\w\-/\.:\@>])(humanities\.[\w\.+\-]+[\w+\-]+)| 
+          -h-> $1<A HREF="news:$2">$2</A>
+|([^\w\-/\.:\@>])(k12\.[\w\.+\-]+[\w+\-]+)|    -h-> $1<A HREF="news:$2">$2</A>
+|([^\w\-/\.:\@>])(misc\.[\w\.+\-]+[\w+\-]+)|   -h-> $1<A HREF="news:$2">$2</A>
+|([^\w\-/\.:\@>])(news\.[\w\.+\-]+[\w+\-]+)|   -h-> $1<A HREF="news:$2">$2</A>
+|([^\w\-/\.:\@>])(rec\.[\w\.+\-]+[\w+\-]+)|    -h-> $1<A HREF="news:$2">$2</A>
+|([^\w\-/\.:\@>])(soc\.[\w\.+\-]+[\w+\-]+)|    -h-> $1<A HREF="news:$2">$2</A>
+|([^\w\-/\.:\@>])(talk\.[\w\.+\-]+[\w+\-]+)|   -h-> $1<A HREF="news:$2">$2</A>
+|([^\w\-/\.:\@>])(us\.[\w\.+\-]+[\w+\-]+)|     -h-> $1<A HREF="news:$2">$2</A>
+|([^\w\-/\.:\@>])(ch\.[\w\.+\-]+[\w+\-]+)|     -h-> $1<A HREF="news:$2">$2</A>
+|([^\w\-/\.:\@>])(de\.[\w\.+\-]+[\w+\-]+)|     -h-> $1<A HREF="news:$2">$2</A>
+
+# FTP locations (with directory):
+# anonymous@<site>:<path>
+|(anonymous\@)([a-zA-Z][\w\.+\-]+\.[a-zA-Z]{2,}):(\s*)([\w\d+\-/\.]+)|
+  -h-> $1<A HREF="ftp://$2/$4">$2:$4</A>$3
+
+# ftp@<site>:<path>
+|(ftp\@)([a-zA-Z][\w\.+\-]+\.[a-zA-Z]{2,}):(\s*)([\w\d+\-/\.]+)|
+  -h-> $1<A HREF="ftp://$2/$4">$2:$4</A>$3
+
+# Email address
+|[a-zA-Z0-9_\+\-\.]+\@([a-zA-Z0-9][\w\.+\-]+\.[a-zA-Z]{2,})|
+  -> mailto:$&
+
+# <site>:<path>
+|([^\w\-/\.:\@>])([a-zA-Z][\w\.+\-]+\.[a-zA-Z]{2,}):(\s*)([\w\d+\-/\.]+)|
+  -h-> $1<A HREF="ftp://$2/$4">$2:$4</A>$3
+
+# NB: don't confuse an http server with a port number for
+# an FTP location!
+# internet number version: <internet-num>:<path>
+|([^\w\-/\.:\@])(\d{2,}\.\d{2,}\.\d+\.\d+):([\w\d+\-/\.]+)|
+  -h-> $1<A HREF="ftp://$2/$3">$2:$3</A>
+
+# telnet <site> <port>
+|telnet ([a-zA-Z][\w+\-]+(\.[\w\.+\-]+)+\.[a-zA-Z]{2,})\s+(\d{2,4})|
+  -h-> telnet <A HREF="telnet://$1:$3/">$1 $3</A>
+
+# ftp <site>
+|ftp ([a-zA-Z][\w+\-]+(\.[\w\.+\-]+)+\.[a-zA-Z]{2,})|
+  -h-> ftp <A HREF="ftp://$1/">$1</A>
+
+# host with "ftp" in the machine name
+|\b([a-zA-Z][\w])*ftp[\w]*(\.[\w+\-]+){2,}| -h-> ftp <A HREF="ftp://$&/">$&</A>
+
+# ftp.foo.net/blah/
+|ftp(\.[a-zA-Z0-9_\@:-]+)+/\S+| -> ftp://$&
+
+# www.thehouse.org/txt2html/
+|www(\.[a-zA-Z0-9_\@:-]+)+/\S+| -> http://$&
+
+# host with "www" in the machine name
+|\b([a-zA-Z][\w])*www[\w]*(\.[\w+\-]+){2,}| -> http://$&/
+
+# <site> <port>
+|([a-zA-Z][\w+\-]+\.[\w+\-]+\.[a-zA-Z]{2,})\s+(\d{2,4})|
+  -h-> <A HREF="telnet://$1:$2/">$1 $2</A>
+
+# just internet numbers with port:
+|([^\w\-/\.:\@])(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\s+(\d{1,4})|
+  -h-> $1<A HREF="telnet://$2:$3">$2 $3</A>
+
+# just internet numbers:
+|([^\w\-/\.:\@])(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})|
+  -h-> $1<A HREF="telnet://$2">$2</A>
+
+# RFCs
+/RFC ?(\d+)/ -i-> http://www.cis.ohio-state.edu/rfc/rfc$1.txt
+
+# This would turn "f^H_o^H_o^H_" into "<U>foo</U>".  Gross, isn't it?
+# Thanks to Mark O'Dell <emark@cns.caltech.edu> for fixing this. 
+#
+# /(.\\010_)+/ -he-> $tmp = $&;$tmp =~ s@\010_@@g;"<U>$tmp</U>"
+# /(_\\010.)+/ -he-> $tmp = $&;$tmp =~ s@_\010@@g;"<U>$tmp</U>"
+# /(.\^H_)+/ -he-> $tmp = $&;$tmp =~ s@\^H_@@g;"<U>$tmp</U>"
+# /(_\^H.)+/ -he-> $tmp = $&;$tmp =~ s@_\^H@@g;"<U>$tmp</U>"
+
+# Mark _underlined stuff_ as <U>underlined stuff</U>
+# Don't mistake variable names for underlines, and
+# take account of possible trailing punctuation
+/([ \t\n])_([a-z][a-z0-9 -]*[a-z])_([ \t\n\.;:,\!\?])/ -hi-> $1<U>$2</U>$3
+
+# Seth and his amazing conversion program    :-)
+
+"Seth Golub"  -io-> http://www.aigeek.com/
+"txt2html"    -io-> http://txt2html.sourceforge.net/
+
+# Kathryn and her amazing modules 8-)
+"Kathryn Andersen"  -io-> http://www.katspace.com/
+"HTML::TextToHTML"  -io-> http://www.katspace.com/tools/text_to_html/
+"hypertoc"          -io-> http://www.katspace.com/tools/hypertoc/
+"HTML::GenToc"      -io-> http://www.katspace.com/tools/hypertoc/
+
+# End of global dictionary
+
-- 
cgit v1.2.3