#!/usr/bin/perl 
#
# this tool parses the text version of the TIGER/Line techincal manual
#
# $Id: parse-tech,v 1.23 2000/10/23 15:19:12 nemesis Exp $
#
# Copyright (C) 2000 Kees Cook
# kees@outflux.net, http://outflux.net/
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
# http://www.gnu.org/copyleft/gpl.html

use Getopt::Std;

my $VERSION="0.1";

sub Usage {
	my $text=shift;
        die "Usage: $0 -[m/d] [-r] -t FILENAME [-c COUNTY] [-s RT]
version $VERSION

-h              help: you're reading it.  :)
-v		turn on verbose debugging
-m		generate MySQL table definitions
-d		generate MySQL data from TIGER/Line county data
-r		add initial 'DROP TABLE ...' commands when generating tables
-s RT		skip processing fields until you hit the 'RT' file (for -d)
-t FILENAME     use FILENAME to read the 'Record Type' info from
-c COUNTY	FIPS county number to read RT files from (e.g. '17031')

$text

";
}
if (!getopts('hmdrvt:c:s:',\%opts) || $opts{h}) {
        Usage();
}
# Sanity check options
if (!defined($opts{m}) && !defined($opts{d})) {
	Usage("Error: You must have either '-m' or '-d' specified.");
}
if (!defined($opts{t})) {
	Usage("Error: You must use the -t option.");
}
if (defined($opts{d}) && !defined($opts{c})) {
	Usage("Error: you must specify a County code (-c) when using -d.");
}

my $running=0;	# used to sense where the Record Type appendix is
my $rt="";
my $key="";
my $none=1;
my $dec=8;

my $num=0;
my $rts=0;

my %defs;
my @rtlist;

open(TECH,"<$opts{t}") || die "Cannot read technical manual '$opts{t}': $!\n";

warn "Parsing Technical Manual for Record Type definitions...\n" if ($opts{v});
while ($line=<TECH>) {
	if ($line =~ /^Record Type (\S+)/) {
		my $got=$1;

		# make sure we're in the right section
		my $check=<TECH>;	# eat Field line
		while (defined($check) && $check =~ /^\s+/) {
			$check=<TECH>;
		}
		next if ($check !~ /^Field\s+/);

		if (!$running) {
			$running=1;
		}	

		if ($rt ne $got) {
			warn "Record Type $got defined.\n" if ($opts{v});

			print ");\n" if ($opts{m} && $rt ne "");

			$rt=$got;
			$key="";
			$none=1;
			$num=0;

			$rtlist[$rts]=$rt;
			$rts++;

			print "DROP TABLE RT$rt;\n" if ($opts{m} && $opts{r});
			print "CREATE TABLE RT$rt (" if ($opts{m});
		}
		next;
	}

	next if (!$running);

	# empty line means end of section
	if ($line =~ /^\s+$/) {
		$running=0;
		next;
	}

	# empty start line means extended comment from previous line
	if ($line =~ /^\s+/) {
		next;
	}

	if ($line =~ /^(\S+)\s+(Yes|No\*?)\s+(L|R)\s+(A|N)\s+(\d+)\s+(\d+)\s+(\d+)\s+(.*)\n/) {
		$field=lc($1);
		$null=($2 eq "Yes");
		$justify=$3;
		$beg=$5;
		$end=$6;
		$len=$7;
		$comment=$8;

		# set as field
		$key=$field if ($field ne "rt" && $field ne "version" && $key eq "");
		$tablenull=($2 eq "Yes" ? "" : "NOT ");

		$type="INT($len)";
		$type="CHAR($len)" if ($4 eq "A");
#		$type="REAL($len,$dec)" if ($field =~ /(long|lat)\d*$/);

		# disallow mysql reserved words
		if ($field =~ /^(primary|key|index|unique|fulltext|constraint|check|source)$/i) {
			$field="T$field";
		}

		if ($none == 1) {
			$none=0;
		}
		else {
			print ", " if ($opts{m});
		}

		$primary="";

                $defs{$rt}[$num]{'field'}=$field;
                $defs{$rt}[$num]{'alpha'}=$type =~ /^C/;
                $defs{$rt}[$num]{'beg'}=$beg;
                $defs{$rt}[$num]{'len'}=$len;
                $defs{$rt}[$num]{'null'}=$null;
                $defs{$rt}[$num]{'justify'}=$justify;

#		$primary="PRIMARY KEY " if ($field eq $key);
		print "${primary}$field $type ${tablenull}NULL" if ($opts{m});

		$num++;
	}
	else {
		chomp($line);
		warn "funny line: '$line'\n";
	}
}

if ($opts{d}) {
	$skip=(defined($opts{s}));
	foreach $rt (@rtlist) {
		if ($skip) {
			if ($rt =~ /^$opts{s}$/i) {
				$skip=0;
			}
			else {
				next;
			}
		}	
#		print "RT: $rt 3rd field len: ".$defs{$rt}[2]{'len'}."\n";

		$file="TGR${opts{c}}.RT$rt";
		if (!open(FILE,"<$file")) {
			warn "Cannot read file: '$file'!\n";
			next;
		}
		warn "Reading '$file'...\n" if ($opts{v});

		@list=();
		@data=@{ $defs{$rt} }; 
		foreach $num (0 .. $#data) {
			push(@list,$data[$num]{'field'});
		}
		$fields=join(", ",@list);

		warn "Scanning for fields: '$fields'...\n" if ($opts{v});
	
		$count=0;
		while ($line=<FILE>) {
			chomp($line);

			if ($opts{v} && ($count % 100) == 0) {
				print STDERR "read: $count\r";
			}
			$count++;
	
#			print "INSERT INTO RT$rt ($fields) values (";
			print "INSERT INTO RT$rt values (";

			@list=();
			@data=@{ $defs{$rt} }; 
			foreach $num (0 .. $#data) {
#				print "num: $num   field: ".$data[$num]{'field'}." null: ".$data[$num]{'null'}."\n";
				$field=$data[$num]{'field'};
				$extract=substr($line,$data[$num]{'beg'}-1,$data[$num]{'len'});
			
				# grab specials, if any
				if ($extract =~ /^\s*$/ && $data[$num]{'null'}) {
					$extract = "NULL";
				}
				elsif ($data[$num]{'alpha'}) {
					$extract=~s/'/\\'/g;
					if ($data[$num]{'justify'} eq "L") {
						$extract=~s/\s+$//g;
					}
					elsif ($data[$num]{'justify'} eq "R") {
						$extract=~s/^\s+//g;
					}
					$extract="'$extract'";
				}
				$list[$num]=$extract;
			}
			print join(", ",@list).");\n";
		}
	}
}

print ");\n" if ($opts{m});

# leave mysql cleanly for a pipe...
print "QUIT\n";

