#!/usr/local/bin/perl
#=====================================================
# Japanese Probabilistic Dependency Analyzer
# Computation Linguistics Laboratory
# Nara Institute of Science and Technology
# Copyright (C) 1997 Masakazu Fujio and Yuji Matsumoto
#=====================================================
#---------------------------------------------
# Baseline: ƥΤߤѤ
#---------------------------------------------
$base = "Hstat";
$k_mode = 1;
$i_mode = 1;
$t_mode = 1;
$r_mode = 0;
$min = 10;
$lexcat = 0;
while ($_ = $ARGV[0], s/^-// && shift) {
    /^\s*$/ && next;
    if (s/^l//) { $base = shift; redo; }
    if (s/^m//) { $min = shift; redo; }
    if (s/^d//) { $k_mode = 0; redo; }
    if (s/^i//) { $i_mode = 0; redo; }
    if (s/^t//) { $t_mode = 0; redo; }
    if (s/^r//) { $r_mode = 0; redo; }
    &usage;
}

open(HD, $base) || die "Can't open $base\n";
while (<HD>) {
    /^(.*) (.*)$/;
    $hd{$1} = $2;
}

print STDERR "ok $k_mode:$i_mode:$t_mode\n";

$saitei = 0.0001;
$i = 0;
while(<>) {
    if (/^EOS$/) {
	print "EOS\n";
	$i--;
	next if (!$i);
	for $fm (0..$i-2) {
	    $relcnt = $hd{"$rel[$fm]"};
	    if (!$relcnt) {
		$rel[$fm] =~ s/^[^\/]*\//\//;
		print STDERR "$rel[$fm]\n";
		$relcnt = $hd{"$rel[$fm]"};
	    }
	    if (!$relcnt) { print STDERR "relcnt error! $rel[$fm]\n"; }

	    # ̤Ф
	    $#kouho = -1;
	    $ks = 0;
	    for $m ($fm+1..$i) {
		($k1) = ($k1[$m] =~ /^k1:(\d)$/);
		($k2) = ($k2[$m] =~ /^k2:(\d)$/);
		$kt = $k1 - $k2;
		($kt > 0) ? $ks++ : ($kt < 0) ? $ks-- : 1;
		if ($ks < 0) {
		    @kouho = ($fm+1..$m);
		    last;
		}
		($ks == 0) && push(@kouho, $m);
	    }
	    ($t1) = ($ten[$fm] =~ /^t:(\d)$/);
	    $kd = $i - $fm; #
	    if (!@kouho) { @kouho = ($i); }
	    for $to (@kouho) {
#	    for $to ($fm+1..$i) {
		$stat1 = 0;
		$stat2 = 0;
		$stat3 = 0;
		$stat4 = 0;
		$tn = 0;
		$p = 0;
		for ($fm+1..$to-1) {
		    if ($ten[$_] eq 't:1') {
			$p = 1;
			$tn++;
		    }
		}

		$t2 = ($ten[$to] =~ /^t:(\d)$/);
		$m = ($maru[$to] =~ /^m:(\d)$/);

		# 缭ȴط
		if ($i_mode) {
		    &category;
		} else { $stat1 = 1; }
		
		if ($k_mode) {
		    $distcnt = 0;
		    # Υ
		    $dist = $to - $fm;
		    
		    if ($relcnt) {
			if ($kd > $min) {
			    $dtmp = 0;
			    for ($k=$min+1; $k<=$kd+50; $k++) {
				$dtmp += $hd{"d$rel[$fm]$k$d$t1$tn"};
				$distcnt += $dtmp/($kd+50-$min);
			    }
			} else {
			    $distcnt = $hd{"d$rel[$fm]$kd$dist$t1$tn"};
			}
#		    print STDERR "(D)$distcnt $relcnt, $rel[$fm]\n";
			if ($relcnt) {
			    $stat2 = $distcnt/$relcnt || $saitei;
			}
		    } else {
			$stat2 = $saitei;
		    }
		} else { $stat2 = 1; }
		
		if ($t_mode) {
		    $tencnt = 0;

		    # ȶζ̤ʤƤߤ
		    if ($t2 || $m) {
			$tencnt = $hd{"p$rel[$fm]${t1}01$p"}
			+ $hd{"p$rel[$fm]${t1}10$p"};
		    } else {
			$tencnt = $hd{"p$rel[$fm]${t1}00$p"};
		    }
		    
		    if ($relcnt) {
			$stat3 = $tencnt/$relcnt;
		    } else {
			$stat3 = $saitei;
		    }
		} else { $stat3 = 1; }
		    
		if ($r_mode) {
		    $rrcnt = $hd{"$rel[$fm]_$moto[$to]"};
		    if ($relcnt) {
			$stat4 = $rrcnt/$relcnt;
#			print STDERR "(S4)$stat4 $rel[$fm] $moto[$to]\n";
		    } else {
			$stat4 = $saitei;
		    }
#		    if (!$stat4) { print STDERR "$rel[$fm] $moto[$to]\n"; }
		    !$stat4 && ($stat4 = $saitei);
		} else { $stat4 = 1; }

#		print "(S2)$fm->$to s1:$stat1 s2:$stat2\n";
		$stat = $stat1 * $stat2 * $stat3 * $stat4;
		print "$fm $to $stat\n";
	    }
	}
	$tmp = $i - 1;
	print "$tmp $i 1\n";

	$i = 0;
	$#head = -1;
	$#hinsi = -1;
	$#rel = -1;
	$#moto = -1;
	$#ten = -1;
	$#maru = -1;
	print 'EOK';
#	print '-' x 5;
	print "\n";
    } else {
	($head[$i], $hinsi[$i], $rel[$i], $moto[$i], $ten[$i], $maru[$i], $k1[$i], $k2[$i], $bunsetu[$i])
	    = /^(\S*)\s+(\S*)\s+(\S*)\s+(\S*)\s+(\S*)\s+(\S*)\s+(\S*)\s+(\S*)\s+(.*)$/;
	print "$bunsetu[$i]:$head[$i] $hinsi[$i] $rel[$i] $moto[$i] $hinsi[$i]\n";
	if (!$head[$i] || !$hinsi[$i] || !$rel[$i] || !$moto[$i]) {
	    print STDERR "ɤ줫°ʤǤ\n";
	}
	$lrel[$i] = $rel[$i];
	($rest, $rel[$i]) = ($rel[$i] =~ /^(.*)-([^-]*)-$/);
	if ($rest) {
	    ($moto[$i]) = ($rest =~ /^-([^-\/]*\/[^-\/]*)/);
	} else {
	    ($moto[$i]) = ($rel[$i] =~ /^([^\/]*\/[^\/]*)/);
	}
	$moto[$i] =~ s/\/$//;
	$i++;
    }
}

sub category {
    $lrcnt = $hd{"$rel[$fm]_$hinsi[$fm]"};
    $lcnt = $hd{"$rel[$fm]_$hinsi[$fm]-$hinsi[$to]"};
#    print STDERR "==$rel[$fm]_$hinsi[$fm]-$hinsi[$to]:$lrcnt,$lcnt\n";
    if (($lrcnt != 0) && ($lcnt != 0)) {
	$stat1 = $lcnt/$lrcnt;
    } else {
	$stat1 = $saitei;
    }
}

sub usage {
    print <<"END";
Usage: baseline [ -dictr -l <patfilename> ]

    -c              þѤ
    -d              Υʤ
    -i              ̣ʤ
    -t              ʤ
    -r              طʤ

    -l  <filename>  ץǡե
END

    exit 1;
}
