#!/usr/local/bin/perl
#=====================================================
# Japanese Probabilistic Dependency Analyzer
# (Segmentater)
# Computation Linguistics Laboratory
# Nara Institute of Science and Technology
# Copyright (C) 1997 Masakazu Fujio and Yuji Matsumoto
#=====================================================
#$HOME = '/home/'.(getpwuid($<))[0];
$HOME = ".";

# ץ
$debug_mode = 0;
$midasi_on = 0;
$punc_on = 0;
$O_mode = 0;
while ($_ = $ARGV[0], s/^-// && shift) {
    /^\s*$/ && next;
    if (s/^D//) { $HOME = shift; next; }
    if (s/^d//) { $debug_mode = 1; redo; }
    if (s/^r1//) { $midasi_on = shift; redo; }
    if (s/^r2//) { $punc_on = 1; redo; }
    if (s/^b//) { $O_mode = 1; redo; }
    if (s/^m//) { $O_mode = 2; redo; }
    if (s/^v//) { $O_mode = 3; redo; }
    if (s/^e//) { $O_mode = 4; redo; }
    &usage;
}

$auto = "$HOME/auto";
$keymap = "$HOME/key";
$feature = "$HOME/zoku";

open(DFA, $auto) || die "Can't open $auto:$!\n";
open(KEY, $keymap) || die "Can't open $keymap:$!\n";
open(ZOK, $feature) || die "Can't open $feature:$!\n";

while (<DFA>) {
    /^(.*)>(.*)$/;
    $dfa{$1} = $2;
}
close(DFA);

while (<KEY>) {
    /^(.*)>(.*)$/;
    $smap{$1} = $2;
}
close(KEY);

while (<ZOK>) {
    /^(.*)>(.*)$/;
    $zoku{$1} = $2;
}
close(ZOK);

# JUMANνϰԤAMΥѴ
&reset;
@set = (0, -1);
$i=$j=$bid=0;
while(<>) {
    if (/^EOS$/) {
	while($i <= $j) {
	    if ($i == $j) {
		&alphabet;
		&transition;
		last;
	    }
            &alphabet;
            &transition;
	    last if ($O_mode == 4 && $shift_error);
        }
	if ($O_mode == 4) {
	    if ($shift_error) {
		&resetEOS;
		$bid = 0;
		$#ebz = -1;
		$line++;
		next;
	    }
	    for (@ebz) { print "$_\n"; }
	    $bid = 0;
	    $#ebz = -1;
	}
	print "EOS\n"; 
	$line++;
        &resetEOS;
        next;
    }
    if ($O_mode == 4 && $shift_error) {
	while (<>) {
	    last if (/^EOS$/);
	}
	$line++;
	&resetEOS;
	$bid = 0;
	$#ebz = -1;
	next;
    }
    &getjuman;
    &alphabet;
    &transition;		# ˾
}

while($i < $j) {
    &alphabet;
    &transition;
}

$debug_mode && &check_ram;

dbmclose(%smap);
dbmclose(%zoku);
dbmclose(%dfa);

exit;

######################################################################
######################################################################
# ܤ¹Ԥλ֤Ȥʤ뤫å
# λɤߤƤ줬ʸᳫȤʤ뤫Ĵ٤
# Ĺפθ§
sub progress {
    if ($tmp = $dfa{"$state,$code"}) {
	# ǽ֤ã..
	if ($dfa{"$tmp,final"}) {
	    $pat_id = $dfa{"ID$state,$code"};
#	    print STDERR "==$pat_id\n";
	    $head_stack = $t1head_stack;
	    $rhead_stack = $t2head_stack;
	    $hpat_id = $tpat_id;
	    $rel_id = $i;
	    if ($punc_on) {
		while ($hinsi[$rel_id] eq '') {
		    $rel_id--;
		}
	    } else {
		while (($hinsi[$rel_id] eq '')
		       || ($hinsi[$rel_id] eq '')
		       || ($hinsi[$rel_id] eq '')) {
		    $rel_id--;
		}
	    }

	    $touten = ($hinsi[$i] eq '') ? 1 : 0;
	    $kuten = ($hinsi[$i] eq '') ? 1 : 0;
	    $kakko1 = $tk1;
	    $kakko2 = $tk2;
	    
            $set[1] = $i;
	}
	$state = $tmp;
    } else { 0; }
}

# ܤ˴Ϣ
sub transition {
    # ߤHead Word
    $tmp = defined($zoku{"head$kihon[$i]/$hinsi[$i]"})
	? $zoku{"head$kihon[$i]/$hinsi[$i]"} : $zoku{"head$hinsi[$i]"};
    if ($tmp) {
	push(@headlist, $i);
	$t1head_stack++;
	if ($dfa{"H$state,$code"}) {
	    $t2head_stack++;
	    $tpat_id = $dfa{"H$state,$code"};
	}
    }
    
    # ط
    if ($rel_start < 0) {
	$tmp = defined($zoku{"rel$kihon[$i]/$hinsi[$i]"})
	    ? $zoku{"rel$kihon[$i]/$hinsi[$i]"} : $zoku{"rel$hinsi[$i]"};
	$tmp && ($rel_start = $i);
    }
    ($hinsi[$i] eq '̳') && ($tk1 = 1);
    ($hinsi[$i] eq '') && ($tk2 = 1);
	    
    # ܤμ¹
    $ch = &progress;

    # Ĺʸڤ꤬ꤷ
    if (!$ch) {
	&bunsetuout;
	&reset;
	$i = $set[1] + 1;
	$set[0] = $i;
    } else { $i++; }
}

sub bunsetuout {
    # 缭
    $head = "";
    $hinsi = '*';
    if ($tmp = $zoku{head.$pat_id}) {
	$head = $tmp;
    } else {
	if ($hpat_id =~ /$pat_id/) {
	    $hinsi = "";
#	    for $id ($head_stack-$rhead_stack..$head_stack-1) {
#		$hinsi .= $hinsi[$headlist[$id]];
#	    }
	    $head_stack -= $rhead_stack;
	    $hinsi = $hinsi[$headlist[$head_stack]];
	    $form = $kform[$headlist[$head_stack]] || "*";
	    $head_stack ? ($headid = $headlist[$head_stack-1])
		: ($headid = $i);
	} else {
	    $head_stack ? ($headid = $headlist[$head_stack-1])
		: ($headid = $i);
	    if ($zoku{head.$pat_id}) {
		$hinsi = "hinsi!";
	    } else {
		$hinsi = $hinsi[$headid];
		$form = $kform[$headid] || "*";
	    }
	}
	$tmp = defined($zoku{"head$kihon[$headid]/$hinsi[$headid]"})
	    ? $zoku{"head$kihon[$i]/$hinsi[$headid]"} : $zoku{"head$hinsi[$headid]"};
	if ($tmp) {
	    if ($hinsi[$headid] eq '') { $head = ''; }
	    elsif ($hinsi[$headid] eq '') { $head = ''; }
	    elsif ($hinsi[$headid] eq '') { $head = ''; }
	    else { $head = $kihon[$headid]; }
	} else {
	    $head = ($headid eq '*') ? '*' : $kihon[$headid];
	}
    }
    if (!$head) { $head = '*'; }
    
    # طη
    $rel = "";
    if ($rel_start > 0 && $rel_start < $rel_id) { 
	for ($rel_start..$rel_id) {
	    next if ($hinsi[$_] eq "");
	    if ($midasi_on == 2) {
		$rel = "$rel-$midasi[$_]";
	    } else {
		$tmp = defined($zoku{"rel$kihon[$_]/$hinsi[$_]"})
		    ? $zoku{"rel$kihon[$_]/$hinsi[$_]"} : $zoku{"rel$hinsi[$_]"};
		if ($tmp) {
		    if ($midasi_on == 1) {
			$rel = "$rel-$kihon[$_]/$midasi[$_]/$hinsi[$_]";
		    } else {
			$rel = "$rel-$kihon[$_]/$hinsi[$_]";
		    }
		} else {
		    $rel = "$rel-/$hinsi[$_]";
		}
	    }
	}
	if ($midasi_on != 2) { 
	    $rel = "$rel/$zoku{'kr'.$kform[$rel_id]}-";
	}
    } else {
	$tmp = defined($zoku{"rel$kihon[$rel_id]/$hinsi[$rel_id]"})
	    ? $zoku{"rel$kihon[$rel_id]/$hinsi[$rel_id]"} : $zoku{"rel$hinsi[$rel_id]"};
	if ($tmp) {
	    if ($midasi_on == 2) {
		$rel = $midasi[$rel_id];
	    } elsif ($midasi_on == 1) {
		$rel = "-$kihon[$rel_id]/$midasi[$rel_id]/$hinsi[$rel_id]/$zoku{'kr'.$kform[$rel_id]}-";
	    } else {
		$rel = "-$kihon[$rel_id]/$hinsi[$rel_id]/$zoku{'kr'.$kform[$rel_id]}-";
	    }
	} else {
	    $rel = "-/$hinsi[$rel_id]/$zoku{'kr'.$kform[$rel_id]}-";
	}
    }

    if ($set[0] > $set[1]) {
	&errorcheck;
    } else {
	if ($O_mode == 0) {
	    for ($i=$set[0];$i<=$set[1];$i++) {
		print  "$midasi[$i]";
	    }
	    print "\n";
	} elsif ($O_mode == 1) {
	    print "$head $hinsi $rel $form t:$touten m:$kuten k1:$kakko1 k2:$kakko2 ";
	    print " ";
	    for ($i=$set[0];$i<=$set[1];$i++) {
		print  "$midasi[$i]/$hinsi[$i]/$ktype[$i]/$kform[$i] ";
	    }
	    print "\n";
	} elsif ($O_mode == 2) {
	    print "$head $hinsi $rel $form t:$touten m:$kuten k1:$kakko1 k2:$kakko2 ";
	    for ($i=$set[0];$i<=$set[1];$i++) {
		print  "$midasi[$i]";
	    }
	    print "\n";
	} elsif ($O_mode == 3) {
	    print "$head $hinsi $rel $form t:$touten m:$kuten k1:$kakko1 k2:$kakko2 ";
	    print " ";
	    for ($i=$set[0];$i<=$set[1];$i++) {
		print  "$midasi[$i]/$yomi[$i]/$kihon[$i]/$hinsi[$i]/$ktype[$i]/$kform[$i] ";
	    }
	    print "\n";
	} elsif ($O_mode == 4) {
	    $ebz[$bid] = "$head $hinsi $rel $form t:$touten m:$kuten k1:$kakko1 k2:$kakko2 ";
	    $ebz[$bid] =~ s/$/ /;
	    for ($i=$set[0];$i<=$set[1];$i++) {
		$ebz[$bid] =~ s/$/$midasi[$i]\/$hinsi[$i]\/$ktype[$i]\/$kform[$i] /;
	    }
	    $bid++;
	}
    }
}

# ٤$_Ф
sub getjuman {
    chop;
    @juman = split(" ", $_);

    $midasi[$j] = $juman[0];
    $yomi[$j] = $juman[1];
    $kihon[$j] = $juman[2];
    $hinsi[$j] =  ($juman[5] ne '*') ? $juman[5] : $juman[3];
    #index 󣰤᤹ͤ򤤤ͤФʤʤ
    $ktype[$j] = ($juman[7] ne '*') ? $juman[7] : "";
    $kform[$j] = ($juman[9] ne '*') ? $juman[9] : "";
    
    ($hinsi[$j] = '̾') if ($hinsi[$j] eq '̤');
    ++$j;
}

# JUMANνϤ򸵤˥ȥޥȥΥե٥åȤ
sub alphabet {
    $char0 = "$kihon[$i],$hinsi[$i],$kform[$i]";
    $char1 = ",$hinsi[$i],$kform[$i]";

    $code = ($smap{$char0}) ? $smap{$char0} : $smap{$char1};
}
 
# ǥХå
sub check_ram {
#    print "\n\n";
#    while(($key, $val) = each %smap){
#	print  "$key --> $val\n";
#    }
    
    print "\n";
    while(($key, $val) = each %zoku){
	print  "$key     $val\n";
    }
#    print "\n";
#    while(($key, $val) = each %dfa){
#	print  "$key     $val\n";
#    }
}

# 餫ͳǾܤ˼ԤȡƤФ
sub errorcheck {
    $set[1] = $i;
#a    print STDERR "\n#$midasi[$i-3]/$yomi[$i-3]/$hinsi[$i-3]/$ktype[$i-3]/$kform[$i-3]#\n";
#    print STDERR "#$midasi[$i-2]/$yomi[$i-2]/$hinsi[$i-2]/$ktype[$i-2]/$kform[$i-2]#\n";
    print STDERR "S=$line,L=$.,W=$i: $midasi[$i-1]/$yomi[$i-1]/$hinsi[$i-1]/$ktype[$i-1]/$kform[$i-1] $midasi[$i]/$yomi[$i]/$hinsi[$i]/$ktype[$i]/$kform[$i] *** Shift error <state:$state code:$code> ";

    $touten = ($hinsi[$i] eq '') ? 1 : 0;
    $kuten = ($hinsi[$i] eq '') ? 1 : 0;
    $kakko1 = ($hinsi[$i] eq '̳') ? 1 : 0;
    $kakko2 = ($hinsi[$i] eq '') ? 1 : 0;

    if (!$head||$head eq " ") { $head = '*'; }
    print STDERR "$head $hinsi $rel $form t:$touten m:$kuten k1:$kakko1 k2:$kakko2";
    if ($bunsetu_mode) {
 	print STDERR "  [";
 	for ($k=$set[0];$k<=$i;$k++) {
 	    print STDERR "($midasi[$k],$hinsi[$k],$ktype[$k],$kform[$k]) ";
 	}
 	print STDERR "]";
    }
    print STDERR "\n";
    $shift_error = 1;
}

sub reset {
    $tk1 = 0;
    $tk2 = 0;
    $#rel = -1;
    $t1head_stack = 0;
    $t2head_stack = 0;
    $rel_start = -1;
    $rhead_stack = 0;
    $head_stack = 0;
    $headid = '*';
    $tpat_id = '*';
    $hpat_id = '*';
    $#headlist = -1;
    $pat_id = -1;
    $rel_id = 0;
    $state = 0;
}

sub resetEOS {
    &reset;
    $#midasi = -1;
    $#hinsi = -1;
    $#kihon = -1;
    $#ktype = -1;
    $#kform = -1;
    @set = (0, -1);
    $last = 1;
    $shift_error = 0;
    $i=$j=$state=0;
}

sub usage {
    print <<"END";
Usage: ram [ -bhv -D <directory name> ]

    -b  ʸ
    -m  °ʸ
    -v  Ĺ
    -d  ǥХå⡼
END

    exit 1;
}
