#!/usr/bin/perl 
# By Jon Dehdari, 2006, originally for ArabicLG, adapted for Shereen's thesis
# converts aramorph.pl output to something that Arabic teachers will use
# replaces non-portable buck2kirk.sh
# usage: cat file | ./aramorph_fast.pl | ./buck2kirk.pl | more

use strict;
use CGI qw(:standard);
$CGI::POST_MAX=50000;
my $cgi = new CGI;
my %input;
my $counter = 0;
my $newword;

for my $key ( $cgi->param() ) {
    $input{$key} = $cgi->param($key);
}
my ( $romanized_word, $romanized_solution );

print qq{Content-type: text/html; charset=windows-1256\n\n 
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//AR">
<html lang="ar">
<head><title>Arabic Text Analysis</title>
<meta http-equiv="Content-Type" content="text/html; charset=windows-1256">
</head>\n<body>\n
<form action="buck2kirk.cgi" method="post">
};
#<style>
#   body \{ text-align\:right \}
#   textarea \{ text-align\:right \}
#<\/style>

### prints all the cgi input, for development
#for my $key ( keys %input ) {
# print $key, ' = ', $input{$key}, "<br>\n";
#}

$input{text_from} =~ s/\b(rm|mv|ln|passwd|shadow)\b//g;  # scrub text input for security

### I wanted to only call on aramorph_fast.pl once, since it's really slow to startup.  As a result, I give all the user input lines to the morpho engine just once, then split it, then foreach it.

if ($input{text_from}) { # User has already input text into box
my $out = `echo -e '$input{text_from}' | nice ./aramorph_fast.pl -i $input{input_type} 2>/dev/null  `;


my @out = split /\n/, $out;

foreach $_ (@out) {
  if    (m/SOLUTION \d+:/) { s/.*\[(.*?)\].*/$1/g; $romanized_solution = $1}
  elsif (m/NEWLINE/ || m/GLOSS/)     {}  # Deal with this stuff later
  elsif (m/NOT FOUND$/)  {s/Comment:\s+(\S+?)\s+NOT FOUND/a${1}/g; $romanized_solution = $1;} # There might be a bug in Perl 5.8: the "a" must be present in second term of s///
  elsif (s/^INPUT STRING: (.*)/<br\/>\n$1 &nbsp; /) {}
  elsif (s/^LOOK-UP WORD: (\S*)//) { $romanized_word = $1; }
  else                   { s/.*//g }


  s/\(null\)//g; 
  s/_(\d+)/  &nbsp;&nbsp;($1)/g;		# eg. word_1 --> word  (1)
  tr/{/</; 
  tr/\n/ /;
  s/<(?!br)/&lt;/g;			# protects "<" in html
  s/\s+/ /g;			# removes excessive spaces
  #s/[aiuo~]//g;		# devocalize
  if (m/[a-zA-Z]/) {		# some newline trickery
      if (m/NEWLINE/) {
	  print "<hr/><br/>\n";
      }
      #elsif (s/^ *\(GLOSS\): \+?(.*?)\+? $/ &nbsp;&nbsp; $1/) {
      elsif (s/^ *\(GLOSS\): (.*)/ &nbsp;&nbsp; $1/) {
	  print " $_";
      }
      elsif (m/[\x81-\xFA]/) { # it's win1256 Arabic script text
	  print "<br/>\n$_";
	  $newword = 1;
      }
      else {
          y/o//d;	# remove sukuns
	  if ($newword) { # auto select first radio button per word
	      $counter++;
	      print "<br/>\n<input type=\"radio\" name=\"${counter}_${romanized_word}\" value=\"$romanized_solution\" checked >$_ ";
	      $newword = 0;
	  }
	  else {
	      print "<br/>\n<input type=\"radio\" name=\"${counter}_${romanized_word}\" value=\"$romanized_solution\">$_ ";
	  }
      }
  }
} # foreach @out
print '<input type="hidden" name="words_checked" value="true">';
} # if $input{text_from}

elsif ($input{words_checked}) { # User has already checked radio buttons of word senses
### prints all the cgi input, for development
for my $key ( sort { $a <=> $b } keys %input ) {
 print $key, ' = ', $input{$key}, "<br>\n";
}

}

else { # No input from user, yet
print <<EOF;
Input Text:<br/>
<select name="input_type">
	<option value="roman">Romanized Buckwalter</option>
	<option value="utf8">UTF-8</option>
<!--	<option value="0">Unicode Decimal</option> -->
	<option value="cp1256" selected="selected">Windows 1256</option>
<!--	<option value="7">ISO 8859-6</option> -->
<!--	<option value="8">ArabTeX</option> -->
</select><br/>
<textarea style="text-align:right" name="text_from" rows="9" cols="70"></textarea>
<br/><br/>
EOF
}

print '<input type="submit" value="Submit" />'."\n";
print "</body>\n</html>\n";
