#!/bin/csh -f
#
#
# This program makes 
#     1. `Dictionary with prob' From CORPUS
#
# Part 1.
#
setenv TOPDIR .
echo 'Getting all the words from CORPUS'
awk -F+ ' { for(i = 1 ; i <= NF ; i++) print $i } ' $1 > $1.tmp
echo 'DONE....'
#
#
echo 'Sorting all the words'
sort $1.tmp > $1.tmp2
echo 'DONE....'
/bin/rm $1.tmp
#
#
echo 'Getting all the hanguel-words from Word-Tag Pairs'
$TOPDIR/printhan < $1.tmp2 > $1.tmp3
awk -F/ ' { print $1 , $2 } ' $1.tmp3 > $1.tmp4
echo 'DONE....'
/bin/rm $1.tmp2
/bin/rm $1.tmp3
#
#
echo 'Getting Frequencies of Word-Tag Pairs'
$TOPDIR/freq < $1.tmp4 > $1.tmp5
echo 'DONE....'
/bin/rm $1.tmp4
#
#
###echo 'Getting P(tag|word) from Frequencies of Word-Tag Pairs'
###wordambi < $1.tmp5 > $1.tmp6

echo 'write (Tag , Word , Frequency)'
awk ' { print $2 , $1 , $3 } ' $1.tmp5 > $1.tmp6
sort < $1.tmp6 > $1.tmp7
echo 'DONE....'
/bin/rm $1.tmp5
/bin/rm $1.tmp6
#
#
echo 'Splitting the Dictionary into Not-YeongEon and YeongEon'
awk ' BEGIN { tag = 0 } /pa/ {tag = 1} { if (tag ==0) print $0 > "notyeong" } { if (tag == 1) print $0 > "yeong" } ' $1.tmp7
echo 'DONE....'
#
#
echo 'Putting the irr-code into YeongEon-Dictionary'
$TOPDIR/put_irr yeong $1.tmp8
echo 'DONE....'
/bin/rm $1.tmp7
#
#
echo 'Merging the Not-YeongEon and YeongEon with irr-code'
cat $1.tmp8 >> notyeong
cat appenddict >> notyeong
echo 'DONE....'
/bin/rm yeong
/bin/rm $1.tmp8
#
# 
/bin/mv notyeong $2 
echo 'Now, The Dictionary with Probability is ' $2
#
#
