#!/bin/tcsh -f

if ($#argv != 4) then
  echo "Usage: grow.tree file_stem train_set_size tree_type seed"
  echo ""
  echo "For example, to train a C4 decision tree on 100 cases"
  echo "drawn from the glass data set using random seed 123:"
  echo ""
  echo "grow.tree glass 100 c4 123 | more"
  exit
endif

set stem  = $1
set train = $2
set prog  = $3
set seed  = $4

set test = `echo $train | dm "x1 + 1"`
set total = `wc -l < ${stem}.dta`

/bin/rm -f ${stem}.bld ${stem}.tst
perm -s $seed < ${stem}.dta | linex 1-$train     > ${stem}.bld
perm -s $seed < ${stem}.dta | linex $test-$total > ${stem}.tst

# passing the optonal arg "-G $huge" prevents IND from subsampling
# from the train set when deciding what test to install
set huge = `echo $total | dm "x1 + 1"`
mktree -v -e -o "-G $huge" -s $prog $stem | linex 1-5 10
echo ""

/bin/rm -f tree
tprint -cdi ${stem}.attr ${stem}.treec > tree
cat tree | less
echo ""

# uncomment this to look at the pruned tree
# /bin/rm -f tree.p
# tprint -p ${stem} > tree.p
# cat tree.p | less
# echo ""

/bin/rm -f targets preds
colex 1 < ${stem}.bld > targets
tclass -e -p ${stem}.attr ${stem}.tree ${stem}.bld | tr "+" "\t" | abut targets - | colex -it 25a1 8.4n2-10 > preds
cat -n preds | less

set acc = `cat preds | dm "if x2>x3 then 1-(x1-1) else x1-1" | stats mean`
echo "train set accuracy = $acc" | less


/bin/rm -f targets preds
colex 1 < ${stem}.tst > targets
tclass -e -p ${stem}.attr ${stem}.tree ${stem}.tst | tr "+" "\t" | abut targets - | colex -it 25a1 8.4n2-10 > preds
cat -n preds | less

set acc = `cat preds | dm "if x2>x3 then 1-(x1-1) else x1-1" | stats mean`
echo "test set accuracy = $acc" | less
