rm -f data/scrubbed/*.txt
# the agilent files have extra headers. we just
# want the part of the file that starts with "FEATURES"
# remove the extra headers and split files
# into sets where the name reflects the array used.
for f in data/{LT,NJ,mi}*.txt; do
rm -f a.tmp
awk 'BEGIN { seen=0;FS=OFS="\t" }
{ if($1=="FEATURES"){ seen=1 }
if(seen==1){ print $0; }}' $f > a.tmp
nlines=$(wc -l "a.tmp" | awk '{ print $1 }')
n=$(($nlines - 1))
mv a.tmp data/scrubbed/`basename $f .txt`.${n}.txt
echo $f $n
done
# for each chip used, create a target file.
for N in 45015 62976; do
T=data/scrubbed/${N}.targets.txt
echo $'FileName\tCondition' > $T
for f in data/scrubbed/*${N}.txt; do
grp=$(echo $f | perl -pe 's/^.+_(\w+)\.\d+.*/$1/;s/IDL/ILD/;s/.+CTRL/CTRL/;')
echo "$f $grp" >> $T
done
done