# Armanni TODO required file DONE
# grabbed text from file DONE
# filter the text
# split it with empty space DONE
# downcase text DONE
# create empty hash DONE
# looking through the text if not in the hash then set number to 1 DONE
# if in the hash use hash index += to increase the number DONE
# after sorting it becomes an array
# sort by value
# make is word => count
# then fetch top item
# goal is find the top 100 most frequently used word
stop_words = %w{
trump
donald
-
@realdonaldtrump
&
a
about
above
after
again
against
all
am
an
and
any
are
aren't
as
at
be
because
been
before
being
below
between
both
but
by
can't
cannot
could
couldn't
did
didn't
do
does
doesn't
doing
don't
down
during
each
few
for
from
further
had
hadn't
has
hasn't
have
haven't
having
he
he'd
he'll
he's
her
here
here's
hers
herself
him
himself
his
how
how's
i
i'd
i'll
i'm
i've
if
in
into
is
isn't
it
it's
its
itself
let's
me
more
most
mustn't
my
myself
no
nor
not
of
off
on
once
only
or
other
ought
our
ours ourselves
out
over
own
same
shan't
she
she'd
she'll
she's
should
shouldn't
so
some
such
than
that
that's
the
their
theirs
them
themselves
then
there
there's
these
they
they'd
they'll
they're
they've
this
those
through
to
too
under
until
up
very
was
wasn't
we
we'd
we'll
we're
we've
were
weren't
what
what's
when
when's
where
where's
which
while
who
who's
whom
why
why's
with
won't
would
wouldn't
you
you'd
you'll
you're
you've
your
yours
yourself
yourselves
}
require "csv"
all_text = ""
CSV.foreach("data.csv") do |row|
all_text += row[0] + " " unless row[0].nil?
end
words = all_text.downcase.split(" ")
word_count = {}
words.each do |word|
unless stop_words.include? word
if word_count.key? word
word_count[word] += 1
else
word_count[word] = 1
end
end
end
result = word_count.sort_by {|word, count| count}.reverse
p result[0..99]
#remove stop words