# Make sure you do not have directory used for output path
path = "/Users/itversity/Research/data/wordcount.txt" or path = "/public/randomtextwriter/part-m-00000"
lines = sc.textFile(path)
lines_coalesce = lines.coalesce(5) # with out coalesce it will try to use 9 tasks in first stage
words = lines.flatMap(lambda rec: rec.split(" "))
tuples = words.map(lambda rec: (rec, 1))
wordByCount = tuples.reduceByKey(lambda total, agg: total + agg)
wbcCoalesce = wordByCount.coalesce(2) # second stage will use only 2 tasks
for i in wbcCoalesce.take(100):
print(i)