dframe = DataFrame({'key1': ['A'] * 2 + ['B'] * 3,
'key2': [2, 2, 2, 3, 3]})
dframe.duplicated() #gives a boolean vector of true and false #length of vector = nrow(dframe)
dframe.drop_duplicates() #just drops duplicate rows #just keep the first one
dframe.drop_duplicates(['key1']) #drops duplicates by a single column #just keep the first one
dframe.drop_duplicates(['key1'],keep='last') #take the last value of duplicate