python sorting_parts_of_speech.py

Posted 2021-05-09

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了python sorting_parts_of_speech.py相关的知识，希望对你有一定的参考价值。

# Let's make a single function to determine the parts of speech

import re
import nltk
import os
#from collections import Counter # Is this used?

# First we break the text into tokens
def tokinze_text(raw_text):
    tokens = nltk.word_tokenize(raw_text)
    return tokens
tokens = tokinze_text(text)

def mytagger(tokens):
    '''This function inputs tokens'''
    tags = nltk.pos_tag(tokens)
    return tags

tagged = mytagger(tokens)

# Note that IN can be either a preposition or a conjunction, for now we're going to list it with the prepositions
common_noun_pos = ['NN', 'NNS']
common_nouns = []
verb_pos = ['VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ']
verbs=[]
adjective_pos = ['JJ', 'JJR', 'JJS']
adjectives = []
pronoun_pos = ['PRP', 'PRP$', 'WP', 'WP$']
pronouns = []
adverb_pos = ['RB', 'RBR', 'RBS', 'WRB']
adverbs = []
proper_noun_pos = ['NNP', 'NNPS']
proper_nouns = []
conjunction_pos = ['CC']
conjunctions = []
preposition_pos = ['IN', 'TO']
prepositions = []
interjection_pos = ['UH']
interjections = []
modal_pos = ['MD'] # But these are also verbs, so let's make sure they show up as such
modals = []
tagged_other_pos = ['CD', 'DT', 'EX', 'FW', 'LS', 'PDT', 'POS', 'RP', 'SYM', 'WDT']
tagged_others = []
other = []

for idx, token in enumerate(tagged):
    if token[1] in common_noun_pos:
        common_nouns.append(token)
    elif token[1] in verb_pos:
        verbs.append(token)
    elif token[1] in adjective_pos:
        adjectives.append(token)
    elif token[1] in pronoun_pos:
        pronouns.append(token)
    elif token[1] in adverb_pos:
        adverbs.append(token)
    elif token[1] in proper_noun_pos:
        proper_nouns.append(token)
    elif token[1] in conjunction_pos:
        conjunctions.append(token)
    elif token[1] in preposition_pos:
        prepositions.append(token)
    elif token[1] in interjection_pos:
        interjections.append(token)
    elif token[1] in modal_pos:
        modals.append(token)
    elif token[1] in tagged_other_pos:
        tagged_others.append(token)
    else:
        other.append(token)
    

parts_of_speech = [common_nouns, verbs, adjectives, pronouns, adverbs, proper_nouns, conjunctions, prepositions, interjections, modals]
   
# Apped modals to verbs
# Create nouns that is both proper nouns and common nouns

以上是关于python sorting_parts_of_speech.py的主要内容，如果未能解决你的问题，请参考以下文章

Python代写，Python作业代写，代写Python，代做Python