[머신러닝] COUNT VECTORIZER

jasonshin 2021. 11. 26. 11:21

from sklearn.feature_extraction.text import CountVectorizer

sample_data = ['This is the first document', 'I loved them', 'this document is the second document', 'I am loving you', 'And this is the third one']

vectorizer = CountVectorizer()

vectorizer.fit_transform(sample_data)

X = vectorizer.fit_transform(sample_data)

X = X.toarray()

array([[0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 2, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0], [1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1], [0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0]])

vectorizer.get_feature_names_out()

array(['am', 'and', 'document', 'first', 'is', 'loved', 'loving', 'one', 'second', 'the', 'them', 'third', 'this', 'you'], dtype=object)

저작자표시 (새창열림)