AI 이론/Machine Learning
[머신러닝] COUNT VECTORIZER
jasonshin
2021. 11. 26. 11:21
from sklearn.feature_extraction.text import CountVectorizer
sample_data = ['This is the first document', 'I loved them', 'this document is the second document', 'I am loving you', 'And this is the third one']
vectorizer = CountVectorizer()
vectorizer.fit_transform(sample_data)
X = vectorizer.fit_transform(sample_data)
X = X.toarray()
X
array([[0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 2, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0], [1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1], [0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0]])
vectorizer.get_feature_names_out()
array(['am', 'and', 'document', 'first', 'is', 'loved', 'loving', 'one', 'second', 'the', 'them', 'third', 'this', 'you'], dtype=object)
반응형