
120
|
第
7
章
self._yes = []
self._xes = None
for corpus in self._corpora:
vectors = self.feature_matrix(corpus)
if self._xes is None:
self._xes = vectors
else:
self._xes = vstack((self._xes, vectors))
self._yes.extend([corpus.sentiment_code] * vectors.shape[0])
def feature_matrix(self, corpus):
data = []
indices = []
indptr = [0]
for sentence in corpus.get_sentences():
sentence_indices = self._get_indices(sentence)
indices.extend(sentence_indices)
data.extend([1] * len(sentence_indices))
indptr.append(len(indices))
feature_matrix = csr_matrix((data, indices, indptr),
shape=(len(indptr) - 1,
len(self._words)),
dtype=np.float64)
feature_matrix.sort_indices() ...