
30
|
第
2
章
... 'data_channel_is_entertainment', 'data_channel_is_bus',
... 'data_channel_is_socmed', 'data_channel_is_tech',
... 'data_channel_is_world']
>>> X = df[features]
>>> y = df[['shares']]
# 创建交互特征对,跳过固定偏移项
>>> X2 = preproc.PolynomialFeatures(include_bias=False).fit_transform(X)
>>> X2.shape
(39644, 170)
# 为两个特征集创建训练集和测试集
>>> X1_train, X1_test, X2_train, X2_test, y_train, y_test = \
... train_test_split(X, X2, y, test_size=0.3, random_state=123)
>>> def evaluate_feature(X_train, X_test, y_train, y_test):
... """Fit a linear regression model on the training set and
... score on the test set"""
... model = linear_model.LinearRegression().fit(X_train, ...