幸福感预测 Task14:集成学习案例一( 五 )

<8.8f}".format(mean_squared_error(oof_xgb_263, target)))
r
#RandomForestRegressor随机森林folds = KFold(n_splits=5, shuffle=True, random_state=2019)oof_rfr_263 = np.zeros(len(X_train_263))predictions_rfr_263 = np.zeros(len(X_test_263))for fold_, (trn_idx, val_idx) in enumerate(folds.split(X_train_263, y_train)):print("fold n°{}".format(fold_+1))tr_x = X_train_263[trn_idx]tr_y = y_train[trn_idx]rfr_263 = rfr(n_estimators=1600,max_depth=9, min_samples_leaf=9, min_weight_fraction_leaf=0.0,max_features=0.25,verbose=1,n_jobs=-1)#verbose = 0 为不在标准输出流输出日志信息#verbose = 1 为输出进度条记录#verbose = 2 为每个epoch输出一行记录rfr_263.fit(tr_x,tr_y)oof_rfr_263[val_idx] = rfr_263.predict(X_train_263[val_idx])predictions_rfr_263 += rfr_263.predict(X_test_263) / folds.n_splitsprint("CV score: {:<8.8f}".format(mean_squared_error(oof_rfr_263, target)))
essor
GradientBoostingRegressor梯度提升决策树folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=2018)oof_gbr_263 = np.zeros(train_shape)predictions_gbr_263 = np.zeros(len(X_test_263))for fold_, (trn_idx, val_idx) in enumerate(folds.split(X_train_263, y_train)):print("fold n°{}".format(fold_+1))tr_x = X_train_263[trn_idx]tr_y = y_train[trn_idx]gbr_263 = gbr(n_estimators=400, learning_rate=0.01,subsample=0.65,max_depth=7, min_samples_leaf=20,max_features=0.22,verbose=1)gbr_263.fit(tr_x,tr_y)oof_gbr_263[val_idx] = gbr_263.predict(X_train_263[val_idx])predictions_gbr_263 += gbr_263.predict(X_test_263) / folds.n_splitsprint("CV score: {:<8.8f}".format(mean_squared_error(oof_gbr_263, target)))
至此 , 我们得到了以上5种模型的预测结果以及模型架构及参数 。其中在每一种特征工程中 , 进行5折的交叉验证 , 并重复两次( Ridge  , 核脊回归) , 取得每一个特征数下的模型的结果 。
train_stack2 = np.vstack([oof_lgb_263,oof_xgb_263,oof_gbr_263,oof_rfr_263,oof_etr_263]).transpose()# transpose()函数的作用就是调换x,y,z的位置,也就是数组的索引值test_stack2 = np.vstack([predictions_lgb_263, predictions_xgb_263,predictions_gbr_263,predictions_rfr_263,predictions_etr_263]).transpose()#交叉验证:5折 , 重复2次folds_stack = RepeatedKFold(n_splits=5, n_repeats=2, random_state=7)oof_stack2 = np.zeros(train_stack2.shape[0])predictions_lr2 = np.zeros(test_stack2.shape[0])for fold_, (trn_idx, val_idx) in enumerate(folds_stack.split(train_stack2,target)):print("fold {}".format(fold_))trn_data, trn_y = train_stack2[trn_idx], target.iloc[trn_idx].valuesval_data, val_y = train_stack2[val_idx], target.iloc[val_idx].values#Kernel Ridge Regressionlr2 = kr()lr2.fit(trn_data, trn_y)oof_stack2[val_idx] = lr2.predict(val_data)predictions_lr2 += lr2.predict(test_stack2) / 10mean_squared_error(target.values, oof_stack2)
之后 , 我们对于49维的数据和383维的数据进行与上述263维数据类似的操作
6.2 对于49维数据的处理:
##### lgb_49lgb_49_param = {'num_leaves': 9,'min_data_in_leaf': 23,'objective':'regression','max_depth': -1,'learning_rate': 0.002,"boosting": "gbdt","feature_fraction": 0.45,"bagging_freq": 1,"bagging_fraction": 0.65,"bagging_seed": 15,"metric": 'mse',"lambda_l2": 0.2, "verbosity": -1}folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=9)oof_lgb_49 = np.zeros(len(X_train_49))predictions_lgb_49 = np.zeros(len(X_test_49))for fold_, (trn_idx, val_idx) in enumerate(folds.split(X_train_49, y_train)):print("fold n°{}".format(fold_+1))trn_data = http://www.kingceram.com/post/lgb.Dataset(X_train_49[trn_idx], y_train[trn_idx])val_data = lgb.Dataset(X_train_49[val_idx], y_train[val_idx])num_round = 12000lgb_49 = lgb.train(lgb_49_param, trn_data, num_round, valid_sets = [trn_data, val_data], verbose_eval=1000, early_stopping_rounds = 1000)oof_lgb_49[val_idx] = lgb_49.predict(X_train_49[val_idx], num_iteration=lgb_49.best_iteration)predictions_lgb_49 += lgb_49.predict(X_test_49, num_iteration=lgb_49.best_iteration) / folds.n_splitsprint("CV score: {: