**我有一段代码,如果样本量小于5就会抛出值错误。我想在值错误的地方添加“数据不足”的注释,如果样本量大于或等于5,我希望继续执行代码。我已经写了这段代码,但它不起作用。请帮助我修改上面的代码}
df_names = []comp1={}for j in bentonite: if len(j)<5: print('Insufficient data') else: continue for i in j: if i!='component_id': X=j.drop([i,'component_id'],axis=1) y = j[i] if i == 'loi_': break sc=StandardScaler() X=sc.fit_transform(X) X = pd.DataFrame(X) from sklearn.model_selection import KFold from sklearn import metrics n_split = 5 kf=KFold(n_splits=n_split,shuffle=True,random_state=0) for model,name in zip([lm],['lm']): rmse_test=[] r2_test=[] adj_r2_test=[] rmse_train=[] r2_train=[] adj_r2_train=[] for train,test in kf.split(X,y): X_train,X_test=X.iloc[train,:],X.iloc[test,:] y_train,y_test=y.iloc[train],y.iloc[test] model.fit(X_train,y_train) #fitting the model y_predict_test=model.predict(X_test) #predicting the test result mse_test=round(metrics.mean_squared_error(y_test,y_predict_test),3) #calculating mse rmse_test.append(np.sqrt(mse_test)) # calculating rmse for test rSquare_test = round(r2_score((y_test),(y_predict_test)),3) #calculating rsquared for test adj_rsquare_test= round(1-(1-rSquare_test)*(len(y)-1)/(len(y)-X.shape[1]-1),3) #calculating adjusted r2 for test r2_test.append(rSquare_test) #appending the result to the empty list adj_r2_test.append(adj_rsquare_test) #appending the result to the empty list y_predict_train=model.predict(X_train) #predicting the result for train mse_train=round(metrics.mean_squared_error(y_train,y_predict_train),3) #calculating mse for train rmse_train.append(np.sqrt(mse_train)) # calculating rmse for train rSquare_train = round(r2_score((y_train),(y_predict_train)),3) #calculating rsquared for train adj_rsquare_train= round(1-(1-rSquare_train)*(len(y)-1)/(len(y)-X.shape[1]-1),3) #calculating adjusted r2 for train r2_train.append(rSquare_train) #appending the result to the empty list adj_r2_train.append(adj_rsquare_train) #appending the result to the empty list exec(f"df_{name}_{i} = pd.DataFrame()") exec(f"df_{name}_{i}['Name'] =[name]*n_split") exec(f"df_{name}_{i}['rmse_train'] =rmse_train") exec(f"df_{name}_{i}['rmse_test'] =rmse_test") exec(f"df_{name}_{i}['r2_train'] =r2_train") exec(f"df_{name}_{i}['r2_test'] =r2_test") exec(f"df_{name}_{i}['adj_r2_train'] =adj_r2_train") exec(f"df_{name}_{i}['adj_r2_test'] =adj_r2_test") exec(f"df_{name}_{i}['output'] = [i]*n_split") df_names.append(f"df_{name}_{i}") collection = pd.DataFrame() #collecting the results in dataframe for nn in df_names: exec(f"collection=pd.concat([collection,{nn}])") collection['new_name'] = collection['output'] df4 = collection.groupby(['new_name']).mean()#finding the mean of the results to find the overall best model#df4['Mean'] = (df4['rmse_train'] + df4['rmse_test'] + (1-df4['r_squared_train']) + (1-df4['r_squared_test']))/4#df4.sort_values(by='Mean',inplace=True) abc = list(d2.columns) new = pd.DataFrame(columns=df4.columns) for names in range(len(abc)): val = df4[df4.index.isin([i for i in df4.index if abc[names] in i])].iloc[0,:] df5=pd.DataFrame(val.values.reshape(1,-1),columns=new.columns,index=[val.name]) new=pd.concat([new,df5]) comp=j.component_id.unique()[0] comp1[comp]=new print('\n') print('\033[1m'+ comp+':') print('\n') print(new) print('\n') ```
回答:
continue
语句的位置不对。它意味着“在这里停止当前的迭代,继续下一次迭代的循环”。因此,要跳过数据点较少的情况,你需要在print
语句后直接放置continue
语句,而不是在单独的else
块中。
for j in bentonite: if len(j)<5: print('Insufficient data') continue for i in j: ...
另一个等效的选项是保留else
语句,但删除continue
。
for j in bentonite: if len(j)<5: print('Insufficient data') else: for i in j: ...