我在尝试使用KNN算法预测类别,但不知道为什么会出现上述错误,如“在外部函数调用中出现NA/NaN/Inf(参数6)”。我已经使用na.omit(A)函数移除了NA值,但仍然得到NA错误。
data.csv
RegionName,RetailerId,PartyName,Address1,Address2,Area,City,ContactPerson,CSTNumber,Email,LicenseNumber,Telephone,MobileNumberMUMBAI,297,BHAGWATI MEDICAL & GENERAL STORES,"2,GROUND FLOOR,ABDUL REHAMAN CHAWL,MAROL GAON",SHREE HANUMAN MANDIR ROAD,MAROL,ANDHERI EAST,HARSHIT JAIN,20 Z6 59 90B,[email protected],21 Z6 59 90B,29207788 / 07666464888,"82,864,534,619,867,000,000"MUMBAI,297,BHAGWATI MEDICAL [MAROL],"SHRI HANUMAN MANDIR RD;MAROL GAON,","ANDHERI[E],MUMBAI-59.",,ANDHERI [E],MR.DINESH KOTHARI,20Z-6-59-908,[email protected],21Z-6-59-908,29207788,MUMBAI,297,BHAGWATI MEDICAL [MAROL],"SHRI HANUMAN MANDIR RD;MAROL GAON,","ANDHERI[E],MUMBAI-59.",,ANDHERI [E],MR.DINESH KOTHARI,20Z-6-59-908,[email protected],21Z-6-59-908,29207788,MUMBAI,297,BHAGWATI MEDICAL [MAROL],"SHRI HANUMAN MANDIR RD;MAROL GAON,","ANDHERI[E],MUMBAI-59.",,ANDHERI [E],MR.DINESH KOTHARI,20Z-6-59-908,[email protected],21Z-6-59-908,29207788,MUMBAI,297,BHAGWATI MEDICAL & GENRAL STORE,"SHRI HANUMAN MANDIR ROAD,",MAROL VILLAGE,MAROL,MUMBAI,DINESH,20/Z-6/59/908,[email protected],20C/Z-6/59/908,29207788/8286453461,98670976670MUMBAI,297,$BHAGWATI MEDICAL.,"SHOP NO.2,ABDUL REHMAN CHAWL SHRI HANUMAN MANDIR ROAD",,ANDHERI(E),MUMABAI,,20-21-Z-1,[email protected],59-908-20C,29207788/8286453461,MUMBAI,297,BHAGWATI MEDICAL & GENERAL STORE,SHOP NO.2 ABDUL REHMAN CHAWL SHRI HANUMAN MANDIR MARG,"MAROL VILLAGE,",,ANDHERI (E),,20/Z-6/59/908,[email protected],21/Z-6/59/908,29207788 / 9867097667,7666464888MUMBAI,297,BHAGWATI MED. & GEN. STORES.,"SHREE HANUMAN MANDIR ROAD, MAROL VILLEG",,MAROL,MUMBAI,DINESH BHIMRAJ,20Z-6/59/908,[email protected],20C-Z-6/59/940,29207788,9869260832MUMBAI,297,BHAGWATI MEDICAL & GENERAL STORES.,"SHOP NO.2, GR FLOOR, MEZZAINI FLR,ABDULREHMAN CHAWL,","SHREE HANUMAN MANDIR ROAD,MAROL GAON",ANDHERI(E),MUMBAI,,"20,21/Z-6/59/90B",[email protected],20C/Z-6/59/940,7977458967,9867097667MUMBAI,297,BHAGWATI MEDICAL,"SHRI HANUMAN MANDIR RD,","MAROL GAON,MAROL, ANDHERI(E)",VP(E)-A(E)-MA,MUMBAI,,"20,21/Z-6/59/908",,20C/Z-6/59/940,29207788,7738788474MUMBAI,297,BHAGWATI MEDICAL & GENERAL STORES.,"SHOP NO.2,ABDUL REHMAN CHWAL,HANUMAN MANDIR,MAROL VILLADGE REZY COELHO CHAWL,",ANDHERI(E),ANDHERI (E),MUMBAI,DINESH BHAI,21Z-6/59/908,[email protected],20Z-6/59/908,29207788/7666464888,MUMBAI,297,BHAGWATI MED.& GEN. ST.,2 GR.FL.ABDUL REHMAN CHAWL,HANUMAN MANDIR RD.,MAROL GAON,ANDHERI-E,DINESH KOTHARI,"20,21/Z-6/59/908",[email protected],20C/Z-6/59/940,9869260832,29207788MUMBAI,297,BHAGWATI MEDICAL & GENERAL STORES.,SHOP NO 2.ABDUL REHMAN CHAWL.,"SHRI HANUMAN MANDIR ROAD, MAROL VILLAGE",MAROL - ANDHERI - EAST,MUMBAI,MAROL,20-Z6/59/908,[email protected],21-Z6/59/908,29207788/7738788474/9869260832,9867097667MUMBAI,297,BHAGWATI MEDICAL,"SHRI HANUMAN MANDIR ROAD,","MAROL GAON,",ANDHERI (E),MUMBAI,,,,,29207788/8286453461,MUMBAI,297,BHAGWATI MEDI & GEN.STORES,SHRI HANUMAN MANDIR ROAD MAROL VILLAGE,MAROL,,MAROL,,20/Z/6/59/749,,20 C/Z-6/59/788,29207788,MUMBAI,297,BHAGWATI MED ST 29207788,2 GR FL MEZZANIN ABDUL REHAMAN,CHAWLHUMAN MANDIR RDMAROL,ANDHERI,,,27390646287V,[email protected],20-21Z-59-908-20CZ6-59-940,,7666464888MUMBAI,297,BHAGWATI MEDICAL,"SHRI HANUMAN MANDIR ROAD,MAROL GAON,MAROL,ANDHERI-E",,,,,,,,,8286453461MUMBAI,297,BHAGWATI MED & GEN STORES,,ANDHERI (E),ANDHERI [W],,,,,/,,MUMBAI,297,BHAGWATI MEDICAL STORE,SH NO.2BRFLR.MAZALIN FLR.,ABDUL REHMAN CHL.HANUMAN MAND,ANDHERI (WEST),,,27390646287 V,[email protected],20-21-Z-6-59-90B,9867097667 / 8286453461,MUMBAI,297,BHAGWATI MEDICAL MAROL,SHOP NO 2 ABDULREHMAN CHAWL SH,ANDHERI E,,GENERAL,,20/21-Z6-59-908,,20C-Z6-59-940,29207788,MUMBAI,297,BHAGWATI MEDICAL & GENERAL STORES,"SHRI HANUMAN MANDIR ROAD,, MAROL VILLAGE,, ANDHERI (E),",", MUMBAI.",ANDHERI (E),MUMBAI,,C_00121689190,MUMBAI,20/21-Z-6/59/908,,9867097667MUMBAI,389,GOPAL KRISHNA MED.& GEN.ST. #,"22,LAXMI CHAYYA BLDG","L.T.ROAD,BABHAI NAKA",BORIVLI,BORIVALI WEST,8959202,20/Z7/92/2221,[email protected],21/Z7/92/2221,9821287221/28959202,MUMBAI,389,GOPAL KRISHNA MED & GEN STORES,"22,LAXMI CHHAYA,L.T.ROAD","BABAI NAKA ,EKSAR ROAD",BORIVALI (WEST),MUMBAI,MR CHANDRAKANT,20/Z7/92/2221,[email protected],21/Z7/92/2221,28959202/983381929,9821287221MUMBAI,389,GOPAL KRISHNA MEDICAL & GENERAL STORES,"22, LAXMI CHHAYA, L.T.ROAD",BABHAI NAKA,BORIVALI W,MUMBAI,,20/Z/7/92/2221,[email protected],21/Z/7/92/2221,28959202,MUMBAI,389,NEW GOPAL KRISHNA MEDICAL & GEN.STORES,"22, LAXMI CHHAYA, BABHAI NAKA",EKSAR ROAD,L.T.ROAD,BORIVALI (W),CHANDHUBHAI,20-MH-MZ7-192791,[email protected],21-MH-MZ7/192792,28959202,9833819296/9821287221MUMBAI,389,GOPAL KRISHNA MED.&GEN.STORES,"22,LAXMI CHHAYA,L.T.ROAD,BABHAI","NAKA,WEST MUMBAI",,BORIVALI,CHANDRAKANTBHAI,20Z-7/92/2221,[email protected],21Z-7/92/2221,28959202/69931501,9833819296MUMBAI,389,GOPAL KRISHNA MED.& GEN.ST;[BORIVALI-W],"22,LAXMI CHHAYA ,L.T.RD;BHABAI NAKA,","BORIVALI[W],MUMBAI-92.",,BORIVALI [W],MR.CHANDUBHAI,20-Z-7/92/2221,,21-Z-7/92/2221,28959202,MUMBAI,389,GOPAL KRISHNA MED.& GEN.ST;[BORIVALI-W],"22,LAXMI CHHAYA ,L.T.RD;BHABAI NAKA,","BORIVALI[W],MUMBAI-92.",,BORIVALI [W],MR.CHANDUBHAI,20-Z-7/92/2221,,21-Z-7/92/2221,28959202,MUMBAI,389,GOPAL KRISHNA MED.& GEN.ST;[BORIVALI-W],"22,LAXMI CHHAYA ,L.T.RD;BHABAI NAKA,","BORIVALI[W],MUMBAI-92.",,BORIVALI [W],MR.CHANDUBHAI,20-Z-7/92/2221,,21-Z-7/92/2221,28959202,MUMBAI,389,GOPAL KRISHNA MED &. GENERAL STORES,"22, LAXMI CHHAYA BLDG,","BABHAI NAKA, EKSAR RD,",BORIVALI (W),MUMBAI,,20/Z/7/92/2221,,21/Z/7/92/2221,28959202 / 9821287221,MUMBAI,389,GOPAL KRISHNA MED. & GEN. STORES,"22,LAXMI CHHAYA,","L.T. ROAD,BABHAI NAKA,",,BORIVALI{WEST},,20&21-Z-7/92/2221,[email protected],20C-Z-7/92/2124,"289,592,029,821,287,000",9833819296MUMBAI,389,GOPAL KRISHNA MEDICAL,22LAXMI CHHAYYA,BABHAI NAKA EKSAR ROAD,(S) BORIVALI (WEST).,,,,,20-Z-7/92/187121-Z-7/92/1871 20C-Z-7/92/1817. DT.6.10.08,9821287221/9892695575,MUMBAI,389,GOPALKRISHNA MEDICAL STORE,,,BORIVALI (WEST),MUMBAI,,,,,28959202,MUMBAI,389,GOPAL KRISHNA MED &. GENERAL STORES,"22, LAXMI CHHAYA BLDG,L.T.RD","BABHAI NAKA, EKSAR RD,",BORIVALI (W),MUMBAI,,20-MH-MZ7-192791,[email protected],21-MH-MZ7-192792,28959202 / 9821287221,MUMBAI,389,ZZGOPAL KRISHNA MED.ST.,22 LAXMI CHAYA,BABHAI NAKA,L.T.RD,BORIVALI-W,CHANDU BHAI,"20,21/Z-7/92/2221",[email protected],20C/Z-7/92/2124,28959202,MUMBAI,389,GOPAL KRISHNA MED & GEN STORES,"22,LAXMI CHHAYA, L.T.RD,BABHAI NAKA",,,BORIVALI-W,,"20-Z-7/92/1536,21-Z-7/92/1536",,21-C-Z/92/1481,,MUMBAI,389,GOPALKRISHNA MEDICAL.,"L.T.ROAD, BABHAI NAKA",BORIVALI (W),,BORIVALI (W),,,,,9821287221,MUMBAI,389,GOPAL KRISHNA MEDICAL,"SH-22,L.T.RD,BABAI NAKA",,BORIVALI(W),MUMBAI,,,,,9821287221/28959202,MUMBAI,389,GOPAL KRISHNA MED.&GEN.STORE,22/LAXMI CHHAYA; L.T.ROAD,BORIVALI (WEST) BABHAI NAKA,BORIVALI,,CHANDU BHAI - 9833819296,27480593421V,[email protected],20-Z-7/92/2221*21-Z-7/92/2221 20C-Z-7/92/2124,28959202,MUMBAI,389,GOPAL KRISHNA MED.(CLOSED-,"22,LAXMI CHHAYA,","L.T.ROAD,BABHAI NAKA, BORAVALI WEST,MUMBAI-400092",,BORIVALI- WEST,,20-Z-7/92/1536,,21-Z-7/92/1536,28959202,MUMBAI,389,GOPAL KRISHNA MED & GEN STO,22 LAXMI CHHAYA L T RD,BABHAI NAKABORIVLI W MUM-92,BORIVALI,,9821287221 9892695575,27480593421.V,[email protected],20-21Z7922221 20C2124,28959202,MUMBAI,389,GOPAL KRISHNA MED & GEN STORE,22/LAXMI CHHAYA,L.T.ROAD,BORIVALI (WEST),,,,,20-7-7/92/1536 /21-Z-7/92/1536,,
RCode
A = read.csv("data.csv")A = data.frame(na.omit(A))str(A)######## split training adn testing set#######set.seed(123)sf = sample(2,nrow(A),replace = T,prob = c(0.9,0.1))trd = A[sf == 1,]tsd = A[sf == 2,]# lists out the variables that are problematicwhich(sapply(A, function(x) length(unique(x))<2))# Converts Dependent Variable into FactorTrain_RetailerId = as.factor(trd[,2])######## KNN#######library(class)Predicted.RetailerId = knn(trd,tsd,Train_RetailerId, k=1)print(mean(A$RetailerId != Predicted.RetailerId))Result = cbind(Predicted.RetailerId,tsd)confusionMatrix(Predicted.RetailerId,tsd$RetailerId)
Structure of Dataset
> str(A)'data.frame': 42 obs. of 13 variables: $ RegionName : Factor w/ 1 level "MUMBAI": 1 1 1 1 1 1 1 1 1 1 ... $ RetailerId : int 297 297 297 297 297 297 297 297 297 297 ... $ PartyName : Factor w/ 32 levels "$BHAGWATI MEDICAL.",..: 12 15 15 15 14 1 11 5 13 8 ... $ Address1 : Factor w/ 36 levels "","2 GR FL MEZZANIN ABDUL REHAMAN",..: 4 32 32 32 34 27 25 29 26 31 ... $ Address2 : Factor w/ 31 levels "",", MUMBAI.",..: 29 7 7 7 26 1 27 1 30 25 ... $ Area : Factor w/ 19 levels "","(S) BORIVALI (WEST).",..: 16 1 1 1 16 7 1 16 7 19 ... $ City : Factor w/ 16 levels "","ANDHERI-E",..: 5 4 4 4 16 15 3 16 16 16 ... $ ContactPerson: Factor w/ 16 levels "","8959202","9821287221 9892695575",..: 12 16 16 16 8 1 1 10 1 1 ... $ CSTNumber : Factor w/ 26 levels "","20-21-Z-1",..: 8 18 18 18 14 2 14 19 11 10 ... $ Email : Factor w/ 4 levels "","[email protected]",..: 2 2 2 2 2 2 2 2 2 1 ... $ LicenseNumber: Factor w/ 30 levels "","/","20-21-Z-6-59-90B",..: 24 28 28 28 14 30 25 11 15 15 ... $ Telephone : Factor w/ 18 levels "","289,592,029,821,287,000",..: 9 7 7 7 12 12 8 7 13 7 ... $ MobileNumber : Factor w/ 12 levels "","29207788",..: 5 1 1 1 11 1 3 12 10 4 ...
回答:
knn
源代码的第一行(在控制台输入knn
时)是train <- as.matrix(train)
,它将数据框转换为矩阵。由于矩阵只能包含一种数据类型,因此它被转换为字符矩阵。显然,knn
以及几乎所有其他算法都需要一个数值矩阵才能进行计算。
trd_mat <- as.matrix(trd)typeof(trd_mat)#[1] "character"
你的所有变量都是因子类型,并且包含相当多的标签。唯一能让它工作的方法是先将其转换为虚拟变量(这样它就充满了0-1变量),然后在该数据框上运行knn。鉴于你的因子变量有很多级别,你的结果数据框将会非常稀疏,这可能会使knn的效率降低。
如果你想走这条路,有很多关于如何将你的因子转换为虚拟变量的教程。我提供一个链接作为参考。
作为替代方案,考虑到你的因子变量,随机森林可能会给你更好的结果。