使用的代码如下:
svmModel<-ksvm(corpus~., data= df.train, kernel = "vanilladot")svmPred<-predict(svmModel, df.test)confMatrix<-confusionMatrix(svmPred, df.test$corpus)
错误在运行 confusionMatrix 时出现,我已经安装了所有必要的库(caret, kernlab)。
当我查看 svmPred 的结构时,它是一个因子,而 df.test$corpus 的结构是一个包含数值变量的数据框。请看以下代码:
> table(factor(svmPred, levels=min(df.test):max(df.test)), + factor(df.test, levels=min(df.test):max(df.test)))Error in FUN(X[[i]], ...) : only defined on a data frame with all numeric variables
有任何建议吗?顺便说一下,这是使用 20 个新闻组数据进行的文本分类。
> dput(head(df.train,10))structure(list(better = c(1, 0, 0, 0, 0, 0, 2, 0, 1, 0), can = c(1, 0, 1, 1, 0, 1, 0, 0, 0, 0), case = c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0), doesnt = c(1, 0, 0, 0, 0, 0, 0, 0, 2, 0), far = c(2, 0, 0, 0, 0, 0, 0, 0, 0, 0), get = c(1, 0, 0, 0, 0, 1, 0, 0, 1, 0), going = c(1, 0, 0, 0, 0, 0, 1, 0, 0, 0), got = c(1, 0, 0, 1, 0, 1, 0, 0, 0, 0), im = c(2, 0, 0, 0, 1, 0, 0, 0, 0, 0), just = c(1, 0, 1, 0, 0, 0, 0, 0, 0, 0), keep = c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0), lines = c(1, 1, 1, 1, 2, 2, 1, 1, 1, 1), much = c(1, 0, 0, 1, 1, 0, 0, 0, 0, 0), new = c(1, 0, 0, 0, 0, 0, 0, 1, 0, 0), next. = c(1, 0, 0, 0, 0, 0, 1, 2, 0, 0), nntppostinghost = c(1, 0, 0, 1, 1, 0, 0, 0, 0, 0), organization = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1), place = c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0), possible = c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0), post = c(1, 0, 0, 0, 0, 0, 0, 1, 0, 0), put = c(1, 0, 0, 0, 0, 0, 0, 1, 0, 0), re = c(1, 0, 1, 1, 1, 0, 1, 1, 0, 0), second = c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0), see = c(1, 0, 0, 0, 0, 0, 0, 0, 1, 0), state = c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0), still = c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0), subject = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1), tell = c(1, 0, 0, 0, 0, 1, 0, 0, 0, 0), think = c(1, 0, 0, 0, 0, 0, 0, 1, 0, 0), time = c(1, 0, 0, 0, 0, 0, 1, 0, 0, 0), try = c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0), university = c(1, 0, 1, 0, 0, 0, 0, 0, 1, 0), version = c(1, 0, 0, 0, 1, 0, 1, 0, 0, 0), way = c(1, 0, 3, 0, 0, 0, 0, 0, 0, 0), well = c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0), work = c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0), wrong = c(1, 0, 0, 0, 0, 1, 0, 0, 0, 0), wrote = c(1, 0, 0, 0, 0, 0, 1, 0, 0, 0), access = c(0, 1, 0, 0, 0, 0, 0, 0, 0, 0), anything = c(0, 1, 0, 0, 0, 0, 0, 2, 0, 0), available = c(0, 1, 0, 0, 0, 0, 0, 0, 0, 0), every = c(0, 1, 0, 0, 0, 0, 0, 0, 0, 0), information = c(0, 1, 0, 0, 0, 0, 0, 0, 0, 0), internet = c(0, 1, 0, 0, 0, 1, 0, 0, 0, 0 ), mail = c(0, 1, 0, 1, 0, 0, 0, 0, 0, 0), old = c(0, 3, 0, 0, 0, 0, 0, 0, 0, 1), please = c(0, 1, 0, 1, 0, 0, 0, 1, 1, 0), point = c(0, 1, 0, 0, 0, 0, 0, 0, 0, 0), public = c(0, 1, 0, 0, 0, 0, 0, 0, 0, 0), right = c(0, 1, 0, 0, 0, 0, 1, 1, 0, 0), something = c(0, 1, 0, 0, 0, 0, 0, 0, 0, 0), v = c(0, 1, 0, 2, 0, 0, 0, 0, 0, 0), x = c(0, 1, 0, 2, 0, 0, 0, 0, 0, 1), another = c(0, 0, 2, 0, 0, 0, 0, 0, 0, 0), article = c(0, 0, 1, 1, 0, 0, 0, 1, 0, 0), computer = c(0, 0, 1, 1, 0, 0, 0, 0, 1, 0), department = c(0, 0, 1, 0, 0, 0, 0, 0, 0, 0), find = c(0, 0, 2, 0, 0, 0, 0, 0, 0, 0), know = c(0, 0, 1, 1, 0, 0, 0, 1, 1, 0), maybe = c(0, 0, 1, 0, 0, 0, 0, 0, 0, 0), really = c(0, 0, 1, 0, 0, 0, 0, 0, 0, 0), science = c(0, 0, 1, 1, 0, 0, 0, 0, 0, 0), use = c(0, 0, 2, 0, 0, 1, 1, 0, 1, 0), used = c(0, 0, 1, 0, 0, 0, 0, 0, 0, 0), windows = c(0, 0, 1, 0, 2, 0, 0, 0, 0, 0), writes = c(0, 0, 1, 1, 0, 0, 0, 1, 0, 0), able = c(0, 0, 0, 1, 0, 0, 1, 0, 0, 0), anyone = c(0, 0, 0, 1, 0, 0, 0, 2, 1, 0), d = c(0, 0, 0, 1, 0, 0, 0, 0, 0, 0), distribution = c(0, 0, 0, 1, 1, 0, 1, 0, 1, 0), ive = c(0, 0, 0, 1, 0, 0, 0, 0, 0, 0), problem = c(0, 0, 0, 1, 0, 0, 0, 0, 1, 0), problems = c(0, 0, 0, 1, 0, 0, 0, 0, 0, 0), replyto = c(0, 0, 0, 1, 0, 0, 0, 0, 0, 0), send = c(0, 0, 0, 1, 1, 0, 0, 0, 2, 0), set = c(0, 0, 0, 1, 0, 0, 0, 0, 0, 0), system = c(0, 0, 0, 2, 0, 0, 0, 0, 1, 0), using = c(0, 0, 0, 1, 0, 0, 0, 0, 0, 0), world = c(0, 0, 0, 1, 0, 0, 0, 0, 0, 0), c = c(0, 0, 0, 0, 6, 0, 0, 0, 0, 0), call = c(0, 0, 0, 0, 1, 0, 0, 0, 0, 0), david = c(0, 0, 0, 0, 1, 0, 0, 0, 0, 0), etc = c(0, 0, 0, 0, 1, 0, 0, 0, 0, 0), many = c(0, 0, 0, 0, 1, 0, 0, 1, 0, 0), message = c(0, 0, 0, 0, 1, 0, 0, 0, 0, 0), software = c(0, 0, 0, 0, 2, 1, 0, 0, 0, 0), sure = c(0, 0, 0, 0, 1, 0, 0, 0, 0, 0), usa = c(0, 0, 0, 0, 1, 0, 1, 0, 1, 0), will = c(0, 0, 0, 0, 1, 0, 0, 0, 0, 0), bit = c(0, 0, 0, 0, 0, 3, 0, 0, 0, 0), e = c(0, 0, 0, 0, 0, 1, 0, 0, 1, 0), general = c(0, 0, 0, 0, 0, 1, 0, 0, 0, 0), help = c(0, 0, 0, 0, 0, 1, 0, 0, 0, 0), however = c(0, 0, 0, 0, 0, 1, 0, 0, 0, 0), m = c(0, 0, 0, 0, 0, 1, 0, 0, 0, 0), name = c(0, 0, 0, 0, 0, 1, 0, 0, 0, 0), someone = c(0, 0, 0, 0, 0, 1, 0, 2, 0, 0), thanks = c(0, 0, 0, 0, 0, 1, 0, 0, 1, 0), away = c(0, 0, 0, 0, 0, 0, 1, 0, 0, 0), b = c(0, 0, 0, 0, 0, 0, 1, 1, 0, 0), days = c(0, 0, 0, 0, 0, 0, 1, 0, 0, 0), dont = c(0, 0, 0, 0, 0, 0, 1, 2, 0, 0), ever = c(0, 0, 0, 0, 0, 0, 1, 1, 0, 0), good = c(0, 0, 0, 0, 0, 0, 2, 0, 0, 0), great = c(0, 0, 0, 0, 0, 0, 1, 0, 0, 0), later = c(0, 0, 0, 0, 0, 0, 1, 0, 0, 0), law = c(0, 0, 0, 0, 0, 0, 1, 0, 0, 0), look = c(0, 0, 0, 0, 0, 0, 1, 0, 0, 0), must = c(0, 0, 0, 0, 0, 0, 1, 0, 0, 0), now = c(0, 0, 0, 0, 0, 0, 1, 0, 0, 0), order = c(0, 0, 0, 0, 0, 0, 1, 0, 0, 0), people = c(0, 0, 0, 0, 0, 0, 4, 1, 0, 0), power = c(0, 0, 0, 0, 0, 0, 1, 0, 0, 0), thought = c(0, 0, 0, 0, 0, 0, 1, 1, 0, 0), us = c(0, 0, 0, 0, 0, 0, 1, 0, 0, 0), also = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0), back = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0), best = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 1), certainly = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0), fact = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0), found = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0), group = c(0, 0, 0, 0, 0, 0, 0, 2, 0, 0), high = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0), idea = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0), isnt = c(0, 0, 0, 0, 0, 0, 0, 2, 0, 0), opinions = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0), probably = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0), read = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0), since = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0), years = c(0, 0, 0, 0, 0, 0, 0, 3, 0, 2), believe = c(0, 0, 0, 0, 0, 0, 0, 0, 1, 0), center = c(0, 0, 0, 0, 0, 0, 0, 0, 1, 0), news = c(0, 0, 0, 0, 0, 0, 0, 0, 1, 0), r = c(0, 0, 0, 0, 0, 0, 0, 0, 1, 0), wouldnt = c(0, 0, 0, 0, 0, 0, 0, 0, 1, 0), around = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 1), keywords = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 1), let = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 1), like = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 1), without = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 1), give = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), important = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), long = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), may = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), need = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), number = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), one = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), seems = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), systems = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), times = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), trying = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), yes = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), different = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), getting = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), line = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), make = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), game = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), looking = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), email = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), actually = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), always = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), come = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), enough = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), even = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), go = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), id = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), last = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), least = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), lot = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), might = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), part = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), pretty = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), s = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), take = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), thats = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), though = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), two = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), understand = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), want = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), year = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), big = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), either = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), government = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), phone = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), else. = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), john = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), question = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), says = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), drive = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), hard = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), cant = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), others = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), run = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), say = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), ask = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), day = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), less = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), never = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), person = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), didnt = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), makes = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), support = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), made = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), whether = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), left = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), mark = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), program = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), rather = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), seen = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), thing = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), done = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), first = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), things = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), three = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), course = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), inc = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), stuff = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), today = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), little = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), bad = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), quite = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), said = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), mean = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), end = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), small = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), true = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), whole = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), several = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), following = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), reason = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), yet = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), given = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), life = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), real = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), kind = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), perhaps = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), free = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), example = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), nothing = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), youre = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), god = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), list = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), corpus = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10")), row.names = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"), class = "data.frame")
回答:
问题在于 df.test$corpus
不是因子,而 svmPred
是因子:
至少使用你通过 dput()
传递的数据,我得到的是:
> class(svmPred)[1] "factor"> class(df.test$corpus)[1] "character"
在调用 confusionMatrix()
时,对 df.test$corpus
添加 as.factor()
使报告的错误消息消失了:
confMatrix<-confusionMatrix(svmPred, as.factor(df.test$corpus))
此外,由于 df.test
数据集中 corpus
变量的值可能与 df.train
数据集中的值不同,你可能还需要确保 df.test$corpus
和 df.train$corpus
以及 svmPred
中的级别是相同的。
你可以通过重新定义 df.test$corpus
(或者创建一个新的变量,例如 df.test$corpus_factor
)为因子变量来实现这一点:
df$corpus_factor = factor(df$corpus, levels=levels(svmPred))
然后直接运行:
confMatrix<-confusionMatrix(svmPred, df.test$corpus_factor)