为什么我会收到这个错误:`data` 和 `reference` 应为具有相同级别的因子

使用的代码如下:

svmModel<-ksvm(corpus~., data= df.train, kernel = "vanilladot")svmPred<-predict(svmModel, df.test)confMatrix<-confusionMatrix(svmPred, df.test$corpus)

错误在运行 confusionMatrix 时出现,我已经安装了所有必要的库(caret, kernlab)。

当我查看 svmPred 的结构时,它是一个因子,而 df.test$corpus 的结构是一个包含数值变量的数据框。请看以下代码:

> table(factor(svmPred, levels=min(df.test):max(df.test)), +       factor(df.test, levels=min(df.test):max(df.test)))Error in FUN(X[[i]], ...) : only defined on a data frame with all numeric variables

有任何建议吗?顺便说一下,这是使用 20 个新闻组数据进行的文本分类。

> dput(head(df.train,10))structure(list(better = c(1, 0, 0, 0, 0, 0, 2, 0, 1, 0), can = c(1, 0, 1, 1, 0, 1, 0, 0, 0, 0), case = c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0), doesnt = c(1, 0, 0, 0, 0, 0, 0, 0, 2, 0), far = c(2, 0, 0, 0, 0, 0, 0, 0, 0, 0), get = c(1, 0, 0, 0, 0, 1, 0, 0, 1, 0), going = c(1, 0, 0, 0, 0, 0, 1, 0, 0, 0), got = c(1, 0, 0, 1, 0, 1, 0, 0, 0, 0), im = c(2, 0, 0, 0, 1, 0, 0, 0, 0, 0), just = c(1, 0, 1, 0, 0, 0, 0, 0, 0, 0), keep = c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0), lines = c(1, 1, 1, 1, 2, 2, 1, 1, 1, 1), much = c(1, 0, 0, 1, 1, 0, 0, 0, 0, 0), new = c(1, 0, 0, 0, 0, 0, 0, 1, 0, 0), next. = c(1, 0, 0, 0, 0, 0, 1, 2, 0, 0), nntppostinghost = c(1, 0, 0, 1, 1, 0, 0, 0, 0, 0), organization = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1), place = c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0), possible = c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0), post = c(1, 0, 0, 0, 0, 0, 0, 1, 0, 0), put = c(1, 0, 0, 0, 0, 0, 0, 1, 0, 0), re = c(1, 0, 1, 1, 1, 0, 1, 1, 0, 0), second = c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0), see = c(1, 0, 0, 0, 0, 0, 0, 0, 1, 0), state = c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0), still = c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0),     subject = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1), tell = c(1, 0,     0, 0, 0, 1, 0, 0, 0, 0), think = c(1, 0, 0, 0, 0, 0, 0, 1,     0, 0), time = c(1, 0, 0, 0, 0, 0, 1, 0, 0, 0), try = c(1,     0, 0, 0, 0, 0, 0, 0, 0, 0), university = c(1, 0, 1, 0, 0,     0, 0, 0, 1, 0), version = c(1, 0, 0, 0, 1, 0, 1, 0, 0, 0),     way = c(1, 0, 3, 0, 0, 0, 0, 0, 0, 0), well = c(1, 0, 0,     0, 0, 0, 0, 0, 0, 0), work = c(1, 0, 0, 0, 0, 0, 0, 0, 0,     0), wrong = c(1, 0, 0, 0, 0, 1, 0, 0, 0, 0), wrote = c(1,     0, 0, 0, 0, 0, 1, 0, 0, 0), access = c(0, 1, 0, 0, 0, 0,     0, 0, 0, 0), anything = c(0, 1, 0, 0, 0, 0, 0, 2, 0, 0),     available = c(0, 1, 0, 0, 0, 0, 0, 0, 0, 0), every = c(0,     1, 0, 0, 0, 0, 0, 0, 0, 0), information = c(0, 1, 0, 0, 0,     0, 0, 0, 0, 0), internet = c(0, 1, 0, 0, 0, 1, 0, 0, 0, 0    ), mail = c(0, 1, 0, 1, 0, 0, 0, 0, 0, 0), old = c(0, 3,     0, 0, 0, 0, 0, 0, 0, 1), please = c(0, 1, 0, 1, 0, 0, 0,     1, 1, 0), point = c(0, 1, 0, 0, 0, 0, 0, 0, 0, 0), public = c(0,     1, 0, 0, 0, 0, 0, 0, 0, 0), right = c(0, 1, 0, 0, 0, 0, 1,     1, 0, 0), something = c(0, 1, 0, 0, 0, 0, 0, 0, 0, 0), v = c(0,     1, 0, 2, 0, 0, 0, 0, 0, 0), x = c(0, 1, 0, 2, 0, 0, 0, 0,     0, 1), another = c(0, 0, 2, 0, 0, 0, 0, 0, 0, 0), article = c(0,     0, 1, 1, 0, 0, 0, 1, 0, 0), computer = c(0, 0, 1, 1, 0, 0,     0, 0, 1, 0), department = c(0, 0, 1, 0, 0, 0, 0, 0, 0, 0),     find = c(0, 0, 2, 0, 0, 0, 0, 0, 0, 0), know = c(0, 0, 1,     1, 0, 0, 0, 1, 1, 0), maybe = c(0, 0, 1, 0, 0, 0, 0, 0, 0,     0), really = c(0, 0, 1, 0, 0, 0, 0, 0, 0, 0), science = c(0,     0, 1, 1, 0, 0, 0, 0, 0, 0), use = c(0, 0, 2, 0, 0, 1, 1,     0, 1, 0), used = c(0, 0, 1, 0, 0, 0, 0, 0, 0, 0), windows = c(0,     0, 1, 0, 2, 0, 0, 0, 0, 0), writes = c(0, 0, 1, 1, 0, 0,     0, 1, 0, 0), able = c(0, 0, 0, 1, 0, 0, 1, 0, 0, 0), anyone = c(0,     0, 0, 1, 0, 0, 0, 2, 1, 0), d = c(0, 0, 0, 1, 0, 0, 0, 0,     0, 0), distribution = c(0, 0, 0, 1, 1, 0, 1, 0, 1, 0), ive = c(0,     0, 0, 1, 0, 0, 0, 0, 0, 0), problem = c(0, 0, 0, 1, 0, 0,     0, 0, 1, 0), problems = c(0, 0, 0, 1, 0, 0, 0, 0, 0, 0),     replyto = c(0, 0, 0, 1, 0, 0, 0, 0, 0, 0), send = c(0, 0,     0, 1, 1, 0, 0, 0, 2, 0), set = c(0, 0, 0, 1, 0, 0, 0, 0,     0, 0), system = c(0, 0, 0, 2, 0, 0, 0, 0, 1, 0), using = c(0,     0, 0, 1, 0, 0, 0, 0, 0, 0), world = c(0, 0, 0, 1, 0, 0, 0,     0, 0, 0), c = c(0, 0, 0, 0, 6, 0, 0, 0, 0, 0), call = c(0,     0, 0, 0, 1, 0, 0, 0, 0, 0), david = c(0, 0, 0, 0, 1, 0, 0,     0, 0, 0), etc = c(0, 0, 0, 0, 1, 0, 0, 0, 0, 0), many = c(0,     0, 0, 0, 1, 0, 0, 1, 0, 0), message = c(0, 0, 0, 0, 1, 0,     0, 0, 0, 0), software = c(0, 0, 0, 0, 2, 1, 0, 0, 0, 0),     sure = c(0, 0, 0, 0, 1, 0, 0, 0, 0, 0), usa = c(0, 0, 0,     0, 1, 0, 1, 0, 1, 0), will = c(0, 0, 0, 0, 1, 0, 0, 0, 0,     0), bit = c(0, 0, 0, 0, 0, 3, 0, 0, 0, 0), e = c(0, 0, 0,     0, 0, 1, 0, 0, 1, 0), general = c(0, 0, 0, 0, 0, 1, 0, 0,     0, 0), help = c(0, 0, 0, 0, 0, 1, 0, 0, 0, 0), however = c(0,     0, 0, 0, 0, 1, 0, 0, 0, 0), m = c(0, 0, 0, 0, 0, 1, 0, 0,     0, 0), name = c(0, 0, 0, 0, 0, 1, 0, 0, 0, 0), someone = c(0,     0, 0, 0, 0, 1, 0, 2, 0, 0), thanks = c(0, 0, 0, 0, 0, 1,     0, 0, 1, 0), away = c(0, 0, 0, 0, 0, 0, 1, 0, 0, 0), b = c(0,     0, 0, 0, 0, 0, 1, 1, 0, 0), days = c(0, 0, 0, 0, 0, 0, 1,     0, 0, 0), dont = c(0, 0, 0, 0, 0, 0, 1, 2, 0, 0), ever = c(0,     0, 0, 0, 0, 0, 1, 1, 0, 0), good = c(0, 0, 0, 0, 0, 0, 2,     0, 0, 0), great = c(0, 0, 0, 0, 0, 0, 1, 0, 0, 0), later = c(0,     0, 0, 0, 0, 0, 1, 0, 0, 0), law = c(0, 0, 0, 0, 0, 0, 1,     0, 0, 0), look = c(0, 0, 0, 0, 0, 0, 1, 0, 0, 0), must = c(0,     0, 0, 0, 0, 0, 1, 0, 0, 0), now = c(0, 0, 0, 0, 0, 0, 1,     0, 0, 0), order = c(0, 0, 0, 0, 0, 0, 1, 0, 0, 0), people = c(0,     0, 0, 0, 0, 0, 4, 1, 0, 0), power = c(0, 0, 0, 0, 0, 0, 1,     0, 0, 0), thought = c(0, 0, 0, 0, 0, 0, 1, 1, 0, 0), us = c(0,     0, 0, 0, 0, 0, 1, 0, 0, 0), also = c(0, 0, 0, 0, 0, 0, 0,     1, 0, 0), back = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0), best = c(0,     0, 0, 0, 0, 0, 0, 1, 0, 1), certainly = c(0, 0, 0, 0, 0,     0, 0, 1, 0, 0), fact = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0), found = c(0,     0, 0, 0, 0, 0, 0, 1, 0, 0), group = c(0, 0, 0, 0, 0, 0, 0,     2, 0, 0), high = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0), idea = c(0,     0, 0, 0, 0, 0, 0, 1, 0, 0), isnt = c(0, 0, 0, 0, 0, 0, 0,     2, 0, 0), opinions = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0), probably = c(0,     0, 0, 0, 0, 0, 0, 1, 0, 0), read = c(0, 0, 0, 0, 0, 0, 0,     1, 0, 0), since = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0), years = c(0,     0, 0, 0, 0, 0, 0, 3, 0, 2), believe = c(0, 0, 0, 0, 0, 0,     0, 0, 1, 0), center = c(0, 0, 0, 0, 0, 0, 0, 0, 1, 0), news = c(0,     0, 0, 0, 0, 0, 0, 0, 1, 0), r = c(0, 0, 0, 0, 0, 0, 0, 0,     1, 0), wouldnt = c(0, 0, 0, 0, 0, 0, 0, 0, 1, 0), around = c(0,     0, 0, 0, 0, 0, 0, 0, 0, 1), keywords = c(0, 0, 0, 0, 0, 0,     0, 0, 0, 1), let = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 1), like = c(0,     0, 0, 0, 0, 0, 0, 0, 0, 1), without = c(0, 0, 0, 0, 0, 0,     0, 0, 0, 1), give = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), important = c(0,     0, 0, 0, 0, 0, 0, 0, 0, 0), long = c(0, 0, 0, 0, 0, 0, 0,     0, 0, 0), may = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), need = c(0,     0, 0, 0, 0, 0, 0, 0, 0, 0), number = c(0, 0, 0, 0, 0, 0,     0, 0, 0, 0), one = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), seems = c(0,     0, 0, 0, 0, 0, 0, 0, 0, 0), systems = c(0, 0, 0, 0, 0, 0,     0, 0, 0, 0), times = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), trying = c(0,     0, 0, 0, 0, 0, 0, 0, 0, 0), yes = c(0, 0, 0, 0, 0, 0, 0,     0, 0, 0), different = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), getting = c(0,     0, 0, 0, 0, 0, 0, 0, 0, 0), line = c(0, 0, 0, 0, 0, 0, 0,     0, 0, 0), make = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), game = c(0,     0, 0, 0, 0, 0, 0, 0, 0, 0), looking = c(0, 0, 0, 0, 0, 0,     0, 0, 0, 0), email = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), actually = c(0,     0, 0, 0, 0, 0, 0, 0, 0, 0), always = c(0, 0, 0, 0, 0, 0,     0, 0, 0, 0), come = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), enough = c(0,     0, 0, 0, 0, 0, 0, 0, 0, 0), even = c(0, 0, 0, 0, 0, 0, 0,     0, 0, 0), go = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), id = c(0,     0, 0, 0, 0, 0, 0, 0, 0, 0), last = c(0, 0, 0, 0, 0, 0, 0,     0, 0, 0), least = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), lot = c(0,     0, 0, 0, 0, 0, 0, 0, 0, 0), might = c(0, 0, 0, 0, 0, 0, 0,     0, 0, 0), part = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), pretty = c(0,     0, 0, 0, 0, 0, 0, 0, 0, 0), s = c(0, 0, 0, 0, 0, 0, 0, 0,     0, 0), take = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), thats = c(0,     0, 0, 0, 0, 0, 0, 0, 0, 0), though = c(0, 0, 0, 0, 0, 0,     0, 0, 0, 0), two = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), understand = c(0,     0, 0, 0, 0, 0, 0, 0, 0, 0), want = c(0, 0, 0, 0, 0, 0, 0,     0, 0, 0), year = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), big = c(0,     0, 0, 0, 0, 0, 0, 0, 0, 0), either = c(0, 0, 0, 0, 0, 0,     0, 0, 0, 0), government = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0),     phone = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), else. = c(0, 0,     0, 0, 0, 0, 0, 0, 0, 0), john = c(0, 0, 0, 0, 0, 0, 0, 0,     0, 0), question = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), says = c(0,     0, 0, 0, 0, 0, 0, 0, 0, 0), drive = c(0, 0, 0, 0, 0, 0, 0,     0, 0, 0), hard = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), cant = c(0,     0, 0, 0, 0, 0, 0, 0, 0, 0), others = c(0, 0, 0, 0, 0, 0,     0, 0, 0, 0), run = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), say = c(0,     0, 0, 0, 0, 0, 0, 0, 0, 0), ask = c(0, 0, 0, 0, 0, 0, 0,     0, 0, 0), day = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), less = c(0,     0, 0, 0, 0, 0, 0, 0, 0, 0), never = c(0, 0, 0, 0, 0, 0, 0,     0, 0, 0), person = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), didnt = c(0,     0, 0, 0, 0, 0, 0, 0, 0, 0), makes = c(0, 0, 0, 0, 0, 0, 0,     0, 0, 0), support = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), made = c(0,     0, 0, 0, 0, 0, 0, 0, 0, 0), whether = c(0, 0, 0, 0, 0, 0,     0, 0, 0, 0), left = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), mark = c(0,     0, 0, 0, 0, 0, 0, 0, 0, 0), program = c(0, 0, 0, 0, 0, 0,     0, 0, 0, 0), rather = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), seen = c(0,     0, 0, 0, 0, 0, 0, 0, 0, 0), thing = c(0, 0, 0, 0, 0, 0, 0,     0, 0, 0), done = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), first = c(0,     0, 0, 0, 0, 0, 0, 0, 0, 0), things = c(0, 0, 0, 0, 0, 0,     0, 0, 0, 0), three = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), course = c(0,     0, 0, 0, 0, 0, 0, 0, 0, 0), inc = c(0, 0, 0, 0, 0, 0, 0,     0, 0, 0), stuff = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), today = c(0,     0, 0, 0, 0, 0, 0, 0, 0, 0), little = c(0, 0, 0, 0, 0, 0,     0, 0, 0, 0), bad = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), quite = c(0,     0, 0, 0, 0, 0, 0, 0, 0, 0), said = c(0, 0, 0, 0, 0, 0, 0,     0, 0, 0), mean = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), end = c(0,     0, 0, 0, 0, 0, 0, 0, 0, 0), small = c(0, 0, 0, 0, 0, 0, 0,     0, 0, 0), true = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), whole = c(0,     0, 0, 0, 0, 0, 0, 0, 0, 0), several = c(0, 0, 0, 0, 0, 0,     0, 0, 0, 0), following = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0),     reason = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), yet = c(0, 0, 0,     0, 0, 0, 0, 0, 0, 0), given = c(0, 0, 0, 0, 0, 0, 0, 0, 0,     0), life = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), real = c(0, 0,     0, 0, 0, 0, 0, 0, 0, 0), kind = c(0, 0, 0, 0, 0, 0, 0, 0,     0, 0), perhaps = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), free = c(0,     0, 0, 0, 0, 0, 0, 0, 0, 0), example = c(0, 0, 0, 0, 0, 0,     0, 0, 0, 0), nothing = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), youre = c(0,     0, 0, 0, 0, 0, 0, 0, 0, 0), god = c(0, 0, 0, 0, 0, 0, 0,     0, 0, 0), list = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), corpus = c("1",     "2", "3", "4", "5", "6", "7", "8", "9", "10")), row.names = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"), class = "data.frame")

回答:

问题在于 df.test$corpus 不是因子,而 svmPred 是因子:

至少使用你通过 dput() 传递的数据,我得到的是:

> class(svmPred)[1] "factor"> class(df.test$corpus)[1] "character"

在调用 confusionMatrix() 时,对 df.test$corpus 添加 as.factor() 使报告的错误消息消失了:

confMatrix<-confusionMatrix(svmPred, as.factor(df.test$corpus))

此外,由于 df.test 数据集中 corpus 变量的值可能与 df.train 数据集中的值不同,你可能还需要确保 df.test$corpusdf.train$corpus 以及 svmPred 中的级别是相同的。

你可以通过重新定义 df.test$corpus(或者创建一个新的变量,例如 df.test$corpus_factor)为因子变量来实现这一点:

df$corpus_factor = factor(df$corpus, levels=levels(svmPred))

然后直接运行:

confMatrix<-confusionMatrix(svmPred, df.test$corpus_factor)

Related Posts

使用LSTM在Python中预测未来值

这段代码可以预测指定股票的当前日期之前的值,但不能预测…

如何在gensim的word2vec模型中查找双词组的相似性

我有一个word2vec模型,假设我使用的是googl…

dask_xgboost.predict 可以工作但无法显示 – 数据必须是一维的

我试图使用 XGBoost 创建模型。 看起来我成功地…

ML Tuning – Cross Validation in Spark

我在https://spark.apache.org/…

如何在React JS中使用fetch从REST API获取预测

我正在开发一个应用程序,其中Flask REST AP…

如何分析ML.NET中多类分类预测得分数组?

我在ML.NET中创建了一个多类分类项目。该项目可以对…

发表回复

您的邮箱地址不会被公开。 必填项已用 * 标注