最近有小伙伴反映收不到推送,因为公众号改了推送算法,现在需要加星标,多点赞、点在看,才能准时收到推送哦。
导语:以下是在R中实现最常用、最全面的机器学习模型的代码示例,包括数据准备、模型训练、评估和可视化。
北京墨因生物已经与国内50+知名医院的老师或名牌大学实验室合作(协和、哈工大、同济、哈医大等)。欢迎有生信分析需求的老师垂询,公共数据库数据挖掘或自测数据分析均可。

# 安装必要的包(如果尚未安装)
install.packages(c("caret", "randomForest", "glmnet", "e1071", "xgboost",
"rpart", "kernlab", "nnet", "pROC", "ggplot2"))
# 加载包
library(caret) # 机器学习统一接口
library(randomForest) # 随机森林
library(glmnet) # 正则化回归
library(e1071) # SVM
library(xgboost) # XGBoost
library(rpart) # 决策树
library(kernlab) # 其他核方法
library(nnet) # 神经网络
library(pROC) # ROC曲线
library(ggplot2) # 可视化
# 设置随机种子保证可重复性
set.seed(123)
# 加载示例数据集(使用内置的iris数据集)
data(iris)
trainIndex 0.7, list = FALSE)
trainData
testData
preProcValues 5], method = c("center", "scale"))
trainTransformed
testTransformed
str(trainTransformed)
logitModel ~ .,
data = trainTransformed,
family = binomial(link = "logit"))
multinomModel ~ ., data = trainTransformed)
logitPred "response")
multinomPred
confusionMatrix(multinomPred, testTransformed$Species)
x ~ ., trainTransformed)[, -1]
y $Species
cvFit "multinomial", alpha = 1)
plot(cvFit)
ridgePred
newx = model.matrix(Species ~ ., testTransformed)[, -1],
s = "lambda.min",
type = "class")
confusionMatrix(factor(ridgePred), testTransformed$Species)
treeModel ~ ., data = trainTransformed, method = "class")
plot(treeModel)
text(treeModel, use.n = TRUE)
treePred "class")
confusionMatrix(treePred, testTransformed$Species)
rfModel ~ ., data = trainTransformed, ntree = 500)
varImpPlot(rfModel)
rfPred
confusionMatrix(rfPred, testTransformed$Species)
svmModel ~ ., data = trainTransformed, kernel = "radial")
svmPred
confusionMatrix(svmPred, testTransformed$Species)
svmTune ~ .,
data = trainTransformed,
method = "svmRadial",
tuneLength = 9,
trControl = trainControl(method = "cv"))
trainX ~ ., trainTransformed)[, -1]
trainY $Species) - 1
testX ~ ., testTransformed)[, -1]
xgbModel
label = trainY,
nrounds = 100,
objective = "multi:softprob",
num_class = length(levels(iris$Species)),
eval_metric = "mlogloss")
xgbPred
xgbPred $Species)[max.col(xgbPred)]
confusionMatrix(factor(xgbPred), testTransformed$Species)
nnModel
data = trainTransformed,
size = 5,
decay = 0.01,
maxit = 200,
trace = FALSE)
nnPred "class")
confusionMatrix(factor(nnPred), testTransformed$Species)
ctrl "cv", number = 5, classProbs = TRUE)
modelList
logistic = train(Species ~ ., data = trainTransformed, method = "multinom", trControl = ctrl),
rf = train(Species ~ ., data = trainTransformed, method = "rf", trControl = ctrl),
svm = train(Species ~ ., data = trainTransformed, method = "svmRadial", trControl = ctrl),
xgb = train(Species ~ ., data = trainTransformed, method = "xgbTree", trControl = ctrl)
)
results
summary(results)
dotplot(results)
pred1 $logistic, testTransformed, type = "prob")
pred2 $rf, testTransformed, type = "prob")
pred3 $svm, testTransformed, type = "prob")
ensemblePred
finalPred
confusionMatrix(factor(finalPred), testTransformed$Species)
importance $rf)
plot(importance)
rocCurve $Species == "setosa"),
predictor = as.numeric(pred1[, "setosa"]))
plot(rocCurve, print.auc = TRUE)
ggplot(trainTransformed, aes(x = Sepal.Length, y = Petal.Length, color = Species)) +
geom_point() +
stat_ellipse() +
ggtitle("Feature Space with Decision Boundaries")
以上代码使用了iris数据集作为示例,实际应用中应替换为您自己的数据集,对于大数据集,某些模型可能需要较长的训练时间,根据具体问题调整模型参数(如学习率、树的数量、网络结构等),分类和回归问题的代码略有不同,上述示例主要是分类问题,对于回归问题,可以使用method = "lm"
, method = "gbm"
等替代方案。
码字不易,欢迎读者分享或转发到朋友圈,任何公众号或其他媒体未经许可不得私自转载或抄袭。由于微信平台算法改版,公众号内容将不再以时间排序展示,建议设置“作图丫”公众号为星标,防止丢失。星标具体步骤为:(2)点击右上角的小点点,在弹出界面选择“设为星标”即可。