python决策树DecisionTreeClassifier模型 - Go语言中文社区

python决策树DecisionTreeClassifier模型


运行环境:win10 64位 py 3.6 pycharm 2018.1.1
import numpy as np
from sklearn.tree import    DecisionTreeClassifier
from sklearn import cross_validation
import  matplotlib.pyplot as plt
from sklearn import datasets
#加载数据
def load_data():
    iris = datasets.load_iris()
    X_train = iris.data
    y_train = iris.target
    return cross_validation.train_test_split(X_train, y_train, test_size=0.25, random_state=0, stratify=y_train)
#利用决策树进行分类
def test_DecisionTreeClassifier(*data):
    X_train, X_test, y_train, y_test = data
    clf = DecisionTreeClassifier()
    clf.fit(X_train, y_train)
    print("Traing score:%f"%(clf.score(X_train,y_train)))
    print("Testing score:%f"%(clf.score(X_test,y_test)))

X_train, X_test, y_train, y_test = load_data()
test_DecisionTreeClassifier(X_train, X_test, y_train, y_test)
#考察评价切分质量的评价标准criterion对于分类性能的影响
def test_DecisionTreeClassifier_criterion(*data):
    X_train, X_test, y_train, y_test = data
    criterions = ['gini','entropy']
    for criterion in criterions:
        clf = DecisionTreeClassifier(criterion=criterion)
        clf.fit(X_train,y_train)
        print('criterion:%s'%criterion)
        print("Traing score:%f" % (clf.score(X_train, y_train)))
        print("Testing score:%f"%(clf.score(X_test,y_test)))
X_train, X_test, y_train, y_test = load_data()
test_DecisionTreeClassifier_criterion(X_train, X_test, y_train, y_test)
# 检测随机划分与最优划分的影响
def test_DecisionTreeClassifier_splitter(*data):
    X_train, X_test, y_train, y_test = data
    splitters = ['best','random']
    for splitter in splitters:
        clf = DecisionTreeClassifier(splitter=splitter)
        clf.fit(X_train,y_train)
        print("splitter:%s"%splitter)
        print("Traing score:%f" % (clf.score(X_train, y_train)))
        print("Testing score:%f"%(clf.score(X_test,y_test)))
X_train, X_test, y_train, y_test = load_data()
test_DecisionTreeClassifier_splitter(X_train, X_test, y_train, y_test)
#考察深度对分类决策树的影响
def test_DecisionTreeClassifiter_depth(*data,maxdepth):
    X_train, X_test, y_train, y_test = data
    depths = np.arange(1,maxdepth)
    training_scores = []
    testing_scores = []
    for depth in depths:
        clf = DecisionTreeClassifier(max_depth=depth)
        clf.fit(X_train,y_train)
        training_scores.append(clf.score(X_train,y_train))
        testing_scores.append(clf.score(X_test,y_test))
    #绘图
    fig = plt.figure()
    ax = fig.add_subplot(1,1,1)
    ax.plot(depths,training_scores,label='traing score',marker='o')
    ax.plot(depths,testing_scores,label='testing score',marker='*')
    ax.set_xlabel('maxdepth')
    ax.set_ylabel('score')
    ax.set_title('Decision Tree Classification')
    ax.legend(framealpha=0.5,loc='best')
    plt.show()
X_train, X_test, y_train, y_test = load_data()
test_DecisionTreeClassifiter_depth(X_train, X_test, y_train, y_test,maxdepth=20)

这里写图片描述

版权声明:本文来源CSDN,感谢博主原创文章,遵循 CC 4.0 by-sa 版权协议,转载请附上原文出处链接和本声明。
原文链接:https://blog.csdn.net/dingming001/article/details/80686473
站方申明:本站部分内容来自社区用户分享,若涉及侵权,请联系站方删除。
  • 发表于 2020-03-01 22:45:32
  • 阅读 ( 1193 )
  • 分类:

0 条评论

请先 登录 后评论

官方社群

GO教程

猜你喜欢