机器学习实验报告(DOC).doc
- 文档编号:1859293
- 上传时间:2023-05-01
- 格式:DOC
- 页数:19
- 大小:179.05KB
机器学习实验报告(DOC).doc
《机器学习实验报告(DOC).doc》由会员分享,可在线阅读,更多相关《机器学习实验报告(DOC).doc(19页珍藏版)》请在冰点文库上搜索。
《机器学习》
课内实验报告
(1)ID算法实现决策树
2015-2016学年第2学期
专业:
智能科学与技术
班级:
智能1301班
学号:
06133029
姓名:
张争辉
一、实验目的:
理解ID3算法的基本原理,并且编程实现。
二、实验要求:
使用C/C++/MATLAB实现ID3算法。
输入:
若干行,每行5个字符串,表示
OutlookTemperatureHumidityWindPlayball
如上表。
输出:
决策树。
实验结果如下:
输入:
SunnyHotHighWeakNo
SunnyHotHighStrongNo
OvercastHotHighWeakYes
RainMildHighWeakYes
RainCoolNormalWeakYes
RainCoolNormalStrongNo
OvercastCoolNormalStrongYes
SunnyMildHighWeakNo
SunnyCoolNormalWeakYes
RainMildNormalWeakYes
SunnyMildNormalStrongYes
OvercastMildHighStrongYes
OvercastHotNormalWeakYes
RainMildHighStrongNo
输出:
Outlook
RainWind
StrongNo
WeakYes
OvercastYes
SunnyHumidity
NormalYes
HighNo
三、具体实现:
实现算法如下:
#include
#include
#include
#include
usingnamespacestd;
#defineROW14
#defineCOL5
#definelog20.69314718055
typedefstructTNode
{
chardata[15];
charweight[15];
TNode*firstchild,*nextsibling;
}*tree;
typedefstructLNode
{
charOutLook[15];
charTemperature[15];
charHumidity[15];
charWind[15];
charPlayTennis[5];
LNode*next;
}*link;
typedefstructAttrNode
{
charattributes[15];//属性
int attr_Num;//属性的个数
AttrNode*next;
}*Attributes;
char*Examples[ROW][COL]={//"OverCast","Cool","High","Strong","No",
// "Rain","Hot","Normal","Strong","Yes",
"Sunny","Hot","High","Weak","No",
"Sunny","Hot","High","Strong","No",
"OverCast","Hot","High","Weak","Yes",
"Rain","Mild","High","Weak","Yes",
"Rain","Cool","Normal","Weak","Yes",
"Rain","Cool","Normal","Strong","No",
"OverCast","Cool","Normal","Strong","Yes",
"Sunny","Mild","High","Weak","No",
"Sunny","Cool","Normal","Weak","Yes",
"Rain","Mild","Normal","Weak","Yes",
"Sunny","Mild","Normal","Strong","Yes",
"OverCast","Mild","Normal","Strong","Yes",
"OverCast","Hot","Normal","Weak","Yes",
"Rain","Mild","High","Strong","No"
};
char*Attributes_kind[4]={"OutLook","Temperature","Humidity","Wind"};
int Attr_kind[4]={3,3,2,2};
char*OutLook_kind[3]={"Sunny","OverCast","Rain"};
char*Temperature_kind[3]={"Hot","Mild","Cool"};
char*Humidity_kind[2]={"High","Normal"};
char*Wind_kind[2]={"Weak","Strong"};
/*inti_Exampple[14][5]={0,0,0,0,1,
0,0,0,1,1,
1,0,0,1,0,
2,1,0,0,0,
2,2,1,0,0,
2,2,1,1,1,
1,2,1,1,0,
0,1,0,0,1,
0,2,1,0,0,
2,1,1,0,0,
0,1,1,1,0,
1,1,1,1,0,
1,1,1,0,0,
2,1,0,0,1
};*/
voidtreelists(treeT);
voidInitAttr(Attributes&attr_link,char*Attributes_kind[],intAttr_kind[]);
voidInitLink(link&L,char*Examples[][COL]);
voidID3(tree&T,linkL,linkTarget_Attr,Attributesattr);
voidPN_Num(linkL,int&positve,int&negative);
doubleGain(intpositive,intnegative,char*atrribute,linkL,Attributesattr_L);
voidmain()
{
linkLL,p;
Attributesattr_L,q;
treeT;
T=newTNode;
T->firstchild=T->nextsibling=NULL;
strcpy(T->weight,"");
strcpy(T->data,"");
attr_L=newAttrNode;
attr_L->next=NULL;
LL=newLNode;
LL->next=NULL;
//成功建立两个链表
InitLink(LL,Examples);
InitAttr(attr_L,Attributes_kind,Attr_kind);
ID3(T,LL,NULL,attr_L);
cout<<"决策树以广义表形式输出如下:
"< treelists(T);//以广义表的形式输出树 // cout< cout< } //以广义表的形式输出树 voidtreelists(treeT) { treep; if(! T) return; cout<<"{"< cout< p=T->firstchild; if(p) { cout<<"("; while(p) { treelists(p); p=p->nextsibling; if(p)cout<<','; } cout<<")"; } } voidInitAttr(Attributes&attr_link,char*Attributes_kind[],intAttr_kind[]) { Attributesp; for(inti=0;i<4;i++) { p=newAttrNode; p->next=NULL; strcpy(p->attributes,Attributes_kind[i]); p->attr_Num=Attr_kind[i]; p->next=attr_link->next; attr_link->next=p; } } voidInitLink(link&LL,char*Examples[][COL]) { linkp; for(inti=0;i { p=newLNode; p->next=NULL; strcpy(p->OutLook,Examples[i][0]); strcpy(p->Temperature,Examples[i][1]); strcpy(p->Humidity,Examples[i][2]); strcpy(p->Wind,Examples[i][3]); strcpy(p->PlayTennis,Examples[i][4]); p->next=LL->next; LL->next=p; } } voidPN_Num(linkL,int&positve,int&negative) { positve=0; negative=0; linkp; p=L->next; while(p) { if(strcmp(p->PlayTennis,"No")==0) negative++; elseif(strcmp(p->PlayTennis,"Yes")==0) positve++; p=p->next; } } //计算信息增益 //linkL: 样本集合S //attr_L: 属性集合 doubleGain(intpositive,intnegative,char*atrribute,linkL,Attributesattr_L) { intatrr_kinds;//每个属性中的值的个数 Attributesp=attr_L->next; linkq=L->next; intattr_th=0;//第几个属性 while(p) { if(strcmp(p->attributes,atrribute)==0) { atrr_kinds=p->attr_Num; break; } p=p->next; attr_th++; } doubleentropy,gain=0; doublep1=1.0*positive/(positive+negative); doublep2=1.0*negative/(positive+negative); entropy=-p1*log(p1)/log2-p2*log(p2)/log2;//集合熵 gain=entropy; //获取每个属性值在训练样本中出现的个数 //获取每个属性值所对应的正例和反例的个数 //声明一个3*atrr_kinds的数组 int**kinds=newint*[3]; for(intj=0;j<3;j++) { kinds[j]=newint[atrr_kinds];//保存每个属性值在训练样本中出现的个数 } //初始化 for(intj=0;j<3;j++) { for(inti=0;i { kinds[j][i]=0; } } while(q) { if(strcmp("OutLook",atrribute)==0) { for(inti=0;i { if(strcmp(q->OutLook,OutLook_kind[i])==0) { kinds[0][i]++; if(strcmp(q->PlayTennis,"Yes")==0) kinds[1][i]++; else kinds[2][i]++; } } } elseif(strcmp("Temperature",atrribute)==0) { for(inti=0;i { if(strcmp(q->Temperature,Temperature_kind[i])==0) { kinds[0][i]++; if(strcmp(q->PlayTennis,"Yes")==0) kinds[1][i]++; else kinds[2][i]++; } } } elseif(strcmp("Humidity",atrribute)==0) { for(inti=0;i { if(strcmp(q->Humidity,Humidity_kind[i])==0) { kinds[0][i]++; if(strcmp(q->PlayTennis,"Yes")==0) kinds[1][i]++;// else kinds[2][i]++; } } } elseif(strcmp("Wind",atrribute)==0) { for(inti=0;i { if(strcmp(q->Wind,Wind_kind[i])==0) { kinds[0][i]++; if(strcmp(q->PlayTennis,"Yes")==0) kinds[1][i]++; else kinds[2][i]++; } } } q=q->next; } //计算信息增益 double*gain_kind=newdouble[atrr_kinds]; intpositive_kind=0,negative_kind=0; for(intj=0;j { if(kinds[0][j]! =0&&kinds[1][j]! =0&&kinds[2][j]! =0) { p1=1.0*kinds[1][j]/kinds[0][j]; p2=1.0*kinds[2][j]/kinds[0][j]; gain_kind[j]=-p1*log(p1)/log2-p2*log(p2)/log2; gain=gain-(1.0*kinds[0][j]/(positive+negative))*gain_kind[j]; } else gain_kind[j]=0; } returngain; } //在ID3算法中的训练样本子集合与属性子集合的链表需要进行清空 voidFreeLink(link&Link) { linkp,q; p=Link->next; Link->next=NULL; while(p) { q=p; p=p->next; free(q); } } voidID3(tree&T,linkL,linkTarget_Attr,Attributesattr) { Attributesp,max,attr_child,p1; linkq,link_child,q1; treer,tree_p; intpositive=0,negative=0; PN_Num(L,positive,negative); //初始化两个子集合 attr_child=newAttrNode; attr_child->next=NULL; link_child=newLNode; link_child->next=NULL; if(positive==0)//全是反例 { strcpy(T->data,"No"); return; } elseif(negative==0)//全是正例 { strcpy(T->data,"Yes"); return; } p=attr->next;//属性链表 doublegain,g=0; /************************************************************************/ /*建立属性子集合与训练样本子集合有两个方案: 一: 在原来链表的基础上进行删除; 二: 另外申请空间进行存储子集合; 采用第二种方法虽然浪费了空间,但也省了很多事情,避免了变量之间的应用混乱 */ /************************************************************************/ if(p) { while(p) { gain=Gain(positive,negative,p->attributes,L,attr); cout< if(gain>g) { g=gain; max=p;//寻找信息增益最大的属性 } p=p->next; } strcpy(T->data,max->attributes);//增加决策树的节点
- 配套讲稿:
如PPT文件的首页显示word图标,表示该PPT已包含配套word讲稿。双击word图标可打开word文档。
- 特殊限制:
部分文档作品中含有的国旗、国徽等图片,仅作为作品整体效果示例展示,禁止商用。设计者仅对作品中独创性部分享有著作权。
- 关 键 词:
- 机器 学习 实验 报告 DOC