Hierarchical clustering 2: Dendrogram
AlonPolegPolsky
The code constructs a dendrogram from a dissimilarity matrix (see Hierarchical clustering for a code to generate the matrix).
Function BuildDendrogram iterates over cells in the matrix and finds the cell with the lowest distance. The cell is removed from the matrix and added to a branch of a dendrogram. The output is to wave: sortedDissimilaryMatrix which is the sorted / final output and DendPosX and DendPosY waves which construct the dendrogram. Old cell position is saved in ClusXTxT wave.
Example usage:
make/o/n=(100,10) testWave=gnoise(1)+mod(q,2)//---one cluster in even columnsand another in odd columns
MakeDisSimilarityMatrix(testWave)//---See Hierarchical clustering for the code. The resulting disSimilaryMatrix looks pixelated
BuildDendrogram(DisSimilarityMatrix)//---The sorted matrix is stored in sortedDissimilaryMatrix
Display/K=0 DendPosY vs DendPosX//---show the dendrogram
ModifyGraph userticks(bottom)={ClusX,ClusXTxT}//---show the original column information
//---helper function
Function DendMeanLoc(DendTxT,matchSt)
wave/t DendTxT
string matchSt
variable i,pos=0,numpnt=0
for(i=0;i<numpnts(DendTxT) ;i++)
if(whichListItem(DendTxT[i],matchSt)>-1)
pos+=i
numpnt++
endif
endfor
return pos/numpnt
end
//---computes the dendrogram and similarity matrix
Function BuildDendrogram(wave DisSimilarityMatrix)
duplicate/o DisSimilarityMatrix,TempDisSimilarityMatrix //---matrix that is reduced in size for each step
make/o/n=(dimsize(DisSimilarityMatrix,0))/t DendTxT=num2str(p),OrigDendTxT=num2str(p) //---original positions
make/o/n=(1,3)/t DendClusTxT="" //---saves the number of the original (unsorted) values
make/o/n=(1,3) DendClus=nan,DendClusX=nan //---the magnitude of the clusters (y axis on a dendrogram)
make/o/n=(0)/t ClusXTxT
make/o/n=0 DendPosX,DendPosY //---waves that draw the dendrogram
variable run,clus
//---iterate over the dissimilarity matrix to find the smallest distance and remove that cell from the matrix
for(run=0;run<dimsize(DisSimilarityMatrix,0)-1;run++) //---over all cells in the dissimilarity matrix
//---for each iteration find the cell with the smallest distance (value)
imagestats/m=1 TempDisSimilarityMatrix
variable minloc=min(V_minColLoc,V_minRowLoc) //---removed row
variable maxloc=max(V_minColLoc,V_minRowLoc) //---removed col
//---set the columns and the rows on the smallest value cell to the max value of the corresponding row and column
TempDisSimilarityMatrix[minloc][]=max(TempDisSimilarityMatrix[minloc][q],TempDisSimilarityMatrix[maxloc][q]) //---find the max distance
TempDisSimilarityMatrix[][minloc]=TempDisSimilarityMatrix[minloc][p]
//---save position information and start building the dendrogram
insertpoints 0,1, DendClusTxT,DendClus,DendClusX
DendClusTxT[0][0]=DendTxT[minloc] //---info about child 1 (all the cells #)
DendClusTxT[0][1]=DendTxT[maxloc] //---info about child 2
DendClusTxT[0][2]=DendClusTxT[0][0]+";"+DendClusTxT[0][1] //---parent - a list of all the cells in child1 and child 2
//---modify the positions for the dendrogram
for(clus=0;clus<numpnts(OrigDendTxT);clus++)
if(stringmatch(OrigDendTxT[clus],DendClusTxT[0][0])) //---child 1 is a terminal branch
insertpoints numpnts(ClusXTxT),1,ClusXTxT
ClusXTxT[ numpnts(ClusXTxT)-1 ]=OrigDendTxT[clus] //---save the original position
DendClus[0][0]=0
endif
if(stringmatch(OrigDendTxT[clus],DendClusTxT[0][1])) //---child 2 is a terminal branch
insertpoints numpnts(ClusXTxT),1,ClusXTxT
ClusXTxT[ numpnts(ClusXTxT)-1 ]=OrigDendTxT[clus] //---save the original position
DendClus[0][1]=0
endif
endfor
for(clus=0;clus<dimsize(DendClusTxT,0);clus++)
if(stringmatch(DendClusTxT[0][0],DendClusTxT[clus][2]))
DendClus[0][0]=DendClus[clus][2]
endif
if(stringmatch(DendClusTxT[0][1],DendClusTxT[clus][2]))
DendClus[0][1]=DendClus[clus][2]
endif
endfor
//---ready to remove the cell
DendClus[0][2]=v_min
DendTxT[minloc]=DendClusTxT[0][2]
DendClusX[0][0]=DendMeanLoc(ClusXTxT,DendClusTxT[0][0]) //---X position info (for display only)
deletepoints /m=0 maxloc,1, TempDisSimilarityMatrix,DendTxT
deletepoints /m=1 maxloc,1, TempDisSimilarityMatrix
endfor
killwaves/z TempDisSimilarityMatrix
//---normalize the dendrogram
variable PeakVal=wavemax(DendClus)
DendClus=DendClus/PeakVal*100
variable i
//---sort the dendrogram
make/o/n=(itemsinlist(DendClusTxT[0][2]))/t ClusXTxT
string/g ClusSortedLocation=""
for(i=0;i<itemsinlist(DendClusTxT[0][2]);i++)
ClusXTxT[i]=stringfromlist(i,DendClusTxT[0][2]) //---new position
ClusSortedLocation+=ClusXTxT[i]+";"
endfor
//---drawing utilities
for(clus=0;clus<dimsize(DendClus,0);clus++)
insertpoints 0,5,DendPosX,DendPosY //---position of the branches
DendPosX[0,1]=DendMeanLoc(ClusXTxT,DendClusTxT[clus][0])
DendPosX[2,3]=DendMeanLoc(ClusXTxT,DendClusTxT[clus][1])
DendPosX[4]=nan
DendPosY[0]=DendClus[clus][0]
DendPosY[1,2]=DendClus[clus][2]
DendPosY[3]=DendClus[clus][1]
DendPosY[4]=nan
endfor
//---sorting
make/o/n=(numpnts(ClusXTxT)) ClusX=p //---just the position (p)
duplicate/o DisSimilarityMatrix,SortedDisSimilarityMatrix
SortedDisSimilarityMatrix[][]=DisSimilarityMatrix[str2num(ClusXTxT[p])][str2num(ClusXTxT[q])]
end
Function DendMeanLoc(DendTxT,matchSt)
wave/t DendTxT
string matchSt
variable i,pos=0,numpnt=0
for(i=0;i<numpnts(DendTxT) ;i++)
if(whichListItem(DendTxT[i],matchSt)>-1)
pos+=i
numpnt++
endif
endfor
return pos/numpnt
end
//---computes the dendrogram and similarity matrix
Function BuildDendrogram(wave DisSimilarityMatrix)
duplicate/o DisSimilarityMatrix,TempDisSimilarityMatrix //---matrix that is reduced in size for each step
make/o/n=(dimsize(DisSimilarityMatrix,0))/t DendTxT=num2str(p),OrigDendTxT=num2str(p) //---original positions
make/o/n=(1,3)/t DendClusTxT="" //---saves the number of the original (unsorted) values
make/o/n=(1,3) DendClus=nan,DendClusX=nan //---the magnitude of the clusters (y axis on a dendrogram)
make/o/n=(0)/t ClusXTxT
make/o/n=0 DendPosX,DendPosY //---waves that draw the dendrogram
variable run,clus
//---iterate over the dissimilarity matrix to find the smallest distance and remove that cell from the matrix
for(run=0;run<dimsize(DisSimilarityMatrix,0)-1;run++) //---over all cells in the dissimilarity matrix
//---for each iteration find the cell with the smallest distance (value)
imagestats/m=1 TempDisSimilarityMatrix
variable minloc=min(V_minColLoc,V_minRowLoc) //---removed row
variable maxloc=max(V_minColLoc,V_minRowLoc) //---removed col
//---set the columns and the rows on the smallest value cell to the max value of the corresponding row and column
TempDisSimilarityMatrix[minloc][]=max(TempDisSimilarityMatrix[minloc][q],TempDisSimilarityMatrix[maxloc][q]) //---find the max distance
TempDisSimilarityMatrix[][minloc]=TempDisSimilarityMatrix[minloc][p]
//---save position information and start building the dendrogram
insertpoints 0,1, DendClusTxT,DendClus,DendClusX
DendClusTxT[0][0]=DendTxT[minloc] //---info about child 1 (all the cells #)
DendClusTxT[0][1]=DendTxT[maxloc] //---info about child 2
DendClusTxT[0][2]=DendClusTxT[0][0]+";"+DendClusTxT[0][1] //---parent - a list of all the cells in child1 and child 2
//---modify the positions for the dendrogram
for(clus=0;clus<numpnts(OrigDendTxT);clus++)
if(stringmatch(OrigDendTxT[clus],DendClusTxT[0][0])) //---child 1 is a terminal branch
insertpoints numpnts(ClusXTxT),1,ClusXTxT
ClusXTxT[ numpnts(ClusXTxT)-1 ]=OrigDendTxT[clus] //---save the original position
DendClus[0][0]=0
endif
if(stringmatch(OrigDendTxT[clus],DendClusTxT[0][1])) //---child 2 is a terminal branch
insertpoints numpnts(ClusXTxT),1,ClusXTxT
ClusXTxT[ numpnts(ClusXTxT)-1 ]=OrigDendTxT[clus] //---save the original position
DendClus[0][1]=0
endif
endfor
for(clus=0;clus<dimsize(DendClusTxT,0);clus++)
if(stringmatch(DendClusTxT[0][0],DendClusTxT[clus][2]))
DendClus[0][0]=DendClus[clus][2]
endif
if(stringmatch(DendClusTxT[0][1],DendClusTxT[clus][2]))
DendClus[0][1]=DendClus[clus][2]
endif
endfor
//---ready to remove the cell
DendClus[0][2]=v_min
DendTxT[minloc]=DendClusTxT[0][2]
DendClusX[0][0]=DendMeanLoc(ClusXTxT,DendClusTxT[0][0]) //---X position info (for display only)
deletepoints /m=0 maxloc,1, TempDisSimilarityMatrix,DendTxT
deletepoints /m=1 maxloc,1, TempDisSimilarityMatrix
endfor
killwaves/z TempDisSimilarityMatrix
//---normalize the dendrogram
variable PeakVal=wavemax(DendClus)
DendClus=DendClus/PeakVal*100
variable i
//---sort the dendrogram
make/o/n=(itemsinlist(DendClusTxT[0][2]))/t ClusXTxT
string/g ClusSortedLocation=""
for(i=0;i<itemsinlist(DendClusTxT[0][2]);i++)
ClusXTxT[i]=stringfromlist(i,DendClusTxT[0][2]) //---new position
ClusSortedLocation+=ClusXTxT[i]+";"
endfor
//---drawing utilities
for(clus=0;clus<dimsize(DendClus,0);clus++)
insertpoints 0,5,DendPosX,DendPosY //---position of the branches
DendPosX[0,1]=DendMeanLoc(ClusXTxT,DendClusTxT[clus][0])
DendPosX[2,3]=DendMeanLoc(ClusXTxT,DendClusTxT[clus][1])
DendPosX[4]=nan
DendPosY[0]=DendClus[clus][0]
DendPosY[1,2]=DendClus[clus][2]
DendPosY[3]=DendClus[clus][1]
DendPosY[4]=nan
endfor
//---sorting
make/o/n=(numpnts(ClusXTxT)) ClusX=p //---just the position (p)
duplicate/o DisSimilarityMatrix,SortedDisSimilarityMatrix
SortedDisSimilarityMatrix[][]=DisSimilarityMatrix[str2num(ClusXTxT[p])][str2num(ClusXTxT[q])]
end
Forum
Support
Gallery
Igor Pro 9
Learn More
Igor XOP Toolkit
Learn More
Igor NIDAQ Tools MX
Learn More