自适应DBSCAN成就——Anaplan社区

jainsaniya · ‎08-28-2020

I am doing the DBSCAN clustering inpython. I want to achieve an adaptive way to return the number of clusters by self calculating its eps and Minpts parameters. Below is my code.

import math
import copy
import numpy as np
import pandas as pd
from sklearn.cluster import DBSCAN

德f loadDataSet(fileName, splitChar='\t'):

dataSet = []
with open(fileName) as fr:
for line in fr.readlines():
curline = line.strip().split(splitChar)
fltline = list(map(float, curline))
dataSet.append(fltline)
return dataSet

德f dist(a,b):

return math.sqrt(math.pow(a[0]-b[0],2) + math.pow(a[1]-b[1],2))

德f returnDk(matrix,k):

Dk = []
for i in range(len(matrix)):
Dk.append(matrix[i][k])
return Dk

德f returnDkAverage(Dk):

sum = 0
for i in range(len(Dk)):
sum = sum + Dk[i]
return sum/len(Dk)

德f CalculateDistMatrix(dataset):

DistMatrix = [[0 for j in range(len(dataset))] for i in range(len(dataset))]
for i in range(len(dataset)):
for j in range(len(dataset)):
DistMatrix[i][j] = dist(dataset[i], dataset[j])
return DistMatrix

德f returnEpsCandidate(dataSet):

DistMatrix = CalculateDistMatrix(dataSet)
tmp_matrix = copy.deepcopy(DistMatrix)
for i in range(len(tmp_matrix)):
tmp_matrix[i].sort()
EpsCandidate = []
for k in range(1,len(dataSet)):
Dk = returnDk(tmp_matrix,k)
DkAverage = returnDkAverage(Dk)
EpsCandidate.append(DkAverage)
return EpsCandidate

德f returnMinptsCandidate(DistMatrix,EpsCandidate):

MinptsCandidate = []
for k in range(len(EpsCandidate)):
tmp_eps = EpsCandidate[k]
tmp_count = 0
for i in range(len(DistMatrix)):
for j in range(len(DistMatrix[i])):
if DistMatrix[i][j] <= tmp_eps:
tmp_count = tmp_count + 1
MinptsCandidate.append(tmp_count/len(dataSet))
return MinptsCandidate

德f returnClusterNumberList(dataset,EpsCandidate,MinptsCandidate):

np_dataset = np.array(dataset)
ClusterNumberList = []
for i in range(len(EpsCandidate)):
clustering = DBSCAN(eps= EpsCandidate[i],min_samples= MinptsCandidate[i]).fit(np_dataset)
num_clustering = max(clustering.labels_)
ClusterNumberList.append(num_clustering)
return ClusterNumberList

if __name__ == '__main__':
data = pd.read_csv('/Users/Desktop/Mic/recorder_test1/New folder/MFCCresultsforclustering/MFCCresultsforclustering.csv')
dataSet = data.iloc[:,0:13].values
EpsCandidate = returnEpsCandidate(dataSet)
DistMatrix = CalculateDistMatrix(dataSet)
MinptsCandidate = returnMinptsCandidate(DistMatrix,EpsCandidate)
ClusterNumberList = returnClusterNumberList(dataSet,EpsCandidate,MinptsCandidate)
print(EpsCandidate)
print(MinptsCandidate)
print('cluster number list is')
print(ClusterNumberList)
However, the output with the loading data set is all [-1]s. I am wondering where is the mistake. Am I right for this general direction? If not, how can I achieve the adaptive DBSCAN clustering?

Adaptive DBSCAN achievement

Adaptive DBSCAN achievement