Jainsaniya
新贡献者

自适应DBSCAN成就

我正在做DBSCAN聚类Python。我想通过自我计算其EPS和MINPTS参数来实现一种自适应方法来返回簇数。以下是我的代码。

导入数学
导入副本
导入numpy作为NP
导入大熊猫作为pd
来自Sklearn.Cluster Import DBSCAN


def LoadDataSet(文件名,splitchar ='\ t'):

数据集= []
以开放式(文件名)为fr:
对于fr.Readlines()中的行:
curline = line.strip()。拆分(splitchar)
fltline = list(MAP(float,Curline))
dataset.append(fltline)
返回数据集


DEF DIST(A,B):

return Math.sqrt(Math.pow(A [0] -b [0],2) + Math.pow(A [1] -b [1],2))


def returndk(Matrix,K):

dk = []
对于我的范围(len(矩阵)):
dk.append(矩阵[i] [k])
返回DK


def returndkaverage(DK):

总和= 0
对于我的范围(len(dk)):
sum = sum + dk [i]
返回总和/len(DK)


DEF CACTURALUTISTMATRIX(数据集):

distmatrix = [[[0 for j in range(len(dataset))] for range(len(dataset))]
对于我的范围(LEN(DATASET)):
对于J范围(LEN(DATASET))):
distmatrix [i] [j] = dist(dataset [i],dataset [j])
返回distmatrix


def returnepscandidate(数据集):

distmatrix = carculatedistMatrix(数据集)
tmp_matrix = copy.deepcopy(distmatrix)
对于我的范围(len(tmp_matrix)):
tmp_matrix [i] .sort()
epscandidate = []
对于范围内的K(1,Len(数据集)):
dk = returndk(tmp_matrix,k)
dkaverage = returndkaverage(DK)
epscandidate.append(dkaverage)
返回epscandidate


def return minptscandidate(Distmatrix,Epscandidate):

minptscandidate = []
对于K范围(Len(Epscandidate)):
tmp_eps = epscandidate [k]
tmp_count = 0
对于我的范围(len(distmatrix)):
对于J中的J(Len(Distmatrix [i])):
如果distmatrix [i] [j] <= tmp_eps:
tmp_count = tmp_count + 1
minptScandidate.Append(tmp_count/len(数据集))
返回minptscandidate


DEF return Clusternumberlist(数据集,Epscandidate,MinptScandidate):

np_dataset = np.array(数据集)
clusternumberlist = []
对于我的范围(Len(Epscandidate)):
clustering = dbscan(eps = epscandidate [i],min_samples = minptScandidate [i])。fit(np_dataset)
num_clustering = max(clustering.labels_)
clusternumberlist.append(num_clustering)
返回clusternumberlist

如果__name__ =='__ main __':
data = pd.read_csv('/users/desktop/mic/recorder_test1/new文件夹/mfccresultsforclustering/mfccresultsforclustering.csv')
dataset = data.iloc [:,0:13]。值
epscandidate = returnepscandidate(数据集)
distmatrix = carculatedistMatrix(数据集)
minptScandidate = return minptscandidate(Distmatrix,Epscandidate)
clusternumberlist = returnClusternumberlist(数据集,Epscandidate,MinptScandidate)
打印(Epscandidate)
打印(MinptScandidate)
打印('集群号列表')
印刷(Clusternumberlist)
但是,带有加载数据集的输出为[-1] s。我想知道错误在哪里。我对这个总体方向是正确的吗?如果没有,我该如何实现自适应DBSCAN聚类?

0答复0