77
88def ClearDir (dirpath ):
99 if os .path .exists (dirpath ):
10- print ("正在删除.. ..." , dirpath )
10+ print ("Deleting ..." , dirpath )
1111 shutil .rmtree (path = dirpath )
1212 os .makedirs (dirpath )
1313
@@ -31,7 +31,7 @@ def GetPointSets(pts_size,
3131 half_space_dist_ ,
3232 dim ,
3333 gauss_delta_ ,
34- _means = np .array ([])): # 获得最初的point set(包含num_clusters个簇,每个簇
34+ _means = np .array ([])): # generate gaussian clusters with specified parameters
3535 _points = []
3636 _labels = []
3737 # 生成一个多维高斯分布
@@ -62,17 +62,12 @@ def GetPointSets(pts_size,
6262def GenDistubIds (pts_size , keep_ratio ):
6363 ids = range (0 , pts_size )
6464
65- # keep_ids 相似性的点
66- # dist_ids 不相似的点
6765 keep_ids = random .sample (ids , int (keep_ratio * pts_size ))
6866 keep_ids .sort ()
6967 dist_ids = [i for i in ids if i not in keep_ids ]
7068
7169 return keep_ids , dist_ids
7270
73- # keep_ids, dist_ids = GenDistubIds(pts_size, keep_ratio)
74-
75-
7671# disturb given points
7772def DisturbPoints (inputs , dim , keep_ids , dist_ids , disturb_dist , HARD_MOVE ):
7873 output = inputs
@@ -91,8 +86,6 @@ def DisturbPoints(inputs, dim, keep_ids, dist_ids, disturb_dist, HARD_MOVE):
9186def DisturbClusters (inputs , dim , labels , disturb_label , pts_size , disturb_dist ,
9287 HARD_MOVE ):
9388 ids = range (0 , pts_size )
94- # keep_ids 相似性的点
95- # dist_ids 不相似的点
9689 dist_ids = [i for i in range (len (labels )) if labels [i ] == disturb_label ]
9790 keep_ids = [i for i in ids if i not in dist_ids ]
9891
@@ -128,8 +121,6 @@ def DisturbPointSets(inputs, labels, disturb_label_num, keep_ratio):
128121
129122 output = inputs
130123 ids = range (0 , pts_size )
131- # keep_ids 相似性的点
132- # dist_ids 不相似的点
133124 disturb_labels = random .sample (range (num_clusters ), disturb_label_num )
134125
135126 num_each_cluster = int ((1. - keep_ratio ) * pts_size / disturb_label_num )
@@ -194,23 +185,19 @@ def overlapClusters(inputs, pts_size, dim, means, labels, merge_labels):
194185 ids = range (0 , pts_size )
195186
196187 merge_mean = np .zeros ((dim ))
197- # 计算出要合并的几个簇中心的中心
188+ # the center of several clusters
198189 for label in merge_labels :
199190 merge_mean += means [label ]
200191 merge_mean /= len (merge_labels )
201192
202193 for id in ids :
203194 # move the cluster center to the same location
204195 if labels [id ] in merge_labels :
205- # print(output[id, :].shape)
206- # print(merge_mean.shape)
207- # print(means[labels[id], :].shape)
208196 output [id , :] += merge_mean - means [labels [id ], :].reshape (dim , )
209197
210198 return output
211199
212200
213- # 缩小整个类簇而不改变knn
214201def scaleCluster (inputs ,
215202 pts_size ,
216203 dim ,
@@ -228,7 +215,6 @@ def scaleCluster(inputs,
228215 pts_size = pts_size ,
229216 dim = dim )
230217
231- # 将所有点围绕该中心进行缩放,并移动到
232218 shrink_ids = [id for id in ids if labels [id ] == shrink_label ]
233219 for id in shrink_ids :
234220 output [id ] = new_center + (output [id ] - scale_center ) * scale_factor
@@ -312,63 +298,3 @@ def DistOfEdges(dists, indices):
312298def writeInfo (filepath , info ):
313299 with open (filepath , 'w' , encoding = 'utf-8' ) as f :
314300 json .dump (info , f )
315-
316-
317- # # Amplify the cluster
318- # def DiffuseCluster(inputs, labels, disturb_label):
319- # output = inputs
320- # assert(disturb_label>=0 and disturb_label<=num_clusters)
321-
322- # # compute the centroid of the cluster
323- # disturb_ids = [i for i in range(len(labels)) if labels[i] == disturb_label]
324- # disturb_inputs = [inputs[id] for id in disturb_ids]
325- # disturb_inputs = np.array(disturb_ids)
326-
327- # centroid = np.sum(disturb_ids, 0) / disturb_ids.shape(0)
328-
329- # # for each point, compute the vector
330- # for i in disturb_inputs.shape[0]:
331- # # each point go through the vector
332- # output[i] +=
333-
334- # return output
335-
336- # # random select keeping edges. NEVER USE IT
337- # def DisturbEdges(inputs, kd_tree, keep_ratio=0.7):
338- # outputs = inputs
339- # dists, indices = kd_tree.query(
340- # inputs, k=k_closest_count) # 一口气对所有points构建knn
341- # edge_size = indices.shape[0]*(k_closest_count-1)
342-
343- # np.arange(0, points.shape[0])
344- # ids = range(0, pts_size)
345-
346- # # keep_edges 相似性的边
347- # keep_edges = []
348- # while len(keep_edges) != edge_size*keep_ratio:
349- # keep_ids_0 = np.random.randint(0, pts_size)
350- # keep_ids_1 = np.random.randint(0, pts_size)
351- # # ensure no self-loop and duplicate
352- # if keep_ids_0 == keep_ids_1 or [keep_ids_0, keep_ids_1] in keep_edges:
353- # continue
354- # keep_edges.append([keep_ids_0, keep_ids_1])
355-
356- # keep_edges.sort()
357- # # dist_edges 不相似的边
358- # dist_edges = []
359- # for i in range(len(indices)):
360- # for j in indices[i]:
361- # # print(i)
362- # # print(j)
363- # if [i, j] not in keep_edges:
364- # dist_edges.append([i, j])
365-
366- # # disturb edge endpoints
367- # for [i, j] in dist_edges:
368- # _moveVec_i = np.random.uniform(-0.5, 0.5, (dim))
369- # _moveVec_j = np.random.uniform(-0.5, 0.5, (dim))
370-
371- # outputs[i] += _moveVec_i
372- # outputs[j] += _moveVec_j
373-
374- # return outputs, keep_edges
0 commit comments