Skip to content

Commit 38bf385

Browse files
committed
first refactor
1 parent 0882377 commit 38bf385

27 files changed

Lines changed: 264 additions & 6477 deletions

.DS_Store

6 KB
Binary file not shown.

codes/.DS_Store

0 Bytes
Binary file not shown.

codes/graphBuild/dg_utils.py

Lines changed: 3 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
def ClearDir(dirpath):
99
if os.path.exists(dirpath):
10-
print("正在删除.....", dirpath)
10+
print("Deleting...", dirpath)
1111
shutil.rmtree(path=dirpath)
1212
os.makedirs(dirpath)
1313

@@ -31,7 +31,7 @@ def GetPointSets(pts_size,
3131
half_space_dist_,
3232
dim,
3333
gauss_delta_,
34-
_means=np.array([])): # 获得最初的point set(包含num_clusters个簇,每个簇
34+
_means=np.array([])): # generate gaussian clusters with specified parameters
3535
_points = []
3636
_labels = []
3737
# 生成一个多维高斯分布
@@ -62,17 +62,12 @@ def GetPointSets(pts_size,
6262
def GenDistubIds(pts_size, keep_ratio):
6363
ids = range(0, pts_size)
6464

65-
# keep_ids 相似性的点
66-
# dist_ids 不相似的点
6765
keep_ids = random.sample(ids, int(keep_ratio * pts_size))
6866
keep_ids.sort()
6967
dist_ids = [i for i in ids if i not in keep_ids]
7068

7169
return keep_ids, dist_ids
7270

73-
# keep_ids, dist_ids = GenDistubIds(pts_size, keep_ratio)
74-
75-
7671
# disturb given points
7772
def DisturbPoints(inputs, dim, keep_ids, dist_ids, disturb_dist, HARD_MOVE):
7873
output = inputs
@@ -91,8 +86,6 @@ def DisturbPoints(inputs, dim, keep_ids, dist_ids, disturb_dist, HARD_MOVE):
9186
def DisturbClusters(inputs, dim, labels, disturb_label, pts_size, disturb_dist,
9287
HARD_MOVE):
9388
ids = range(0, pts_size)
94-
# keep_ids 相似性的点
95-
# dist_ids 不相似的点
9689
dist_ids = [i for i in range(len(labels)) if labels[i] == disturb_label]
9790
keep_ids = [i for i in ids if i not in dist_ids]
9891

@@ -128,8 +121,6 @@ def DisturbPointSets(inputs, labels, disturb_label_num, keep_ratio):
128121

129122
output = inputs
130123
ids = range(0, pts_size)
131-
# keep_ids 相似性的点
132-
# dist_ids 不相似的点
133124
disturb_labels = random.sample(range(num_clusters), disturb_label_num)
134125

135126
num_each_cluster = int((1. - keep_ratio) * pts_size / disturb_label_num)
@@ -194,23 +185,19 @@ def overlapClusters(inputs, pts_size, dim, means, labels, merge_labels):
194185
ids = range(0, pts_size)
195186

196187
merge_mean = np.zeros((dim))
197-
# 计算出要合并的几个簇中心的中心
188+
# the center of several clusters
198189
for label in merge_labels:
199190
merge_mean += means[label]
200191
merge_mean /= len(merge_labels)
201192

202193
for id in ids:
203194
# move the cluster center to the same location
204195
if labels[id] in merge_labels:
205-
# print(output[id, :].shape)
206-
# print(merge_mean.shape)
207-
# print(means[labels[id], :].shape)
208196
output[id, :] += merge_mean - means[labels[id], :].reshape(dim, )
209197

210198
return output
211199

212200

213-
# 缩小整个类簇而不改变knn
214201
def scaleCluster(inputs,
215202
pts_size,
216203
dim,
@@ -228,7 +215,6 @@ def scaleCluster(inputs,
228215
pts_size=pts_size,
229216
dim=dim)
230217

231-
# 将所有点围绕该中心进行缩放,并移动到
232218
shrink_ids = [id for id in ids if labels[id] == shrink_label]
233219
for id in shrink_ids:
234220
output[id] = new_center + (output[id] - scale_center) * scale_factor
@@ -312,63 +298,3 @@ def DistOfEdges(dists, indices):
312298
def writeInfo(filepath, info):
313299
with open(filepath, 'w', encoding='utf-8') as f:
314300
json.dump(info, f)
315-
316-
317-
# # Amplify the cluster
318-
# def DiffuseCluster(inputs, labels, disturb_label):
319-
# output = inputs
320-
# assert(disturb_label>=0 and disturb_label<=num_clusters)
321-
322-
# # compute the centroid of the cluster
323-
# disturb_ids = [i for i in range(len(labels)) if labels[i] == disturb_label]
324-
# disturb_inputs = [inputs[id] for id in disturb_ids]
325-
# disturb_inputs = np.array(disturb_ids)
326-
327-
# centroid = np.sum(disturb_ids, 0) / disturb_ids.shape(0)
328-
329-
# # for each point, compute the vector
330-
# for i in disturb_inputs.shape[0]:
331-
# # each point go through the vector
332-
# output[i] +=
333-
334-
# return output
335-
336-
# # random select keeping edges. NEVER USE IT
337-
# def DisturbEdges(inputs, kd_tree, keep_ratio=0.7):
338-
# outputs = inputs
339-
# dists, indices = kd_tree.query(
340-
# inputs, k=k_closest_count) # 一口气对所有points构建knn
341-
# edge_size = indices.shape[0]*(k_closest_count-1)
342-
343-
# np.arange(0, points.shape[0])
344-
# ids = range(0, pts_size)
345-
346-
# # keep_edges 相似性的边
347-
# keep_edges = []
348-
# while len(keep_edges) != edge_size*keep_ratio:
349-
# keep_ids_0 = np.random.randint(0, pts_size)
350-
# keep_ids_1 = np.random.randint(0, pts_size)
351-
# # ensure no self-loop and duplicate
352-
# if keep_ids_0 == keep_ids_1 or [keep_ids_0, keep_ids_1] in keep_edges:
353-
# continue
354-
# keep_edges.append([keep_ids_0, keep_ids_1])
355-
356-
# keep_edges.sort()
357-
# # dist_edges 不相似的边
358-
# dist_edges = []
359-
# for i in range(len(indices)):
360-
# for j in indices[i]:
361-
# # print(i)
362-
# # print(j)
363-
# if [i, j] not in keep_edges:
364-
# dist_edges.append([i, j])
365-
366-
# # disturb edge endpoints
367-
# for [i, j] in dist_edges:
368-
# _moveVec_i = np.random.uniform(-0.5, 0.5, (dim))
369-
# _moveVec_j = np.random.uniform(-0.5, 0.5, (dim))
370-
371-
# outputs[i] += _moveVec_i
372-
# outputs[j] += _moveVec_j
373-
374-
# return outputs, keep_edges

codes/graphBuild/run.py

Lines changed: 13 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,8 @@
1-
#!/usr/bin/env python3
21
# encoding = utf-8
32
import os
43
import sys
54
import numpy as np
65
from sklearn.neighbors import KDTree
7-
import shutil
8-
import random
96
import json
107
import time
118

@@ -17,7 +14,7 @@
1714
import dg_utils
1815

1916

20-
# read point sets and configuration from file.
17+
# read high dimensional dataset from file
2118
def ReadPointSets(filepath, norm_data):
2219
_points = []
2320
_labels = []
@@ -42,7 +39,6 @@ def ReadPointSets(filepath, norm_data):
4239
_flag = True
4340

4441
_points.append(_point)
45-
# label
4642
_labels.append(items[-1])
4743

4844
p = np.array(_points)
@@ -57,19 +53,20 @@ def ReadPointSets(filepath, norm_data):
5753
def sav_graph(filepath, points, labels, k_closest_count):
5854
tree = KDTree(points)
5955
dists, indices = tree.query(points,
60-
k=k_closest_count + 1) # 一口气对所有points构建knn
56+
k=k_closest_count + 1) # build knn graph from points
6157

62-
with open(filepath, 'w') as file: # 打开新文件fm_{0}.txt
63-
# first write in the number of nodes
64-
file.write(str(points.shape[0]) + "\n") # 写入当前点
58+
with open(filepath, 'w') as file:
59+
# write in the number of nodes
60+
file.write(str(points.shape[0]) + "\n")
6561

62+
# write each point
6663
for i in range(points.shape[0]):
67-
file.write(str(i) + "\t") # 写入当前点
64+
file.write(str(i) + "\t")
6865
count = 0
6966
for t in range(indices[i].shape[0]):
70-
if indices[i][t] == i: # 是否包含自身
67+
if indices[i][t] == i:
7168
continue
72-
if count == k_closest_count: # 只写入Indices中前k-1个点
69+
if count == k_closest_count: # write the k_closest_count neighbors
7370
break
7471
# write incient point and corresponding distance
7572
file.write(str(indices[i][t]) + "\t" + str(dists[i][t]) + "\t")
@@ -80,7 +77,7 @@ def sav_graph(filepath, points, labels, k_closest_count):
8077
if __name__ == '__main__':
8178
argv = sys.argv
8279
assert (len(argv) == 2)
83-
config_path = argv[1] #"../../config/config_0.json"
80+
config_path = argv[1]
8481

8582
start = time.perf_counter()
8683
with open(config_path, 'r') as f:
@@ -91,7 +88,7 @@ def sav_graph(filepath, points, labels, k_closest_count):
9188
graph_path = config.graph_dir
9289
dg_utils.ClearDir(graph_path)
9390

94-
k_closest_count = config.k_closest_count #min(3*perplexity, pts_size) # K近邻的个数+1(虽然是K=4,但由于包含自身,实际为K-1邻近)
91+
k_closest_count = config.k_closest_count
9592

9693
raw_files = []
9794
for filename in os.listdir(input_dir):
@@ -100,17 +97,16 @@ def sav_graph(filepath, points, labels, k_closest_count):
10097
if ext == ".txt":
10198
raw_files.append(raw_file)
10299

103-
# size_dims = []
104100
for filepath in raw_files:
105101
data_id = dg_utils.GetGraphIDFromPath(filepath)
106-
print("当前处理: " + str(data_id) + " Graph")
102+
print("Processing graph " + str(data_id))
107103

108104
cur_points, labels, pts_size, dim = ReadPointSets(filepath,
109105
norm_data=True)
110106
print((pts_size, dim))
111107

112108
sav_graph(os.path.join(graph_path, "g_{}.txt".format(data_id)),
113-
cur_points, labels, k_closest_count) # 打开新文件fm_{1}.txt
109+
cur_points, labels, k_closest_count)
114110

115111
elapsed = (time.perf_counter() - start)
116112
print("Total time for building knn graph:", elapsed)
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
QMAKE_MAC_SDK.macosx.Path = /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX11.1.sdk
2+
QMAKE_MAC_SDK.macosx.PlatformPath = /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform
3+
QMAKE_MAC_SDK.macosx.SDKVersion = 11.1
4+
QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_CC = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
5+
QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_CXX = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang++
6+
QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_FIX_RPATH = \
7+
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/install_name_tool \
8+
-id
9+
QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_AR = \
10+
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/ar \
11+
cq
12+
QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_RANLIB = \
13+
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/ranlib \
14+
-s
15+
QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_LINK = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang++
16+
QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_LINK_SHLIB = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang++
17+
QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_ACTOOL = /Applications/Xcode.app/Contents/Developer/usr/bin/actool
18+
QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_LINK_C = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
19+
QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_LINK_C_SHLIB = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
20+
QMAKE_CXX.QT_COMPILER_STDCXX = 199711L
21+
QMAKE_CXX.QMAKE_APPLE_CC = 6000
22+
QMAKE_CXX.QMAKE_APPLE_CLANG_MAJOR_VERSION = 12
23+
QMAKE_CXX.QMAKE_APPLE_CLANG_MINOR_VERSION = 0
24+
QMAKE_CXX.QMAKE_APPLE_CLANG_PATCH_VERSION = 0
25+
QMAKE_CXX.QMAKE_GCC_MAJOR_VERSION = 4
26+
QMAKE_CXX.QMAKE_GCC_MINOR_VERSION = 2
27+
QMAKE_CXX.QMAKE_GCC_PATCH_VERSION = 1
28+
QMAKE_CXX.COMPILER_MACROS = \
29+
QT_COMPILER_STDCXX \
30+
QMAKE_APPLE_CC \
31+
QMAKE_APPLE_CLANG_MAJOR_VERSION \
32+
QMAKE_APPLE_CLANG_MINOR_VERSION \
33+
QMAKE_APPLE_CLANG_PATCH_VERSION \
34+
QMAKE_GCC_MAJOR_VERSION \
35+
QMAKE_GCC_MINOR_VERSION \
36+
QMAKE_GCC_PATCH_VERSION
37+
QMAKE_CXX.INCDIRS = \
38+
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include/c++/v1 \
39+
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/clang/12.0.0/include \
40+
/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX11.1.sdk/usr/include \
41+
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include
42+
QMAKE_CXX.LIBDIRS = /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX11.1.sdk/usr/lib
43+
QMAKE_XCODE_DEVELOPER_PATH = /Applications/Xcode.app/Contents/Developer
44+
QMAKE_XCODE_VERSION = 12.3

0 commit comments

Comments
 (0)