Python的treelib構建多叉樹——快速命名節點id
阿新 • • 發佈:2018-11-08
思想就是:為保證多叉樹節點的唯一性,主要就是根據巢狀list,構建對應的節點id
首先,將巢狀list的第一個list元素,作為第一個list的元素節點的id ;
其次,為了保證節點的命名不重複,建立一個字典來統計各個節點的出現次數 ;
若有同一棵樹上不在路徑的子樹相同,怎麼知道將該字數放到哪個節點下面呢?
很簡單,就是建立臨時path,從當前list的第一個元素與之前的list相同位置元素進行比較,若第一個元素相同,則將對應的list抽出放到path,當前list節點命名與path中的同位置相同元素一樣,若不同,則將id命名為該元素與其當前計數的拼接str。
def encoding_tree_node_id (temps):
# first template的值直接作為其id
temps_id = [temps[0]]
# 構建一個id計數器以對其節點id進行命名
id_count_dict = dict(zip(temps_id[0],[1]*len(temps_id[0])))
# Template總數
len_temps = len(temps)
# encoding id begin
for i in range(1,len_temps):
temp_path = [] # 將符合條件的temp存入臨時path
temp_path_loc = [] # 記錄臨時path在原始temps中的位置
len_current_list = len(temps[i])
for xi in temps[i]:
if xi in id_count_dict.keys():
id_count_dict[xi] = id_count_dict[xi] + 1
else:
id_count_dict[xi] = 1
for k in range(len_current_list):
if k == 0:
for j in range(i):
if temps[i][k] == temps[j][k]:
temp_path.append(temps[j]) # temp_path = [['sa1','sa3','s5'], ['sa1','sa3','s6'], ['sa1','sa5'],['sa1','sa3','sa7','sa8']]
temp_path_loc.append(j) # temp_path_loc = [3,4,5,6]
if len(temps_id) < i + 1:
temps_id.append([temps_id[j][k]]) # temps_id = [['sa7','sa2','s3','s5'],['sa7','sa2','s3','s6'],['sa7']]
else:
# if temps_id[j][k] == temps_id[j+1][k]:
# pass
pass
else:
pass
if len(temp_path) == 0:
temp_path.append(temps[i])
temp_path_loc.append(j)
temps_id.append([])
# flatten_temps_id00 = list(set(flatten(temps_id)))
for xk in range(len_current_list):
if temps[i][xk] in id_count_dict.keys():
temps_id[i].append( temps[i][xk] + str(id_count_dict[temps[i][xk]]) )
# temps_id[i].append(temps[i][xk] + '01')
else:
temps_id[i].append(temps[i][xk])
# temps_id.append(temps[i])
break
# else:
# pass
# elif len(temp_path) == 0:
# temp_path.append(temps[i]) # temp_path = [['sa1','sa3','s5']]
# temp_path_loc.append(i)
# temps_id.append(temps[i])
# break
# elif len(temp_path) == 1:
# break
else:
temp_path01 = []
temp_path_loc01 = []
for x in range(len(temp_path)):
if (k+1) <= len(temp_path[x]):
if temps[i][k] == temp_path[x][k]:
temp_path01.append(temp_path[x])
temp_path_loc01.append(temp_path_loc[x])
# temp_path = temp_path
if len(temps_id[i]) < k+1:
temps_id[i].append(temps_id[temp_path_loc[x]][k]) # temps_id = [['sa7','sa2','s3','s5'],['sa7','sa2','s3','s6'],['sa7','sa2']]
else:
# if temps_id[j][k] == temps_id[i][k]:
# pass
pass
else:
pass
else:
continue
temp_path = temp_path01
temp_path_loc = temp_path_loc01
if temp_path == []:
break
else:
continue
if len(temps_id[i]) < len(temps[i]):
for y in range( len(temps_id[i]),len(temps[i]) ):
# flatten_temps_id = list(set(flatten(temps_id)))
if temps[i][y] in id_count_dict.keys():
temps_id[i].append( temps[i][y] + str(id_count_dict[temps[i][y]]) )
else:
temps_id[i].append(temps[i][y])
return temps_id
例如:
>>> temps = [
['sa7','sa2','sa3','sa5'],
['sa1','sa3','sa5'],
['sa1','sa3','sa6'],
['sa1','sa5'],
['sa7','sa2','sa3','sa6'],
['sa7','sa2','sa4'],
['sa1','sa3','sa7','sa8'],
['sa1','sa5','sa8']
]
>>> encoding_tree_node_id(temps)
[['sa7', 'sa2', 'sa3', 'sa5'], ['sa11', 'sa32', 'sa52'], ['sa11', 'sa32', 'sa61'], ['sa11', 'sa53'], ['sa7', 'sa2', 'sa3', 'sa62'], ['sa7', 'sa2', 'sa41'], ['sa11', 'sa32', 'sa74', 'sa81'], ['sa11', 'sa53', 'sa82']]