-
Notifications
You must be signed in to change notification settings - Fork 2
/
SaveModel.py
56 lines (44 loc) · 2.26 KB
/
SaveModel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import os
import pandas as pd
import torch as th
import pickle
def save_model(g, model, model_dir, id_to_node):
# Save Pytorch model's parameters to model.pth
th.save(model.state_dict(), os.path.join(model_dir, 'model.pth'))
# Save graph's structure information to metadata.pkl for inference codes to initialize RGCN model.
etype_list = g.canonical_etypes
ntype_cnt = {ntype: g.number_of_nodes(ntype) for ntype in g.ntypes}
with open(os.path.join(model_dir, 'metadata.pkl'), 'wb') as f:
pickle.dump({'etypes': etype_list,
'ntype_cnt': ntype_cnt}, f)
# Save original IDs to Node_ids, and trained embedding for non-target node type
# Covert id_to_node into pandas dataframes
for ntype, mapping in id_to_node.items():
# ignore target node
if ntype == 'TransactionID':
continue
# retrieve old and node id list
old_id_list, node_id_list = [], []
for old_id, node_id in mapping.items():
old_id_list.append(old_id)
node_id_list.append(node_id)
# retrieve embeddings of a node type
node_feats = model.embed[ntype].detach().cpu().numpy()
# get the number of nodes and the dimension of features
num_nodes = node_feats.shape[0]
num_feats = node_feats.shape[1]
# create id dataframe
node_ids_df = pd.DataFrame({'~label': [ntype] * num_nodes})
node_ids_df['~id_tmp'] = old_id_list
node_ids_df['~id'] = node_ids_df['~label'] + '-' + node_ids_df['~id_tmp']
node_ids_df['node_id'] = node_id_list
# create feature dataframe columns
cols = {'val' + str(i + 1) + ':Double': node_feats[:, i] for i in range(num_feats)}
node_feats_df = pd.DataFrame(cols)
# merge id with feature, where feature_df use index
node_id_feats_df = node_ids_df.merge(node_feats_df, left_on='node_id', right_on=node_feats_df.index)
# drop the id_tmp and node_id columns to follow the Grelim format requirements
node_id_feats_df = node_id_feats_df.drop(['~id_tmp', 'node_id'], axis=1)
# dump the embeddings to files
node_id_feats_df.to_csv(os.path.join(model_dir, ntype + '.csv'),
index=False, header=True, encoding='utf-8')