From 63ba5e321fb77e9bbcb7250ed8b2ad2506b80142 Mon Sep 17 00:00:00 2001 From: knc6 Date: Sat, 27 Apr 2024 20:37:03 -0400 Subject: [PATCH 01/22] Add DDP. --- alignn/data.py | 13 +++++--- alignn/graphs.py | 3 +- alignn/train.py | 31 ++++++++++++----- alignn/train_alignn.py | 75 +++++++++++++++++++++++------------------- 4 files changed, 76 insertions(+), 46 deletions(-) diff --git a/alignn/data.py b/alignn/data.py index b5872501..17724d9f 100644 --- a/alignn/data.py +++ b/alignn/data.py @@ -16,10 +16,12 @@ # from jarvis.core.graphs import Graph, StructureDataset from jarvis.db.figshare import data as jdata -from torch.utils.data import DataLoader + +# from torch.utils.data import DataLoader from tqdm import tqdm import math from jarvis.db.jsonutils import dumpjson +from dgl.dataloading import GraphDataLoader # from sklearn.pipeline import Pipeline import pickle as pk @@ -570,7 +572,8 @@ def get_train_val_loaders( collate_fn = train_data.collate_line_graph # use a regular pytorch dataloader - train_loader = DataLoader( + train_loader = GraphDataLoader( + # train_loader = DataLoader( train_data, batch_size=batch_size, shuffle=True, @@ -580,7 +583,8 @@ def get_train_val_loaders( pin_memory=pin_memory, ) - val_loader = DataLoader( + val_loader = GraphDataLoader( + # val_loader = DataLoader( val_data, batch_size=batch_size, shuffle=False, @@ -591,7 +595,8 @@ def get_train_val_loaders( ) test_loader = ( - DataLoader( + GraphDataLoader( + # DataLoader( test_data, batch_size=1, shuffle=False, diff --git a/alignn/graphs.py b/alignn/graphs.py index 74a10b7f..d8c1a405 100644 --- a/alignn/graphs.py +++ b/alignn/graphs.py @@ -13,6 +13,7 @@ # from jarvis.core.atoms import Atoms from collections import defaultdict from typing import List, Tuple, Sequence, Optional +from dgl.data import DGLDataset import torch import dgl @@ -711,7 +712,7 @@ def compute_bond_cosines(edges): return {"h": bond_cosine} -class StructureDataset(torch.utils.data.Dataset): +class StructureDataset(DGLDataset): """Dataset of crystal DGLGraphs.""" def __init__( diff --git a/alignn/train.py b/alignn/train.py index 4854c178..aedfd61b 100644 --- a/alignn/train.py +++ b/alignn/train.py @@ -5,6 +5,8 @@ then `tensorboard --logdir tb_logs/test` to monitor results... """ +import torch.distributed as dist +from torch.nn.parallel import DistributedDataParallel as DDP from functools import partial from typing import Any, Dict, Union import torch @@ -28,6 +30,18 @@ warnings.filterwarnings("ignore", category=RuntimeWarning) torch.set_default_dtype(torch.float32) +device = "cpu" +if torch.cuda.is_available(): + device = torch.device("cuda") + + +def setup(rank, world_size): + os.environ["MASTER_ADDR"] = "localhost" + os.environ["MASTER_PORT"] = "12355" + # Initialize the distributed environment. + dist.init_process_group("nccl", rank=rank, world_size=world_size) + torch.cuda.set_device(rank) + def activated_output_transform(output): """Exponentiate output.""" @@ -101,6 +115,8 @@ def train_dgl( model: nn.Module = None, # checkpoint_dir: Path = Path("./"), train_val_test_loaders=[], + rank="", + world_size="", # log_tensorboard: bool = False, ): """Training entry point for DGL networks. @@ -108,6 +124,8 @@ def train_dgl( `config` should conform to alignn.conf.TrainingConfig, and if passed as a dict with matching keys, pydantic validation is used """ + print("rank", rank) + setup(rank, world_size) print(config) if type(config) is dict: try: @@ -179,9 +197,8 @@ def train_dgl( val_loader = train_val_test_loaders[1] test_loader = train_val_test_loaders[2] prepare_batch = train_val_test_loaders[3] - device = "cpu" - if torch.cuda.is_available(): - device = torch.device("cuda") + # rank=0 + device = torch.device(f"cuda:{rank}") prepare_batch = partial(prepare_batch, device=device) if classification: config.model.classification = True @@ -208,12 +225,10 @@ def train_dgl( net = _model.get(config.model.name)(config.model) else: net = model + + print("net", net) net.to(device) - if config.data_parallel and torch.cuda.device_count() > 1: - # For multi-GPU training make data_parallel:true in config.json file - device_ids = [cid for cid in range(torch.cuda.device_count())] - print("Let's use", torch.cuda.device_count(), "GPUs!") - net = torch.nn.DataParallel(net, device_ids=device_ids).cuda() + net = DDP(net, device_ids=[rank]) # group parameters to skip weight decay for bias and batchnorm params = group_decay(net) optimizer = setup_optimizer(params, config) diff --git a/alignn/train_alignn.py b/alignn/train_alignn.py index 79f78961..9aaf4e54 100644 --- a/alignn/train_alignn.py +++ b/alignn/train_alignn.py @@ -115,24 +115,24 @@ def train_for_folder( - root_dir="examples/sample_data", - config_name="config.json", - # keep_data_order=False, - classification_threshold=None, - batch_size=None, - epochs=None, - id_key="jid", - target_key="total_energy", - atomwise_key="forces", - gradwise_key="forces", - stresswise_key="stresses", - file_format="poscar", - restart_model_path=None, - # subtract_mean=False, - # normalize_with_natoms=False, - output_dir=None, + rank, + world_size, + root_dir, + config_name, + classification_threshold, + batch_size, + epochs, + id_key, + target_key, + atomwise_key, + gradwise_key, + stresswise_key, + file_format, + restart_model_path, + output_dir, ): """Train for a folder.""" + print("root_dir", root_dir) id_prop_json = os.path.join(root_dir, "id_prop.json") id_prop_json_zip = os.path.join(root_dir, "id_prop.json.zip") id_prop_csv = os.path.join(root_dir, "id_prop.csv") @@ -372,6 +372,9 @@ def train_for_folder( ) # print("dataset", dataset[0]) t1 = time.time() + # world_size = torch.cuda.device_count() + print("rank ht1", rank) + print("world_size ht1", world_size) train_dgl( config, model=model, @@ -381,6 +384,8 @@ def train_for_folder( test_loader, prepare_batch, ], + rank=rank, + world_size=world_size, ) t2 = time.time() print("Time taken (s)", t2 - t1) @@ -390,21 +395,25 @@ def train_for_folder( if __name__ == "__main__": args = parser.parse_args(sys.argv[1:]) - train_for_folder( - root_dir=args.root_dir, - config_name=args.config_name, - # keep_data_order=args.keep_data_order, - classification_threshold=args.classification_threshold, - output_dir=args.output_dir, - batch_size=(args.batch_size), - epochs=(args.epochs), - target_key=(args.target_key), - id_key=(args.id_key), - atomwise_key=(args.atomwise_key), - gradwise_key=(args.force_key), - stresswise_key=(args.stresswise_key), - restart_model_path=(args.restart_model_path), - # subtract_mean=(args.subtract_mean), - # normalize_with_natoms=(args.normalize_with_natoms), - file_format=(args.file_format), + world_size = int(torch.cuda.device_count()) + rank = [0, 1] + torch.multiprocessing.spawn( + train_for_folder, + args=( + world_size, + args.root_dir, + args.config_name, + args.classification_threshold, + args.batch_size, + args.epochs, + args.id_key, + args.target_key, + args.atomwise_key, + args.force_key, + args.stresswise_key, + args.file_format, + args.restart_model_path, + args.output_dir, + ), + nprocs=world_size, ) From 6e19d56a63cc2d635c1f7c83e3382c18c6fc65f1 Mon Sep 17 00:00:00 2001 From: knc6 Date: Sat, 27 Apr 2024 21:36:33 -0400 Subject: [PATCH 02/22] Lint fix. --- alignn/train.py | 1 + alignn/train_alignn.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/alignn/train.py b/alignn/train.py index aedfd61b..ddfe602d 100644 --- a/alignn/train.py +++ b/alignn/train.py @@ -36,6 +36,7 @@ def setup(rank, world_size): + """Set up multi GPU rank.""" os.environ["MASTER_ADDR"] = "localhost" os.environ["MASTER_PORT"] = "12355" # Initialize the distributed environment. diff --git a/alignn/train_alignn.py b/alignn/train_alignn.py index 9aaf4e54..cdc61a19 100644 --- a/alignn/train_alignn.py +++ b/alignn/train_alignn.py @@ -396,7 +396,7 @@ def train_for_folder( if __name__ == "__main__": args = parser.parse_args(sys.argv[1:]) world_size = int(torch.cuda.device_count()) - rank = [0, 1] + print("world_size", world_size) torch.multiprocessing.spawn( train_for_folder, args=( From db278b5a86070c038743e5561b85c3bcbee1696d Mon Sep 17 00:00:00 2001 From: knc6 Date: Sat, 27 Apr 2024 21:57:17 -0400 Subject: [PATCH 03/22] Add find_unused_true. --- alignn/data.py | 3 +++ alignn/train.py | 20 ++++++++++---------- alignn/train_alignn.py | 11 +++++++++++ 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/alignn/data.py b/alignn/data.py index 17724d9f..405f5a25 100644 --- a/alignn/data.py +++ b/alignn/data.py @@ -581,6 +581,7 @@ def get_train_val_loaders( drop_last=True, num_workers=workers, pin_memory=pin_memory, + use_ddp=True, ) val_loader = GraphDataLoader( @@ -592,6 +593,7 @@ def get_train_val_loaders( drop_last=True, num_workers=workers, pin_memory=pin_memory, + use_ddp=True, ) test_loader = ( @@ -604,6 +606,7 @@ def get_train_val_loaders( drop_last=False, num_workers=workers, pin_memory=pin_memory, + use_ddp=True, ) if len(dataset_test) > 0 else None diff --git a/alignn/train.py b/alignn/train.py index ddfe602d..9fe41474 100644 --- a/alignn/train.py +++ b/alignn/train.py @@ -35,13 +35,13 @@ device = torch.device("cuda") -def setup(rank, world_size): - """Set up multi GPU rank.""" - os.environ["MASTER_ADDR"] = "localhost" - os.environ["MASTER_PORT"] = "12355" - # Initialize the distributed environment. - dist.init_process_group("nccl", rank=rank, world_size=world_size) - torch.cuda.set_device(rank) +# def setup(rank, world_size): +# """Set up multi GPU rank.""" +# os.environ["MASTER_ADDR"] = "localhost" +# os.environ["MASTER_PORT"] = "12355" +# # Initialize the distributed environment. +# dist.init_process_group("nccl", rank=rank, world_size=world_size) +# torch.cuda.set_device(rank) def activated_output_transform(output): @@ -125,8 +125,8 @@ def train_dgl( `config` should conform to alignn.conf.TrainingConfig, and if passed as a dict with matching keys, pydantic validation is used """ - print("rank", rank) - setup(rank, world_size) + # print("rank", rank) + # setup(rank, world_size) print(config) if type(config) is dict: try: @@ -229,7 +229,7 @@ def train_dgl( print("net", net) net.to(device) - net = DDP(net, device_ids=[rank]) + net = DDP(net, device_ids=[rank], find_unused_parameters=True) # group parameters to skip weight decay for bias and batchnorm params = group_decay(net) optimizer = setup_optimizer(params, config) diff --git a/alignn/train_alignn.py b/alignn/train_alignn.py index cdc61a19..a60ea8d4 100644 --- a/alignn/train_alignn.py +++ b/alignn/train_alignn.py @@ -2,6 +2,7 @@ """Module to train for a folder with formatted dataset.""" import os +import torch.distributed as dist import csv import sys import json @@ -21,6 +22,15 @@ device = torch.device("cuda") +def setup(rank, world_size): + """Set up multi GPU rank.""" + os.environ["MASTER_ADDR"] = "localhost" + os.environ["MASTER_PORT"] = "12355" + # Initialize the distributed environment. + dist.init_process_group("nccl", rank=rank, world_size=world_size) + torch.cuda.set_device(rank) + + parser = argparse.ArgumentParser( description="Atomistic Line Graph Neural Network" ) @@ -132,6 +142,7 @@ def train_for_folder( output_dir, ): """Train for a folder.""" + setup(rank, world_size) print("root_dir", root_dir) id_prop_json = os.path.join(root_dir, "id_prop.json") id_prop_json_zip = os.path.join(root_dir, "id_prop.json.zip") From ae3842a929d25f81792a827f0120f543a693ef82 Mon Sep 17 00:00:00 2001 From: knc6 Date: Sun, 28 Apr 2024 16:10:40 -0400 Subject: [PATCH 04/22] CPU and GPU trainings. --- alignn/data.py | 38 ++- alignn/graphs.py | 1 + alignn/train.py | 573 +++++++++++++++++++++-------------------- alignn/train_alignn.py | 52 +++- 4 files changed, 364 insertions(+), 300 deletions(-) diff --git a/alignn/data.py b/alignn/data.py index 405f5a25..af378871 100644 --- a/alignn/data.py +++ b/alignn/data.py @@ -1,11 +1,9 @@ -"""Jarvis-dgl data loaders and DGLGraph utilities.""" +"""ALIGNN data loaders and DGLGraph utilities.""" import random from pathlib import Path from typing import Optional - -# from typing import Dict, List, Optional, Set, Tuple - +from torch.utils.data.distributed import DistributedSampler import os import torch import dgl @@ -13,11 +11,7 @@ import pandas as pd from jarvis.core.atoms import Atoms from alignn.graphs import Graph, StructureDataset - -# from jarvis.core.graphs import Graph, StructureDataset from jarvis.db.figshare import data as jdata - -# from torch.utils.data import DataLoader from tqdm import tqdm import math from jarvis.db.jsonutils import dumpjson @@ -123,7 +117,7 @@ def atoms_to_graph(atoms): print("Converting to graphs!") graphs = [] # columns=dataset.columns - for ii, i in tqdm(dataset.iterrows()): + for ii, i in tqdm(dataset.iterrows(), total=len(dataset)): # print('iooooo',i) atoms = i["atoms"] structure = ( @@ -239,6 +233,7 @@ def get_torch_dataset( classification=False, output_dir=".", tmp_name="dataset", + sampler=None, ): """Get Torch Dataset.""" df = pd.DataFrame(dataset) @@ -274,6 +269,7 @@ def get_torch_dataset( line_graph=line_graph, id_tag=id_tag, classification=classification, + sampler=sampler, ) return data @@ -303,6 +299,7 @@ def get_train_val_loaders( filename: str = "sample", id_tag: str = "jid", use_canonize: bool = False, + # use_ddp: bool = False, cutoff: float = 8.0, cutoff_extra: float = 3.0, max_neighbors: int = 12, @@ -312,8 +309,11 @@ def get_train_val_loaders( keep_data_order=False, output_features=1, output_dir=None, + world_size=0, + rank=0, ): """Help function to set up JARVIS train and val dataloaders.""" + train_sample = filename + "_train.data" val_sample = filename + "_val.data" test_sample = filename + "_test.data" @@ -500,6 +500,18 @@ def get_train_val_loaders( print("Data error", exp) pass + if world_size > 1: + use_ddp = True + train_sampler = DistributedSampler( + dataset_train, num_replicas=world_size, rank=rank + ) + val_sampler = DistributedSampler( + dataset_val, num_replicas=world_size, rank=rank + ) + else: + use_ddp = False + train_sampler = None + val_sampler = None train_data = get_torch_dataset( dataset=dataset_train, id_tag=id_tag, @@ -517,6 +529,7 @@ def get_train_val_loaders( max_neighbors=max_neighbors, classification=classification_threshold is not None, output_dir=output_dir, + sampler=train_sampler, tmp_name="train_data", ) val_data = ( @@ -534,6 +547,7 @@ def get_train_val_loaders( line_graph=line_graph, cutoff=cutoff, cutoff_extra=cutoff_extra, + sampler=val_sampler, max_neighbors=max_neighbors, classification=classification_threshold is not None, output_dir=output_dir, @@ -581,7 +595,7 @@ def get_train_val_loaders( drop_last=True, num_workers=workers, pin_memory=pin_memory, - use_ddp=True, + use_ddp=use_ddp, ) val_loader = GraphDataLoader( @@ -593,7 +607,7 @@ def get_train_val_loaders( drop_last=True, num_workers=workers, pin_memory=pin_memory, - use_ddp=True, + use_ddp=use_ddp, ) test_loader = ( @@ -606,7 +620,7 @@ def get_train_val_loaders( drop_last=False, num_workers=workers, pin_memory=pin_memory, - use_ddp=True, + use_ddp=use_ddp, ) if len(dataset_test) > 0 else None diff --git a/alignn/graphs.py b/alignn/graphs.py index d8c1a405..53e772ab 100644 --- a/alignn/graphs.py +++ b/alignn/graphs.py @@ -728,6 +728,7 @@ def __init__( line_graph=False, classification=False, id_tag="jid", + sampler=None, ): """Pytorch Dataset for atomistic graphs. diff --git a/alignn/train.py b/alignn/train.py index 9fe41474..363aa8f5 100644 --- a/alignn/train.py +++ b/alignn/train.py @@ -5,7 +5,6 @@ then `tensorboard --logdir tb_logs/test` to monitor results... """ -import torch.distributed as dist from torch.nn.parallel import DistributedDataParallel as DDP from functools import partial from typing import Any, Dict, Union @@ -30,10 +29,6 @@ warnings.filterwarnings("ignore", category=RuntimeWarning) torch.set_default_dtype(torch.float32) -device = "cpu" -if torch.cuda.is_available(): - device = torch.device("cuda") - # def setup(rank, world_size): # """Set up multi GPU rank.""" @@ -116,8 +111,8 @@ def train_dgl( model: nn.Module = None, # checkpoint_dir: Path = Path("./"), train_val_test_loaders=[], - rank="", - world_size="", + rank=0, + world_size=0, # log_tensorboard: bool = False, ): """Training entry point for DGL networks. @@ -127,13 +122,14 @@ def train_dgl( """ # print("rank", rank) # setup(rank, world_size) - print(config) - if type(config) is dict: - try: - print(config) - config = TrainingConfig(**config) - except Exception as exp: - print("Check", exp) + if rank == 0: + print(config) + if type(config) is dict: + try: + print(config) + config = TrainingConfig(**config) + except Exception as exp: + print("Check", exp) if not os.path.exists(config.output_dir): os.makedirs(config.output_dir) @@ -154,6 +150,13 @@ def train_dgl( line_graph = False if config.model.alignn_layers > 0: line_graph = True + if world_size > 1: + use_ddp = True + else: + use_ddp = False + device = "cpu" + if torch.cuda.is_available(): + device = torch.device("cuda") if not train_val_test_loaders: # use input standardization for all real-valued feature sets # print("config.neighbor_strategy",config.neighbor_strategy) @@ -192,6 +195,7 @@ def train_dgl( standard_scalar_and_pca=config.standard_scalar_and_pca, keep_data_order=config.keep_data_order, output_dir=config.output_dir, + use_ddp=use_ddp, ) else: train_loader = train_val_test_loaders[0] @@ -199,7 +203,8 @@ def train_dgl( test_loader = train_val_test_loaders[2] prepare_batch = train_val_test_loaders[3] # rank=0 - device = torch.device(f"cuda:{rank}") + if use_ddp: + device = torch.device(f"cuda:{rank}") prepare_batch = partial(prepare_batch, device=device) if classification: config.model.classification = True @@ -227,9 +232,11 @@ def train_dgl( else: net = model - print("net", net) + # print("net", net) + # print("device", device) net.to(device) - net = DDP(net, device_ids=[rank], find_unused_parameters=True) + if use_ddp: + net = DDP(net, device_ids=[rank], find_unused_parameters=True) # group parameters to skip weight decay for bias and batchnorm params = group_decay(net) optimizer = setup_optimizer(params, config) @@ -468,23 +475,7 @@ def get_batch_errors(dat=[]): scheduler.step() train_final_time = time.time() train_ep_time = train_final_time - train_init_time - print( - "TrainLoss", - "Epoch", - e, - "total", - running_loss, - "out", - mean_out, - "atom", - mean_atom, - "grad", - mean_grad, - "stress", - mean_stress, - "time", - train_ep_time, - ) + # if rank == 0: # or world_size == 1: history_train.append([mean_out, mean_atom, mean_grad, mean_stress]) dumpjson( filename=os.path.join(config.output_dir, "history_train.json"), @@ -603,260 +594,288 @@ def get_batch_errors(dat=[]): data=val_result, ) best_model = net - print( - "ValLoss", - "Epoch", - e, - "total", - val_loss, - "out", - mean_out, - "atom", - mean_atom, - "grad", - mean_grad, - "stress", - mean_stress, - saving_msg, - ) history_val.append([mean_out, mean_atom, mean_grad, mean_stress]) dumpjson( filename=os.path.join(config.output_dir, "history_val.json"), data=history_val, ) - - test_loss = 0 - test_result = [] - for dats, jid in zip(test_loader, test_loader.dataset.ids): - # for dats in test_loader: - info = {} - info["id"] = jid - optimizer.zero_grad() - # print('dats[0]',dats[0]) - # print('test_loader',test_loader) - # print('test_loader.dataset.ids',test_loader.dataset.ids) - result = net([dats[0].to(device), dats[1].to(device)]) - loss1 = 0 # Such as energy - loss2 = 0 # Such as bader charges - loss3 = 0 # Such as forces - loss4 = 0 # Such as stresses - if config.model.output_features is not None and not classification: - # print('result["out"]',result["out"]) - # print('dats[2]',dats[2]) - loss1 = config.model.graphwise_weight * criterion( - result["out"], dats[2].to(device) + # print('rank',rank) + # print('world_size',world_size) + if rank == 0: + print( + "TrainLoss", + "Epoch", + e, + "total", + running_loss, + "out", + mean_out, + "atom", + mean_atom, + "grad", + mean_grad, + "stress", + mean_stress, + "time", + train_ep_time, ) - info["target_out"] = dats[2].cpu().numpy().tolist() - info["pred_out"] = ( - result["out"].cpu().detach().numpy().tolist() + print( + "ValLoss", + "Epoch", + e, + "total", + val_loss, + "out", + mean_out, + "atom", + mean_atom, + "grad", + mean_grad, + "stress", + mean_stress, + saving_msg, ) - if config.model.atomwise_output_features > 0: - loss2 = config.model.atomwise_weight * criterion( - result["atomwise_pred"].to(device), - dats[0].ndata["atomwise_target"].to(device), - ) - info["target_atomwise_pred"] = ( - dats[0].ndata["atomwise_target"].cpu().numpy().tolist() - ) - info["pred_atomwise_pred"] = ( - result["atomwise_pred"].cpu().detach().numpy().tolist() - ) + if rank == 0 or world_size == 1: + test_loss = 0 + test_result = [] + for dats, jid in zip(test_loader, test_loader.dataset.ids): + # for dats in test_loader: + info = {} + info["id"] = jid + optimizer.zero_grad() + # print('dats[0]',dats[0]) + # print('test_loader',test_loader) + # print('test_loader.dataset.ids',test_loader.dataset.ids) + result = net([dats[0].to(device), dats[1].to(device)]) + loss1 = 0 # Such as energy + loss2 = 0 # Such as bader charges + loss3 = 0 # Such as forces + loss4 = 0 # Such as stresses + if ( + config.model.output_features is not None + and not classification + ): + # print('result["out"]',result["out"]) + # print('dats[2]',dats[2]) + loss1 = config.model.graphwise_weight * criterion( + result["out"], dats[2].to(device) + ) + info["target_out"] = dats[2].cpu().numpy().tolist() + info["pred_out"] = ( + result["out"].cpu().detach().numpy().tolist() + ) - if config.model.calculate_gradient: - loss3 = config.model.gradwise_weight * criterion( - result["grad"].to(device), - dats[0].ndata["atomwise_grad"].to(device), - ) - info["target_grad"] = ( - dats[0].ndata["atomwise_grad"].cpu().numpy().tolist() - ) - info["pred_grad"] = ( - result["grad"].cpu().detach().numpy().tolist() - ) - if config.model.stresswise_weight != 0: - loss4 = config.model.stresswise_weight * criterion( - # torch.flatten(result["stress"].to(device)), - # (dats[0].ndata["stresses"]).to(device), - # torch.flatten(dats[0].ndata["stresses"]).to(device), - result["stresses"].to(device), - torch.cat(tuple(dats[0].ndata["stresses"])).to(device), - # torch.flatten(torch.cat(dats[0].ndata["stresses"])).to(device), - # dats[0].ndata["stresses"][0].to(device), - ) - # loss4 = config.model.stresswise_weight * criterion( - # result["stress"][0].to(device), - # dats[0].ndata["stresses"].to(device), - # ) - info["target_stress"] = ( - torch.cat(tuple(dats[0].ndata["stresses"])) - .cpu() - .numpy() - .tolist() - ) - info["pred_stress"] = ( - result["stresses"].cpu().detach().numpy().tolist() - ) - test_result.append(info) - loss = loss1 + loss2 + loss3 + loss4 - if not classification: - test_loss += loss.item() - print("TestLoss", e, test_loss) - dumpjson( - filename=os.path.join(config.output_dir, "Test_results.json"), - data=test_result, - ) - last_model_name = "last_model.pt" - torch.save( - net.state_dict(), - os.path.join(config.output_dir, last_model_name), - ) - # return test_result - - if config.write_predictions and classification: - best_model.eval() - # net.eval() - f = open( - os.path.join(config.output_dir, "prediction_results_test_set.csv"), - "w", - ) - f.write("id,target,prediction\n") - targets = [] - predictions = [] - with torch.no_grad(): - ids = test_loader.dataset.ids # [test_loader.dataset.indices] - for dat, id in zip(test_loader, ids): - g, lg, target = dat - out_data = best_model([g.to(device), lg.to(device)])["out"] - # out_data = net([g.to(device), lg.to(device)])["out"] - # out_data = torch.exp(out_data.cpu()) - # print('target',target) - # print('out_data',out_data) - top_p, top_class = torch.topk(torch.exp(out_data), k=1) - target = int(target.cpu().numpy().flatten().tolist()[0]) - - f.write("%s, %d, %d\n" % (id, (target), (top_class))) - targets.append(target) - predictions.append( - top_class.cpu().numpy().flatten().tolist()[0] - ) - f.close() + if config.model.atomwise_output_features > 0: + loss2 = config.model.atomwise_weight * criterion( + result["atomwise_pred"].to(device), + dats[0].ndata["atomwise_target"].to(device), + ) + info["target_atomwise_pred"] = ( + dats[0].ndata["atomwise_target"].cpu().numpy().tolist() + ) + info["pred_atomwise_pred"] = ( + result["atomwise_pred"].cpu().detach().numpy().tolist() + ) - print("predictions", predictions) - print("targets", targets) - print( - "Test ROCAUC:", - roc_auc_score(np.array(targets), np.array(predictions)), - ) + if config.model.calculate_gradient: + loss3 = config.model.gradwise_weight * criterion( + result["grad"].to(device), + dats[0].ndata["atomwise_grad"].to(device), + ) + info["target_grad"] = ( + dats[0].ndata["atomwise_grad"].cpu().numpy().tolist() + ) + info["pred_grad"] = ( + result["grad"].cpu().detach().numpy().tolist() + ) + if config.model.stresswise_weight != 0: + loss4 = config.model.stresswise_weight * criterion( + # torch.flatten(result["stress"].to(device)), + # (dats[0].ndata["stresses"]).to(device), + # torch.flatten(dats[0].ndata["stresses"]).to(device), + result["stresses"].to(device), + torch.cat(tuple(dats[0].ndata["stresses"])).to(device), + # torch.flatten(torch.cat(dats[0].ndata["stresses"])).to(device), + # dats[0].ndata["stresses"][0].to(device), + ) + # loss4 = config.model.stresswise_weight * criterion( + # result["stress"][0].to(device), + # dats[0].ndata["stresses"].to(device), + # ) + info["target_stress"] = ( + torch.cat(tuple(dats[0].ndata["stresses"])) + .cpu() + .numpy() + .tolist() + ) + info["pred_stress"] = ( + result["stresses"].cpu().detach().numpy().tolist() + ) + test_result.append(info) + loss = loss1 + loss2 + loss3 + loss4 + if not classification: + test_loss += loss.item() + print("TestLoss", e, test_loss) + dumpjson( + filename=os.path.join(config.output_dir, "Test_results.json"), + data=test_result, + ) + last_model_name = "last_model.pt" + torch.save( + net.state_dict(), + os.path.join(config.output_dir, last_model_name), + ) + # return test_result + if rank == 0 or world_size == 1: + if config.write_predictions and classification: + best_model.eval() + # net.eval() + f = open( + os.path.join( + config.output_dir, "prediction_results_test_set.csv" + ), + "w", + ) + f.write("id,target,prediction\n") + targets = [] + predictions = [] + with torch.no_grad(): + ids = test_loader.dataset.ids # [test_loader.dataset.indices] + for dat, id in zip(test_loader, ids): + g, lg, target = dat + out_data = best_model([g.to(device), lg.to(device)])["out"] + # out_data = net([g.to(device), lg.to(device)])["out"] + # out_data = torch.exp(out_data.cpu()) + # print('target',target) + # print('out_data',out_data) + top_p, top_class = torch.topk(torch.exp(out_data), k=1) + target = int(target.cpu().numpy().flatten().tolist()[0]) + + f.write("%s, %d, %d\n" % (id, (target), (top_class))) + targets.append(target) + predictions.append( + top_class.cpu().numpy().flatten().tolist()[0] + ) + f.close() - if ( - config.write_predictions - and not classification - and config.model.output_features > 1 - ): - best_model.eval() - # net.eval() - mem = [] - with torch.no_grad(): - ids = test_loader.dataset.ids # [test_loader.dataset.indices] - for dat, id in zip(test_loader, ids): - g, lg, target = dat - out_data = best_model([g.to(device), lg.to(device)])["out"] - # out_data = net([g.to(device), lg.to(device)])["out"] - out_data = out_data.cpu().numpy().tolist() - if config.standard_scalar_and_pca: - sc = pk.load(open("sc.pkl", "rb")) - out_data = list( - sc.transform(np.array(out_data).reshape(1, -1))[0] - ) # [0][0] - target = target.cpu().numpy().flatten().tolist() - info = {} - info["id"] = id - info["target"] = target - info["predictions"] = out_data - mem.append(info) - dumpjson( - filename=os.path.join( - config.output_dir, "multi_out_predictions.json" - ), - data=mem, - ) - if ( - config.write_predictions - and not classification - and config.model.output_features == 1 - and config.model.gradwise_weight == 0 - ): - best_model.eval() - # net.eval() - f = open( - os.path.join(config.output_dir, "prediction_results_test_set.csv"), - "w", - ) - f.write("id,target,prediction\n") - targets = [] - predictions = [] - with torch.no_grad(): - ids = test_loader.dataset.ids # [test_loader.dataset.indices] - for dat, id in zip(test_loader, ids): - g, lg, target = dat - out_data = best_model([g.to(device), lg.to(device)])["out"] - # out_data = net([g.to(device), lg.to(device)])["out"] - out_data = out_data.cpu().numpy().tolist() - if config.standard_scalar_and_pca: - sc = pk.load( - open(os.path.join(tmp_output_dir, "sc.pkl"), "rb") - ) - out_data = sc.transform(np.array(out_data).reshape(-1, 1))[ - 0 - ][0] - target = target.cpu().numpy().flatten().tolist() - if len(target) == 1: - target = target[0] - f.write("%s, %6f, %6f\n" % (id, target, out_data)) - targets.append(target) - predictions.append(out_data) - f.close() - - print( - "Test MAE:", - mean_absolute_error(np.array(targets), np.array(predictions)), - ) - best_model.eval() - # net.eval() - f = open( - os.path.join( - config.output_dir, "prediction_results_train_set.csv" - ), - "w", - ) - f.write("target,prediction\n") - targets = [] - predictions = [] - with torch.no_grad(): - ids = train_loader.dataset.ids # [test_loader.dataset.indices] - for dat, id in zip(train_loader, ids): - g, lg, target = dat - out_data = best_model([g.to(device), lg.to(device)])["out"] - # out_data = net([g.to(device), lg.to(device)])["out"] - out_data = out_data.cpu().numpy().tolist() - if config.standard_scalar_and_pca: - sc = pk.load( - open(os.path.join(tmp_output_dir, "sc.pkl"), "rb") - ) - out_data = sc.transform(np.array(out_data).reshape(-1, 1))[ - 0 - ][0] - target = target.cpu().numpy().flatten().tolist() - # if len(target) == 1: - # target = target[0] - # if len(out_data) == 1: - # out_data = out_data[0] - for ii, jj in zip(target, out_data): - f.write("%6f, %6f\n" % (ii, jj)) - targets.append(ii) - predictions.append(jj) - f.close() + print("predictions", predictions) + print("targets", targets) + print( + "Test ROCAUC:", + roc_auc_score(np.array(targets), np.array(predictions)), + ) + + if ( + config.write_predictions + and not classification + and config.model.output_features > 1 + ): + best_model.eval() + # net.eval() + mem = [] + with torch.no_grad(): + ids = test_loader.dataset.ids # [test_loader.dataset.indices] + for dat, id in zip(test_loader, ids): + g, lg, target = dat + out_data = best_model([g.to(device), lg.to(device)])["out"] + # out_data = net([g.to(device), lg.to(device)])["out"] + out_data = out_data.cpu().numpy().tolist() + if config.standard_scalar_and_pca: + sc = pk.load(open("sc.pkl", "rb")) + out_data = list( + sc.transform(np.array(out_data).reshape(1, -1))[0] + ) # [0][0] + target = target.cpu().numpy().flatten().tolist() + info = {} + info["id"] = id + info["target"] = target + info["predictions"] = out_data + mem.append(info) + dumpjson( + filename=os.path.join( + config.output_dir, "multi_out_predictions.json" + ), + data=mem, + ) + if ( + config.write_predictions + and not classification + and config.model.output_features == 1 + and config.model.gradwise_weight == 0 + ): + best_model.eval() + # net.eval() + f = open( + os.path.join( + config.output_dir, "prediction_results_test_set.csv" + ), + "w", + ) + f.write("id,target,prediction\n") + targets = [] + predictions = [] + with torch.no_grad(): + ids = test_loader.dataset.ids # [test_loader.dataset.indices] + for dat, id in zip(test_loader, ids): + g, lg, target = dat + out_data = best_model([g.to(device), lg.to(device)])["out"] + # out_data = net([g.to(device), lg.to(device)])["out"] + out_data = out_data.cpu().numpy().tolist() + if config.standard_scalar_and_pca: + sc = pk.load( + open(os.path.join(tmp_output_dir, "sc.pkl"), "rb") + ) + out_data = sc.transform( + np.array(out_data).reshape(-1, 1) + )[0][0] + target = target.cpu().numpy().flatten().tolist() + if len(target) == 1: + target = target[0] + f.write("%s, %6f, %6f\n" % (id, target, out_data)) + targets.append(target) + predictions.append(out_data) + f.close() + + print( + "Test MAE:", + mean_absolute_error(np.array(targets), np.array(predictions)), + ) + best_model.eval() + # net.eval() + f = open( + os.path.join( + config.output_dir, "prediction_results_train_set.csv" + ), + "w", + ) + f.write("target,prediction\n") + targets = [] + predictions = [] + with torch.no_grad(): + ids = train_loader.dataset.ids # [test_loader.dataset.indices] + for dat, id in zip(train_loader, ids): + g, lg, target = dat + out_data = best_model([g.to(device), lg.to(device)])["out"] + # out_data = net([g.to(device), lg.to(device)])["out"] + out_data = out_data.cpu().numpy().tolist() + if config.standard_scalar_and_pca: + sc = pk.load( + open(os.path.join(tmp_output_dir, "sc.pkl"), "rb") + ) + out_data = sc.transform( + np.array(out_data).reshape(-1, 1) + )[0][0] + target = target.cpu().numpy().flatten().tolist() + # if len(target) == 1: + # target = target[0] + # if len(out_data) == 1: + # out_data = out_data[0] + for ii, jj in zip(target, out_data): + f.write("%6f, %6f\n" % (ii, jj)) + targets.append(ii) + predictions.append(jj) + f.close() if __name__ == "__main__": diff --git a/alignn/train_alignn.py b/alignn/train_alignn.py index a60ea8d4..c49bbee0 100644 --- a/alignn/train_alignn.py +++ b/alignn/train_alignn.py @@ -24,11 +24,18 @@ def setup(rank, world_size): """Set up multi GPU rank.""" - os.environ["MASTER_ADDR"] = "localhost" - os.environ["MASTER_PORT"] = "12355" - # Initialize the distributed environment. - dist.init_process_group("nccl", rank=rank, world_size=world_size) - torch.cuda.set_device(rank) + if world_size > 1: + os.environ["MASTER_ADDR"] = "localhost" + os.environ["MASTER_PORT"] = "12355" + # Initialize the distributed environment. + dist.init_process_group("nccl", rank=rank, world_size=world_size) + torch.cuda.set_device(rank) + + +def cleanup(world_size): + """Clean up distributed process.""" + if world_size > 1: + dist.destroy_process_group() parser = argparse.ArgumentParser( @@ -408,9 +415,30 @@ def train_for_folder( args = parser.parse_args(sys.argv[1:]) world_size = int(torch.cuda.device_count()) print("world_size", world_size) - torch.multiprocessing.spawn( - train_for_folder, - args=( + if world_size > 1: + torch.multiprocessing.spawn( + train_for_folder, + args=( + world_size, + args.root_dir, + args.config_name, + args.classification_threshold, + args.batch_size, + args.epochs, + args.id_key, + args.target_key, + args.atomwise_key, + args.force_key, + args.stresswise_key, + args.file_format, + args.restart_model_path, + args.output_dir, + ), + nprocs=world_size, + ) + else: + train_for_folder( + 0, world_size, args.root_dir, args.config_name, @@ -425,6 +453,8 @@ def train_for_folder( args.file_format, args.restart_model_path, args.output_dir, - ), - nprocs=world_size, - ) + ) + try: + cleanup(world_size) + except Exception: + pass From 114cc9e611fcf5c2a0f64cc3c2518a395cad3228 Mon Sep 17 00:00:00 2001 From: knc6 Date: Sun, 28 Apr 2024 16:22:51 -0400 Subject: [PATCH 05/22] Lint fix. --- alignn/data.py | 1 - 1 file changed, 1 deletion(-) diff --git a/alignn/data.py b/alignn/data.py index af378871..dddf577f 100644 --- a/alignn/data.py +++ b/alignn/data.py @@ -313,7 +313,6 @@ def get_train_val_loaders( rank=0, ): """Help function to set up JARVIS train and val dataloaders.""" - train_sample = filename + "_train.data" val_sample = filename + "_val.data" test_sample = filename + "_test.data" From 6b1103090197348fa20de181974fb50f04fe5902 Mon Sep 17 00:00:00 2001 From: knc6 Date: Sun, 28 Apr 2024 16:40:00 -0400 Subject: [PATCH 06/22] CPU and GPU trainings. --- alignn/train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/alignn/train.py b/alignn/train.py index 363aa8f5..4c7d92ed 100644 --- a/alignn/train.py +++ b/alignn/train.py @@ -195,7 +195,7 @@ def train_dgl( standard_scalar_and_pca=config.standard_scalar_and_pca, keep_data_order=config.keep_data_order, output_dir=config.output_dir, - use_ddp=use_ddp, + # use_ddp=use_ddp, ) else: train_loader = train_val_test_loaders[0] From 9eb5c9c8907555bc30f57bedc4f1cdf96d5abc30 Mon Sep 17 00:00:00 2001 From: knc6 Date: Sun, 28 Apr 2024 17:32:38 -0400 Subject: [PATCH 07/22] Pytest fix. --- alignn/tests/test_prop.py | 20 ++++++++++++++++---- alignn/train_alignn.py | 30 +++++++++++++++--------------- 2 files changed, 31 insertions(+), 19 deletions(-) diff --git a/alignn/tests/test_prop.py b/alignn/tests/test_prop.py index aee4b638..d382db5e 100644 --- a/alignn/tests/test_prop.py +++ b/alignn/tests/test_prop.py @@ -12,6 +12,7 @@ from alignn.train_alignn import train_for_folder from jarvis.db.figshare import get_jid_data from alignn.ff.ff import AlignnAtomwiseCalculator, default_path, revised_path +import torch plt.switch_backend("agg") @@ -137,7 +138,10 @@ def test_alignn_train(): "../examples/sample_data/config_example.json", ) ) - train_for_folder(root_dir=root_dir, config_name=config) + world_size = int(torch.cuda.device_count()) + train_for_folder( + rank=0, world_size=world_size, root_dir=root_dir, config_name=config + ) root_dir = os.path.abspath( os.path.join( @@ -150,7 +154,9 @@ def test_alignn_train(): "../examples/sample_data/config_example.json", ) ) - train_for_folder(root_dir=root_dir, config_name=config) + train_for_folder( + rank=0, world_size=world_size, root_dir=root_dir, config_name=config + ) root_dir = os.path.abspath( os.path.join(os.path.dirname(__file__), "../examples/sample_data/") @@ -162,7 +168,11 @@ def test_alignn_train(): ) ) train_for_folder( - root_dir=root_dir, config_name=config, classification_threshold=0.01 + rank=0, + world_size=world_size, + root_dir=root_dir, + config_name=config, + classification_threshold=0.01, ) root_dir = os.path.abspath( @@ -174,7 +184,9 @@ def test_alignn_train(): "../examples/sample_data_ff/config_example_atomwise.json", ) ) - train_for_folder(root_dir=root_dir, config_name=config) + train_for_folder( + rank=0, world_size=world_size, root_dir=root_dir, config_name=config + ) def test_calculator(): diff --git a/alignn/train_alignn.py b/alignn/train_alignn.py index c49bbee0..76a25c56 100644 --- a/alignn/train_alignn.py +++ b/alignn/train_alignn.py @@ -132,21 +132,21 @@ def cleanup(world_size): def train_for_folder( - rank, - world_size, - root_dir, - config_name, - classification_threshold, - batch_size, - epochs, - id_key, - target_key, - atomwise_key, - gradwise_key, - stresswise_key, - file_format, - restart_model_path, - output_dir, + rank=0, + world_size=0, + root_dir="examples/sample_data", + config_name="config.json", + classification_threshold=None, + batch_size=None, + epochs=None, + id_key="jid", + target_key="total_energy", + atomwise_key="forces", + gradwise_key="forces", + stresswise_key="stresses", + file_format="poscar", + restart_model_path=None, + output_dir=None, ): """Train for a folder.""" setup(rank, world_size) From 4fa6accba6214baaa01445b9dfb747b84a86ec9e Mon Sep 17 00:00:00 2001 From: knc6 Date: Tue, 30 Apr 2024 12:05:19 -0400 Subject: [PATCH 08/22] First try LMDB. --- alignn/data.py | 166 ++------------------------------------- alignn/dataset.py | 164 ++++++++++++++++++++++++++++++++++++++ alignn/lmdb_dataset.py | 173 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 344 insertions(+), 159 deletions(-) create mode 100644 alignn/dataset.py create mode 100644 alignn/lmdb_dataset.py diff --git a/alignn/data.py b/alignn/data.py index dddf577f..cb6ea654 100644 --- a/alignn/data.py +++ b/alignn/data.py @@ -1,29 +1,19 @@ """ALIGNN data loaders and DGLGraph utilities.""" import random -from pathlib import Path from typing import Optional from torch.utils.data.distributed import DistributedSampler import os import torch -import dgl import numpy as np -import pandas as pd -from jarvis.core.atoms import Atoms -from alignn.graphs import Graph, StructureDataset from jarvis.db.figshare import data as jdata from tqdm import tqdm import math from jarvis.db.jsonutils import dumpjson from dgl.dataloading import GraphDataLoader - -# from sklearn.pipeline import Pipeline import pickle as pk - -# from sklearn.decomposition import PCA # ,KernelPCA from sklearn.preprocessing import StandardScaler -# use pandas progress_apply tqdm.pandas() @@ -64,96 +54,6 @@ def mean_absolute_deviation(data, axis=None): return np.mean(np.absolute(data - np.mean(data, axis)), axis) -def load_graphs( - dataset=[], - name: str = "dft_3d", - neighbor_strategy: str = "k-nearest", - cutoff: float = 8, - cutoff_extra: float = 3, - max_neighbors: int = 12, - cachedir: Optional[Path] = None, - use_canonize: bool = False, - id_tag="jid", - # extra_feats_json=None, -): - """Construct crystal graphs. - - Load only atomic number node features - and bond displacement vector edge features. - - Resulting graphs have scheme e.g. - ``` - Graph(num_nodes=12, num_edges=156, - ndata_schemes={'atom_features': Scheme(shape=(1,)} - edata_schemes={'r': Scheme(shape=(3,)}) - ``` - """ - - def atoms_to_graph(atoms): - """Convert structure dict to DGLGraph.""" - structure = ( - Atoms.from_dict(atoms) if isinstance(atoms, dict) else atoms - ) - return Graph.atom_dgl_multigraph( - structure, - cutoff=cutoff, - cutoff_extra=cutoff_extra, - atom_features="atomic_number", - max_neighbors=max_neighbors, - compute_line_graph=False, - use_canonize=use_canonize, - neighbor_strategy=neighbor_strategy, - ) - - if cachedir is not None: - cachefile = cachedir / f"{name}-{neighbor_strategy}.bin" - else: - cachefile = None - - if cachefile is not None and cachefile.is_file(): - graphs, labels = dgl.load_graphs(str(cachefile)) - else: - # print('dataset',dataset,type(dataset)) - print("Converting to graphs!") - graphs = [] - # columns=dataset.columns - for ii, i in tqdm(dataset.iterrows(), total=len(dataset)): - # print('iooooo',i) - atoms = i["atoms"] - structure = ( - Atoms.from_dict(atoms) if isinstance(atoms, dict) else atoms - ) - g = Graph.atom_dgl_multigraph( - structure, - cutoff=cutoff, - cutoff_extra=cutoff_extra, - atom_features="atomic_number", - max_neighbors=max_neighbors, - compute_line_graph=False, - use_canonize=use_canonize, - neighbor_strategy=neighbor_strategy, - id=i[id_tag], - ) - # print ('ii',ii) - if "extra_features" in i: - natoms = len(atoms["elements"]) - # if "extra_features" in columns: - g.ndata["extra_features"] = torch.tensor( - [i["extra_features"] for n in range(natoms)] - ).type(torch.get_default_dtype()) - graphs.append(g) - - # df = pd.DataFrame(dataset) - # print ('df',df) - - # graphs = df["atoms"].progress_apply(atoms_to_graph).values - # print ('graphs',graphs,graphs[0]) - if cachefile is not None: - dgl.save_graphs(str(cachefile), graphs.tolist()) - - return graphs - - def get_id_train_val_test( total_size=1000, split_seed=123, @@ -215,65 +115,6 @@ def get_id_train_val_test( return id_train, id_val, id_test -def get_torch_dataset( - dataset=[], - id_tag="jid", - target="", - target_atomwise="", - target_grad="", - target_stress="", - neighbor_strategy="", - atom_features="", - use_canonize="", - name="", - line_graph="", - cutoff=8.0, - cutoff_extra=3.0, - max_neighbors=12, - classification=False, - output_dir=".", - tmp_name="dataset", - sampler=None, -): - """Get Torch Dataset.""" - df = pd.DataFrame(dataset) - # df['natoms']=df['atoms'].apply(lambda x: len(x['elements'])) - # print(" data df", df) - vals = np.array([ii[target] for ii in dataset]) # df[target].values - print("data range", np.max(vals), np.min(vals)) - f = open(os.path.join(output_dir, tmp_name + "_data_range"), "w") - line = "Max=" + str(np.max(vals)) + "\n" - f.write(line) - line = "Min=" + str(np.min(vals)) + "\n" - f.write(line) - f.close() - - graphs = load_graphs( - df, - name=name, - neighbor_strategy=neighbor_strategy, - use_canonize=use_canonize, - cutoff=cutoff, - cutoff_extra=cutoff_extra, - max_neighbors=max_neighbors, - id_tag=id_tag, - ) - data = StructureDataset( - df, - graphs, - target=target, - target_atomwise=target_atomwise, - target_grad=target_grad, - target_stress=target_stress, - atom_features=atom_features, - line_graph=line_graph, - id_tag=id_tag, - classification=classification, - sampler=sampler, - ) - return data - - def get_train_val_loaders( dataset: str = "dft_3d", dataset_array=None, @@ -311,8 +152,15 @@ def get_train_val_loaders( output_dir=None, world_size=0, rank=0, + use_lmdb: bool = False, ): """Help function to set up JARVIS train and val dataloaders.""" + if use_lmdb: + print("Using LMDB dataset.") + from alignn.lmdb_dataset import get_torch_dataset + else: + print("Not using LMDB dataset, memory footprint maybe high.") + from alignn.dataset import get_torch_dataset train_sample = filename + "_train.data" val_sample = filename + "_val.data" test_sample = filename + "_test.data" diff --git a/alignn/dataset.py b/alignn/dataset.py new file mode 100644 index 00000000..6baec251 --- /dev/null +++ b/alignn/dataset.py @@ -0,0 +1,164 @@ +"""Module to prepare ALIGNN dataset.""" + +from pathlib import Path +from typing import Optional +import os +import torch +import dgl +import numpy as np +import pandas as pd +from jarvis.core.atoms import Atoms +from alignn.graphs import Graph, StructureDataset +from tqdm import tqdm + +tqdm.pandas() + + +def load_graphs( + dataset=[], + name: str = "dft_3d", + neighbor_strategy: str = "k-nearest", + cutoff: float = 8, + cutoff_extra: float = 3, + max_neighbors: int = 12, + cachedir: Optional[Path] = None, + use_canonize: bool = False, + id_tag="jid", + # extra_feats_json=None, + map_size=1e12, +): + """Construct crystal graphs. + + Load only atomic number node features + and bond displacement vector edge features. + + Resulting graphs have scheme e.g. + ``` + Graph(num_nodes=12, num_edges=156, + ndata_schemes={'atom_features': Scheme(shape=(1,)} + edata_schemes={'r': Scheme(shape=(3,)}) + ``` + """ + + def atoms_to_graph(atoms): + """Convert structure dict to DGLGraph.""" + structure = ( + Atoms.from_dict(atoms) if isinstance(atoms, dict) else atoms + ) + return Graph.atom_dgl_multigraph( + structure, + cutoff=cutoff, + cutoff_extra=cutoff_extra, + atom_features="atomic_number", + max_neighbors=max_neighbors, + compute_line_graph=False, + use_canonize=use_canonize, + neighbor_strategy=neighbor_strategy, + ) + + if cachedir is not None: + cachefile = cachedir / f"{name}-{neighbor_strategy}.bin" + else: + cachefile = None + + if cachefile is not None and cachefile.is_file(): + graphs, labels = dgl.load_graphs(str(cachefile)) + else: + # print('dataset',dataset,type(dataset)) + print("Converting to graphs!") + graphs = [] + # columns=dataset.columns + for ii, i in tqdm(dataset.iterrows(), total=len(dataset)): + # print('iooooo',i) + atoms = i["atoms"] + structure = ( + Atoms.from_dict(atoms) if isinstance(atoms, dict) else atoms + ) + g = Graph.atom_dgl_multigraph( + structure, + cutoff=cutoff, + cutoff_extra=cutoff_extra, + atom_features="atomic_number", + max_neighbors=max_neighbors, + compute_line_graph=False, + use_canonize=use_canonize, + neighbor_strategy=neighbor_strategy, + id=i[id_tag], + ) + # print ('ii',ii) + if "extra_features" in i: + natoms = len(atoms["elements"]) + # if "extra_features" in columns: + g.ndata["extra_features"] = torch.tensor( + [i["extra_features"] for n in range(natoms)] + ).type(torch.get_default_dtype()) + graphs.append(g) + + # df = pd.DataFrame(dataset) + # print ('df',df) + + # graphs = df["atoms"].progress_apply(atoms_to_graph).values + # print ('graphs',graphs,graphs[0]) + if cachefile is not None: + dgl.save_graphs(str(cachefile), graphs.tolist()) + + return graphs + + +def get_torch_dataset( + dataset=[], + id_tag="jid", + target="", + target_atomwise="", + target_grad="", + target_stress="", + neighbor_strategy="", + atom_features="", + use_canonize="", + name="", + line_graph="", + cutoff=8.0, + cutoff_extra=3.0, + max_neighbors=12, + classification=False, + output_dir=".", + tmp_name="dataset", + sampler=None, +): + """Get Torch Dataset.""" + df = pd.DataFrame(dataset) + # df['natoms']=df['atoms'].apply(lambda x: len(x['elements'])) + # print(" data df", df) + vals = np.array([ii[target] for ii in dataset]) # df[target].values + print("data range", np.max(vals), np.min(vals)) + f = open(os.path.join(output_dir, tmp_name + "_data_range"), "w") + line = "Max=" + str(np.max(vals)) + "\n" + f.write(line) + line = "Min=" + str(np.min(vals)) + "\n" + f.write(line) + f.close() + + graphs = load_graphs( + df, + name=name, + neighbor_strategy=neighbor_strategy, + use_canonize=use_canonize, + cutoff=cutoff, + cutoff_extra=cutoff_extra, + max_neighbors=max_neighbors, + id_tag=id_tag, + ) + data = StructureDataset( + df, + graphs, + target=target, + target_atomwise=target_atomwise, + target_grad=target_grad, + target_stress=target_stress, + atom_features=atom_features, + line_graph=line_graph, + id_tag=id_tag, + classification=classification, + sampler=sampler, + ) + return data diff --git a/alignn/lmdb_dataset.py b/alignn/lmdb_dataset.py new file mode 100644 index 00000000..98ada8d6 --- /dev/null +++ b/alignn/lmdb_dataset.py @@ -0,0 +1,173 @@ +"""Module to prepare LMDB ALIGNN dataset.""" + +import os +import numpy as np +import lmdb +from jarvis.core.atoms import Atoms +from jarvis.db.figshare import data +from alignn.graphs import Graph +import pickle as pk +from torch.utils.data import Dataset +import torch +from tqdm import tqdm +from typing import List, Tuple +import dgl + + +def prepare_line_graph_batch( + batch: Tuple[Tuple[dgl.DGLGraph, dgl.DGLGraph], torch.Tensor], + device=None, + non_blocking=False, +): + """Send line graph batch to device. + + Note: the batch is a nested tuple, with the graph and line graph together + """ + g, lg, t, id = batch + batch = ( + ( + g.to(device, non_blocking=non_blocking), + lg.to(device, non_blocking=non_blocking), + ), + t.to(device, non_blocking=non_blocking), + ) + + return batch + + +class TorchLMDBDataset(Dataset): + """Dataset of crystal DGLGraphs using LMDB.""" + + def __init__(self, lmdb_path="", ids=[]): + """Intitialize with path and ids array.""" + super(TorchLMDBDataset, self).__init__() + self.lmdb_path = lmdb_path + self.ids = ids + self.env = lmdb.open(self.lmdb_path, readonly=True, lock=False) + with self.env.begin() as txn: + self.length = txn.stat()["entries"] + self.prepare_batch = prepare_line_graph_batch + + def __len__(self): + """Get length.""" + return self.length + + def __getitem__(self, idx): + """Get sample.""" + with self.env.begin() as txn: + serialized_data = txn.get(f"{idx}".encode()) + graph, line_graph, label = pk.loads(serialized_data) + return graph, line_graph, label + + def close(self): + """Close connection.""" + self.env.close() + + def __del__(self): + """Delete connection.""" + self.close() + + @staticmethod + def collate(samples: List[Tuple[dgl.DGLGraph, torch.Tensor]]): + """Dataloader helper to batch graphs cross `samples`.""" + graphs, labels = map(list, zip(*samples)) + batched_graph = dgl.batch(graphs) + return batched_graph, torch.tensor(labels) + + @staticmethod + def collate_line_graph( + samples: List[Tuple[dgl.DGLGraph, dgl.DGLGraph, torch.Tensor]] + ): + """Dataloader helper to batch graphs cross `samples`.""" + graphs, line_graphs, labels = map(list, zip(*samples)) + batched_graph = dgl.batch(graphs) + batched_line_graph = dgl.batch(line_graphs) + if len(labels[0].size()) > 0: + return batched_graph, batched_line_graph, torch.stack(labels) + else: + return batched_graph, batched_line_graph, torch.tensor(labels) + + +def get_torch_dataset( + dataset=[], + id_tag="jid", + target="", + target_atomwise="", + target_grad="", + target_stress="", + neighbor_strategy="k-nearest", + atom_features="cgcnn", + use_canonize="", + name="", + line_graph=True, + cutoff=8.0, + cutoff_extra=3.0, + max_neighbors=12, + classification=False, + sampler=None, + output_dir=".", + tmp_name="dataset", + map_size=1e12, +): + """Get Torch Dataset with LMDB.""" + vals = np.array([ii[target] for ii in dataset]) # df[target].values + print("data range", np.max(vals), np.min(vals)) + f = open(os.path.join(output_dir, tmp_name + "_data_range"), "w") + line = "Max=" + str(np.max(vals)) + "\n" + f.write(line) + line = "Min=" + str(np.min(vals)) + "\n" + f.write(line) + f.close() + ids = [] + env = lmdb.open(tmp_name, map_size=int(map_size)) + with env.begin(write=True) as txn: + for idx, (d) in tqdm(enumerate(dataset), total=len(dataset)): + ids.append(d[id_tag]) + g, lg = Graph.atom_dgl_multigraph( + Atoms.from_dict(d["atoms"]), + cutoff=float(cutoff), + max_neighbors=max_neighbors, + atom_features=atom_features, + compute_line_graph=line_graph, + use_canonize=use_canonize, + cutoff_extra=cutoff_extra, + ) + label = torch.tensor(d[target]).type(torch.get_default_dtype()) + # print('label',label,label.view(-1).long()) + if classification: + label = label.long() + # label = label.view(-1).long() + if "extra_features" in d: + natoms = len(d["atoms"]["elements"]) + g.ndata["extra_features"] = torch.tensor( + [d["extra_features"] for n in range(natoms)] + ).type(torch.get_default_dtype()) + if target_atomwise is not None and target_atomwise != "": + g.ndata[target_atomwise] = torch.tensor( + np.array(d[target_atomwise]) + ).type(torch.get_default_dtype()) + if target_grad is not None and target_grad != "": + g.ndata[target_grad] = torch.tensor( + np.array(d[target_grad]) + ).type(torch.get_default_dtype()) + if target_stress is not None and target_stress != "": + stress = np.array(d[target_stress]) + g.ndata[target_stress] = torch.tensor( + np.array([stress for ii in range(g.number_of_nodes())]) + ).type(torch.get_default_dtype()) + + # labels.append(label) + serialized_data = pk.dumps((g, lg, label)) + txn.put(f"{idx}".encode(), serialized_data) + + env.close() + lmdb_dataset = TorchLMDBDataset(lmdb_path=tmp_name, ids=ids) + return lmdb_dataset + + +if __name__ == "__main__": + dataset = data("dft_2d") + lmdb_dataset = get_torch_dataset( + dataset=dataset, target="formation_energy_peratom" + ) + print(lmdb_dataset) From 4f6f871dee0861515116e758844326c21fcb6671 Mon Sep 17 00:00:00 2001 From: knc6 Date: Tue, 30 Apr 2024 12:39:56 -0400 Subject: [PATCH 09/22] Add use_lmdb config option. --- alignn/config.py | 3 ++- alignn/data.py | 4 ++-- .../sample_data_ff/config_example_atomwise.json | 10 +++++++--- alignn/train.py | 10 +++++++--- alignn/train_alignn.py | 5 +++-- 5 files changed, 21 insertions(+), 11 deletions(-) diff --git a/alignn/config.py b/alignn/config.py index c8a21279..d7c807ae 100644 --- a/alignn/config.py +++ b/alignn/config.py @@ -207,7 +207,8 @@ class TrainingConfig(BaseSettings): distributed: bool = False data_parallel: bool = False n_early_stopping: Optional[int] = None # typically 50 - output_dir: str = os.path.abspath(".") # typically 50 + output_dir: str = os.path.abspath(".") + use_lmdb: bool = True # alignn_layers: int = 4 # gcn_layers: int =4 # edge_input_features: int= 80 diff --git a/alignn/data.py b/alignn/data.py index cb6ea654..a542a02f 100644 --- a/alignn/data.py +++ b/alignn/data.py @@ -139,7 +139,7 @@ def get_train_val_loaders( save_dataloader: bool = False, filename: str = "sample", id_tag: str = "jid", - use_canonize: bool = False, + use_canonize: bool = True, # use_ddp: bool = False, cutoff: float = 8.0, cutoff_extra: float = 3.0, @@ -152,7 +152,7 @@ def get_train_val_loaders( output_dir=None, world_size=0, rank=0, - use_lmdb: bool = False, + use_lmdb: bool = True, ): """Help function to set up JARVIS train and val dataloaders.""" if use_lmdb: diff --git a/alignn/examples/sample_data_ff/config_example_atomwise.json b/alignn/examples/sample_data_ff/config_example_atomwise.json index 7c761a4d..2b915ddb 100644 --- a/alignn/examples/sample_data_ff/config_example_atomwise.json +++ b/alignn/examples/sample_data_ff/config_example_atomwise.json @@ -31,12 +31,13 @@ "progress": true, "log_tensorboard": false, "standard_scalar_and_pca": false, - "use_canonize": false, + "use_canonize": true, "num_workers": 0, "cutoff": 8.0, "max_neighbors": 12, "keep_data_order": true, "distributed":false, + "use_lmdb": true, "model": { "name": "alignn_atomwise", "atom_input_features": 92, @@ -48,7 +49,10 @@ "graphwise_weight":0.85, "gradwise_weight":0.05, "atomwise_weight":0.0, - "stresswise_weight":0.05 - + "stresswise_weight":0.05, + "add_reverse_forces":true, + "lg_on_fly":true + + } } diff --git a/alignn/train.py b/alignn/train.py index 4c7d92ed..a4822f84 100644 --- a/alignn/train.py +++ b/alignn/train.py @@ -123,7 +123,8 @@ def train_dgl( # print("rank", rank) # setup(rank, world_size) if rank == 0: - print(config) + print("config:") + # print(config) if type(config) is dict: try: print(config) @@ -136,7 +137,6 @@ def train_dgl( # checkpoint_dir = os.path.join(config.output_dir) # deterministic = False classification = False - print("config:") tmp = config.dict() f = open(os.path.join(config.output_dir, "config.json"), "w") f.write(json.dumps(tmp, indent=4)) @@ -195,7 +195,7 @@ def train_dgl( standard_scalar_and_pca=config.standard_scalar_and_pca, keep_data_order=config.keep_data_order, output_dir=config.output_dir, - # use_ddp=use_ddp, + use_lmdb=config.use_lmdb, ) else: train_loader = train_val_test_loaders[0] @@ -876,6 +876,10 @@ def get_batch_errors(dat=[]): targets.append(ii) predictions.append(jj) f.close() + if config.use_lmdb: + train_loader.dataset.close() + val_loader.dataset.close() + test_loader.dataset.close() if __name__ == "__main__": diff --git a/alignn/train_alignn.py b/alignn/train_alignn.py index 76a25c56..5f2b5016 100644 --- a/alignn/train_alignn.py +++ b/alignn/train_alignn.py @@ -387,12 +387,13 @@ def train_for_folder( standard_scalar_and_pca=config.standard_scalar_and_pca, keep_data_order=config.keep_data_order, output_dir=config.output_dir, + use_lmdb=config.use_lmdb, ) # print("dataset", dataset[0]) t1 = time.time() # world_size = torch.cuda.device_count() - print("rank ht1", rank) - print("world_size ht1", world_size) + print("rank", rank) + print("world_size", world_size) train_dgl( config, model=model, From 3f7ada2f8bcbe2251d0d0f0cf6dc7f69f360b89c Mon Sep 17 00:00:00 2001 From: knc6 Date: Tue, 30 Apr 2024 12:41:36 -0400 Subject: [PATCH 10/22] Add use_lmdb config option. --- alignn/pretrained.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/alignn/pretrained.py b/alignn/pretrained.py index b5ddd9c8..6522689b 100644 --- a/alignn/pretrained.py +++ b/alignn/pretrained.py @@ -6,7 +6,7 @@ import zipfile from tqdm import tqdm from alignn.models.alignn import ALIGNN, ALIGNNConfig -from alignn.data import get_torch_dataset +from alignn.dataset import get_torch_dataset from torch.utils.data import DataLoader import tempfile import torch From 0428fe14abaeeb01234af7d8b57592b27e8a190a Mon Sep 17 00:00:00 2001 From: knc6 Date: Tue, 30 Apr 2024 13:16:56 -0400 Subject: [PATCH 11/22] Version update. --- alignn/__init__.py | 2 +- alignn/pretrained.py | 9 ++++++++- environment.yml | 1 + setup.py | 3 ++- 4 files changed, 12 insertions(+), 3 deletions(-) diff --git a/alignn/__init__.py b/alignn/__init__.py index 83674b0f..86b75d31 100644 --- a/alignn/__init__.py +++ b/alignn/__init__.py @@ -1,3 +1,3 @@ """Version number.""" -__version__ = "2024.4.10" +__version__ = "2024.4.20" diff --git a/alignn/pretrained.py b/alignn/pretrained.py index 6522689b..b3c7f8a3 100644 --- a/alignn/pretrained.py +++ b/alignn/pretrained.py @@ -6,7 +6,6 @@ import zipfile from tqdm import tqdm from alignn.models.alignn import ALIGNN, ALIGNNConfig -from alignn.dataset import get_torch_dataset from torch.utils.data import DataLoader import tempfile import torch @@ -340,8 +339,16 @@ def get_multiple_predictions( model=None, model_name="jv_formation_energy_peratom_alignn", print_freq=100, + use_lmdb=True, ): """Use pretrained model on a number of structures.""" + if use_lmdb: + print("Using LMDB dataset.") + from alignn.lmdb_dataset import get_torch_dataset + else: + print("Not using LMDB dataset, memory footprint maybe high.") + from alignn.dataset import get_torch_dataset + # import glob # atoms_array=[] # for i in glob.glob("alignn/examples/sample_data/*.vasp"): diff --git a/environment.yml b/environment.yml index c7460d58..886addb5 100644 --- a/environment.yml +++ b/environment.yml @@ -168,6 +168,7 @@ dependencies: - pysocks=1.7.1=pyha2e5f31_6 - pytest=8.1.1=pyhd8ed1ab_0 - python=3.10.13=hd12c33a_0_cpython + - python-lmdb=1.4.1=py310hdf73078_1 - python-tzdata=2024.1=pyhd8ed1ab_0 - python_abi=3.10=4_cp310 - pytz=2024.1=pyhd8ed1ab_0 diff --git a/setup.py b/setup.py index e075929c..c3f078f3 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ setuptools.setup( name="alignn", - version="2024.4.10", + version="2024.4.20", author="Kamal Choudhary, Brian DeCost", author_email="kamal.choudhary@nist.gov", description="alignn", @@ -33,6 +33,7 @@ "pydocstyle>=6.0.0", "pyparsing>=2.2.1,<3", "ase", + "lmdb", # "pytorch-ignite>=0.5.0.dev20221024", # "accelerate>=0.20.3", # "dgl-cu101>=0.6.0", From 8bf645a108a87066ae46e629da52efc0f963420c Mon Sep 17 00:00:00 2001 From: knc6 Date: Tue, 30 Apr 2024 13:30:00 -0400 Subject: [PATCH 12/22] Version update. --- alignn/pretrained.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/alignn/pretrained.py b/alignn/pretrained.py index b3c7f8a3..6e49e5b2 100644 --- a/alignn/pretrained.py +++ b/alignn/pretrained.py @@ -16,6 +16,7 @@ from alignn.graphs import Graph from jarvis.db.jsonutils import dumpjson import pandas as pd +from alignn.dataset import get_torch_dataset # from jarvis.core.graphs import Graph @@ -339,15 +340,15 @@ def get_multiple_predictions( model=None, model_name="jv_formation_energy_peratom_alignn", print_freq=100, - use_lmdb=True, + # use_lmdb=True, ): """Use pretrained model on a number of structures.""" - if use_lmdb: - print("Using LMDB dataset.") - from alignn.lmdb_dataset import get_torch_dataset - else: - print("Not using LMDB dataset, memory footprint maybe high.") - from alignn.dataset import get_torch_dataset + # if use_lmdb: + # print("Using LMDB dataset.") + # from alignn.lmdb_dataset import get_torch_dataset + # else: + # print("Not using LMDB dataset, memory footprint maybe high.") + # from alignn.dataset import get_torch_dataset # import glob # atoms_array=[] From 7e30e311b5d9ec43073291b7e0efa670807e1821 Mon Sep 17 00:00:00 2001 From: knc6 Date: Tue, 30 Apr 2024 14:07:19 -0400 Subject: [PATCH 13/22] Make sure to remove previous lmdb files. --- alignn/data.py | 4 ++++ alignn/tests/test_prop.py | 27 +++++++++++++++++++++++++-- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/alignn/data.py b/alignn/data.py index a542a02f..3192c549 100644 --- a/alignn/data.py +++ b/alignn/data.py @@ -164,6 +164,10 @@ def get_train_val_loaders( train_sample = filename + "_train.data" val_sample = filename + "_val.data" test_sample = filename + "_test.data" + if os.path.exists(train_sample): + print("If you are training from scratch, run") + cmd = "rm -r " + train_sample + " " + val_sample + " " + test_sample + print(cmd) # print ('output_dir data',output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) diff --git a/alignn/tests/test_prop.py b/alignn/tests/test_prop.py index d382db5e..c31f1c42 100644 --- a/alignn/tests/test_prop.py +++ b/alignn/tests/test_prop.py @@ -128,7 +128,13 @@ def test_pretrained(): get_multiple_predictions(atoms_array=[Si, Si]) -def test_alignn_train(): +world_size = int(torch.cuda.device_count()) + + +def test_alignn_train_regression(): + # Regression + cmd = "rm -rf train_data test_data val_data" + os.system(cmd) root_dir = os.path.abspath( os.path.join(os.path.dirname(__file__), "../examples/sample_data/") ) @@ -138,11 +144,15 @@ def test_alignn_train(): "../examples/sample_data/config_example.json", ) ) - world_size = int(torch.cuda.device_count()) train_for_folder( rank=0, world_size=world_size, root_dir=root_dir, config_name=config ) + +def test_alignn_train_regression_multi_out(): + cmd = "rm -rf train_data test_data val_data" + os.system(cmd) + # Regression multi-out root_dir = os.path.abspath( os.path.join( os.path.dirname(__file__), "../examples/sample_data_multi_prop/" @@ -158,6 +168,11 @@ def test_alignn_train(): rank=0, world_size=world_size, root_dir=root_dir, config_name=config ) + +def test_alignn_train_classification(): + cmd = "rm -rf train_data test_data val_data" + os.system(cmd) + # Classification root_dir = os.path.abspath( os.path.join(os.path.dirname(__file__), "../examples/sample_data/") ) @@ -175,6 +190,11 @@ def test_alignn_train(): classification_threshold=0.01, ) + +def test_alignn_train_ff(): + cmd = "rm -rf train_data test_data val_data" + os.system(cmd) + # FF root_dir = os.path.abspath( os.path.join(os.path.dirname(__file__), "../examples/sample_data_ff/") ) @@ -248,6 +268,9 @@ def test_del_files(): os.system(cmd) +# test_alignn_train_ff() +# test_alignn_train_classification() +# test_alignn_train() # test_minor_configs() # test_pretrained() # test_runtime_training() From e1b71820cf9336e9359123b57ad233d505739430 Mon Sep 17 00:00:00 2001 From: knc6 Date: Wed, 1 May 2024 12:12:46 -0400 Subject: [PATCH 14/22] Add rank 0. --- alignn/data.py | 187 +++++++++++++++++++++++++------------------------ 1 file changed, 94 insertions(+), 93 deletions(-) diff --git a/alignn/data.py b/alignn/data.py index 3192c549..604487b3 100644 --- a/alignn/data.py +++ b/alignn/data.py @@ -363,29 +363,9 @@ def get_train_val_loaders( use_ddp = False train_sampler = None val_sampler = None - train_data = get_torch_dataset( - dataset=dataset_train, - id_tag=id_tag, - atom_features=atom_features, - target=target, - target_atomwise=target_atomwise, - target_grad=target_grad, - target_stress=target_stress, - neighbor_strategy=neighbor_strategy, - use_canonize=use_canonize, - name=dataset, - line_graph=line_graph, - cutoff=cutoff, - cutoff_extra=cutoff_extra, - max_neighbors=max_neighbors, - classification=classification_threshold is not None, - output_dir=output_dir, - sampler=train_sampler, - tmp_name="train_data", - ) - val_data = ( - get_torch_dataset( - dataset=dataset_val, + if rank == 0: + train_data = get_torch_dataset( + dataset=dataset_train, id_tag=id_tag, atom_features=atom_features, target=target, @@ -398,91 +378,112 @@ def get_train_val_loaders( line_graph=line_graph, cutoff=cutoff, cutoff_extra=cutoff_extra, - sampler=val_sampler, max_neighbors=max_neighbors, classification=classification_threshold is not None, output_dir=output_dir, - tmp_name="val_data", + sampler=train_sampler, + tmp_name="train_data", ) - if len(dataset_val) > 0 - else None - ) - test_data = ( - get_torch_dataset( - dataset=dataset_test, - id_tag=id_tag, - atom_features=atom_features, - target=target, - target_atomwise=target_atomwise, - target_grad=target_grad, - target_stress=target_stress, - neighbor_strategy=neighbor_strategy, - use_canonize=use_canonize, - name=dataset, - line_graph=line_graph, - cutoff=cutoff, - cutoff_extra=cutoff_extra, - max_neighbors=max_neighbors, - classification=classification_threshold is not None, - output_dir=output_dir, - tmp_name="test_data", + val_data = ( + get_torch_dataset( + dataset=dataset_val, + id_tag=id_tag, + atom_features=atom_features, + target=target, + target_atomwise=target_atomwise, + target_grad=target_grad, + target_stress=target_stress, + neighbor_strategy=neighbor_strategy, + use_canonize=use_canonize, + name=dataset, + line_graph=line_graph, + cutoff=cutoff, + cutoff_extra=cutoff_extra, + sampler=val_sampler, + max_neighbors=max_neighbors, + classification=classification_threshold is not None, + output_dir=output_dir, + tmp_name="val_data", + ) + if len(dataset_val) > 0 + else None + ) + test_data = ( + get_torch_dataset( + dataset=dataset_test, + id_tag=id_tag, + atom_features=atom_features, + target=target, + target_atomwise=target_atomwise, + target_grad=target_grad, + target_stress=target_stress, + neighbor_strategy=neighbor_strategy, + use_canonize=use_canonize, + name=dataset, + line_graph=line_graph, + cutoff=cutoff, + cutoff_extra=cutoff_extra, + max_neighbors=max_neighbors, + classification=classification_threshold is not None, + output_dir=output_dir, + tmp_name="test_data", + ) + if len(dataset_test) > 0 + else None ) - if len(dataset_test) > 0 - else None - ) - - collate_fn = train_data.collate - # print("line_graph,line_dih_graph", line_graph, line_dih_graph) - if line_graph: - collate_fn = train_data.collate_line_graph - # use a regular pytorch dataloader - train_loader = GraphDataLoader( - # train_loader = DataLoader( - train_data, - batch_size=batch_size, - shuffle=True, - collate_fn=collate_fn, - drop_last=True, - num_workers=workers, - pin_memory=pin_memory, - use_ddp=use_ddp, - ) + collate_fn = train_data.collate + # print("line_graph,line_dih_graph", line_graph, line_dih_graph) + if line_graph: + collate_fn = train_data.collate_line_graph - val_loader = GraphDataLoader( - # val_loader = DataLoader( - val_data, - batch_size=batch_size, - shuffle=False, - collate_fn=collate_fn, - drop_last=True, - num_workers=workers, - pin_memory=pin_memory, - use_ddp=use_ddp, - ) + # use a regular pytorch dataloader + train_loader = GraphDataLoader( + # train_loader = DataLoader( + train_data, + batch_size=batch_size, + shuffle=True, + collate_fn=collate_fn, + drop_last=True, + num_workers=workers, + pin_memory=pin_memory, + use_ddp=use_ddp, + ) - test_loader = ( - GraphDataLoader( - # DataLoader( - test_data, - batch_size=1, + val_loader = GraphDataLoader( + # val_loader = DataLoader( + val_data, + batch_size=batch_size, shuffle=False, collate_fn=collate_fn, - drop_last=False, + drop_last=True, num_workers=workers, pin_memory=pin_memory, use_ddp=use_ddp, ) - if len(dataset_test) > 0 - else None - ) - if save_dataloader: - torch.save(train_loader, train_sample) - if val_loader is not None: - torch.save(val_loader, val_sample) - if test_loader is not None: - torch.save(test_loader, test_sample) + test_loader = ( + GraphDataLoader( + # DataLoader( + test_data, + batch_size=1, + shuffle=False, + collate_fn=collate_fn, + drop_last=False, + num_workers=workers, + pin_memory=pin_memory, + use_ddp=use_ddp, + ) + if len(dataset_test) > 0 + else None + ) + + if save_dataloader: + torch.save(train_loader, train_sample) + if val_loader is not None: + torch.save(val_loader, val_sample) + if test_loader is not None: + torch.save(test_loader, test_sample) print("n_train:", len(train_loader.dataset)) print("n_val :", len(val_loader.dataset) if val_loader is not None else 0) From 12f664f1a93a16b8fa6e7a5a10234652daef56cd Mon Sep 17 00:00:00 2001 From: knc6 Date: Wed, 1 May 2024 16:41:41 -0400 Subject: [PATCH 15/22] Minor update. --- alignn/train_alignn.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/alignn/train_alignn.py b/alignn/train_alignn.py index 5f2b5016..f4b0b62b 100644 --- a/alignn/train_alignn.py +++ b/alignn/train_alignn.py @@ -22,11 +22,12 @@ device = torch.device("cuda") -def setup(rank, world_size): +def setup(rank=0, world_size=0, port="12356"): """Set up multi GPU rank.""" if world_size > 1: os.environ["MASTER_ADDR"] = "localhost" - os.environ["MASTER_PORT"] = "12355" + os.environ["MASTER_PORT"] = port + # os.environ["MASTER_PORT"] = "12355" # Initialize the distributed environment. dist.init_process_group("nccl", rank=rank, world_size=world_size) torch.cuda.set_device(rank) @@ -149,7 +150,7 @@ def train_for_folder( output_dir=None, ): """Train for a folder.""" - setup(rank, world_size) + setup(rank=rank, world_size=world_size) print("root_dir", root_dir) id_prop_json = os.path.join(root_dir, "id_prop.json") id_prop_json_zip = os.path.join(root_dir, "id_prop.json.zip") From 575216b0e2fbbf800946fe310905f34819ad9653 Mon Sep 17 00:00:00 2001 From: knc6 Date: Thu, 2 May 2024 18:25:23 -0400 Subject: [PATCH 16/22] Switch back to use all rank. --- alignn/data.py | 187 ++++++++++++++++++++++++------------------------- 1 file changed, 93 insertions(+), 94 deletions(-) diff --git a/alignn/data.py b/alignn/data.py index 604487b3..3192c549 100644 --- a/alignn/data.py +++ b/alignn/data.py @@ -363,9 +363,29 @@ def get_train_val_loaders( use_ddp = False train_sampler = None val_sampler = None - if rank == 0: - train_data = get_torch_dataset( - dataset=dataset_train, + train_data = get_torch_dataset( + dataset=dataset_train, + id_tag=id_tag, + atom_features=atom_features, + target=target, + target_atomwise=target_atomwise, + target_grad=target_grad, + target_stress=target_stress, + neighbor_strategy=neighbor_strategy, + use_canonize=use_canonize, + name=dataset, + line_graph=line_graph, + cutoff=cutoff, + cutoff_extra=cutoff_extra, + max_neighbors=max_neighbors, + classification=classification_threshold is not None, + output_dir=output_dir, + sampler=train_sampler, + tmp_name="train_data", + ) + val_data = ( + get_torch_dataset( + dataset=dataset_val, id_tag=id_tag, atom_features=atom_features, target=target, @@ -378,112 +398,91 @@ def get_train_val_loaders( line_graph=line_graph, cutoff=cutoff, cutoff_extra=cutoff_extra, + sampler=val_sampler, max_neighbors=max_neighbors, classification=classification_threshold is not None, output_dir=output_dir, - sampler=train_sampler, - tmp_name="train_data", - ) - val_data = ( - get_torch_dataset( - dataset=dataset_val, - id_tag=id_tag, - atom_features=atom_features, - target=target, - target_atomwise=target_atomwise, - target_grad=target_grad, - target_stress=target_stress, - neighbor_strategy=neighbor_strategy, - use_canonize=use_canonize, - name=dataset, - line_graph=line_graph, - cutoff=cutoff, - cutoff_extra=cutoff_extra, - sampler=val_sampler, - max_neighbors=max_neighbors, - classification=classification_threshold is not None, - output_dir=output_dir, - tmp_name="val_data", - ) - if len(dataset_val) > 0 - else None + tmp_name="val_data", ) - test_data = ( - get_torch_dataset( - dataset=dataset_test, - id_tag=id_tag, - atom_features=atom_features, - target=target, - target_atomwise=target_atomwise, - target_grad=target_grad, - target_stress=target_stress, - neighbor_strategy=neighbor_strategy, - use_canonize=use_canonize, - name=dataset, - line_graph=line_graph, - cutoff=cutoff, - cutoff_extra=cutoff_extra, - max_neighbors=max_neighbors, - classification=classification_threshold is not None, - output_dir=output_dir, - tmp_name="test_data", - ) - if len(dataset_test) > 0 - else None + if len(dataset_val) > 0 + else None + ) + test_data = ( + get_torch_dataset( + dataset=dataset_test, + id_tag=id_tag, + atom_features=atom_features, + target=target, + target_atomwise=target_atomwise, + target_grad=target_grad, + target_stress=target_stress, + neighbor_strategy=neighbor_strategy, + use_canonize=use_canonize, + name=dataset, + line_graph=line_graph, + cutoff=cutoff, + cutoff_extra=cutoff_extra, + max_neighbors=max_neighbors, + classification=classification_threshold is not None, + output_dir=output_dir, + tmp_name="test_data", ) + if len(dataset_test) > 0 + else None + ) - collate_fn = train_data.collate - # print("line_graph,line_dih_graph", line_graph, line_dih_graph) - if line_graph: - collate_fn = train_data.collate_line_graph + collate_fn = train_data.collate + # print("line_graph,line_dih_graph", line_graph, line_dih_graph) + if line_graph: + collate_fn = train_data.collate_line_graph - # use a regular pytorch dataloader - train_loader = GraphDataLoader( - # train_loader = DataLoader( - train_data, - batch_size=batch_size, - shuffle=True, - collate_fn=collate_fn, - drop_last=True, - num_workers=workers, - pin_memory=pin_memory, - use_ddp=use_ddp, - ) + # use a regular pytorch dataloader + train_loader = GraphDataLoader( + # train_loader = DataLoader( + train_data, + batch_size=batch_size, + shuffle=True, + collate_fn=collate_fn, + drop_last=True, + num_workers=workers, + pin_memory=pin_memory, + use_ddp=use_ddp, + ) + + val_loader = GraphDataLoader( + # val_loader = DataLoader( + val_data, + batch_size=batch_size, + shuffle=False, + collate_fn=collate_fn, + drop_last=True, + num_workers=workers, + pin_memory=pin_memory, + use_ddp=use_ddp, + ) - val_loader = GraphDataLoader( - # val_loader = DataLoader( - val_data, - batch_size=batch_size, + test_loader = ( + GraphDataLoader( + # DataLoader( + test_data, + batch_size=1, shuffle=False, collate_fn=collate_fn, - drop_last=True, + drop_last=False, num_workers=workers, pin_memory=pin_memory, use_ddp=use_ddp, ) + if len(dataset_test) > 0 + else None + ) - test_loader = ( - GraphDataLoader( - # DataLoader( - test_data, - batch_size=1, - shuffle=False, - collate_fn=collate_fn, - drop_last=False, - num_workers=workers, - pin_memory=pin_memory, - use_ddp=use_ddp, - ) - if len(dataset_test) > 0 - else None - ) - - if save_dataloader: - torch.save(train_loader, train_sample) - if val_loader is not None: - torch.save(val_loader, val_sample) - if test_loader is not None: - torch.save(test_loader, test_sample) + if save_dataloader: + torch.save(train_loader, train_sample) + if val_loader is not None: + torch.save(val_loader, val_sample) + if test_loader is not None: + torch.save(test_loader, test_sample) print("n_train:", len(train_loader.dataset)) print("n_val :", len(val_loader.dataset) if val_loader is not None else 0) From e600009ecb34838a50dcace0c2b679ecded2ff57 Mon Sep 17 00:00:00 2001 From: knc6 Date: Sat, 4 May 2024 15:14:15 -0400 Subject: [PATCH 17/22] Read existing dataset to save time. --- alignn/lmdb_dataset.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/alignn/lmdb_dataset.py b/alignn/lmdb_dataset.py index 98ada8d6..328395c8 100644 --- a/alignn/lmdb_dataset.py +++ b/alignn/lmdb_dataset.py @@ -108,6 +108,7 @@ def get_torch_dataset( output_dir=".", tmp_name="dataset", map_size=1e12, + read_existing=True, ): """Get Torch Dataset with LMDB.""" vals = np.array([ii[target] for ii in dataset]) # df[target].values @@ -119,6 +120,13 @@ def get_torch_dataset( f.write(line) f.close() ids = [] + if os.path.exists(tmp_name) and read_existing: + for idx, (d) in tqdm(enumerate(dataset), total=len(dataset)): + ids.append(d[id_tag]) + dat = TorchLMDBDataset(lmdb_path=tmp_name, ids=ids) + print("Reading dataset", tmp_name) + return dat + ids = [] env = lmdb.open(tmp_name, map_size=int(map_size)) with env.begin(write=True) as txn: for idx, (d) in tqdm(enumerate(dataset), total=len(dataset)): From 60d55d7fa5f56e5e72b6f4fe7c5704c376bd9092 Mon Sep 17 00:00:00 2001 From: knc6 Date: Thu, 9 May 2024 18:18:31 -0400 Subject: [PATCH 18/22] Add rand num generator. --- alignn/train_alignn.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/alignn/train_alignn.py b/alignn/train_alignn.py index f4b0b62b..2e058951 100644 --- a/alignn/train_alignn.py +++ b/alignn/train_alignn.py @@ -16,6 +16,7 @@ import torch import time from jarvis.core.atoms import Atoms +import random device = "cpu" if torch.cuda.is_available(): @@ -24,6 +25,9 @@ def setup(rank=0, world_size=0, port="12356"): """Set up multi GPU rank.""" + # "12356" + if port == "": + port = str(random.randint(10000, 99999)) if world_size > 1: os.environ["MASTER_ADDR"] = "localhost" os.environ["MASTER_PORT"] = port From f1f30b3d486fa97198c85d4b83296b80733bc4dd Mon Sep 17 00:00:00 2001 From: knc6 Date: Fri, 10 May 2024 00:16:10 -0400 Subject: [PATCH 19/22] Fix PyTest. --- alignn/data.py | 14 ++++++++++---- alignn/tests/test_prop.py | 26 +++++++++++++++++++++----- alignn/train.py | 1 + 3 files changed, 32 insertions(+), 9 deletions(-) diff --git a/alignn/data.py b/alignn/data.py index 3192c549..c52d0c74 100644 --- a/alignn/data.py +++ b/alignn/data.py @@ -137,7 +137,7 @@ def get_train_val_loaders( workers: int = 0, pin_memory: bool = True, save_dataloader: bool = False, - filename: str = "sample", + filename: str = "./", id_tag: str = "jid", use_canonize: bool = True, # use_ddp: bool = False, @@ -363,6 +363,7 @@ def get_train_val_loaders( use_ddp = False train_sampler = None val_sampler = None + tmp_name = filename + "train_data" train_data = get_torch_dataset( dataset=dataset_train, id_tag=id_tag, @@ -381,8 +382,10 @@ def get_train_val_loaders( classification=classification_threshold is not None, output_dir=output_dir, sampler=train_sampler, - tmp_name="train_data", + tmp_name=tmp_name, + # tmp_name="train_data", ) + tmp_name = filename + "val_data" val_data = ( get_torch_dataset( dataset=dataset_val, @@ -402,11 +405,13 @@ def get_train_val_loaders( max_neighbors=max_neighbors, classification=classification_threshold is not None, output_dir=output_dir, - tmp_name="val_data", + tmp_name=tmp_name, + # tmp_name="val_data", ) if len(dataset_val) > 0 else None ) + tmp_name = filename + "test_data" test_data = ( get_torch_dataset( dataset=dataset_test, @@ -425,7 +430,8 @@ def get_train_val_loaders( max_neighbors=max_neighbors, classification=classification_threshold is not None, output_dir=output_dir, - tmp_name="test_data", + tmp_name=tmp_name, + # tmp_name="test_data", ) if len(dataset_test) > 0 else None diff --git a/alignn/tests/test_prop.py b/alignn/tests/test_prop.py index c31f1c42..bcc34c64 100644 --- a/alignn/tests/test_prop.py +++ b/alignn/tests/test_prop.py @@ -13,6 +13,7 @@ from jarvis.db.figshare import get_jid_data from alignn.ff.ff import AlignnAtomwiseCalculator, default_path, revised_path import torch +from jarvis.db.jsonutils import loadjson, dumpjson plt.switch_backend("agg") @@ -63,6 +64,7 @@ def test_models(): config["write_predictions"] = True config["model"]["name"] = "alignn_atomwise" + config["filename"] = "X" t1 = time.time() result = train_dgl(config) t2 = time.time() @@ -74,6 +76,7 @@ def test_models(): print() config["model"]["name"] = "alignn_atomwise" + config["filename"] = "Y" config["classification_threshold"] = 0.0 t1 = time.time() result = train_dgl(config) @@ -133,7 +136,7 @@ def test_pretrained(): def test_alignn_train_regression(): # Regression - cmd = "rm -rf train_data test_data val_data" + cmd = "rm -rf *train_data *test_data *val_data" os.system(cmd) root_dir = os.path.abspath( os.path.join(os.path.dirname(__file__), "../examples/sample_data/") @@ -144,13 +147,16 @@ def test_alignn_train_regression(): "../examples/sample_data/config_example.json", ) ) + tmp = loadjson(config) + tmp["filename"] = "AA" + dumpjson(data=tmp, filename=config) train_for_folder( rank=0, world_size=world_size, root_dir=root_dir, config_name=config ) def test_alignn_train_regression_multi_out(): - cmd = "rm -rf train_data test_data val_data" + cmd = "rm -rf *train_data *test_data *val_data" os.system(cmd) # Regression multi-out root_dir = os.path.abspath( @@ -164,13 +170,16 @@ def test_alignn_train_regression_multi_out(): "../examples/sample_data/config_example.json", ) ) + tmp = loadjson(config) + tmp["filename"] = "BB" + dumpjson(data=tmp, filename=config) train_for_folder( rank=0, world_size=world_size, root_dir=root_dir, config_name=config ) def test_alignn_train_classification(): - cmd = "rm -rf train_data test_data val_data" + cmd = "rm -rf *train_data *test_data *val_data" os.system(cmd) # Classification root_dir = os.path.abspath( @@ -182,6 +191,9 @@ def test_alignn_train_classification(): "../examples/sample_data/config_example.json", ) ) + tmp = loadjson(config) + tmp["filename"] = "A" + dumpjson(data=tmp, filename=config) train_for_folder( rank=0, world_size=world_size, @@ -192,7 +204,7 @@ def test_alignn_train_classification(): def test_alignn_train_ff(): - cmd = "rm -rf train_data test_data val_data" + cmd = "rm -rf *train_data *test_data *val_data" os.system(cmd) # FF root_dir = os.path.abspath( @@ -204,6 +216,9 @@ def test_alignn_train_ff(): "../examples/sample_data_ff/config_example_atomwise.json", ) ) + tmp = loadjson(config) + tmp["filename"] = "B" + dumpjson(data=tmp, filename=config) train_for_folder( rank=0, world_size=world_size, root_dir=root_dir, config_name=config ) @@ -266,7 +281,8 @@ def test_del_files(): for i in fnames: cmd = "rm -r " + i os.system(cmd) - + cmd="rm -r *train_data *val_data *test_data" + os.system(cmd) # test_alignn_train_ff() # test_alignn_train_classification() diff --git a/alignn/train.py b/alignn/train.py index a4822f84..dd3675f1 100644 --- a/alignn/train.py +++ b/alignn/train.py @@ -877,6 +877,7 @@ def get_batch_errors(dat=[]): predictions.append(jj) f.close() if config.use_lmdb: + print("Closing LMDB.") train_loader.dataset.close() val_loader.dataset.close() test_loader.dataset.close() From a637086fe1b06af205a1440535ef9b8436c9b53b Mon Sep 17 00:00:00 2001 From: Kamal Choudhary Date: Sat, 1 Jun 2024 18:09:58 -0400 Subject: [PATCH 20/22] Update README.md --- README.md | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index a461dbc9..aacc461b 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,6 @@ * [Installation](#install) * [Examples](#example) * [Pre-trained models](#pretrained) -* [Quick start using colab](#colab) * [JARVIS-ALIGNN webapp](#webapp) * [ALIGNN-FF & ASE Calculator](#alignnff) * [Peformances on a few datasets](#performances) @@ -111,6 +110,18 @@ pip install dgl==1.0.1+cu117 -f https://data.dgl.ai/wheels/cu117/repo.html Examples --------- + + + +| Notebooks | Google Colab | Descriptions | +| ---------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| [Regression model](https://colab.research.google.com/github/knc6/jarvis-tools-notebooks/blob/master/jarvis-tools-notebooks/alignn_jarvis_leaderboard.ipynb) | [![Open in Google Colab]](https://colab.research.google.com/github/knc6/jarvis-tools-notebooks/blob/master/jarvis-tools-notebooks/alignn_jarvis_leaderboard.ipynb) | Examples for developing single output regression model for exfoliation energies of 2D materials. | +| [MLFF](https://colab.research.google.com/github/knc6/jarvis-tools-notebooks/blob/master/jarvis-tools-notebooks/Train_ALIGNNFF_Mlearn.ipynb) | [![Open in Google Colab]](https://colab.research.google.com/github/knc6/jarvis-tools-notebooks/blob/master/jarvis-tools-notebooks/Train_ALIGNNFF_Mlearn.ipynb) | Examples of training a machine learning force field for Silicon. | +| [Miscellaneous tasks](https://colab.research.google.com/github/knc6/jarvis-tools-notebooks/blob/master/jarvis-tools-notebooks/Training_ALIGNN_model_example.ipynb) | [![Open in Google Colab]](https://colab.research.google.com/github/knc6/jarvis-tools-notebooks/blob/master/jarvis-tools-notebooks/Training_ALIGNN_model_example.ipynb) | Examples for developing single output (such as formation energy, bandgaps) or multi-output (such as phonon DOS, electron DOS) Regression or Classification (such as metal vs non-metal), sing several pretrained models. | + + +[Open in Google Colab]: https://colab.research.google.com/assets/colab-badge.svg + Here, we provide examples for property prediction tasks, development of machine-learning force-fields (MLFF), usage of pre-trained property predictor, MLFFs, webapps etc. #### Dataset preparation for property prediction tasks @@ -174,18 +185,7 @@ An example of prediction formation energy per atom using JARVIS-DFT dataset trai ``` pretrained.py --model_name jv_formation_energy_peratom_alignn --file_format poscar --file_path alignn/examples/sample_data/POSCAR-JVASP-10.vasp ``` - -Quick start using GoogleColab notebook example ------------------------------------------------ - -The following [notebook](https://colab.research.google.com/github/knc6/jarvis-tools-notebooks/blob/master/jarvis-tools-notebooks/Training_ALIGNN_model_example.ipynb) provides an example of 1) installing ALIGNN model, 2) training the example data and 3) using the pretrained models. For this example, you don't need to install alignn package on your local computer/cluster, it requires a gmail account to login. Learn more about Google colab [here](https://colab.research.google.com/notebooks/intro.ipynb). - -[![name](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/knc6/jarvis-tools-notebooks/blob/master/jarvis-tools-notebooks/Training_ALIGNN_model_example.ipynb) - - -The following [notebook](https://colab.research.google.com/github/knc6/jarvis-tools-notebooks/blob/master/jarvis-tools-notebooks/Train_ALIGNNFF_Mlearn.ipynb) provides an example of ALIGNN-FF model. -For additional notebooks, checkout [JARVIS-Tools-Notebooks](https://github.com/JARVIS-Materials-Design/jarvis-tools-notebooks?tab=readme-ov-file#artificial-intelligencemachine-learning) Web-app From 5f17b3bc03b809f4f4e33cddefcda9ccdd3069ae Mon Sep 17 00:00:00 2001 From: Kamal Choudhary Date: Sat, 1 Jun 2024 18:10:51 -0400 Subject: [PATCH 21/22] Update README.md --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index aacc461b..84b28022 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,3 @@ -[![name](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/knc6/jarvis-tools-notebooks/blob/master/jarvis-tools-notebooks/Training_ALIGNN_model_example.ipynb) ![alt text](https://github.com/usnistgov/alignn/actions/workflows/main.yml/badge.svg) [![codecov](https://codecov.io/gh/usnistgov/alignn/branch/main/graph/badge.svg?token=S5X4OYC80V)](https://codecov.io/gh/usnistgov/alignn) [![PyPI version](https://badge.fury.io/py/alignn.svg)](https://badge.fury.io/py/alignn) From 91799e6e93ff563a8b20ee6437268fd031e59389 Mon Sep 17 00:00:00 2001 From: Kamal Choudhary Date: Sat, 1 Jun 2024 18:12:57 -0400 Subject: [PATCH 22/22] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 84b28022..d5a47be9 100644 --- a/README.md +++ b/README.md @@ -116,7 +116,7 @@ Examples | ---------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | [Regression model](https://colab.research.google.com/github/knc6/jarvis-tools-notebooks/blob/master/jarvis-tools-notebooks/alignn_jarvis_leaderboard.ipynb) | [![Open in Google Colab]](https://colab.research.google.com/github/knc6/jarvis-tools-notebooks/blob/master/jarvis-tools-notebooks/alignn_jarvis_leaderboard.ipynb) | Examples for developing single output regression model for exfoliation energies of 2D materials. | | [MLFF](https://colab.research.google.com/github/knc6/jarvis-tools-notebooks/blob/master/jarvis-tools-notebooks/Train_ALIGNNFF_Mlearn.ipynb) | [![Open in Google Colab]](https://colab.research.google.com/github/knc6/jarvis-tools-notebooks/blob/master/jarvis-tools-notebooks/Train_ALIGNNFF_Mlearn.ipynb) | Examples of training a machine learning force field for Silicon. | -| [Miscellaneous tasks](https://colab.research.google.com/github/knc6/jarvis-tools-notebooks/blob/master/jarvis-tools-notebooks/Training_ALIGNN_model_example.ipynb) | [![Open in Google Colab]](https://colab.research.google.com/github/knc6/jarvis-tools-notebooks/blob/master/jarvis-tools-notebooks/Training_ALIGNN_model_example.ipynb) | Examples for developing single output (such as formation energy, bandgaps) or multi-output (such as phonon DOS, electron DOS) Regression or Classification (such as metal vs non-metal), sing several pretrained models. | +| [Miscellaneous tasks](https://colab.research.google.com/github/knc6/jarvis-tools-notebooks/blob/master/jarvis-tools-notebooks/Training_ALIGNN_model_example.ipynb) | [![Open in Google Colab]](https://colab.research.google.com/github/knc6/jarvis-tools-notebooks/blob/master/jarvis-tools-notebooks/Training_ALIGNN_model_example.ipynb) | Examples for developing single output (such as formation energy, bandgaps) or multi-output (such as phonon DOS, electron DOS) Regression or Classification (such as metal vs non-metal), Using several pretrained models. | [Open in Google Colab]: https://colab.research.google.com/assets/colab-badge.svg