Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Experimental macOS/MPS support #200

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 4 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,11 @@ Primary use cases include
This library is _not_ intended to serve as a place for clinical NLP applications to live. If you build something cool that uses transformer models that take advantage of our model definitions, the best practice is probably to rely on it as a library rather than treating it as your workspace. This library is also not intended as a deployment-ready tool for _scalable_ clinical NLP. There is a lot of interest in developing methods and tools that are smaller and can process millions of records, and this library can potentially be used for research along those line. But it will probably never be extremely optimized or shrink-wrapped for applications. However, there should be plenty of examples and useful code for people who are interested in that type of deployment.

## Install
> [!WARNING]
macOS support is currently experimental. We recommend using python3.10 for macOS installations.

**Note: due to some dependency issues, this package does not officially
support macOS on Apple Silicon. If you want to install it on Apple Silicon,
you are on your own; we unofficially recommend trying it with Python 3.10, or using
the docker CPU image**

**Note:** When installing the library's dependencies, `pip` will probably install
> [!NOTE]
When installing the library's dependencies, `pip` will probably install
PyTorch with CUDA 10.2 support by default. If you would like to run the
library in CPU-only mode or with a newer version of CUDA, [install PyTorch
to your desired specifications](https://pytorch.org/get-started/locally/)
Expand Down
6 changes: 3 additions & 3 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,12 @@ install_requires =
numpy~=1.23.3
pandas~=1.3.5
pydantic~=1.10.8
PyRuSH~=1.0.3.6
PyRuSH~=1.0.8
requests~=2.26.0
scikit-learn~=1.0.2
seqeval~=1.2.2
torch~=1.5
transformers[torch]~=4.15
torch>=1.5, <=2.3.1
transformers[torch]>=4.15, <4.42
uvicorn[standard]~=0.16.0

[options.entry_points]
Expand Down
46 changes: 28 additions & 18 deletions src/cnlpt/api/cnlp_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

# FastAPI imports
from pydantic import BaseModel
from typing import List
from typing import List, Literal

# Modeling imports
from transformers import (
Expand All @@ -17,6 +17,8 @@
)
from datasets import Dataset
import torch
import torch.backends
import torch.backends.mps
import logging

# intra-library imports
Expand Down Expand Up @@ -61,7 +63,28 @@ def create_instance_string(doc_text: str, offsets : List[int]):
raw_str = doc_text[start:offsets[0]] + ' <e> ' + doc_text[offsets[0]:offsets[1]] + ' </e> ' + doc_text[offsets[1]:end]
return raw_str.replace('\n', ' ')

def initialize_cnlpt_model(app, model_name, cuda=True, batch_size=8):
def move_model_to_device(model, device: Literal["auto", "cuda", "mps", "cpu"]):
if (device == "cuda" and not torch.cuda.is_available()) or (device == "mps" and not torch.backends.mps.is_available()):
logging.warning(f"Device was set to '{device}' but {device} was not found -- using CPU instead. If you have a GPU you need to debug why pytorch cannot see it.")
device = "cpu"

if device == "auto":
if torch.cuda.is_available():
device = "cuda"
elif torch.backends.mps.is_available():
device = "mps"
else:
logging.warning("Device was set to 'auto' but neither cuda nor mps are available -- using CPU instead. If you have a GPU you need to debug why pytorch cannot see it.")
device = "cpu"

if device == "cuda":
return model.to("cuda")
elif device == "mps":
return model.to("mps")
else:
return model.to("cpu")

def initialize_cnlpt_model(app, model_name, device: Literal["auto", "cuda", "mps", "cpu"] = "auto", batch_size=8):
args = ['--output_dir', 'save_run/', '--per_device_eval_batch_size', str(batch_size), '--do_predict', '--report_to', 'none']
parser = HfArgumentParser((TrainingArguments,))
training_args, = parser.parse_args_into_dataclasses(args=args)
Expand All @@ -76,14 +99,8 @@ def initialize_cnlpt_model(app, model_name, cuda=True, batch_size=8):
app.state.tokenizer = AutoTokenizer.from_pretrained(model_name,
config=config)
model = CnlpModelForClassification.from_pretrained(model_name, cache_dir=os.getenv('HF_CACHE'), config=config)
if cuda and not torch.cuda.is_available():
logging.warning('CUDA is set to True (probably a default) but was not available; setting to False and proceeding. If you have a GPU you need to debug why pytorch cannot see it.')
cuda = False

if cuda:
model = model.to('cuda')
else:
model = model.to('cpu')
model = move_model_to_device(model, device)

app.state.model = model
app.state.trainer = Trainer(
Expand All @@ -92,7 +109,7 @@ def initialize_cnlpt_model(app, model_name, cuda=True, batch_size=8):
compute_metrics=None,
)

def initialize_hier_model(app, model_name, cuda=True, batch_size=1):
def initialize_hier_model(app, model_name, device: Literal["auto", "cuda", "mps", "cpu"] = "auto", batch_size=1):
AutoConfig.register("cnlpt", CnlpConfig)
AutoModel.register(CnlpConfig, HierarchicalModel)

Expand All @@ -104,13 +121,6 @@ def initialize_hier_model(app, model_name, cuda=True, batch_size=1):
model = AutoModel.from_pretrained(model_name, cache_dir=os.getenv('HF_CACHE'), config=config)
model.train(False)

if cuda and not torch.cuda.is_available():
logging.warning('CUDA is set to True (probably a default) but was not available; setting to False and proceeding. If you have a GPU you need to debug why pytorch cannot see it.')
cuda = False

if cuda:
model = model.to('cuda')
else:
model = model.to('cpu')
model = move_model_to_device(model, device)

app.state.model = model
15 changes: 12 additions & 3 deletions src/cnlpt/api/cnn_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

from transformers import AutoTokenizer, Trainer
import torch
import torch.backends.mps
import numpy as np

from .cnlp_rest import UnannotatedDocument, get_dataset
Expand All @@ -34,6 +35,14 @@
if model_name is None:
sys.stderr.write('This REST container requires a MODEL_PATH environment variable\n')
sys.exit(-1)
device = os.getenv('MODEL_DEVICE', 'auto')
if device == 'auto':
if torch.cuda.is_available():
device = 'cuda'
elif torch.backends.mps.is_available():
device = 'mps'
else:
device = 'cpu'

logger = logging.getLogger('CNN_REST_Processor')
logger.setLevel(logging.DEBUG)
Expand All @@ -54,7 +63,7 @@ async def startup_event():
filters=conf_dict['filters'])
model.load_state_dict(torch.load(join(model_name, 'pytorch_model.bin')))

app.state.model = model.to('cuda')
app.state.model = model.to(device)
app.state.tokenizer = tokenizer
app.state.conf_dict = conf_dict

Expand All @@ -63,8 +72,8 @@ async def process(doc: UnannotatedDocument):
results = []
instances = [doc.doc_text]
dataset = get_dataset(instances, app.state.tokenizer, max_length=app.state.conf_dict['max_seq_length'])
_, logits = app.state.model.forward(input_ids=torch.LongTensor(dataset['input_ids']).to('cuda'),
attention_mask = torch.LongTensor(dataset['attention_mask']).to('cuda'),
_, logits = app.state.model.forward(input_ids=torch.LongTensor(dataset['input_ids']).to(device),
attention_mask = torch.LongTensor(dataset['attention_mask']).to(device),
)

prediction = int(np.argmax(logits[0].cpu().detach().numpy(), axis=1))
Expand Down
2 changes: 1 addition & 1 deletion src/cnlpt/api/dtr_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class DocTimeRelResults(BaseModel):

@app.on_event("startup")
async def startup_event():
initialize_cnlpt_model(app, model_name, cuda=False, batch_size=64)
initialize_cnlpt_model(app, model_name, device="cpu", batch_size=64)

@app.post("/dtr/process")
async def process(doc: EntityDocument):
Expand Down
22 changes: 16 additions & 6 deletions src/cnlpt/api/hier_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from .cnlp_rest import UnannotatedDocument, create_instance_string, initialize_cnlpt_model, initialize_hier_model, get_dataset
from ..CnlpModelForClassification import CnlpModelForClassification, CnlpConfig
import torch
import torch.backends.mps
import numpy as np

import logging
Expand All @@ -31,6 +32,15 @@
app = FastAPI()
model_name = os.getenv('MODEL_PATH')

device = os.getenv('MODEL_DEVICE', 'auto')
if device == 'auto':
if torch.cuda.is_available():
device = 'cuda'
elif torch.backends.mps.is_available():
device = 'mps'
else:
device = 'cpu'

logger = logging.getLogger('HierRep_REST_Processor')
logger.setLevel(logging.DEBUG)

Expand All @@ -42,9 +52,9 @@ async def startup_event():
async def get_representation(doc: UnannotatedDocument):
instances = [doc.doc_text]
dataset = get_dataset(instances, app.state.tokenizer, max_length=16000, hier=True, chunk_len=200, num_chunks=80, insert_empty_chunk_at_beginning=False)
result = app.state.model.forward(input_ids=torch.LongTensor(dataset['input_ids']).to('cuda'),
token_type_ids=torch.LongTensor(dataset['token_type_ids']).to('cuda'),
attention_mask = torch.LongTensor(dataset['attention_mask']).to('cuda'),
result = app.state.model.forward(input_ids=torch.LongTensor(dataset['input_ids']).to(device),
token_type_ids=torch.LongTensor(dataset['token_type_ids']).to(device),
attention_mask = torch.LongTensor(dataset['attention_mask']).to(device),
output_hidden_states=True)

# Convert to a list so python can send it out
Expand All @@ -55,9 +65,9 @@ async def get_representation(doc: UnannotatedDocument):
async def classify(doc: UnannotatedDocument):
instances = [doc.doc_text]
dataset = get_dataset(instances, app.state.tokenizer, max_length=16000, hier=True, chunk_len=200, num_chunks=80, insert_empty_chunk_at_beginning=False)
result = app.state.model.forward(input_ids=torch.LongTensor(dataset['input_ids']).to('cuda'),
token_type_ids=torch.LongTensor(dataset['token_type_ids']).to('cuda'),
attention_mask = torch.LongTensor(dataset['attention_mask']).to('cuda'),
result = app.state.model.forward(input_ids=torch.LongTensor(dataset['input_ids']).to(device),
token_type_ids=torch.LongTensor(dataset['token_type_ids']).to(device),
attention_mask = torch.LongTensor(dataset['attention_mask']).to(device),
output_hidden_states=False)

predictions = [int(torch.argmax(logits.to('cpu').detach()).numpy()) for logits in result['logits']]
Expand Down
11 changes: 8 additions & 3 deletions src/cnlpt/notebooks/viz_negation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,15 @@
"AutoConfig.register(\"cnlpt\", CnlpConfig)\n",
"AutoModel.register(CnlpConfig, CnlpModelForClassification)\n",
"\n",
"import torch\n",
"import torch.backends.mps\n",
"\n",
"device = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'\n",
"\n",
"config = AutoConfig.from_pretrained(model_name)\n",
"tokenizer = AutoTokenizer.from_pretrained(model_name, config=config)\n",
"model = CnlpModelForClassification.from_pretrained(model_name, config=config)\n",
"model.to('cuda')\n",
"model.to(device)\n",
"trainer = Trainer(model=model, args=training_args, compute_metrics=None)"
]
},
Expand All @@ -85,7 +90,7 @@
"outputs": [],
"source": [
"print(inputs)\n",
"outputs = model(inputs.to('cuda'), output_attentions=True)"
"outputs = model(inputs.to(device), output_attentions=True)"
]
},
{
Expand Down Expand Up @@ -170,7 +175,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
"version": "3.10.14"
}
},
"nbformat": 4,
Expand Down
Loading