From 55da325899170c20dacc9034cac6429225175db1 Mon Sep 17 00:00:00 2001 From: Vitaliy Chiley <6439018+vchiley@users.noreply.github.com> Date: Wed, 21 Jun 2023 09:25:43 -0700 Subject: [PATCH 1/9] updt composer to 0.15.0 (#347) * updt composer * updt test * Update test_tasks.yaml * updt with jeremies updt * updt eval tasks yamls --------- Co-authored-by: Jeremy D <115047575+bmosaicml@users.noreply.github.com> --- scripts/eval/yamls/tasks.yaml | 12 ++++++------ scripts/eval/yamls/tasks_light.yaml | 8 ++++---- setup.py | 4 ++-- tests/test_tasks.yaml | 2 +- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/scripts/eval/yamls/tasks.yaml b/scripts/eval/yamls/tasks.yaml index 0ffdb4dbd6..5be9fe269c 100644 --- a/scripts/eval/yamls/tasks.yaml +++ b/scripts/eval/yamls/tasks.yaml @@ -4,7 +4,7 @@ icl_tasks: dataset_uri: eval/local_data/jeopardy_all.jsonl # ADD YOUR OWN DATASET URI num_fewshot: [0, 1, 5, 10] icl_task_type: language_modeling - continuation_delimiter: 'Answer: ' # this separates questions from answers + continuation_delimiter: "\nAnswer: " # this separates questions from answers has_categories: true - label: lambada_openai @@ -16,7 +16,7 @@ icl_tasks: dataset_uri: eval/local_data/piqa.jsonl # ADD YOUR OWN DATASET URI num_fewshot: [0, 1, 5, 10] icl_task_type: multiple_choice - continuation_delimiter: 'Answer: ' # this separates questions from answers + continuation_delimiter: "\nAnswer: " # this separates questions from answers - label: hellaswag dataset_uri: eval/local_data/hellaswag.jsonl # ADD YOUR OWN DATASET URI @@ -27,13 +27,13 @@ icl_tasks: dataset_uri: eval/local_data/arc_easy.jsonl # ADD YOUR OWN DATASET URI num_fewshot: [0, 1, 5, 10] icl_task_type: multiple_choice - continuation_delimiter: 'Answer: ' # this separates questions from answers + continuation_delimiter: "\nAnswer: " # this separates questions from answers - label: arc_challenge dataset_uri: eval/local_data/arc_challenge.jsonl # ADD YOUR OWN DATASET URI num_fewshot: [0, 1, 5, 10] icl_task_type: multiple_choice - continuation_delimiter: 'Answer: ' # this separates questions from answers + continuation_delimiter: "\nAnswer: " # this separates questions from answers - label: copa dataset_uri: eval/local_data/copa.jsonl # ADD YOUR OWN DATASET URI @@ -44,13 +44,13 @@ icl_tasks: dataset_uri: eval/local_data/boolq.jsonl # ADD YOUR OWN DATASET URI num_fewshot: [0, 1, 5, 10] icl_task_type: multiple_choice - continuation_delimiter: 'Answer: ' # this separates questions from answers + continuation_delimiter: "\nAnswer: " # this separates questions from answers - label: mmlu dataset_uri: eval/local_data/mmlu.jsonl # ADD YOUR OWN DATASET URI num_fewshot: [0, 1, 5, 10] icl_task_type: multiple_choice - continuation_delimiter: 'Answer: ' # this separates questions from answers + continuation_delimiter: "\nAnswer: " # this separates questions from answers has_categories: true - label: winograd diff --git a/scripts/eval/yamls/tasks_light.yaml b/scripts/eval/yamls/tasks_light.yaml index 66621e1be6..54580727a1 100644 --- a/scripts/eval/yamls/tasks_light.yaml +++ b/scripts/eval/yamls/tasks_light.yaml @@ -9,7 +9,7 @@ icl_tasks: dataset_uri: eval/local_data/piqa.jsonl # ADD YOUR OWN DATASET URI num_fewshot: [0, 1, 5, 10] icl_task_type: multiple_choice - continuation_delimiter: 'Answer: ' # this separates questions from answers + continuation_delimiter: "\nAnswer: " # this separates questions from answers - label: hellaswag dataset_uri: eval/local_data/hellaswag.jsonl # ADD YOUR OWN DATASET URI @@ -20,13 +20,13 @@ icl_tasks: dataset_uri: eval/local_data/arc_easy.jsonl # ADD YOUR OWN DATASET URI num_fewshot: [0, 1, 5, 10] icl_task_type: multiple_choice - continuation_delimiter: 'Answer: ' # this separates questions from answers + continuation_delimiter: "\nAnswer: " # this separates questions from answers - label: arc_challenge dataset_uri: eval/local_data/arc_challenge.jsonl # ADD YOUR OWN DATASET URI num_fewshot: [0, 1, 5, 10] icl_task_type: multiple_choice - continuation_delimiter: 'Answer: ' # this separates questions from answers + continuation_delimiter: "\nAnswer: " # this separates questions from answers - label: copa dataset_uri: eval/local_data/copa.jsonl # ADD YOUR OWN DATASET URI @@ -37,4 +37,4 @@ icl_tasks: dataset_uri: eval/local_data/boolq.jsonl # ADD YOUR OWN DATASET URI num_fewshot: [0, 1, 5, 10] icl_task_type: multiple_choice - continuation_delimiter: 'Answer: ' # this separates questions from answers + continuation_delimiter: "\nAnswer: " # this separates questions from answers diff --git a/setup.py b/setup.py index e8f670f762..3302030fa6 100644 --- a/setup.py +++ b/setup.py @@ -47,7 +47,7 @@ ] install_requires = [ - 'composer[libcloud,nlp,wandb]>=0.14.1,<0.15', + 'composer[libcloud,nlp,wandb]>=0.15.0,<0.16', 'accelerate>=0.19,<0.20', # for HF inference `device_map` 'mosaicml-streaming>=0.5.1,<0.6', 'torch>=1.13.1,<=2.0.1', @@ -77,7 +77,7 @@ ] extra_deps['tensorboard'] = [ - 'composer[tensorboard]>=0.14.1,<0.15', + 'composer[tensorboard]>=0.15.0,<0.16', ] extra_deps['gpu'] = [ diff --git a/tests/test_tasks.yaml b/tests/test_tasks.yaml index dae4b470bd..4298b3939c 100644 --- a/tests/test_tasks.yaml +++ b/tests/test_tasks.yaml @@ -4,7 +4,7 @@ icl_tasks: dataset_uri: scripts/eval/local_data/jeopardy_all.jsonl # ADD YOUR OWN DATASET URI num_fewshot: [0, 1] icl_task_type: language_modeling - continuation_delimiter: "Answer: " # this separates questions from answers + continuation_delimiter: "\nAnswer: " # this separates questions from answers has_categories: true - label: copa From da7fa6b907462a03af051ee6f1fe54d9fbfcf0f3 Mon Sep 17 00:00:00 2001 From: Vitaliy Chiley <6439018+vchiley@users.noreply.github.com> Date: Wed, 21 Jun 2023 09:43:19 -0700 Subject: [PATCH 2/9] updt yml (#349) --- mcli/mcli-1b-eval.yaml | 2 +- mcli/mcli-1b-max-seq-len-8k.yaml | 2 +- mcli/mcli-1b.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mcli/mcli-1b-eval.yaml b/mcli/mcli-1b-eval.yaml index cc5dd1cbb1..d5df9c902b 100644 --- a/mcli/mcli-1b-eval.yaml +++ b/mcli/mcli-1b-eval.yaml @@ -10,7 +10,7 @@ command: | cd llm-foundry/llmfoundry/icl_eval composer eval.py /mnt/config/parameters.yaml image: mosaicml/pytorch:1.13.1_cu117-python3.10-ubuntu20.04 -name: mosaic-gpt-1b-eval +name: mpt-1b-eval compute: gpus: 8 # Number of GPUs to use diff --git a/mcli/mcli-1b-max-seq-len-8k.yaml b/mcli/mcli-1b-max-seq-len-8k.yaml index d6b1ea2a9c..177aaee7aa 100644 --- a/mcli/mcli-1b-max-seq-len-8k.yaml +++ b/mcli/mcli-1b-max-seq-len-8k.yaml @@ -18,7 +18,7 @@ command: | --concat_tokens 8192 --tokenizer EleutherAI/gpt-neox-20b --eos_text '<|endoftext|>' composer train/train.py /mnt/config/parameters.yaml image: mosaicml/pytorch:1.13.1_cu117-python3.10-ubuntu20.04 -name: mosaic-gpt-1b-ctx-8k-gpus-8 +name: mpt-1b-ctx-8k-gpus-8 compute: gpus: 8 # Number of GPUs to use diff --git a/mcli/mcli-1b.yaml b/mcli/mcli-1b.yaml index ccca8f26d7..1d7f4b0d13 100644 --- a/mcli/mcli-1b.yaml +++ b/mcli/mcli-1b.yaml @@ -22,7 +22,7 @@ command: | max_duration=100ba \ eval_interval=0 image: mosaicml/pytorch:1.13.1_cu117-python3.10-ubuntu20.04 -name: mosaic-gpt-1b-gpus-8 +name: mpt-1b-gpus-8 compute: gpus: 8 # Number of GPUs to use From 619400a22a0c2b89270a16b51e0fdc10acee257d Mon Sep 17 00:00:00 2001 From: Evan Racah Date: Wed, 21 Jun 2023 12:45:29 -0700 Subject: [PATCH 3/9] Fix bug with saving optimizer states with MonolithicCheckpointSaver Callback (#310) * Fix bug with saving optimizer states with mono ckpt saver * lint --------- Co-authored-by: Abhi Venigalla <77638579+abhi-mosaic@users.noreply.github.com> Co-authored-by: Vitaliy Chiley <6439018+vchiley@users.noreply.github.com> Co-authored-by: root --- .../callbacks/monolithic_ckpt_callback.py | 25 +++++++++++++++---- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/llmfoundry/callbacks/monolithic_ckpt_callback.py b/llmfoundry/callbacks/monolithic_ckpt_callback.py index 4e6bc4bf50..afca099832 100644 --- a/llmfoundry/callbacks/monolithic_ckpt_callback.py +++ b/llmfoundry/callbacks/monolithic_ckpt_callback.py @@ -8,7 +8,8 @@ import torch from composer.core import Callback, State -from composer.core.state import fsdp_state_dict_type_context +from composer.core.state import (fsdp_get_optim_state_dict, + fsdp_state_dict_type_context) from composer.loggers import Logger from composer.loggers.remote_uploader_downloader import RemoteUploaderDownloader from composer.utils import (dist, format_name_with_dist_and_time, parse_uri, @@ -79,13 +80,27 @@ def _save_checkpoint(self, state: State, logger: Logger): 'state': state.state_dict(), 'rng': reproducibility.get_rng_state() } - if not self.keep_optimizers: - state_dict['state'].pop('optimizers') + # Remove sharded model and optimizer state dicts + state_dict['state'].pop('optimizers') + state_dict['state'].pop('model') + + # Add in unsharded model params. with fsdp_state_dict_type_context(state.model, state_dict_type='full'): state_dict['state']['model'] = state.model.state_dict() - if dist.get_global_rank() == 0: - torch.save(state_dict, save_path) + + # Add in unsharded optimizer state dict. + if self.keep_optimizers: + optimizer = state.optimizers[0] + state_dict['state']['optimizers'] = { + type(optimizer).__qualname__: + fsdp_get_optim_state_dict(state.model, + optimizer, + state_dict_type='full') + } + if dist.get_global_rank() == 0: + torch.save(state_dict, save_path) + if self.upload_to_object_store and self.remote_ud is not None and dist.get_global_rank( ) == 0: remote_file_name = str(Path(save_dir) / Path(filename)) From d848d3daf3be1dc55fbfaf5b092e560ba00ae3db Mon Sep 17 00:00:00 2001 From: bandish-shah <86627118+bandish-shah@users.noreply.github.com> Date: Wed, 21 Jun 2023 14:00:42 -0700 Subject: [PATCH 4/9] Add step to free up some disk space on the worker (#350) --- .github/workflows/docker.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml index 67ccd5ddd1..28084b7fb4 100644 --- a/.github/workflows/docker.yaml +++ b/.github/workflows/docker.yaml @@ -17,6 +17,14 @@ jobs: base_image: mosaicml/pytorch:2.0.1_cu118-python3.10-ubuntu20.04 steps: + - name: Maximize Build Space on Worker + uses: easimon/maximize-build-space@v4 + with: + overprovision-lvm: true + remove-dotnet: true + remove-android: true + remove-haskell: true + - name: Checkout uses: actions/checkout@v3 From 2167c0e6fd0a7b837c230f26e8be974bb2951be6 Mon Sep 17 00:00:00 2001 From: Daniel King <43149077+dakinggg@users.noreply.github.com> Date: Wed, 21 Jun 2023 17:44:54 -0700 Subject: [PATCH 5/9] Filter out sequences where prompt is longer than max length, rather than dropping them on the fly later (#348) * attempt fix for hf side * fix * fix cpu count --- llmfoundry/data/finetuning/dataloader.py | 5 ++++- llmfoundry/data/finetuning/tasks.py | 20 ++++++++++++++++++-- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/llmfoundry/data/finetuning/dataloader.py b/llmfoundry/data/finetuning/dataloader.py index 9b27f4f0d0..0db8d98b49 100644 --- a/llmfoundry/data/finetuning/dataloader.py +++ b/llmfoundry/data/finetuning/dataloader.py @@ -145,7 +145,10 @@ def build_finetuning_dataloader(cfg: DictConfig, tokenizer: Tokenizer, ) else: - dataset = dataset_constructor.build_from_hf(cfg.dataset, tokenizer) + dataset = dataset_constructor.build_from_hf( + cfg.dataset, + max_seq_len=cfg.dataset.max_seq_len, + tokenizer=tokenizer) collate_fn, dataloader_batch_size = _build_collate_fn( cfg.dataset, tokenizer, device_batch_size) diff --git a/llmfoundry/data/finetuning/tasks.py b/llmfoundry/data/finetuning/tasks.py index 56be18532d..2ff151de0c 100644 --- a/llmfoundry/data/finetuning/tasks.py +++ b/llmfoundry/data/finetuning/tasks.py @@ -33,6 +33,7 @@ def preprocessing_fn(example: Dict) -> Dict[str, str]: import importlib import os +import warnings from typing import Any, Callable, Dict, Optional, Union import datasets as hf_datasets @@ -220,11 +221,15 @@ def get_preprocessing_fn_from_str(self, return preprocessing_fn - def build_from_hf(self, cfg: DictConfig, tokenizer: Tokenizer): + def build_from_hf(self, cfg: DictConfig, max_seq_len: int, + tokenizer: Tokenizer): """Load a HuggingFace Datasets, preprocess, and tokenize. + Note: This function will drop examples where the prompt is longer than the max_seq_len + Args: cfg (DictConfig): The dataset configuration. + max_seq_len (int): The maximum sequence length. Examples with prompts longer than this will be dropped. tokenizer (Tokenizer): The tokenizer to be used for tokenizing the dataset. Returns: @@ -248,9 +253,20 @@ def dataset_mapper(example: Dict): dataset_mapper, batched=False, remove_columns=columns_to_remove, + num_proc=max(os.cpu_count() - 2, 1), ) + prompt_length_filtered_dataset = tokenized_dataset.filter( + lambda example: len(example['input_ids']) < max_seq_len, + num_proc=max(os.cpu_count() - 2, 1)) + + examples_removed = len(tokenized_dataset) - len( + prompt_length_filtered_dataset) + if examples_removed > 0: + warnings.warn( + f'Dropped {examples_removed} examples where the prompt was longer than {max_seq_len}.' + ) - return tokenized_dataset + return prompt_length_filtered_dataset def build_from_streaming(self, *args: Any, **kwargs: Any): return StreamingFinetuningDataset(*args, **kwargs) From 2f1bf410e5780fa4274dd1cbb66f44870ed3bc69 Mon Sep 17 00:00:00 2001 From: Cody Blakeney Date: Wed, 21 Jun 2023 23:56:00 -0500 Subject: [PATCH 6/9] Revert "Filter out sequences where prompt is longer than max length, rather than dropping them on the fly later (#348)" (#354) This reverts commit 2167c0e6fd0a7b837c230f26e8be974bb2951be6. --- llmfoundry/data/finetuning/dataloader.py | 5 +---- llmfoundry/data/finetuning/tasks.py | 20 ++------------------ 2 files changed, 3 insertions(+), 22 deletions(-) diff --git a/llmfoundry/data/finetuning/dataloader.py b/llmfoundry/data/finetuning/dataloader.py index 0db8d98b49..9b27f4f0d0 100644 --- a/llmfoundry/data/finetuning/dataloader.py +++ b/llmfoundry/data/finetuning/dataloader.py @@ -145,10 +145,7 @@ def build_finetuning_dataloader(cfg: DictConfig, tokenizer: Tokenizer, ) else: - dataset = dataset_constructor.build_from_hf( - cfg.dataset, - max_seq_len=cfg.dataset.max_seq_len, - tokenizer=tokenizer) + dataset = dataset_constructor.build_from_hf(cfg.dataset, tokenizer) collate_fn, dataloader_batch_size = _build_collate_fn( cfg.dataset, tokenizer, device_batch_size) diff --git a/llmfoundry/data/finetuning/tasks.py b/llmfoundry/data/finetuning/tasks.py index 2ff151de0c..56be18532d 100644 --- a/llmfoundry/data/finetuning/tasks.py +++ b/llmfoundry/data/finetuning/tasks.py @@ -33,7 +33,6 @@ def preprocessing_fn(example: Dict) -> Dict[str, str]: import importlib import os -import warnings from typing import Any, Callable, Dict, Optional, Union import datasets as hf_datasets @@ -221,15 +220,11 @@ def get_preprocessing_fn_from_str(self, return preprocessing_fn - def build_from_hf(self, cfg: DictConfig, max_seq_len: int, - tokenizer: Tokenizer): + def build_from_hf(self, cfg: DictConfig, tokenizer: Tokenizer): """Load a HuggingFace Datasets, preprocess, and tokenize. - Note: This function will drop examples where the prompt is longer than the max_seq_len - Args: cfg (DictConfig): The dataset configuration. - max_seq_len (int): The maximum sequence length. Examples with prompts longer than this will be dropped. tokenizer (Tokenizer): The tokenizer to be used for tokenizing the dataset. Returns: @@ -253,20 +248,9 @@ def dataset_mapper(example: Dict): dataset_mapper, batched=False, remove_columns=columns_to_remove, - num_proc=max(os.cpu_count() - 2, 1), ) - prompt_length_filtered_dataset = tokenized_dataset.filter( - lambda example: len(example['input_ids']) < max_seq_len, - num_proc=max(os.cpu_count() - 2, 1)) - - examples_removed = len(tokenized_dataset) - len( - prompt_length_filtered_dataset) - if examples_removed > 0: - warnings.warn( - f'Dropped {examples_removed} examples where the prompt was longer than {max_seq_len}.' - ) - return prompt_length_filtered_dataset + return tokenized_dataset def build_from_streaming(self, *args: Any, **kwargs: Any): return StreamingFinetuningDataset(*args, **kwargs) From af209b380cd1cd288b5c963cec04d1e20c3439c9 Mon Sep 17 00:00:00 2001 From: Sam Havens Date: Wed, 21 Jun 2023 22:26:46 -0700 Subject: [PATCH 7/9] Remote JSONL IFT data (#275) * support remote jsonl files for IFT datasets * improve docstring * add support for other extensions * don't duplicate validation check * build dataset before tmpdir deletes * parse uri * only rank 0 download * only download rank 0 * better error * break earlier * log more * more reasonable destination str * use data files format * name points to a preprocessing function I guess * debugging * always something with HF * json vs jsonl [no-ci] * if hf wants it local, make it local [no-ci] * back to tempfile [no-ci] * debug * debug hfds [no-ci] * ... [no-ci] * don't rename file * use tempfile again * updt --------- Co-authored-by: Vitaliy Chiley <6439018+vchiley@users.noreply.github.com> Co-authored-by: root --- llmfoundry/data/finetuning/dataloader.py | 54 ++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 3 deletions(-) diff --git a/llmfoundry/data/finetuning/dataloader.py b/llmfoundry/data/finetuning/dataloader.py index 9b27f4f0d0..4fcb7c4f25 100644 --- a/llmfoundry/data/finetuning/dataloader.py +++ b/llmfoundry/data/finetuning/dataloader.py @@ -2,10 +2,12 @@ # SPDX-License-Identifier: Apache-2.0 import logging +import os +import tempfile from typing import Union import torch -from composer.utils import dist +from composer.utils import dist, get_file, parse_uri from omegaconf import DictConfig from torch.utils.data import DataLoader from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast @@ -38,7 +40,9 @@ def build_finetuning_dataloader(cfg: DictConfig, tokenizer: Tokenizer, --- *** HuggingFace dataset config fields *** cfg.dataset.hf_name (str, optional): The name of the HuggingFace dataset - to use. + to use. Can also be a remote http(s) directory or object store bucket + containing the file {split}.jsonl in the format (prompt, response), + in which case the builder will create a HuggingFace dataset. cfg.dataset.hf_kwargs (DictConfig, optional): Additional kwargs to pass to `datasets.load_dataset`, which can be used to load a dataset from local files. @@ -145,7 +149,51 @@ def build_finetuning_dataloader(cfg: DictConfig, tokenizer: Tokenizer, ) else: - dataset = dataset_constructor.build_from_hf(cfg.dataset, tokenizer) + backend, _, _ = parse_uri(cfg.dataset.hf_name) + if backend not in ['', None]: + if cfg.dataset.get('split') is None: + raise ValueError( + 'When using a HuggingFace dataset from a URL, you must set the ' + \ + '`split` key in the dataset config.' + ) + supported_extensions = ['jsonl', 'csv', 'parquet'] + with tempfile.TemporaryDirectory() as tmp_dir: + for extension in supported_extensions: + name = f'{cfg.dataset.hf_name.strip("/")}/{cfg.dataset.split}.{extension}' + destination = str( + os.path.abspath( + f'{tmp_dir}/{cfg.dataset.split}.{extension}')) + try: + with dist.run_local_rank_zero_first(): + get_file(name, destination, overwrite=True) + except FileNotFoundError as e: + if extension == supported_extensions[-1]: + raise FileNotFoundError( + f'Could not find a {cfg.dataset.split} file with any of ' + \ + f'the supported extensions: {supported_extensions}\n' + \ + f'at {cfg.dataset.hf_name}/{cfg.dataset.split}' + ) from e + else: + print( + f'Could not find {name}, looking for another extension' + ) + continue + # 'json' causes special behavior in the dataset constructor + cfg.dataset.hf_name = extension if extension != 'jsonl' else 'json' + kwargs = cfg.dataset.get('hf_kwargs', {}) + kwargs['data_files'] = destination + cfg.dataset['hf_kwargs'] = kwargs + print(cfg.dataset) + dataset = dataset_constructor.build_from_hf( + cfg.dataset, + tokenizer=tokenizer, + ) + break + else: + dataset = dataset_constructor.build_from_hf( + cfg.dataset, + tokenizer=tokenizer, + ) collate_fn, dataloader_batch_size = _build_collate_fn( cfg.dataset, tokenizer, device_batch_size) From 7731106750ba3bef965ae3c5a32e8566aea5493c Mon Sep 17 00:00:00 2001 From: Abhi Venigalla <77638579+abhi-mosaic@users.noreply.github.com> Date: Thu, 22 Jun 2023 09:04:25 -0600 Subject: [PATCH 8/9] Add MPT-30B to README (#356) --- README.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index aba62029cb..2acdcf882c 100644 --- a/README.md +++ b/README.md @@ -41,15 +41,18 @@ You'll find in this repo: # MPT -MPT-7B is a GPT-style model, and the first in the MosaicML Foundation Series of models. Trained on 1T tokens of a MosaicML-curated dataset, MPT-7B is open-source, commercially usable, and equivalent to LLaMa 7B on evaluation metrics. The MPT architecture contains all the latest techniques on LLM modeling -- Flash Attention for efficiency, Alibi for context length extrapolation, and stability improvements to mitigate loss spikes. The base model and several variants, including a 64K context length fine-tuned model (!!) are all available: +Mosaic Pretrained Transformers (MPT) are GPT-style models with some special features -- Flash Attention for efficiency, ALiBi for context length extrapolation, and stability improvements to mitigate loss spikes. As part of MosaicML's Foundation series, we have open-sourced several MPT models: | Model | Context Length | Download | Demo | Commercial use? | |--------------------|----------------|----------------------------------------------------|------------------------------------------------------------------|-----------------| +| MPT-30B | 8192 | https://huggingface.co/mosaicml/mpt-30b | | Yes | +| MPT-30B-Instruct | 8192 | https://huggingface.co/mosaicml/mpt-30b-instruct | | Yes | +| MPT-30B-Chat | 8192 | https://huggingface.co/mosaicml/mpt-30b-chat | [Demo](https://huggingface.co/spaces/mosaicml/mpt-30b-chat) | No | | MPT-7B | 2048 | https://huggingface.co/mosaicml/mpt-7b | | Yes | -| MPT-7B-Instruct | 2048 | https://huggingface.co/mosaicml/mpt-7b-instruct | [Demo](https://huggingface.co/spaces/mosaicml/mpt-7b-instruct) | Yes | +| MPT-7B-Instruct | 2048 | https://huggingface.co/mosaicml/mpt-7b-instruct | | Yes | | MPT-7B-Chat | 2048 | https://huggingface.co/mosaicml/mpt-7b-chat | [Demo](https://huggingface.co/spaces/mosaicml/mpt-7b-chat) | No | -| MPT-7B-StoryWriter | 65536 | https://huggingface.co/mosaicml/mpt-7b-storywriter | [Demo](https://huggingface.co/spaces/mosaicml/mpt-7b-storywriter)| Yes | +| MPT-7B-StoryWriter | 65536 | https://huggingface.co/mosaicml/mpt-7b-storywriter | | Yes | To try out these models locally, [follow the instructions](https://github.com/mosaicml/llm-foundry/tree/main/scripts/inference#interactive-generation-with-modelgenerate) in `scripts/inference/README.md` to prompt HF models using our [hf_generate.py](https://github.com/mosaicml/llm-foundry/blob/main/scripts/inference/hf_generate.py) or [hf_chat.py](https://github.com/mosaicml/llm-foundry/blob/main/scripts/inference/hf_chat.py) scripts. @@ -71,6 +74,7 @@ Tutorial videos from the community: Something missing? Contribute with a PR! # Latest News +* [Blog: MPT-30B: Raising the bar for open-source foundation models](https://www.mosaicml.com/blog/mpt-30b) * [Blog: Introducing MPT-7B](https://www.mosaicml.com/blog/mpt-7b) * [Blog: Benchmarking LLMs on H100](https://www.mosaicml.com/blog/coreweave-nvidia-h100-part-1) * [Blog: Blazingly Fast LLM Evaluation](https://www.mosaicml.com/blog/llm-evaluation-for-icl) From 38361a6f56bf05a1d90d03b2d0bbf0a1cc28e7bc Mon Sep 17 00:00:00 2001 From: Mihir Patel Date: Thu, 22 Jun 2023 09:34:36 -0700 Subject: [PATCH 9/9] Codeql on PRs (#352) * Codeql on PRs * allow empty --- .github/workflows/codeql-analysis.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 07197a82e1..7fb270db97 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -14,6 +14,9 @@ name: 'CodeQL' on: push: branches: [main] + pull_request: + # The branches below must be a subset of the branches above + branches: [main] schedule: - cron: '0 9 * * 1' # Every Monday at 09:00 (9:00 AM)