Skip to content

Commit

Permalink
clean up circular import
Browse files Browse the repository at this point in the history
  • Loading branch information
jacobfulano committed Aug 17, 2023
1 parent 20b3df1 commit 76a5c18
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 11 deletions.
2 changes: 1 addition & 1 deletion llmfoundry/data/finetuning/dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ def _validate_config(dataset_cfg: DictConfig):
)


def _build_hf_dataset_from_remote(cfg: DictConfig, tokenizer: Tokenizer):
def _build_hf_dataset_from_remote(cfg: DictConfig, tokenizer):
"""Builds a dataset from a remote object store.
This function supports 'jsonl', 'csv', and 'parquet' file formats for the dataset. It will attempt to download
Expand Down
20 changes: 10 additions & 10 deletions llmfoundry/models/layers/mosaicbert_layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ class BertEmbeddings(nn.Module):
This module ignores the `position_ids` input to the `forward` method.
"""

def __init__(self, config: BertConfig):
def __init__(self, config):
super().__init__()
self.word_embeddings = nn.Embedding(config.vocab_size,
config.hidden_size,
Expand Down Expand Up @@ -156,7 +156,7 @@ class BertUnpadSelfAttention(nn.Module):
See `forward` method for additional detail.
"""

def __init__(self, config: BertConfig):
def __init__(self, config):
super().__init__()
if config.hidden_size % config.num_attention_heads != 0 and not hasattr(
config, 'embedding_size'):
Expand Down Expand Up @@ -253,7 +253,7 @@ class BertSelfOutput(nn.Module):
BERT modules.
"""

def __init__(self, config: BertConfig):
def __init__(self, config):
super().__init__()
self.dense = nn.Linear(config.hidden_size, config.hidden_size)
self.LayerNorm = nn.LayerNorm(config.hidden_size,
Expand All @@ -271,7 +271,7 @@ def forward(self, hidden_states: torch.Tensor,
class BertUnpadAttention(nn.Module):
"""Chains attention, Dropout, and LayerNorm for MosaicBERT."""

def __init__(self, config: BertConfig):
def __init__(self, config):
super().__init__()
self.self = BertUnpadSelfAttention(config)
self.output = BertSelfOutput(config)
Expand Down Expand Up @@ -322,7 +322,7 @@ class BertGatedLinearUnitMLP(nn.Module):
parameter size, MosaicBERT typically offers a net higher throughput than a Hugging Face BERT built from the same `config`.
"""

def __init__(self, config: BertConfig):
def __init__(self, config):
super().__init__()
self.config = config
self.gated_layers = nn.Linear(config.hidden_size,
Expand Down Expand Up @@ -358,7 +358,7 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
class BertLayer(nn.Module):
"""Composes the MosaicBERT attention and FFN blocks into a single layer."""

def __init__(self, config: BertConfig):
def __init__(self, config):
super(BertLayer, self).__init__()
self.attention = BertUnpadAttention(config)
self.mlp = BertGatedLinearUnitMLP(config)
Expand Down Expand Up @@ -401,7 +401,7 @@ class BertEncoder(nn.Module):
at padded tokens, and pre-computes attention biases to implement ALiBi.
"""

def __init__(self, config: BertConfig):
def __init__(self, config):
super().__init__()
layer = BertLayer(config)
self.layer = nn.ModuleList(
Expand Down Expand Up @@ -548,7 +548,7 @@ def forward(

class BertPooler(nn.Module):

def __init__(self, config: BertConfig):
def __init__(self, config):
super(BertPooler, self).__init__()
self.dense = nn.Linear(config.hidden_size, config.hidden_size)
self.activation = nn.Tanh()
Expand All @@ -566,7 +566,7 @@ def forward(self,

class BertPredictionHeadTransform(nn.Module):

def __init__(self, config: BertConfig):
def __init__(self, config):
super().__init__()
self.dense = nn.Linear(config.hidden_size, config.hidden_size)
if isinstance(config.hidden_act, str):
Expand All @@ -587,7 +587,7 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
###################
class BertLMPredictionHead(nn.Module):

def __init__(self, config: BertConfig, bert_model_embedding_weights):
def __init__(self, config, bert_model_embedding_weights):
super().__init__()
self.transform = BertPredictionHeadTransform(config)
# The output weights are the same as the input embeddings, but there is
Expand Down
1 change: 1 addition & 0 deletions llmfoundry/models/mpt/configuration_mpt.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

# Copyright 2022 MosaicML LLM Foundry authors
# SPDX-License-Identifier: Apache-2.0

Expand Down

0 comments on commit 76a5c18

Please sign in to comment.