Skip to content

Commit

Permalink
Merge pull request PaddlePaddle#663 from zhanghan1992/develop
Browse files Browse the repository at this point in the history
change the path of ernie-gram and fix mrc
  • Loading branch information
nbcc committed May 20, 2021
2 parents af81999 + cd47f55 commit 7b52bcc
Show file tree
Hide file tree
Showing 20 changed files with 65 additions and 31 deletions.
File renamed without changes
2 changes: 1 addition & 1 deletion ernie_gram/README.en.md → ernie-gram/README.en.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ wget https://ernie-github.cdn.bcebos.com/data-xnli.tar.gz
tar xf data-xnli.tar.gz
cd ..
#demo for NLI task
sh ernie_gram/run_cls.sh ernie_gram/task_configs/xnli_conf
sh run_cls.sh task_configs/xnli_conf
```

### Setup
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion ernie_gram/README.zh.md → ernie-gram/README.zh.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ wget https://ernie-github.cdn.bcebos.com/data-xnli.tar.gz
tar xf data-xnli.tar.gz
cd ..
#demo for NLI task
sh ernie_gram/run_cls.sh ernie_gram/task_configs/xnli_conf
sh run_cls.sh task_configs/xnli_conf
```


Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
import numpy as np
import logging
#from visualdl import LogWriter
import sys
sys.path.append("../")

from pathlib import Path
import paddle as P
Expand All @@ -32,8 +34,8 @@
#from model.bert import BertConfig, BertModelLayer
from ernie.modeling_ernie import ErnieModel, ErnieModelForSequenceClassification
from ernie.tokenizing_ernie import ErnieTokenizer, ErnieTinyTokenizer
from ernie_gram.optimization import AdamW
from ernie_gram.utils import create_if_not_exists, get_warmup_and_linear_decay
from optimization import AdamW
from utils import create_if_not_exists, get_warmup_and_linear_decay

log.setLevel(logging.DEBUG)
logging.getLogger().setLevel(logging.DEBUG)
Expand Down
56 changes: 42 additions & 14 deletions ernie_gram/finetune_mrc.py → ernie-gram/finetune_mrc.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
import argparse
from functools import partial
from io import open
import sys
sys.path.append("../")

import numpy as np
import logging
Expand All @@ -38,22 +40,23 @@

from propeller import log
import propeller.paddle as propeller
from ernie_gram.optimization import AdamW
from optimization import AdamW

from ernie.modeling_ernie import ErnieModel, ErnieModelForQuestionAnswering
from ernie.tokenizing_ernie import ErnieTokenizer, ErnieTinyTokenizer
#from ernie.optimization import AdamW, LinearDecay

from ernie_gram.mrc import mrc_reader
from ernie_gram.mrc import mrc_metrics
from ernie_gram.utils import create_if_not_exists, get_warmup_and_linear_decay
from mrc import mrc_reader
from mrc import mrc_metrics
from utils import create_if_not_exists, get_warmup_and_linear_decay

log.setLevel(logging.DEBUG)
logging.getLogger().setLevel(logging.DEBUG)


def evaluate(model, ds, all_examples, all_features, tokenizer, args):
dev_file = json.loads(open(args.dev_file, encoding='utf8').read())
def evaluate(model, ds, all_examples, all_features, tokenizer, args, is_test=False):
dev_file = args.dev_file if not is_test else args.test_file
dev_file = json.loads(open(dev_file, encoding='utf8').read())
with P.no_grad():
log.debug('start eval')
model.eval()
Expand Down Expand Up @@ -84,8 +87,8 @@ def evaluate(model, ds, all_examples, all_features, tokenizer, args):
return f1, em


def train(model, train_dataset, dev_dataset, dev_examples, dev_features,
tokenizer, args):
def train(model, train_dataset, dev_dataset, dev_examples, dev_features,
tokenizer, args, test_dataset=None, test_examples=None, test_features=None, do_test=False):
model = P.DataParallel(model)

max_steps = args.max_steps
Expand Down Expand Up @@ -142,10 +145,14 @@ def train(model, train_dataset, dev_dataset, dev_examples, dev_features,
log.debug(msg)

if env.dev_id == 0 and step % 100==0 and step:
print(step)
f1, em = evaluate(model, dev_dataset, dev_examples,
dev_features, tokenizer, args)
log.debug('[step %d] eval result: f1 %.5f em %.5f' %
log.debug('[step %d] dev eval result: f1 %.5f em %.5f' %
(step, f1, em))
if do_test:
f1, em = evaluate(model, test_dataset, test_examples,
test_features, tokenizer, args, True)
log.debug('[step %d] test eval result: f1 %.5f em %.5f' %
(step, f1, em))
if env.dev_id == 0 and args.save_dir is not None:
P.save(model.state_dict(), args.save_dir / 'ckpt.bin')
Expand Down Expand Up @@ -177,7 +184,12 @@ def train(model, train_dataset, dev_dataset, dev_examples, dev_features,
type=str,
required=True,
help='data directory includes train / develop data')
parser.add_argument('--warmup_proportion', type=float, default=0.0)
parser.add_argument(
'--test_file',
type=str,
default=None,
help='data directory includes train / develop data')
parser.add_argument('--warmup_proportion', type=float, default=0.1)
parser.add_argument('--lr', type=float, default=3e-5, help='learning rate')
parser.add_argument(
'--save_dir', type=Path, required=True, help='model output directory')
Expand Down Expand Up @@ -216,6 +228,10 @@ def train(model, train_dataset, dev_dataset, dev_examples, dev_features,
dev_examples = mrc_reader.read_files(args.dev_file, is_training=False)
dev_features = mrc_reader.convert_example_to_features(
dev_examples, args.max_seqlen, tokenizer, is_training=False)
if args.test_file:
test_examples = mrc_reader.read_files(args.test_file, is_training=False)
test_features = mrc_reader.convert_example_to_features(
test_examples, args.max_seqlen, tokenizer, is_training=False)

log.info('train examples: %d, features: %d' %
(len(train_examples), len(train_features)))
Expand All @@ -235,16 +251,28 @@ def map_fn(unique_id, example_index, doc_span_index, tokens,

dev_dataset = propeller.data.Dataset.from_list(dev_features).map(
map_fn).padded_batch(args.bsz)

model = ErnieModelForQuestionAnswering.from_pretrained(
args.from_pretrained, name='')

train(model, train_dataset, dev_dataset, dev_examples, dev_features,
if args.test_file:
test_dataset = propeller.data.Dataset.from_list(test_features).map(
map_fn).padded_batch(args.bsz)
train(model, train_dataset, dev_dataset, dev_examples, dev_features,
tokenizer, args, test_dataset, test_examples, test_features, True)

else:
train(model, train_dataset, dev_dataset, dev_examples, dev_features,
tokenizer, args)



if env.dev_id == 0:
f1, em = evaluate(model, dev_dataset, dev_examples, dev_features,
tokenizer, args)
log.debug('final eval result: f1 %.5f em %.5f' % (f1, em))
log.debug('final dev eval result: f1 %.5f em %.5f' % (f1, em))
if args.test_file:
f1, em = evaluate(model, test_dataset, test_examples, test_features,
tokenizer, args, True)
log.debug('final test eval result: f1 %.5f em %.5f' % (f1, em))
if env.dev_id == 0 and args.save_dir is not None:
P.save(model.state_dict(), args.save_dir / 'ckpt.bin')
6 changes: 4 additions & 2 deletions ernie_gram/finetune_ner.py → ernie-gram/finetune_ner.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
import multiprocessing
import pickle
import logging
import sys
sys.path.append("../")

from sklearn.metrics import f1_score
import paddle as P
Expand All @@ -39,10 +41,10 @@
log.setLevel(logging.DEBUG)
logging.getLogger().setLevel(logging.DEBUG)

from ernie_gram.utils import create_if_not_exists, get_warmup_and_linear_decay
from utils import create_if_not_exists, get_warmup_and_linear_decay
from ernie.modeling_ernie import ErnieModel, ErnieModelForSequenceClassification, ErnieModelForTokenClassification
from ernie.tokenizing_ernie import ErnieTokenizer
from ernie_gram.optimization import AdamW
from optimization import AdamW

parser = propeller.ArgumentParser('NER model with ERNIE')
parser.add_argument('--max_seqlen', type=int, default=256)
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,10 @@ def make_results(vocab, all_examples, all_features, all_results, n_best_size,

unique_id_to_result = {}
for result in all_results:
unique_id_to_result[result.unique_id] = result
try:
unique_id_to_result[result.unique_id] = result
except:
continue

_PrelimPrediction = collections.namedtuple( # pylint: disable=invalid-name
"PrelimPrediction", [
Expand Down
File renamed without changes.
4 changes: 3 additions & 1 deletion ernie_gram/optimization.py → ernie-gram/optimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,13 @@
import numpy as np
import paddle as P
import paddle.distributed.fleet as fleet
import sys
sys.path.append("../")
from propeller.paddle.train.hooks import RunHook
import paddle.fluid as F
log = logging.getLogger(__name__)

from ernie_gram.utils import create_if_not_exists, get_warmup_and_linear_decay
from utils import create_if_not_exists, get_warmup_and_linear_decay

class AdamW(P.optimizer.AdamW):
"""AdamW object for dygraph"""
Expand Down
2 changes: 1 addition & 1 deletion ernie_gram/run_cls.sh → ernie-gram/run_cls.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
source $1

python3 -m paddle.distributed.launch ./ernie_gram/finetune_classifier_distributed.py \
python3 -m paddle.distributed.launch ./finetune_classifier_distributed.py \
--data_dir $data_dir \
--max_steps $max_steps \
--bsz $bsz \
Expand Down
2 changes: 1 addition & 1 deletion ernie_gram/run_mrc.sh → ernie-gram/run_mrc.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
source $1
export CUDA_VISIBLE_DEVICES=0
python3 -m paddle.distributed.launch ./ernie_gram/finetune_mrc.py \
python3 -m paddle.distributed.launch ./finetune_mrc.py \
--train_file $train_file \
--dev_file $dev_file \
--max_steps $max_steps \
Expand Down
2 changes: 1 addition & 1 deletion ernie_gram/run_ner.sh → ernie-gram/run_ner.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
source $1

python3 -m paddle.distributed.launch ./ernie_gram/finetune_ner.py \
python3 -m paddle.distributed.launch ./finetune_ner.py \
--data_dir $data_dir \
--max_steps $max_steps \
--epoch $epoch \
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
9 changes: 3 additions & 6 deletions ernie/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,9 @@ def _fetch_from_remote(url,
f = done_file.open('wb')
f.close()
else:
while True:
if done_file.exists():
break
else:
time.sleep(1)

while not done_file.exists():
time.sleep(1)

log.debug('%s cached in %s' % (url, cached_dir))
return cached_dir_model

Expand Down

0 comments on commit 7b52bcc

Please sign in to comment.