Skip to content

Commit

Permalink
Merge pull request #89 from RWKV/main
Browse files Browse the repository at this point in the history
pulling main updates to rwkv-x-playground
  • Loading branch information
PicoCreator authored Apr 10, 2024
2 parents 41c7d95 + 2528086 commit b9f9093
Show file tree
Hide file tree
Showing 146 changed files with 362,593 additions and 282 deletions.
38 changes: 19 additions & 19 deletions .github/workflows/docker-build.yml
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
name: Docker Env Image (cuda-11-8)
name: Docker Env Image (cuda-12-1)

on:
push:
branches: [ "main" ]
branches: [ "main", "rwkv-x-*" ]
# Publish semver tags as releases.
tags: [ 'v*.*.*' ]
# Reduce build to only for the valid path
paths:
- docker/**
pull_request:
branches: [ "main" ]
branches: [ "main", "rwkv-x-*" ]
paths:
- docker/**

Expand All @@ -21,7 +21,7 @@ env:

jobs:
build_env:
name: Docker Env Image (cuda-11-8)
name: Docker Env Image (cuda-12-1)

runs-on: ubuntu-latest
permissions:
Expand Down Expand Up @@ -71,9 +71,9 @@ jobs:
# https://github.com/sigstore/cosign-installer
- name: Install cosign
if: github.event_name != 'pull_request'
uses: sigstore/cosign-installer@f3c664df7af409cb4873aa5068053ba9d61a57b6 #v2.6.0
with:
cosign-release: 'v1.11.0'
uses: sigstore/cosign-installer@v3.3.0
# with:
# cosign-release: 'v2.2.0'

# Workaround: https://github.com/docker/build-push-action/issues/461
- name: Setup Docker buildx
Expand Down Expand Up @@ -103,20 +103,20 @@ jobs:
# Build and push Docker image with Buildx (don't push on PR)
# https://github.com/docker/build-push-action
- name: Build and push Docker image (env-cuda-11-8)
- name: Build and push Docker image (env-cuda-12-1)
id: build-and-push
uses: docker/build-push-action@v4
with:
context: "{{defaultContext}}:docker/env-cuda-11-8"
context: "{{defaultContext}}:docker/env-cuda-12-1"
push: ${{ github.event_name != 'pull_request' }} # Don't push on PR
tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME_LC }}:env-cuda-11-8
tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME_LC }}:env-cuda-12-1
# tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha,src=docker/env-cuda-11-8
cache-from: type=gha,src=docker/env-cuda-12-1
cache-to: type=gha,mode=max

build_runner:
name: Docker Env Image (github-worker-11-8)
name: Docker Env Image (github-worker-12-1)

needs: build_env
runs-on: ubuntu-latest
Expand Down Expand Up @@ -167,9 +167,9 @@ jobs:
# https://github.com/sigstore/cosign-installer
- name: Install cosign
if: github.event_name != 'pull_request'
uses: sigstore/cosign-installer@f3c664df7af409cb4873aa5068053ba9d61a57b6 #v2.6.0
with:
cosign-release: 'v1.11.0'
uses: sigstore/cosign-installer@v3.3.0
# with:
# cosign-release: 'v2.2.0'

# Workaround: https://github.com/docker/build-push-action/issues/461
- name: Setup Docker buildx
Expand Down Expand Up @@ -199,14 +199,14 @@ jobs:
# Build and push Docker image with Buildx (don't push on PR)
# https://github.com/docker/build-push-action
- name: Build and push Docker image (github-worker-cuda-11-8)
- name: Build and push Docker image (github-worker-cuda-12-1)
id: build-and-push
uses: docker/build-push-action@v4
with:
context: "{{defaultContext}}:docker/github-worker-cuda-11-8"
context: "{{defaultContext}}:docker/github-worker-cuda-12-1"
push: ${{ github.event_name != 'pull_request' }} # Don't push on PR
tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME_LC }}:github-worker-cuda-11-8
tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME_LC }}:github-worker-cuda-12-1
# tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha,src=docker/github-worker-cuda-11-8
cache-from: type=gha,src=docker/github-worker-cuda-12-1
cache-to: type=gha,mode=max
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ dmypy.json
# and standard hidden files ignore. Including
# example files generated via notebook tutorials
.*
scratch/
model/
dataset/
datapath/
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ conda update conda
conda create -n rwkv-infctx python=3.11 pip
conda activate rwkv-infctx

# Install pytorch (>=2.0.1)
conda install -y pytorch==2.0.1 torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia
python -m pip install lightning==2.0.5 deepspeed==0.10.0
# Install pytorch (>=2.1.2)
conda install -y pytorch==2.1.2 torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia
python -m pip install lightning==2.1.3 deepspeed==0.12.6

# Currently for torch.compile + 3.11 to work, for some platform, you will need the nightly build
# if so you may need to try the following instead - this is considered highly "unstable"
Expand Down
50 changes: 46 additions & 4 deletions RWKV-v5/config-example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,17 @@ model:
# dim_att: null
# dim_ffn: null
data:
# Skip the datapath setup
#
# ignored if using the preload_datapath.py, useful for speeding up the trainer startup
# provided you have your datasets all properly preinitialized
# ---
# skip_datapath_setup: True

# Datapack config yaml to use instead, this overwrites all other settings below
# ---
# datapack_config_path: null

# dataset_path for the prebuilt dataset, using HF `load_from_disk()`
#
# Use this if you have built your own dataset and saved it with `save_to_disk()`
Expand All @@ -334,6 +345,23 @@ data:
# If using relative path, this should be relative to the trainer script path
data_path: /path/to/store/your/data_path/

# Data path storage options, this is used to support cloud storage
# via the huggingface dataset API. See:
# https://huggingface.co/docs/datasets/v2.16.1/en/filesystems#amazon-s3
#
# Note: As of Jan 2023, these options has been only tested to work with AWS S3, and backblaze. YMMV
# For S3 bucket support you will also need to install s3fs `python3 -m pip install s3fs`
#
# If you want to reduce the risk of accidental key/secret commits, you can use
# `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` environment variables instead
#
# For datapath, it should use the `s3://bucket-name/subpath` format
# ---
# data_path_storage_options:
# key: <example S3 key>
# secret: <example S3 secret>
# endpoint_url: <example S3 endpoint>

# Other wise provide the source path, which is used as huggingface dataset path
# this will be used to populate the dataset_path
#
Expand All @@ -349,6 +377,10 @@ data:
# source: "teven/enwiki_00k" # Hugging face dataset
# source: text # Text mode, used with source_data_dir

# Dataset split to use from HF dataset
# ---
# source_dataset_split: train

# Additional source dataset params, used to grab subsets of the dataset
# ---
# source_dataset_params:
Expand Down Expand Up @@ -395,6 +427,7 @@ data:

# Custom text column to use, useful for dataset with alternative training columns labels
# This is checked before multi column merging, default is null (disabled)
# If set this takes priority
# eg: 'code'
# ---
# custom_text_key: 'code'
Expand All @@ -407,19 +440,18 @@ data:
# or throw an error if the default fallback is not found
#
# IMPORTANT NOTE: as newlines are commonly used for multi_column_suffix, etc.
# you should use single quotes to ensure such values dun get escaped.
# eg. multi_column_suffix: ['\n\n']
# you should use double quotes to ensure such values dun get escaped.
# eg. multi_column_suffix: ["\n\n"]
#
# See: https://github.com/RWKV/RWKV-infctx-trainer/issues/34
# Need to use " or the new lines won't be tokenized properly
# ---
# multi_column_keys: ["instruction", "input", "output"]
# multi_column_prefix: ["Instruction:\n", "Input:\n", "Output:\n"]
# multi_column_suffix: ["\n\n", "\n\n", "\n\n"]
# multi_column_suffix: ['', '', '']
# multi_column_train_mask: [true, false, true]
# multi_column_separator: "\n\n"


# Conversation merging process
# useful for merging full conversational datasets, into single documents
# default is off, (or set conversation_key to [])
Expand Down Expand Up @@ -504,6 +536,16 @@ data:
# this can be used together with sort_by_length, otherwise a shuffle will be done
packing_in_sequence: False

# ----------------------------
# Specal use caes flags
# ----------------------------

# Reverse the training dataset order before saving, this is useful for,
# optimizing dataset packing process, when using packing_in_sequence
# and sort_by_length desc order together
reverse_train_dataset_before_save: False


# Path to the current checkpoint to continue training from
# this should be the directory path, and ends with `.ckpt/`
ckpt_path: null
Loading

0 comments on commit b9f9093

Please sign in to comment.