Skip to content

Commit

Permalink
Initial import
Browse files Browse the repository at this point in the history
  • Loading branch information
daniel-j-h committed Sep 27, 2019
1 parent ecfc96c commit a783234
Show file tree
Hide file tree
Showing 11 changed files with 689 additions and 2 deletions.
6 changes: 6 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
__pycache__
*.py[cod]

*.pth
*.pb
*.pkl
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
__pycache__
*.py[cod]

*.pth
*.pb
*.pkl
2 changes: 2 additions & 0 deletions AUTHORS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Daniel J. Hofmann
Harsimrat Sandhawalia
17 changes: 17 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
FROM ubuntu:18.04

WORKDIR /usr/src/app

ENV LANG="C.UTF-8" LC_ALL="C.UTF-8" PATH="/opt/venv/bin:$PATH" PIP_NO_CACHE_DIR="false"

RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
python3 python3-pip python3-venv libglib2.0-0 && \
rm -rf /var/lib/apt/lists/*

COPY requirements.txt .

RUN python3 -m venv /opt/venv && \
python3 -m pip install pip==19.2.3 pip-tools==4.0.0 && \
python3 -m piptools sync

COPY . .
File renamed without changes.
35 changes: 35 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
dockerimage ?= das/vmz
dockerfile ?= Dockerfile
srcdir ?= $(shell pwd)
datadir ?= $(shell pwd)

install:
@docker build -t $(dockerimage) -f $(dockerfile) .

i: install


update:
@docker build -t $(dockerimage) -f $(dockerfile) . --pull --no-cache

u: update


run:
@docker run -it --rm -v $(srcdir):/usr/src/app/ \
-v $(datadir):/data \
--entrypoint=/bin/bash $(dockerimage)

r: run


webcam:
@docker run -it --rm -v $(srcdir):/usr/src/app/ \
-v $(datadir):/data \
--device=/dev/video0 \
--entrypoint=/bin/bash $(dockerimage)

w: webcam


.PHONY: install i run r update u webcam w
39 changes: 37 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,37 @@
# video-resnet
ResNet 3D Conv Video models
# IG65-M PyTorch

Unofficial PyTorch (and ONNX) models and weights for IG65-M pre-trained 3d video architectures.

The official research Caffe2 model and weights are availabe at: https://github.com/facebookresearch/vmz


## Models

| Model | Weights | Input Size | pth | onnx |
| ------------- | ------------------ | ---------- | ----------------------------------------------- | --------------------------------------------- |
| r(2+1)d 34 | IG65-M | 8x112x112 | *r2plus1d_34_clip8_ig65m_from_scratch.pth* | *r2plus1d_34_clip8_ig65m_from_scratch.pb* |
| r(2+1)d 34 | IG65-M + Kinetics | 8x112x112 | *r2plus1d_34_clip8_ft_kinetics_from_ig65m.pth* | *r2plus1d_34_clip8_ft_kinetics_from_ig65m.pb* |
| r(2+1)d 34 | IG65-M | 32x112x112 | NA | NA |
| r(2+1)d 34 | IG65-M + Kinetics | 32x112x112 | *r2plus1d_34_clip32_ft_kinetics_from_ig65m.pth* | r2plus1d_34_clip32_ft_kinetics_from_ig65m.pb |


## Usage

See
- `convert.py` for model conversion
- `extract.py` for feature extraction

We provide converted `.pth` PyTorch weights as artifacts in our Github releases.


## References
- [VMZ: Model Zoo for Video Modeling](https://github.com/facebookresearch/vmz)
- [Kinetics](https://arxiv.org/abs/1705.06950)
- [IG65-M](https://arxiv.org/abs/1905.00561)


## License

Copyright © 2019 MoabitCoin

Distributed under the MIT License (MIT).
204 changes: 204 additions & 0 deletions convert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
#!/usr/bin/env python3

import pickle
import argparse
from pathlib import Path

import torch
import torch.nn as nn

from torchvision.models.video.resnet import VideoResNet, BasicBlock, R2Plus1dStem, Conv2Plus1D


def r2plus1d_34(num_classes, pretrained=False, progress=False, **kwargs):
model = VideoResNet(block=BasicBlock,
conv_makers=[Conv2Plus1D] * 4,
layers=[3, 4, 6, 3],
stem=R2Plus1dStem)

model.fc = nn.Linear(model.fc.in_features, out_features=num_classes)

# Fix difference in PyTorch vs Caffe2 architecture
# https://github.com/facebookresearch/VMZ/issues/89
model.layer2[0].conv2[0] = Conv2Plus1D(128, 128, 288)
model.layer3[0].conv2[0] = Conv2Plus1D(256, 256, 576)
model.layer4[0].conv2[0] = Conv2Plus1D(512, 512, 1152)

# We need exact Caffe2 momentum for BatchNorm scaling
for m in model.modules():
if isinstance(m, nn.BatchNorm3d):
m.eps = 1e-3
m.momentum = 0.9

return model


def blobs_from_pkl(path):
with path.open(mode="rb") as f:
pkl = pickle.load(f, encoding="latin1")
return pkl["blobs"]


def copy_tensor(data, blobs, name):
tensor = torch.from_numpy(blobs[name])

del blobs[name] # enforce: use at most once

assert data.size() == tensor.size()
assert data.dtype == tensor.dtype

data.copy_(tensor)


def copy_conv(module, blobs, prefix):
assert isinstance(module, nn.Conv3d)
assert module.bias is None
copy_tensor(module.weight.data, blobs, prefix + "_w")


def copy_bn(module, blobs, prefix):
assert isinstance(module, nn.BatchNorm3d)
copy_tensor(module.weight.data, blobs, prefix + "_s")
copy_tensor(module.running_mean.data, blobs, prefix + "_rm")
copy_tensor(module.running_var.data, blobs, prefix + "_riv")
copy_tensor(module.bias.data, blobs, prefix + "_b")


def copy_fc(module, blobs):
assert isinstance(module, nn.Linear)
n = module.out_features
copy_tensor(module.bias.data, blobs, "last_out_L" + str(n) + "_b")
copy_tensor(module.weight.data, blobs, "last_out_L" + str(n) + "_w")


# https://github.com/pytorch/vision/blob/v0.4.0/torchvision/models/video/resnet.py#L174-L188
# https://github.com/facebookresearch/VMZ/blob/6c925c47b7d6545b64094a083f111258b37cbeca/lib/models/r3d_model.py#L233-L275
def copy_stem(module, blobs):
assert isinstance(module, R2Plus1dStem)
assert len(module) == 6
copy_conv(module[0], blobs, "conv1_middle")
copy_bn(module[1], blobs, "conv1_middle_spatbn_relu")
assert isinstance(module[2], nn.ReLU)
copy_conv(module[3], blobs, "conv1")
copy_bn(module[4], blobs, "conv1_spatbn_relu")
assert isinstance(module[5], nn.ReLU)


# https://github.com/pytorch/vision/blob/v0.4.0/torchvision/models/video/resnet.py#L82-L114
def copy_conv2plus1d(module, blobs, i, j):
assert isinstance(module, Conv2Plus1D)
assert len(module) == 4
copy_conv(module[0], blobs, "comp_" + str(i) + "_conv_" + str(j) + "_middle")
copy_bn(module[1], blobs, "comp_" + str(i) + "_spatbn_" + str(j) + "_middle")
assert isinstance(module[2], nn.ReLU)
copy_conv(module[3], blobs, "comp_" + str(i) + "_conv_" + str(j))


# https://github.com/pytorch/vision/blob/v0.4.0/torchvision/models/video/resnet.py#L82-L114
def copy_basicblock(module, blobs, i):
assert isinstance(module, BasicBlock)

assert len(module.conv1) == 3
assert isinstance(module.conv1[0], Conv2Plus1D)
copy_conv2plus1d(module.conv1[0], blobs, i, 1)
assert isinstance(module.conv1[1], nn.BatchNorm3d)
copy_bn(module.conv1[1], blobs, "comp_" + str(i) + "_spatbn_" + str(1))
assert isinstance(module.conv1[2], nn.ReLU)

assert len(module.conv2) == 2
assert isinstance(module.conv2[0], Conv2Plus1D)
copy_conv2plus1d(module.conv2[0], blobs, i, 2)
assert isinstance(module.conv2[1], nn.BatchNorm3d)
copy_bn(module.conv2[1], blobs, "comp_" + str(i) + "_spatbn_" + str(2))

if module.downsample is not None:
assert i in [3, 7, 13]
assert len(module.downsample) == 2
assert isinstance(module.downsample[0], nn.Conv3d)
assert isinstance(module.downsample[1], nn.BatchNorm3d)
copy_conv(module.downsample[0], blobs, "shortcut_projection_" + str(i))
copy_bn(module.downsample[1], blobs, "shortcut_projection_" + str(i) + "_spatbn")


def copy_layer(module, blobs, i):
assert {0: 3, 3: 4, 7: 6, 13: 3}[i] == len(module)

for basicblock in module:
copy_basicblock(basicblock, blobs, i)
i += 1


def init_canary(model):
nan = float("nan")

for m in model.modules():
if isinstance(m, nn.Conv3d):
assert m.bias is None
nn.init.constant_(m.weight, nan)
elif isinstance(m, nn.BatchNorm3d):
nn.init.constant_(m.weight, nan)
nn.init.constant_(m.running_mean, nan)
nn.init.constant_(m.running_var, nan)
nn.init.constant_(m.bias, nan)
elif isinstance(m, nn.Linear):
nn.init.constant_(m.weight, nan)
nn.init.constant_(m.bias, nan)


def check_canary(model):
for m in model.modules():
if isinstance(m, nn.Conv3d):
assert m.bias is None
assert not torch.isnan(m.weight).any()
elif isinstance(m, nn.BatchNorm3d):
assert not torch.isnan(m.weight).any()
assert not torch.isnan(m.running_mean).any()
assert not torch.isnan(m.running_var).any()
assert not torch.isnan(m.bias).any()
elif isinstance(m, nn.Linear):
assert not torch.isnan(m.weight).any()
assert not torch.isnan(m.bias).any()


def main(args):
blobs = blobs_from_pkl(args.pkl)

model = r2plus1d_34(num_classes=args.classes)

init_canary(model)

copy_stem(model.stem, blobs)

layers = [model.layer1, model.layer2, model.layer3, model.layer4]
blocks = [0, 3, 7, 13]

for layer, i in zip(layers, blocks):
copy_layer(layer, blobs, i)

copy_fc(model.fc, blobs)

assert not blobs
check_canary(model)

# Export to pytorch .pth and self-contained onnx .pb files

batch = torch.rand(1, 3, args.frames, 112, 112) # NxCxTxHxW
torch.save(model.state_dict(), args.out.with_suffix(".pth"))
torch.onnx.export(model, batch, args.out.with_suffix(".pb"))

# Check pth roundtrip into fresh model

model = r2plus1d_34(num_classes=args.classes)
model.load_state_dict(torch.load(args.out.with_suffix(".pth")))


if __name__ == "__main__":
parser = argparse.ArgumentParser()
arg = parser.add_argument

arg("pkl", type=Path, help=".pkl file to read the R(2+1)D 34 layer weights from")
arg("out", type=Path, help="prefix to save converted R(2+1)D 34 layer weights to")
arg("--frames", type=int, choices=(8, 32), required=True, help="clip frames for video model")
arg("--classes", type=int, choices=(400, 487), required=True, help="classes in last layer")

main(parser.parse_args())
Loading

0 comments on commit a783234

Please sign in to comment.