Instructions to use hpcai-tech/vqvae with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use hpcai-tech/vqvae with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("feature-extraction", model="hpcai-tech/vqvae", trust_remote_code=True)# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("hpcai-tech/vqvae", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
| """ | |
| MIT License | |
| Copyright (c) 2021 Wilson Yan | |
| Permission is hereby granted, free of charge, to any person obtaining a copy | |
| of this software and associated documentation files (the "Software"), to deal | |
| in the Software without restriction, including without limitation the rights | |
| to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
| copies of the Software, and to permit persons to whom the Software is | |
| furnished to do so, subject to the following conditions: | |
| The above copyright notice and this permission notice shall be included in all | |
| copies or substantial portions of the Software. | |
| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
| FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
| AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
| LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
| OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
| SOFTWARE. | |
| This file is copied from https://github.com/wilson1yan/VideoGPT/blob/master/videogpt/utils.py | |
| We adapted it to Hugging Face AutoModel for easier model loading. | |
| """ | |
| # Shifts src_tf dim to dest dim | |
| # i.e. shift_dim(x, 1, -1) would be (b, c, t, h, w) -> (b, t, h, w, c) | |
| def shift_dim(x, src_dim=-1, dest_dim=-1, make_contiguous=True): | |
| n_dims = len(x.shape) | |
| if src_dim < 0: | |
| src_dim = n_dims + src_dim | |
| if dest_dim < 0: | |
| dest_dim = n_dims + dest_dim | |
| assert 0 <= src_dim < n_dims and 0 <= dest_dim < n_dims | |
| dims = list(range(n_dims)) | |
| del dims[src_dim] | |
| permutation = [] | |
| ctr = 0 | |
| for i in range(n_dims): | |
| if i == dest_dim: | |
| permutation.append(src_dim) | |
| else: | |
| permutation.append(dims[ctr]) | |
| ctr += 1 | |
| x = x.permute(permutation) | |
| if make_contiguous: | |
| x = x.contiguous() | |
| return x | |
| # reshapes tensor start from dim i (inclusive) | |
| # to dim j (exclusive) to the desired shape | |
| # e.g. if x.shape = (b, thw, c) then | |
| # view_range(x, 1, 2, (t, h, w)) returns | |
| # x of shape (b, t, h, w, c) | |
| def view_range(x, i, j, shape): | |
| shape = tuple(shape) | |
| n_dims = len(x.shape) | |
| if i < 0: | |
| i = n_dims + i | |
| if j is None: | |
| j = n_dims | |
| elif j < 0: | |
| j = n_dims + j | |
| assert 0 <= i < j <= n_dims | |
| x_shape = x.shape | |
| target_shape = x_shape[:i] + shape + x_shape[j:] | |
| return x.view(target_shape) | |
| def tensor_slice(x, begin, size): | |
| assert all([b >= 0 for b in begin]) | |
| size = [l - b if s == -1 else s | |
| for s, b, l in zip(size, begin, x.shape)] | |
| assert all([s >= 0 for s in size]) | |
| slices = [slice(b, b + s) for b, s in zip(begin, size)] | |
| return x[slices] | |
| import math | |
| import numpy as np | |
| import skvideo.io | |
| def save_video_grid(video, fname, nrow=None): | |
| b, c, t, h, w = video.shape | |
| video = video.permute(0, 2, 3, 4, 1) | |
| video = (video.cpu().numpy() * 255).astype('uint8') | |
| if nrow is None: | |
| nrow = math.ceil(math.sqrt(b)) | |
| ncol = math.ceil(b / nrow) | |
| padding = 1 | |
| video_grid = np.zeros((t, (padding + h) * nrow + padding, | |
| (padding + w) * ncol + padding, c), dtype='uint8') | |
| for i in range(b): | |
| r = i // ncol | |
| c = i % ncol | |
| start_r = (padding + h) * r | |
| start_c = (padding + w) * c | |
| video_grid[:, start_r:start_r + h, start_c:start_c + w] = video[i] | |
| skvideo.io.vwrite(fname, video_grid, inputdict={'-r': '5'}) | |
| print('saved videos to', fname) | |