titok pytorch
0.0.5
Implementation of TiTok, proposed by Bytedance in An Image is Worth 32 Tokens for Reconstruction and Generation
$ pip install titok-pytorch
import torch
from titok_pytorch import TiTokTokenizer
images = torch.randn(2, 3, 256, 256)
titok = TiTokTokenizer(
dim = 1024,
patch_size = 32,
num_latent_tokens = 32, # they claim only 32 tokens needed
codebook_size = 4096 # codebook size 4096
)
loss = titok(images)
loss.backward()
# after much training
# extract codes for gpt, maskgit, whatever
codes = titok.tokenize(images) # (2, 32)
# reconstructing images from codes
recon_images = titok.codebook_ids_to_images(codes)
assert recon_images.shape == images.shape
@article{yu2024an,
author = {Qihang Yu and Mark Weber and Xueqing Deng and Xiaohui Shen and Daniel Cremers and Liang-Chieh Chen},
title = {An Image is Worth 32 Tokens for Reconstruction and Generation},
journal = {arxiv: 2406.07550},
year = {2024}
}