meshgpt pytorch 다운로드 - meshgpt pytorch 소스 코드 다운로드

또한 최종 텍스트를 3D 자산으로 변환하기 위해 텍스트 조건을 추가합니다.

이 작업을 복제하기 위해 다른 사람들과 협력하는 데 관심이 있다면 가입하세요.

$ pip install meshgpt-pytorch

 import torch

from meshgpt_pytorch import (
    MeshAutoencoder ,
    MeshTransformer
)

# autoencoder

autoencoder = MeshAutoencoder (
    num_discrete_coors = 128
)

# mock inputs

vertices = torch . randn (( 2 , 121 , 3 ))            # (batch, num vertices, coor (3))
faces = torch . randint ( 0 , 121 , ( 2 , 64 , 3 ))      # (batch, num faces, vertices (3))

# make sure faces are padded with `-1` for variable lengthed meshes

# forward in the faces

loss = autoencoder (
    vertices = vertices ,
    faces = faces
)

loss . backward ()

# after much training...
# you can pass in the raw face data above to train a transformer to model this sequence of face vertices

transformer = MeshTransformer (
    autoencoder ,
    dim = 512 ,
    max_seq_len = 768
)

loss = transformer (
    vertices = vertices ,
    faces = faces
)

loss . backward ()

# after much training of transformer, you can now sample novel 3d assets

faces_coordinates , face_mask = transformer . generate ()

# (batch, num faces, vertices (3), coordinates (3)), (batch, num faces)
# now post process for the generated 3d asset

텍스트 조건이 적용된 3D 모양 합성의 경우 MeshTransformer 에서 condition_on_text = True 로 설정한 다음 설명 목록을 texts 키워드 인수로 전달하면 됩니다.

 transformer = MeshTransformer (
    autoencoder ,
    dim = 512 ,
    max_seq_len = 768 ,
    condition_on_text = True
)


loss = transformer (
    vertices = vertices ,
    faces = faces ,
    texts = [ 'a high chair' , 'a small teapot' ],
)

loss . backward ()

# after much training of transformer, you can now sample novel 3d assets conditioned on text

faces_coordinates , face_mask = transformer . generate (
    texts = [ 'a long table' ],
    cond_scale = 8. ,  # a cond_scale > 1. will enable classifier free guidance - can be placed anywhere from 3. - 10.
    remove_parallel_component = True # from https://arxiv.org/abs/2410.02416
)

멀티모달 변환기에서 사용하기 위해 메시를 토큰화하려면 자동 인코더에서 .tokenize 호출하기만 하면 됩니다(또는 지수적으로 평활화된 모델의 경우 자동 인코더 트레이너 인스턴스에서 동일한 방법).

 mesh_token_ids = autoencoder . tokenize (
    vertices = vertices ,
    faces = faces
)

# (batch, num face vertices, residual quantized layer)

$ cp .env.sample .env

 @inproceedings { Siddiqui2023MeshGPTGT ,
    title   = { MeshGPT: Generating Triangle Meshes with Decoder-Only Transformers } ,
    author  = { Yawar Siddiqui and Antonio Alliegro and Alexey Artemov and Tatiana Tommasi and Daniele Sirigatti and Vladislav Rosov and Angela Dai and Matthias Nie{ss}ner } ,
    year    = { 2023 } ,
    url     = { https://api.semanticscholar.org/CorpusID:265457242 }
}

 @inproceedings { dao2022flashattention ,
    title   = { Flash{A}ttention: Fast and Memory-Efficient Exact Attention with {IO}-Awareness } ,
    author  = { Dao, Tri and Fu, Daniel Y. and Ermon, Stefano and Rudra, Atri and R{'e}, Christopher } ,
    booktitle = { Advances in Neural Information Processing Systems } ,
    year    = { 2022 }
}

 @inproceedings { Leviathan2022FastIF ,
    title   = { Fast Inference from Transformers via Speculative Decoding } ,
    author  = { Yaniv Leviathan and Matan Kalman and Y. Matias } ,
    booktitle = { International Conference on Machine Learning } ,
    year    = { 2022 } ,
    url     = { https://api.semanticscholar.org/CorpusID:254096365 }
}

 @misc { yu2023language ,
    title   = { Language Model Beats Diffusion -- Tokenizer is Key to Visual Generation } , 
    author  = { Lijun Yu and José Lezama and Nitesh B. Gundavarapu and Luca Versari and Kihyuk Sohn and David Minnen and Yong Cheng and Agrim Gupta and Xiuye Gu and Alexander G. Hauptmann and Boqing Gong and Ming-Hsuan Yang and Irfan Essa and David A. Ross and Lu Jiang } ,
    year    = { 2023 } ,
    eprint  = { 2310.05737 } ,
    archivePrefix = { arXiv } ,
    primaryClass = { cs.CV }
}

 @article { Lee2022AutoregressiveIG ,
    title   = { Autoregressive Image Generation using Residual Quantization } ,
    author  = { Doyup Lee and Chiheon Kim and Saehoon Kim and Minsu Cho and Wook-Shin Han } ,
    journal = { 2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) } ,
    year    = { 2022 } ,
    pages   = { 11513-11522 } ,
    url     = { https://api.semanticscholar.org/CorpusID:247244535 }
}

 @inproceedings { Katsch2023GateLoopFD ,
    title   = { GateLoop: Fully Data-Controlled Linear Recurrence for Sequence Modeling } ,
    author  = { Tobias Katsch } ,
    year    = { 2023 } ,
    url     = { https://api.semanticscholar.org/CorpusID:265018962 }
}