se3 transformer pytorch 다운로드 - se3 transformer pytorch 소스 코드 다운로드

SE3 트랜스포머 - 파이토치

Pytorch에서 Equivariant Self-Attention을 위한 SE3-Transformer 구현. Alphafold2 결과 복제 및 기타 약물 발견 애플리케이션에 필요할 수 있습니다.

등분산의 예

버전 0.6.0 이전의 SE3 Transformers 버전을 사용하고 있었다면 업데이트하세요. 인접 희소 이웃 설정을 사용하지 않고 최근접 이웃 기능에 의존하는 경우 @MattMcPartlon이 큰 버그를 발견했습니다.

업데이트: 대신 Equiformer를 사용하는 것이 좋습니다.

설치하다

$ pip install se3-transformer-pytorch

용법

 import torch
from se3_transformer_pytorch import SE3Transformer

model = SE3Transformer (
    dim = 512 ,
    heads = 8 ,
    depth = 6 ,
    dim_head = 64 ,
    num_degrees = 4 ,
    valid_radius = 10
)

feats = torch . randn ( 1 , 1024 , 512 )
coors = torch . randn ( 1 , 1024 , 3 )
mask  = torch . ones ( 1 , 1024 ). bool ()

out = model ( feats , coors , mask ) # (1, 1024, 512)

여기에 설명된 대로 Alphafold2의 잠재적인 사용 예시

 import torch
from se3_transformer_pytorch import SE3Transformer

model = SE3Transformer (
    dim = 64 ,
    depth = 2 ,
    input_degrees = 1 ,
    num_degrees = 2 ,
    output_degrees = 2 ,
    reduce_dim_out = True ,
    differentiable_coors = True
)

atom_feats = torch . randn ( 2 , 32 , 64 )
coors = torch . randn ( 2 , 32 , 3 )
mask  = torch . ones ( 2 , 32 ). bool ()

refined_coors = coors + model ( atom_feats , coors , mask , return_type = 1 ) # (2, 32, 3)

또한 기본 변환기 클래스가 전달되는 유형 0 기능을 포함하도록 할 수도 있습니다. 원자라고 가정합니다.

 import torch
from se3_transformer_pytorch import SE3Transformer

model = SE3Transformer (
    num_tokens = 28 ,       # 28 unique atoms
    dim = 64 ,
    depth = 2 ,
    input_degrees = 1 ,
    num_degrees = 2 ,
    output_degrees = 2 ,
    reduce_dim_out = True
)

atoms = torch . randint ( 0 , 28 , ( 2 , 32 ))
coors = torch . randn ( 2 , 32 , 3 )
mask  = torch . ones ( 2 , 32 ). bool ()

refined_coors = coors + model ( atoms , coors , mask , return_type = 1 ) # (2, 32, 3)

위치 인코딩을 통해 네트가 더 많은 이점을 얻을 수 있다고 생각한다면 공간에서의 위치를 특징화하고 다음과 같이 전달할 수 있습니다.

 import torch
from se3_transformer_pytorch import SE3Transformer

model = SE3Transformer (
    dim = 64 ,
    depth = 2 ,
    input_degrees = 2 ,
    num_degrees = 2 ,
    output_degrees = 2 ,
    reduce_dim_out = True  # reduce out the final dimension
)

atom_feats  = torch . randn ( 2 , 32 , 64 , 1 ) # b x n x d x type0
coors_feats = torch . randn ( 2 , 32 , 64 , 3 ) # b x n x d x type1

# atom features are type 0, predicted coordinates are type 1
features = { '0' : atom_feats , '1' : coors_feats }
coors = torch . randn ( 2 , 32 , 3 )
mask  = torch . ones ( 2 , 32 ). bool ()

refined_coors = coors + model ( features , coors , mask , return_type = 1 ) # (2, 32, 3) - equivariant to input type 1 features and coordinates

가장자리

SE3 Transformers에 에지 정보(예: 원자 간 결합 유형)를 제공하려면 초기화 시 두 개의 키워드 인수를 더 전달하면 됩니다.

 import torch
from se3_transformer_pytorch import SE3Transformer

model = SE3Transformer (
    num_tokens = 28 ,
    dim = 64 ,
    num_edge_tokens = 4 ,       # number of edge type, say 4 bond types
    edge_dim = 16 ,             # dimension of edge embedding
    depth = 2 ,
    input_degrees = 1 ,
    num_degrees = 3 ,
    output_degrees = 1 ,
    reduce_dim_out = True
)

atoms = torch . randint ( 0 , 28 , ( 2 , 32 ))
bonds = torch . randint ( 0 , 4 , ( 2 , 32 , 32 ))
coors = torch . randn ( 2 , 32 , 3 )
mask  = torch . ones ( 2 , 32 ). bool ()

pred = model ( atoms , coors , mask , edges = bonds , return_type = 0 ) # (2, 32, 1)

모서리에 연속 값을 전달하려는 경우 num_edge_tokens 를 설정하지 않고 개별 결합 유형을 인코딩한 다음 이를 연속 값의 푸리에 특성에 연결할 수 있습니다.

 import torch
from se3_transformer_pytorch import SE3Transformer
from se3_transformer_pytorch . utils import fourier_encode

model = SE3Transformer (
    dim = 64 ,
    depth = 1 ,
    attend_self = True ,
    num_degrees = 2 ,
    output_degrees = 2 ,
    edge_dim = 34           # edge dimension must match the final dimension of the edges being passed in
)

feats = torch . randn ( 1 , 32 , 64 )
coors = torch . randn ( 1 , 32 , 3 )
mask  = torch . ones ( 1 , 32 ). bool ()

pairwise_continuous_values = torch . randint ( 0 , 4 , ( 1 , 32 , 32 , 2 ))  # say there are 2

edges = fourier_encode (
    pairwise_continuous_values ,
    num_encodings = 8 ,
    include_self = True
) # (1, 32, 32, 34) - {2 * (2 * 8 + 1)}

out = model ( feats , coors , mask , edges = edges , return_type = 1 )

희박한 이웃

점의 연결성을 알고 있는 경우(예: 분자로 작업 중) 부울 마스크 형태로 인접 행렬을 전달할 수 있습니다(여기서 True 연결성을 나타냄).

 import torch
from se3_transformer_pytorch import SE3Transformer

model = SE3Transformer (
    dim = 32 ,
    heads = 8 ,
    depth = 1 ,
    dim_head = 64 ,
    num_degrees = 2 ,
    valid_radius = 10 ,
    attend_sparse_neighbors = True ,  # this must be set to true, in which case it will assert that you pass in the adjacency matrix
    num_neighbors = 0 ,               # if you set this to 0, it will only consider the connected neighbors as defined by the adjacency matrix. but if you set a value greater than 0, it will continue to fetch the closest points up to this many, excluding the ones already specified by the adjacency matrix
    max_sparse_neighbors = 8         # you can cap the number of neighbors, sampled from within your sparse set of neighbors as defined by the adjacency matrix, if specified
)

feats = torch . randn ( 1 , 128 , 32 )
coors = torch . randn ( 1 , 128 , 3 )
mask  = torch . ones ( 1 , 128 ). bool ()

# placeholder adjacency matrix
# naively assuming the sequence is one long chain (128, 128)

i = torch . arange ( 128 )
adj_mat = ( i [:, None ] <= ( i [ None , :] + 1 )) & ( i [:, None ] >= ( i [ None , :] - 1 ))

out = model ( feats , coors , mask , adj_mat = adj_mat ) # (1, 128, 512)

또한 하나의 추가 키워드 num_adj_degrees 를 사용하여 네트워크가 N차 이웃을 자동으로 파생하도록 할 수도 있습니다. 시스템이 이웃의 정도를 가장자리 정보로 구별하도록 하려면 0이 아닌 adj_dim 추가로 전달하세요.

 import torch
from se3_transformer_pytorch . se3_transformer_pytorch import SE3Transformer

model = SE3Transformer (
    dim = 64 ,
    depth = 1 ,
    attend_self = True ,
    num_degrees = 2 ,
    output_degrees = 2 ,
    num_neighbors = 0 ,
    attend_sparse_neighbors = True ,
    num_adj_degrees = 2 ,    # automatically derive 2nd degree neighbors
    adj_dim = 4             # embed 1st and 2nd degree neighbors (as well as null neighbors) with edge embeddings of this dimension
)

feats = torch . randn ( 1 , 32 , 64 )
coors = torch . randn ( 1 , 32 , 3 )
mask  = torch . ones ( 1 , 32 ). bool ()

# placeholder adjacency matrix
# naively assuming the sequence is one long chain (128, 128)

i = torch . arange ( 128 )
adj_mat = ( i [:, None ] <= ( i [ None , :] + 1 )) & ( i [:, None ] >= ( i [ None , :] - 1 ))

out = model ( feats , coors , mask , adj_mat = adj_mat , return_type = 1 )

각 유형의 차원을 세밀하게 제어하려면 hidden_fiber_dict 및 out_fiber_dict 키워드를 사용하여 차원 값의 정도를 키/값으로 사용하여 사전에 전달할 수 있습니다.

 import torch
from se3_transformer_pytorch import SE3Transformer

model = SE3Transformer (
    num_tokens = 28 ,
    dim = 64 ,
    num_edge_tokens = 4 ,
    edge_dim = 16 ,
    depth = 2 ,
    input_degrees = 1 ,
    num_degrees = 3 ,
    output_degrees = 1 ,
    hidden_fiber_dict = { 0 : 16 , 1 : 8 , 2 : 4 },
    out_fiber_dict = { 0 : 16 , 1 : 1 },
    reduce_dim_out = False
)

atoms = torch . randint ( 0 , 28 , ( 2 , 32 ))
bonds = torch . randint ( 0 , 4 , ( 2 , 32 , 32 ))
coors = torch . randn ( 2 , 32 , 3 )
mask  = torch . ones ( 2 , 32 ). bool ()

pred = model ( atoms , coors , mask , edges = bonds )

pred [ '0' ] # (2, 32, 16)
pred [ '1' ] # (2, 32, 1, 3)

이웃

이웃 마스크를 전달하여 고려할 수 있는 노드를 추가로 제어할 수 있습니다. 모든 False 값은 고려 대상에서 제외됩니다.

 import torch
from se3_transformer_pytorch . se3_transformer_pytorch import SE3Transformer

model = SE3Transformer (
    dim = 16 ,
    dim_head = 16 ,
    attend_self = True ,
    num_degrees = 4 ,
    output_degrees = 2 ,
    num_edge_tokens = 4 ,
    num_neighbors = 8 ,      # make sure you set this value as the maximum number of neighbors set by your neighbor_mask, or it will throw a warning
    edge_dim = 2 ,
    depth = 3
)

feats = torch . randn ( 1 , 32 , 16 )
coors = torch . randn ( 1 , 32 , 3 )
mask  = torch . ones ( 1 , 32 ). bool ()
bonds = torch . randint ( 0 , 4 , ( 1 , 32 , 32 ))

neighbor_mask = torch . ones ( 1 , 32 , 32 ). bool () # set the nodes you wish to be masked out as False

out = model (
    feats ,
    coors ,
    mask ,
    edges = bonds ,
    neighbor_mask = neighbor_mask ,
    return_type = 1
)

글로벌 노드

이 기능을 사용하면 다른 모든 노드에서 볼 수 있는 전역 노드로 볼 수 있는 벡터를 전달할 수 있습니다. 아이디어는 그래프를 몇 가지 특징 벡터로 풀링하여 네트워크의 모든 주의 계층에 걸쳐 키/값으로 투영되는 것입니다. 모든 노드는 가장 가까운 이웃 또는 인접성 계산에 관계없이 전역 노드 정보에 대한 전체 액세스 권한을 갖습니다.

 import torch
from torch import nn
from se3_transformer_pytorch import SE3Transformer

model = SE3Transformer (
    dim = 64 ,
    depth = 1 ,
    num_degrees = 2 ,
    num_neighbors = 4 ,
    valid_radius = 10 ,
    global_feats_dim = 32 # this must be set to the dimension of the global features, in this example, 32
)

feats = torch . randn ( 1 , 32 , 64 )
coors = torch . randn ( 1 , 32 , 3 )
mask  = torch . ones ( 1 , 32 ). bool ()

# naively derive global features
# by pooling features and projecting
global_feats = nn . Linear ( 64 , 32 )( feats . mean ( dim = 1 , keepdim = True )) # (1, 1, 32)

out = model ( feats , coors , mask , return_type = 0 , global_feats = global_feats )

할 일:

글로벌 노드가 다른 모든 노드에 참여하도록 허용하여 네트워크에 정보의 글로벌 통로를 제공합니다. (BigBird, ETC, Longformer 등과 유사)

자기회귀

하나의 추가 플래그만 사용하여 SE3 Transformers를 자동 회귀적으로 사용할 수 있습니다.

 import torch
from se3_transformer_pytorch import SE3Transformer

model = SE3Transformer (
    dim = 512 ,
    heads = 8 ,
    depth = 6 ,
    dim_head = 64 ,
    num_degrees = 4 ,
    valid_radius = 10 ,
    causal = True          # set this to True
)

feats = torch . randn ( 1 , 1024 , 512 )
coors = torch . randn ( 1 , 1024 , 3 )
mask  = torch . ones ( 1 , 1024 ). bool ()

out = model ( feats , coors , mask ) # (1, 1024, 512)

실험적 기능

비쌍 컨벌루션 키

나는 쌍으로 된 컨볼루션 대신 선형으로 투영된 키를 사용하는 것이 장난감 소음 제거 작업에서 괜찮은 것 같다는 것을 발견했습니다. 이로 인해 메모리가 25% 절약됩니다. linear_proj_keys = True 로 설정하여 이 기능을 사용해 볼 수 있습니다.

 import torch
from se3_transformer_pytorch import SE3Transformer

model = SE3Transformer (
    dim = 64 ,
    depth = 1 ,
    num_degrees = 4 ,
    num_neighbors = 8 ,
    valid_radius = 10 ,
    splits = 4 ,
    linear_proj_keys = True # set this to True
). cuda ()

feats = torch . randn ( 1 , 32 , 64 ). cuda ()
coors = torch . randn ( 1 , 32 , 3 ). cuda ()
mask  = torch . ones ( 1 , 32 ). bool (). cuda ()

out = model ( feats , coors , mask , return_type = 0 )

모든 헤드에서 공유 키/값

쿼리의 모든 헤드에서 하나의 키/값 헤드를 공유할 수 있는 변환기에 대해서는 상대적으로 알려지지 않은 기술이 있습니다. NLP에 대한 내 경험에 따르면 이는 일반적으로 성능 저하로 이어지지만 실제로 더 깊이 또는 더 높은 각도를 위해 메모리를 교환해야 하는 경우 이것이 좋은 선택일 수 있습니다.

 import torch
from se3_transformer_pytorch import SE3Transformer

model = SE3Transformer (
    dim = 64 ,
    depth = 8 ,
    num_degrees = 4 ,
    num_neighbors = 8 ,
    valid_radius = 10 ,
    splits = 4 ,
    one_headed_key_values = True  # one head of key / values shared across all heads of the queries
). cuda ()

feats = torch . randn ( 1 , 32 , 64 ). cuda ()
coors = torch . randn ( 1 , 32 , 3 ). cuda ()
mask  = torch . ones ( 1 , 32 ). bool (). cuda ()

out = model ( feats , coors , mask , return_type = 0 )

묶인 키/값

메모리를 절반으로 절약하기 위해 키/값을 동일하게 묶을 수도 있습니다.

 import torch
from se3_transformer_pytorch import SE3Transformer

model = SE3Transformer (
    dim = 64 ,
    depth = 8 ,
    num_degrees = 4 ,
    num_neighbors = 8 ,
    valid_radius = 10 ,
    splits = 4 ,
    tie_key_values = True # set this to True
). cuda ()

feats = torch . randn ( 1 , 32 , 64 ). cuda ()
coors = torch . randn ( 1 , 32 , 3 ). cuda ()
mask  = torch . ones ( 1 , 32 ). bool (). cuda ()

out = model ( feats , coors , mask , return_type = 0 )

EGNN 사용

이는 1(좌표의 경우)보다 더 높은 유형과 더 큰 차원에 대해 작동하는 EGNN의 실험적 버전입니다. 클래스 이름은 기존 로직을 재사용하기 때문에 여전히 SE3Transformer 이므로 나중에 정리할 때까지 지금은 무시하세요.

 import torch
from se3_transformer_pytorch import SE3Transformer

model = SE3Transformer (
    dim = 32 ,
    num_neighbors = 8 ,
    num_edge_tokens = 4 ,
    edge_dim = 4 ,
    num_degrees = 4 ,       # number of higher order types - will use basis on a TCN to project to these dimensions
    use_egnn = True ,       # set this to true to use EGNN instead of equivariant attention layers
    egnn_hidden_dim = 64 ,  # egnn hidden dimension
    depth = 4 ,             # depth of EGNN
    reduce_dim_out = True  # will project the dimension of the higher types to 1
). cuda ()

feats = torch . randn ( 2 , 32 , 32 ). cuda ()
coors = torch . randn ( 2 , 32 , 3 ). cuda ()
bonds = torch . randint ( 0 , 4 , ( 2 , 32 , 32 )). cuda ()
mask  = torch . ones ( 2 , 32 ). bool (). cuda ()

refinement = model ( feats , coors , mask , edges = bonds , return_type = 1 ) # (2, 32, 3)

coors = coors + refinement  # update coors with refinement

각각의 상위 유형에 대해 개별 차원을 지정하려면 사전이 num_degrees 대신 {<degree>:<dim>} 형식인 hidden_fiber_dict 를 전달하면 됩니다.

 import torch
from se3_transformer_pytorch import SE3Transformer

model = SE3Transformer (
    dim = 32 ,
    num_neighbors = 8 ,
    hidden_fiber_dict = { 0 : 32 , 1 : 16 , 2 : 8 , 3 : 4 },
    use_egnn = True ,
    depth = 4 ,
    egnn_hidden_dim = 64 ,
    egnn_weights_clamp_value = 2 , 
    reduce_dim_out = True
). cuda ()

feats = torch . randn ( 2 , 32 , 32 ). cuda ()
coors = torch . randn ( 2 , 32 , 3 ). cuda ()
mask  = torch . ones ( 2 , 32 ). bool (). cuda ()

refinement = model ( feats , coors , mask , return_type = 1 ) # (2, 32, 3)

coors = coors + refinement  # update coors with refinement

스케일링(wip)

이 섹션에서는 SE3 Transformer의 확장성을 좀 더 향상시키기 위한 지속적인 노력을 나열합니다.

먼저 가역적 네트워크를 추가했습니다. 이를 통해 일반적인 메모리 장애물에 부딪히기 전에 좀 더 깊이를 추가할 수 있습니다. 등분산 보존은 테스트에서 입증됩니다.

 import torch
from se3_transformer_pytorch import SE3Transformer

model = SE3Transformer (
    num_tokens = 20 ,
    dim = 32 ,
    dim_head = 32 ,
    heads = 4 ,
    depth = 12 ,             # 12 layers
    input_degrees = 1 ,
    num_degrees = 3 ,
    output_degrees = 1 ,
    reduce_dim_out = True ,
    reversible = True       # set reversible to True
). cuda ()

atoms = torch . randint ( 0 , 4 , ( 2 , 32 )). cuda ()
coors = torch . randn ( 2 , 32 , 3 ). cuda ()
mask  = torch . ones ( 2 , 32 ). bool (). cuda ()

pred = model ( atoms , coors , mask = mask , return_type = 0 )

loss = pred . sum ()
loss . backward ()

예

먼저 sidechainnet 설치하세요

$ pip install sidechainnet

그런 다음 단백질 백본 노이즈 제거 작업을 실행합니다.

$ python denoise.py

캐싱

기본적으로 기본 벡터는 캐시됩니다. 그러나 캐시를 지워야 하는 경우에는 스크립트를 시작할 때 CLEAR_CACHE 환경 플래그를 특정 값으로 설정하기만 하면 됩니다.

$ CLEAR_CACHE=1 python train.py

또는 다음 위치에 있어야 하는 캐시 디렉터리를 삭제해 볼 수 있습니다.

$ rm -rf ~ /.cache.equivariant_attention

기본 디렉터리에 권한 문제가 있을 수 있는 경우 캐시를 저장할 디렉터리를 직접 지정할 수도 있습니다.

CACHE_PATH=./path/to/my/cache python train.py

테스트

$ python setup.py pytest

신용 거래

이 라이브러리는 주로 Fabian 공식 저장소의 포트이지만 DGL 라이브러리는 없습니다.

인용

 @misc { fuchs2020se3transformers ,
    title   = { SE(3)-Transformers: 3D Roto-Translation Equivariant Attention Networks } , 
    author  = { Fabian B. Fuchs and Daniel E. Worrall and Volker Fischer and Max Welling } ,
    year    = { 2020 } ,
    eprint  = { 2006.10503 } ,
    archivePrefix = { arXiv } ,
    primaryClass = { cs.LG }
}

 @misc { satorras2021en ,
    title   = { E(n) Equivariant Graph Neural Networks } ,
    author  = { Victor Garcia Satorras and Emiel Hoogeboom and Max Welling } ,
    year    = { 2021 } ,
    eprint  = { 2102.09844 } ,
    archivePrefix = { arXiv } ,
    primaryClass = { cs.LG }
}

 @misc { gomez2017reversible ,
    title     = { The Reversible Residual Network: Backpropagation Without Storing Activations } ,
    author    = { Aidan N. Gomez and Mengye Ren and Raquel Urtasun and Roger B. Grosse } ,
    year      = { 2017 } ,
    eprint    = { 1707.04585 } ,
    archivePrefix = { arXiv } ,
    primaryClass = { cs.CV }
}

 @misc { shazeer2019fast ,
    title   = { Fast Transformer Decoding: One Write-Head is All You Need } ,
    author  = { Noam Shazeer } ,
    year    = { 2019 } ,
    eprint  = { 1911.02150 } ,
    archivePrefix = { arXiv } ,
    primaryClass = { cs.NE }
}