تحميل se3 transformer pytorch - تحميل كود المصدر se3 transformer pytorch

محول SE3 - الشعلة

تنفيذ محولات SE3 للانتباه الذاتي المتساوي في Pytorch. قد تكون هناك حاجة لتكرار نتائج Alphafold2 وتطبيقات اكتشاف الأدوية الأخرى.

مثال على التكافؤ

إذا كنت تستخدم أي إصدار من SE3 Transformers قبل الإصدار 0.6.0، فيرجى التحديث. اكتشف @MattMcPartlon خطأً كبيرًا، إذا كنت لا تستخدم إعدادات الجيران المتجاورة وتعتمد على وظيفة أقرب الجيران

تحديث: يوصى باستخدام Equiformer بدلاً من ذلك

ثَبَّتَ

$ pip install se3-transformer-pytorch

الاستخدام

 import torch
from se3_transformer_pytorch import SE3Transformer

model = SE3Transformer (
    dim = 512 ,
    heads = 8 ,
    depth = 6 ,
    dim_head = 64 ,
    num_degrees = 4 ,
    valid_radius = 10
)

feats = torch . randn ( 1 , 1024 , 512 )
coors = torch . randn ( 1 , 1024 , 3 )
mask  = torch . ones ( 1 , 1024 ). bool ()

out = model ( feats , coors , mask ) # (1, 1024, 512)

مثال محتمل للاستخدام في Alphafold2، كما هو موضح هنا

 import torch
from se3_transformer_pytorch import SE3Transformer

model = SE3Transformer (
    dim = 64 ,
    depth = 2 ,
    input_degrees = 1 ,
    num_degrees = 2 ,
    output_degrees = 2 ,
    reduce_dim_out = True ,
    differentiable_coors = True
)

atom_feats = torch . randn ( 2 , 32 , 64 )
coors = torch . randn ( 2 , 32 , 3 )
mask  = torch . ones ( 2 , 32 ). bool ()

refined_coors = coors + model ( atom_feats , coors , mask , return_type = 1 ) # (2, 32, 3)

يمكنك أيضًا السماح لفئة المحولات الأساسية بالاعتناء بتضمين ميزات النوع 0 التي يتم تمريرها. بافتراض أنها ذرات

 import torch
from se3_transformer_pytorch import SE3Transformer

model = SE3Transformer (
    num_tokens = 28 ,       # 28 unique atoms
    dim = 64 ,
    depth = 2 ,
    input_degrees = 1 ,
    num_degrees = 2 ,
    output_degrees = 2 ,
    reduce_dim_out = True
)

atoms = torch . randint ( 0 , 28 , ( 2 , 32 ))
coors = torch . randn ( 2 , 32 , 3 )
mask  = torch . ones ( 2 , 32 ). bool ()

refined_coors = coors + model ( atoms , coors , mask , return_type = 1 ) # (2, 32, 3)

إذا كنت تعتقد أن الشبكة يمكن أن تستفيد بشكل أكبر من التشفير الموضعي، فيمكنك إبراز مواقعك في الفضاء وتمريرها على النحو التالي.

 import torch
from se3_transformer_pytorch import SE3Transformer

model = SE3Transformer (
    dim = 64 ,
    depth = 2 ,
    input_degrees = 2 ,
    num_degrees = 2 ,
    output_degrees = 2 ,
    reduce_dim_out = True  # reduce out the final dimension
)

atom_feats  = torch . randn ( 2 , 32 , 64 , 1 ) # b x n x d x type0
coors_feats = torch . randn ( 2 , 32 , 64 , 3 ) # b x n x d x type1

# atom features are type 0, predicted coordinates are type 1
features = { '0' : atom_feats , '1' : coors_feats }
coors = torch . randn ( 2 , 32 , 3 )
mask  = torch . ones ( 2 , 32 ). bool ()

refined_coors = coors + model ( features , coors , mask , return_type = 1 ) # (2, 32, 3) - equivariant to input type 1 features and coordinates

الحواف

لتقديم معلومات الحافة إلى محولات SE3 (على سبيل المثال أنواع الروابط بين الذرات)، عليك فقط تمرير وسيطتين إضافيتين للكلمات الرئيسية عند التهيئة.

 import torch
from se3_transformer_pytorch import SE3Transformer

model = SE3Transformer (
    num_tokens = 28 ,
    dim = 64 ,
    num_edge_tokens = 4 ,       # number of edge type, say 4 bond types
    edge_dim = 16 ,             # dimension of edge embedding
    depth = 2 ,
    input_degrees = 1 ,
    num_degrees = 3 ,
    output_degrees = 1 ,
    reduce_dim_out = True
)

atoms = torch . randint ( 0 , 28 , ( 2 , 32 ))
bonds = torch . randint ( 0 , 4 , ( 2 , 32 , 32 ))
coors = torch . randn ( 2 , 32 , 3 )
mask  = torch . ones ( 2 , 32 ). bool ()

pred = model ( atoms , coors , mask , edges = bonds , return_type = 0 ) # (2, 32, 1)

إذا كنت ترغب في تمرير قيم مستمرة لحوافك، فيمكنك اختيار عدم تعيين num_edge_tokens ، وترميز أنواع الروابط المنفصلة الخاصة بك، ثم ربطها بميزات فورييه لهذه القيم المستمرة

 import torch
from se3_transformer_pytorch import SE3Transformer
from se3_transformer_pytorch . utils import fourier_encode

model = SE3Transformer (
    dim = 64 ,
    depth = 1 ,
    attend_self = True ,
    num_degrees = 2 ,
    output_degrees = 2 ,
    edge_dim = 34           # edge dimension must match the final dimension of the edges being passed in
)

feats = torch . randn ( 1 , 32 , 64 )
coors = torch . randn ( 1 , 32 , 3 )
mask  = torch . ones ( 1 , 32 ). bool ()

pairwise_continuous_values = torch . randint ( 0 , 4 , ( 1 , 32 , 32 , 2 ))  # say there are 2

edges = fourier_encode (
    pairwise_continuous_values ,
    num_encodings = 8 ,
    include_self = True
) # (1, 32, 32, 34) - {2 * (2 * 8 + 1)}

out = model ( feats , coors , mask , edges = edges , return_type = 1 )

الجيران المتفرقين

إذا كنت تعرف اتصال نقاطك (لنفترض أنك تعمل مع الجزيئات)، فيمكنك تمرير مصفوفة مجاورة، في شكل قناع منطقي (حيث يشير True إلى الاتصال).

 import torch
from se3_transformer_pytorch import SE3Transformer

model = SE3Transformer (
    dim = 32 ,
    heads = 8 ,
    depth = 1 ,
    dim_head = 64 ,
    num_degrees = 2 ,
    valid_radius = 10 ,
    attend_sparse_neighbors = True ,  # this must be set to true, in which case it will assert that you pass in the adjacency matrix
    num_neighbors = 0 ,               # if you set this to 0, it will only consider the connected neighbors as defined by the adjacency matrix. but if you set a value greater than 0, it will continue to fetch the closest points up to this many, excluding the ones already specified by the adjacency matrix
    max_sparse_neighbors = 8         # you can cap the number of neighbors, sampled from within your sparse set of neighbors as defined by the adjacency matrix, if specified
)

feats = torch . randn ( 1 , 128 , 32 )
coors = torch . randn ( 1 , 128 , 3 )
mask  = torch . ones ( 1 , 128 ). bool ()

# placeholder adjacency matrix
# naively assuming the sequence is one long chain (128, 128)

i = torch . arange ( 128 )
adj_mat = ( i [:, None ] <= ( i [ None , :] + 1 )) & ( i [:, None ] >= ( i [ None , :] - 1 ))

out = model ( feats , coors , mask , adj_mat = adj_mat ) # (1, 128, 512)

يمكنك أيضًا جعل الشبكة تشتق لك تلقائيًا الجيران من الدرجة N باستخدام كلمة رئيسية إضافية واحدة num_adj_degrees . إذا كنت ترغب في أن يقوم النظام بالتمييز بين درجة الجيران كمعلومات حافة، فقم أيضًا بتمرير adj_dim غير الصفري.

 import torch
from se3_transformer_pytorch . se3_transformer_pytorch import SE3Transformer

model = SE3Transformer (
    dim = 64 ,
    depth = 1 ,
    attend_self = True ,
    num_degrees = 2 ,
    output_degrees = 2 ,
    num_neighbors = 0 ,
    attend_sparse_neighbors = True ,
    num_adj_degrees = 2 ,    # automatically derive 2nd degree neighbors
    adj_dim = 4             # embed 1st and 2nd degree neighbors (as well as null neighbors) with edge embeddings of this dimension
)

feats = torch . randn ( 1 , 32 , 64 )
coors = torch . randn ( 1 , 32 , 3 )
mask  = torch . ones ( 1 , 32 ). bool ()

# placeholder adjacency matrix
# naively assuming the sequence is one long chain (128, 128)

i = torch . arange ( 128 )
adj_mat = ( i [:, None ] <= ( i [ None , :] + 1 )) & ( i [:, None ] >= ( i [ None , :] - 1 ))

out = model ( feats , coors , mask , adj_mat = adj_mat , return_type = 1 )

للحصول على تحكم دقيق في أبعاد كل نوع، يمكنك استخدام الكلمات الأساسية hidden_fiber_dict و out_fiber_dict لتمرير قاموس مع قيم الدرجة إلى الأبعاد كمفتاح / قيم.

 import torch
from se3_transformer_pytorch import SE3Transformer

model = SE3Transformer (
    num_tokens = 28 ,
    dim = 64 ,
    num_edge_tokens = 4 ,
    edge_dim = 16 ,
    depth = 2 ,
    input_degrees = 1 ,
    num_degrees = 3 ,
    output_degrees = 1 ,
    hidden_fiber_dict = { 0 : 16 , 1 : 8 , 2 : 4 },
    out_fiber_dict = { 0 : 16 , 1 : 1 },
    reduce_dim_out = False
)

atoms = torch . randint ( 0 , 28 , ( 2 , 32 ))
bonds = torch . randint ( 0 , 4 , ( 2 , 32 , 32 ))
coors = torch . randn ( 2 , 32 , 3 )
mask  = torch . ones ( 2 , 32 ). bool ()

pred = model ( atoms , coors , mask , edges = bonds )

pred [ '0' ] # (2, 32, 16)
pred [ '1' ] # (2, 32, 1, 3)

الجيران

يمكنك أيضًا التحكم في العقد التي يمكن اعتبارها عن طريق تمرير قناع الجوار. سيتم حجب كافة القيم False بعيدًا عن الاعتبار.

 import torch
from se3_transformer_pytorch . se3_transformer_pytorch import SE3Transformer

model = SE3Transformer (
    dim = 16 ,
    dim_head = 16 ,
    attend_self = True ,
    num_degrees = 4 ,
    output_degrees = 2 ,
    num_edge_tokens = 4 ,
    num_neighbors = 8 ,      # make sure you set this value as the maximum number of neighbors set by your neighbor_mask, or it will throw a warning
    edge_dim = 2 ,
    depth = 3
)

feats = torch . randn ( 1 , 32 , 16 )
coors = torch . randn ( 1 , 32 , 3 )
mask  = torch . ones ( 1 , 32 ). bool ()
bonds = torch . randint ( 0 , 4 , ( 1 , 32 , 32 ))

neighbor_mask = torch . ones ( 1 , 32 , 32 ). bool () # set the nodes you wish to be masked out as False

out = model (
    feats ,
    coors ,
    mask ,
    edges = bonds ,
    neighbor_mask = neighbor_mask ,
    return_type = 1
)

العقد العالمية

تسمح لك هذه الميزة بتمرير المتجهات التي يمكن عرضها على أنها عقد عامة يمكن رؤيتها بواسطة جميع العقد الأخرى. تتمثل الفكرة في تجميع الرسم البياني الخاص بك في عدد قليل من متجهات المعالم، والتي سيتم عرضها على المفاتيح/القيم عبر جميع طبقات الانتباه في الشبكة. سيكون لجميع العقد حق الوصول الكامل إلى معلومات العقدة العامة، بغض النظر عن أقرب الجيران أو حساب الجوار.

 import torch
from torch import nn
from se3_transformer_pytorch import SE3Transformer

model = SE3Transformer (
    dim = 64 ,
    depth = 1 ,
    num_degrees = 2 ,
    num_neighbors = 4 ,
    valid_radius = 10 ,
    global_feats_dim = 32 # this must be set to the dimension of the global features, in this example, 32
)

feats = torch . randn ( 1 , 32 , 64 )
coors = torch . randn ( 1 , 32 , 3 )
mask  = torch . ones ( 1 , 32 ). bool ()

# naively derive global features
# by pooling features and projecting
global_feats = nn . Linear ( 64 , 32 )( feats . mean ( dim = 1 , keepdim = True )) # (1, 1, 32)

out = model ( feats , coors , mask , return_type = 0 , global_feats = global_feats )

ما يجب القيام به:

السماح للعقد العالمية بالحضور إلى جميع العقد الأخرى، لمنح الشبكة قناة عالمية للمعلومات. (على غرار BigBird، ETC، Longformer، إلخ)

الانحدار الذاتي

يمكنك استخدام محولات SE3 بشكل انحداري بعلامة إضافية واحدة فقط

 import torch
from se3_transformer_pytorch import SE3Transformer

model = SE3Transformer (
    dim = 512 ,
    heads = 8 ,
    depth = 6 ,
    dim_head = 64 ,
    num_degrees = 4 ,
    valid_radius = 10 ,
    causal = True          # set this to True
)

feats = torch . randn ( 1 , 1024 , 512 )
coors = torch . randn ( 1 , 1024 , 3 )
mask  = torch . ones ( 1 , 1024 ). bool ()

out = model ( feats , coors , mask ) # (1, 1024, 512)

الميزات التجريبية

مفاتيح ملتوية غير زوجية

لقد اكتشفت أن استخدام المفاتيح المسقطة خطيًا (بدلاً من الالتفاف الزوجي) يبدو جيدًا في مهمة تقليل الضوضاء في اللعبة. وهذا يؤدي إلى توفير الذاكرة بنسبة 25%. يمكنك تجربة هذه الميزة عن طريق ضبط linear_proj_keys = True

 import torch
from se3_transformer_pytorch import SE3Transformer

model = SE3Transformer (
    dim = 64 ,
    depth = 1 ,
    num_degrees = 4 ,
    num_neighbors = 8 ,
    valid_radius = 10 ,
    splits = 4 ,
    linear_proj_keys = True # set this to True
). cuda ()

feats = torch . randn ( 1 , 32 , 64 ). cuda ()
coors = torch . randn ( 1 , 32 , 3 ). cuda ()
mask  = torch . ones ( 1 , 32 ). bool (). cuda ()

out = model ( feats , coors , mask , return_type = 0 )

المفتاح/القيم المشتركة عبر جميع الرؤوس

هناك تقنية غير معروفة نسبيًا للمحولات حيث يمكن للمرء مشاركة رأس مفتاح/قيمة واحد عبر جميع رؤوس الاستعلامات. في تجربتي في البرمجة اللغوية العصبية، يؤدي هذا عادةً إلى أداء أسوأ، ولكن إذا كنت حقًا بحاجة إلى استبدال الذاكرة بمزيد من العمق أو بعدد أكبر من الدرجات، فقد يكون هذا خيارًا جيدًا.

 import torch
from se3_transformer_pytorch import SE3Transformer

model = SE3Transformer (
    dim = 64 ,
    depth = 8 ,
    num_degrees = 4 ,
    num_neighbors = 8 ,
    valid_radius = 10 ,
    splits = 4 ,
    one_headed_key_values = True  # one head of key / values shared across all heads of the queries
). cuda ()

feats = torch . randn ( 1 , 32 , 64 ). cuda ()
coors = torch . randn ( 1 , 32 , 3 ). cuda ()
mask  = torch . ones ( 1 , 32 ). bool (). cuda ()

out = model ( feats , coors , mask , return_type = 0 )

ربط المفتاح / القيم

يمكنك أيضًا ربط المفتاح/القيم (جعلها متماثلة) لتوفير نصف الذاكرة

 import torch
from se3_transformer_pytorch import SE3Transformer

model = SE3Transformer (
    dim = 64 ,
    depth = 8 ,
    num_degrees = 4 ,
    num_neighbors = 8 ,
    valid_radius = 10 ,
    splits = 4 ,
    tie_key_values = True # set this to True
). cuda ()

feats = torch . randn ( 1 , 32 , 64 ). cuda ()
coors = torch . randn ( 1 , 32 , 3 ). cuda ()
mask  = torch . ones ( 1 , 32 ). bool (). cuda ()

out = model ( feats , coors , mask , return_type = 0 )

باستخدام EGNN

هذه نسخة تجريبية من EGNN تعمل مع أنواع أعلى وأبعاد أكبر من 1 فقط (للإحداثيات). لا يزال اسم الفئة هو SE3Transformer لأنه يعيد استخدام بعض المنطق الموجود مسبقًا، لذا تجاهل ذلك الآن حتى أقوم بتنظيفه لاحقًا.

 import torch
from se3_transformer_pytorch import SE3Transformer

model = SE3Transformer (
    dim = 32 ,
    num_neighbors = 8 ,
    num_edge_tokens = 4 ,
    edge_dim = 4 ,
    num_degrees = 4 ,       # number of higher order types - will use basis on a TCN to project to these dimensions
    use_egnn = True ,       # set this to true to use EGNN instead of equivariant attention layers
    egnn_hidden_dim = 64 ,  # egnn hidden dimension
    depth = 4 ,             # depth of EGNN
    reduce_dim_out = True  # will project the dimension of the higher types to 1
). cuda ()

feats = torch . randn ( 2 , 32 , 32 ). cuda ()
coors = torch . randn ( 2 , 32 , 3 ). cuda ()
bonds = torch . randint ( 0 , 4 , ( 2 , 32 , 32 )). cuda ()
mask  = torch . ones ( 2 , 32 ). bool (). cuda ()

refinement = model ( feats , coors , mask , edges = bonds , return_type = 1 ) # (2, 32, 3)

coors = coors + refinement  # update coors with refinement

إذا كنت ترغب في تحديد أبعاد فردية لكل نوع من الأنواع الأعلى، فما عليك سوى تمرير hidden_fiber_dict حيث يكون القاموس بالتنسيق {<degree>:<dim>} بدلاً من num_degrees

 import torch
from se3_transformer_pytorch import SE3Transformer

model = SE3Transformer (
    dim = 32 ,
    num_neighbors = 8 ,
    hidden_fiber_dict = { 0 : 32 , 1 : 16 , 2 : 8 , 3 : 4 },
    use_egnn = True ,
    depth = 4 ,
    egnn_hidden_dim = 64 ,
    egnn_weights_clamp_value = 2 , 
    reduce_dim_out = True
). cuda ()

feats = torch . randn ( 2 , 32 , 32 ). cuda ()
coors = torch . randn ( 2 , 32 , 3 ). cuda ()
mask  = torch . ones ( 2 , 32 ). bool (). cuda ()

refinement = model ( feats , coors , mask , return_type = 1 ) # (2, 32, 3)

coors = coors + refinement  # update coors with refinement

التحجيم (المسح)

سيدرج هذا القسم الجهود المستمرة لجعل مقياس SE3 Transformer أفضل قليلاً.

أولاً، أضفت شبكات قابلة للعكس. هذا يسمح لي بإضافة المزيد من العمق قبل أن أصطدم بحواجز الذاكرة المعتادة. ويظهر الحفاظ على التكافؤ في الاختبارات.

 import torch
from se3_transformer_pytorch import SE3Transformer

model = SE3Transformer (
    num_tokens = 20 ,
    dim = 32 ,
    dim_head = 32 ,
    heads = 4 ,
    depth = 12 ,             # 12 layers
    input_degrees = 1 ,
    num_degrees = 3 ,
    output_degrees = 1 ,
    reduce_dim_out = True ,
    reversible = True       # set reversible to True
). cuda ()

atoms = torch . randint ( 0 , 4 , ( 2 , 32 )). cuda ()
coors = torch . randn ( 2 , 32 , 3 ). cuda ()
mask  = torch . ones ( 2 , 32 ). bool (). cuda ()

pred = model ( atoms , coors , mask = mask , return_type = 0 )

loss = pred . sum ()
loss . backward ()

أمثلة

قم أولاً بتثبيت sidechainnet

$ pip install sidechainnet

ثم قم بتشغيل مهمة تقليل الضوضاء في العمود الفقري للبروتين

$ python denoise.py

التخزين المؤقت

بشكل افتراضي، يتم تخزين المتجهات الأساسية مؤقتًا. ومع ذلك، إذا كانت هناك حاجة لمسح ذاكرة التخزين المؤقت، فما عليك سوى تعيين العلامة البيئية CLEAR_CACHE إلى قيمة معينة عند بدء البرنامج النصي

$ CLEAR_CACHE=1 python train.py

أو يمكنك محاولة حذف دليل ذاكرة التخزين المؤقت، الذي يجب أن يكون موجودًا في

$ rm -rf ~ /.cache.equivariant_attention

يمكنك أيضًا تعيين الدليل الخاص بك حيث تريد تخزين ذاكرة التخزين المؤقت، في حالة وجود مشكلات في الأذونات للدليل الافتراضي

CACHE_PATH=./path/to/my/cache python train.py

اختبار

$ python setup.py pytest

ائتمان

تعد هذه المكتبة إلى حد كبير بمثابة منفذ لمستودع فابيان الرسمي، ولكن بدون مكتبة DGL.

الاستشهادات

 @misc { fuchs2020se3transformers ,
    title   = { SE(3)-Transformers: 3D Roto-Translation Equivariant Attention Networks } , 
    author  = { Fabian B. Fuchs and Daniel E. Worrall and Volker Fischer and Max Welling } ,
    year    = { 2020 } ,
    eprint  = { 2006.10503 } ,
    archivePrefix = { arXiv } ,
    primaryClass = { cs.LG }
}

 @misc { satorras2021en ,
    title   = { E(n) Equivariant Graph Neural Networks } ,
    author  = { Victor Garcia Satorras and Emiel Hoogeboom and Max Welling } ,
    year    = { 2021 } ,
    eprint  = { 2102.09844 } ,
    archivePrefix = { arXiv } ,
    primaryClass = { cs.LG }
}

 @misc { gomez2017reversible ,
    title     = { The Reversible Residual Network: Backpropagation Without Storing Activations } ,
    author    = { Aidan N. Gomez and Mengye Ren and Raquel Urtasun and Roger B. Grosse } ,
    year      = { 2017 } ,
    eprint    = { 1707.04585 } ,
    archivePrefix = { arXiv } ,
    primaryClass = { cs.CV }
}

 @misc { shazeer2019fast ,
    title   = { Fast Transformer Decoding: One Write-Head is All You Need } ,
    author  = { Noam Shazeer } ,
    year    = { 2019 } ,
    eprint  = { 1911.02150 } ,
    archivePrefix = { arXiv } ,
    primaryClass = { cs.NE }
}