surgeon pytorch 다운로드 - surgeon pytorch 소스 코드 다운로드

surgeon pytorch

AI 소스 코드

0.0.4

다운로드

PyTorch 모델의 중간 레이어를 검사하고 추출하는 라이브러리입니다.

왜?

코드를 수정하지 않고 PyTorch 모델의 중간 레이어를 검사 하려는 경우가 종종 있습니다. 이는 언어 모델의 주의 매트릭스를 얻거나, 레이어 임베딩을 시각화하거나, 중간 레이어에 손실 함수를 적용하는 데 유용할 수 있습니다. 때때로 우리는 모델의 하위 부분을 추출 하고 독립적으로 실행하여 디버깅하거나 별도로 훈련하기를 원합니다. 원본 모델의 한 줄도 변경하지 않고 Surgeon을 사용하면 이 모든 작업을 수행할 수 있습니다.

설치하다

$ pip install surgeon-pytorch

용법

검사

PyTorch 모델이 주어지면 get_layers 사용하여 모든 레이어를 표시할 수 있습니다.

 import torch
import torch . nn as nn

from surgeon_pytorch import Inspect , get_layers

class SomeModel ( nn . Module ):

    def __init__ ( self ):
        super (). __init__ ()
        self . layer1 = nn . Linear ( 5 , 3 )
        self . layer2 = nn . Linear ( 3 , 2 )
        self . layer3 = nn . Linear ( 2 , 1 )

    def forward ( self , x ):
        x1 = self . layer1 ( x )
        x2 = self . layer2 ( x1 )
        y = self . layer3 ( x2 )
        return y


model = SomeModel ()
print ( get_layers ( model )) # ['layer1', 'layer2', 'layer3']

그런 다음 Inspect 사용하여 검사할 model 래핑할 수 있으며 새 모델을 순방향 호출할 때마다 제공된 레이어 출력(두 번째 반환 값)도 출력합니다.

 model_wrapped = Inspect ( model , layer = 'layer2' )
x = torch . rand ( 1 , 5 )
y , x2 = model_wrapped ( x )
print ( x2 ) # tensor([[-0.2726,  0.0910]], grad_fn=<AddmmBackward0>)

여러 레이어 검사

레이어 목록을 제공할 수 있습니다.

 model_wrapped = Inspect ( model , layer = [ 'layer1' , 'layer2' ])
x = torch . rand ( 1 , 5 )
y , [ x1 , x2 ] = model_wrapped ( x )
print ( x1 ) # tensor([[ 0.1739,  0.3844, -0.4724]], grad_fn=<AddmmBackward0>)
print ( x2 ) # tensor([[-0.2238,  0.0107]], grad_fn=<AddmmBackward0>)

검사된 레이어 출력 이름 지정

명명된 출력을 얻기 위해 사전을 제공할 수 있습니다.

 model_wrapped = Inspect ( model , layer = { 'layer1' : 'x1' , 'layer2' : 'x2' })
x = torch . rand ( 1 , 5 )
y , layers = model_wrapped ( x )
print ( layers )
"""
{
    'x1': tensor([[ 0.3707,  0.6584, -0.2970]], grad_fn=<AddmmBackward0>),
    'x2': tensor([[-0.1953, -0.3408]], grad_fn=<AddmmBackward0>)
}
"""

API

 model = Inspect (
    model : nn . Module ,
    layer : Union [ str , Sequence [ str ], Dict [ str , str ]],
    keep_output : bool = True ,
)

발췌

PyTorch 모델이 주어지면 get_nodes 사용하여 그래프의 모든 중간 노드를 표시할 수 있습니다.

 import torch
import torch . nn as nn
from surgeon_pytorch import Extract , get_nodes

class SomeModel ( nn . Module ):

    def __init__ ( self ):
        super (). __init__ ()
        self . layer1 = nn . Linear ( 5 , 3 )
        self . layer2 = nn . Linear ( 3 , 2 )
        self . layer3 = nn . Linear ( 1 , 1 )

    def forward ( self , x ):
        x1 = torch . relu ( self . layer1 ( x ))
        x2 = torch . sigmoid ( self . layer2 ( x1 ))
        y = self . layer3 ( x2 ). tanh ()
        return y

model = SomeModel ()
print ( get_nodes ( model )) # ['x', 'layer1', 'relu', 'layer2', 'sigmoid', 'layer3', 'tanh']

그런 다음 요청된 출력 노드를 반환하는 새 모델을 생성하는 Extract 사용하여 출력을 추출할 수 있습니다.

 model_ext = Extract ( model , node_out = 'sigmoid' )
x = torch . rand ( 1 , 5 )
sigmoid = model_ext ( x )
print ( sigmoid ) # tensor([[0.5570, 0.3652]], grad_fn=<SigmoidBackward0>)

새로운 입력 노드를 사용하여 모델을 추출할 수도 있습니다.

 model_ext = Extract ( model , node_in = 'layer1' , node_out = 'sigmoid' )
layer1 = torch . rand ( 1 , 3 )
sigmoid = model_ext ( layer1 )
print ( sigmoid ) # tensor([[0.5444, 0.3965]], grad_fn=<SigmoidBackward0>)

다중 노드

또한 여러 입력과 출력을 제공하고 이름을 지정할 수도 있습니다.

 model_ext = Extract ( model , node_in = { 'layer1' : 'x' }, node_out = { 'sigmoid' : 'y1' , 'relu' : 'y2' })
out = model_ext ( x = torch . rand ( 1 , 3 ))
print ( out )
"""
{
    'y1': tensor([[0.4437, 0.7152]], grad_fn=<SigmoidBackward0>),
    'y2': tensor([[0.0555, 0.9014, 0.8297]]),
}
"""

그래프 입력/출력 요약

입력 노드를 변경하는 것만으로는 그래프를 자르기에 충분하지 않을 수 있습니다(이전 입력에 연결된 다른 종속성이 있을 수 있음). 새 그래프의 모든 입력을 보려면 model_ext.summary 호출하여 모든 필수 입력 및 반환된 출력에 대한 개요를 제공할 수 있습니다.

 import torch
import torch . nn as nn
from surgeon_pytorch import Extract , get_nodes

class SomeModel ( nn . Module ):

    def __init__ ( self ):
        super (). __init__ ()
        self . layer1a = nn . Linear ( 2 , 2 )
        self . layer1b = nn . Linear ( 2 , 2 )
        self . layer2 = nn . Linear ( 2 , 1 )

    def forward ( self , x ):
        a = self . layer1a ( x )
        b = self . layer1b ( x )
        c = torch . add ( a , b )
        y = self . layer2 ( c )
        return y

model = SomeModel ()
print ( get_nodes ( model )) # ['x', 'layer1a', 'layer1b', 'add', 'layer2']

model_ext = Extract ( model , node_in = { 'layer1a' : 'my_input' }, node_out = { 'add' : 'my_add' })
print ( model_ext . summary ) # {'input': ('x', 'my_input'), 'output': {'my_add': add}}

out = model_ext ( x = torch . rand ( 1 , 2 ), my_input = torch . rand ( 1 , 2 ))
print ( out ) # {'my_add': tensor([[ 0.3722, -0.6843]], grad_fn=<AddBackward0>)}

API

API

 model = Extract (
    model : nn . Module ,
    node_in : Optional [ Union [ str , Sequence [ str ], Dict [ str , str ]]] = None ,
    node_out : Optional [ Union [ str , Sequence [ str ], Dict [ str , str ]]] = None ,
    tracer : Optional [ Type [ Tracer ]] = None ,          # Tracer class used, default: torch.fx.Tracer
    concrete_args : Optional [ Dict [ str , Any ]] = None , # Tracer concrete_args, default: None
    keep_output : bool = None ,                       # Set to `True` to return original outputs as first argument, default: True except if node_out are provided
    share_modules : bool = False ,                    # Set to true if you want to share module weights with original model
)

검사와 추출

Inspect 클래스는 항상 입력으로 제공된 전체 모델을 실행하며 특수 후크를 사용하여 텐서 값이 흐르는 대로 기록합니다. 이 접근 방식은 (1) 새 모듈을 만들지 않고 (2) 동적 실행 그래프(예: 입력에 의존하는 for 루프 및 if 문)를 허용한다는 장점이 있습니다. Inspect 의 단점은 (1) 모델의 일부만 실행하면 일부 계산이 낭비되고 (2) nn.Module 레이어의 값만 출력할 수 있고 중간 함수 값이 없다는 것입니다.

Extract 클래스는 기호 추적을 사용하여 완전히 새로운 모델을 구축합니다. 이 접근 방식의 장점은 (1) 어디에서나 그래프를 자르고 해당 부분만 계산하는 새 모델을 얻을 수 있고, (2) 중간 함수(레이어뿐만 아니라)에서 값을 추출할 수 있으며, (3) 그래프를 변경할 수도 있다는 것입니다. 입력 텐서. Extract 의 단점은 정적 그래프만 허용된다는 것입니다(대부분의 모델에는 정적 그래프가 있음).