plip
1.0.0
Pra-Pelatihan Bahasa dan Gambar Patologi (PLIP) adalah model dasar visi dan bahasa pertama untuk AI Patologi. PLIP adalah model terlatih berskala besar yang dapat digunakan untuk mengekstrak fitur visual dan bahasa dari gambar patologi dan deskripsi teks. Model ini merupakan versi yang disempurnakan dari model CLIP asli.
from plip . plip import PLIP
import numpy as np
plip = PLIP ( 'vinid/plip' )
# we create image embeddings and text embeddings
image_embeddings = plip . encode_images ( images , batch_size = 32 )
text_embeddings = plip . encode_text ( texts , batch_size = 32 )
# we normalize the embeddings to unit norm (so that we can use dot product instead of cosine similarity to do comparisons)
image_embeddings = image_embeddings / np . linalg . norm ( image_embeddings , ord = 2 , axis = - 1 , keepdims = True )
text_embeddings = text_embeddings / np . linalg . norm ( text_embeddings , ord = 2 , axis = - 1 , keepdims = True )
from PIL import Image
from transformers import CLIPProcessor , CLIPModel
model = CLIPModel . from_pretrained ( "vinid/plip" )
processor = CLIPProcessor . from_pretrained ( "vinid/plip" )
image = Image . open ( "images/image1.jpg" )
inputs = processor ( text = [ "a photo of label 1" , "a photo of label 2" ],
images = image , return_tensors = "pt" , padding = True )
outputs = model ( ** inputs )
logits_per_image = outputs . logits_per_image # this is the image-text similarity score
probs = logits_per_image . softmax ( dim = 1 )
print ( probs )
image . resize (( 224 , 224 ))
Jika Anda menggunakan PLIP dalam penelitian Anda, harap mengutip makalah berikut:
@article { huang2023visual ,
title = { A visual--language foundation model for pathology image analysis using medical Twitter } ,
author = { Huang, Zhi and Bianchi, Federico and Yuksekgonul, Mert and Montine, Thomas J and Zou, James } ,
journal = { Nature Medicine } ,
pages = { 1--10 } ,
year = { 2023 } ,
publisher = { Nature Publishing Group US New York }
}
API internal telah disalin dari FashionCLIP.