MLX-Embeddings is a package for running Vision and Language Embedding models locally on your Mac using MLX.
You can install mlx-embeddings using pip:
pip install mlx-embeddings
To generate an embedding for a single piece of text:
import mlx.core as mx
from mlx_embeddings.utils import load
# Load the model and tokenizer
model, tokenizer = load("sentence-transformers/all-MiniLM-L6-v2")
# Prepare the text
text = "I like reading"
# Tokenize and generate embedding
input_ids = tokenizer.encode(text, return_tensors="mlx")
outputs = model(input_ids)
embeddings = outputs[0][:, 0, :]
To compare multiple texts using their embeddings:
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt
import seaborn as sns
import mlx.core as mx
from mlx_embeddings.utils import load
# Load the model and tokenizer
model, tokenizer = load("sentence-transformers/all-MiniLM-L6-v2")
def get_embedding(text, model, tokenizer):
input_ids = tokenizer.encode(text, return_tensors="mlx", padding=True, truncation=True, max_length=512)
outputs = model(input_ids)
embeddings = outputs[0][:, 0, :][0]
return embeddings
# Sample texts
texts = [
"I like grapes",
"I like fruits",
"The slow green turtle crawls under the busy ant."
]
# Generate embeddings
embeddings = [get_embedding(text, model, tokenizer) for text in texts]
# Compute similarity
similarity_matrix = cosine_similarity(embeddings)
# Visualize results
def plot_similarity_matrix(similarity_matrix, labels):
plt.figure(figsize=(5, 4))
sns.heatmap(similarity_matrix, annot=True, cmap='coolwarm', xticklabels=labels, yticklabels=labels)
plt.title('Similarity Matrix Heatmap')
plt.tight_layout()
plt.show()
labels = [f"Text {i+1}" for i in range(len(texts))]
plot_similarity_matrix(similarity_matrix, labels)
For processing multiple texts at once:
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt
import seaborn as sns
import mlx.core as mx
from mlx_embeddings.utils import load
# Load the model and tokenizer
model, tokenizer = load("sentence-transformers/all-MiniLM-L6-v2")
def get_embedding(texts, model, tokenizer):
inputs = tokenizer.batch_encode_plus(texts, return_tensors="mlx", padding=True, truncation=True, max_length=512)
outputs = model(
inputs["input_ids"],
attention_mask=inputs["attention_mask"]
)
return outputs[0]
def compute_and_print_similarity(embeddings):
B, Seq_len, dim = embeddings.shape
embeddings_2d = embeddings.reshape(B, -1)
similarity_matrix = cosine_similarity(embeddings_2d)
print("Similarity matrix between sequences:")
print(similarity_matrix)
print("n")
for i in range(B):
for j in range(i+1, B):
print(f"Similarity between sequence {i+1} and sequence {j+1}: {similarity_matrix[i][j]:.4f}")
return similarity_matrix
# Sample texts
texts = [
"I like grapes",
"I like fruits",
"The slow green turtle crawls under the busy ant."
]
embeddings = get_embedding(texts, model, tokenizer)
similarity_matrix = compute_and_print_similarity(embeddings)
# Visualize results
labels = [f"Text {i+1}" for i in range(len(texts))]
plot_similarity_matrix(similarity_matrix, labels)
MLX-Embeddings supports a variety of model architectures for text embedding tasks. Here's a breakdown of the currently supported architectures:
We're continuously working to expand our support for additional model architectures. Check our GitHub repository or documentation for the most up-to-date list of supported models and their specific versions.
Contributions to MLX-Embeddings are welcome! Please refer to our contribution guidelines for more information.
This project is licensed under the GNU General Public License v3.
For any questions or issues, please open an issue on the GitHub repository.