首页>编程相关>Ai源码

目录

  1. 相关项目
  2. 安装
  3. 码头工人
  4. 用法
    • 骆驼_cli
    • 启动文件
    • LoRA适配器
    • ROS 2 客户端
    • 浪链
  5. 演示

相关项目

安装

要使用 CUDA 运行 llama_ros,首先必须安装 CUDA 工具包。然后,您可以使用--cmake-args -DGGML_CUDA=ON编译 llama_ros 以启用 CUDA 支持。

 cd ~ /ros2_ws/src
git clone https://github.com/mgonzs13/llama_ros.git
pip3 install -r llama_ros/requirements.txt
cd ~ /ros2_ws
rosdep install --from-paths src --ignore-src -r -y
colcon build --cmake-args -DGGML_CUDA=ON # add this for CUDA 

码头工人

构建 llama_ros docker 或从 DockerHub 下载镜像。您可以选择使用 CUDA ( USE_CUDA ) 构建 llama_ros 并选择 CUDA 版本 ( CUDA_VERSION )。请记住,构建映像时必须使用DOCKER_BUILDKIT=0来使用 CUDA 编译 llama_ros。

DOCKER_BUILDKIT=0 docker build -t llama_ros --build-arg USE_CUDA=1 --build-arg CUDA_VERSION=12-6 .

运行 Docker 容器。如果你想使用 CUDA,你必须安装 NVIDIA Container Tollkit 并添加--gpus all

docker run -it --rm --gpus all llama_ros

用法

骆驼_cli

llama_ros 中包含命令,可加快 ROS 2 生态系统中基于 GGUF 的 LLM 的测试速度。这样,以下命令将集成到 ROS 2 命令中:

发射

使用此命令从 YAML 文件启动 LLM。 YAML 的配置用于以与使用常规启动文件相同的方式启动 LLM。以下是如何使用它的示例:

ros2 llama launch ~ /ros2_ws/src/llama_ros/llama_bringup/models/StableLM-Zephyr.yaml

迅速的

使用此命令向已启动的 LLM 发送提示。该命令使用一个字符串,它是提示符并具有以下参数:

以下是如何使用它的示例:

ros2 llama prompt " Do you know ROS 2? " -t 0.0

启动文件

首先,您需要创建一个启动文件来使用llama_ros或llava_ros。该启动文件将包含从 HuggingFace 下载模型并配置它的主要参数。查看以下示例和预定义的启动文件。

llama_ros(Python 启动)

点击展开
 from launch import LaunchDescription
from llama_bringup . utils import create_llama_launch


def generate_launch_description ():

    return LaunchDescription ([
        create_llama_launch (
            n_ctx = 2048 , # context of the LLM in tokens
            n_batch = 8 , # batch size in tokens
            n_gpu_layers = 0 , # layers to load in GPU
            n_threads = 1 , # threads
            n_predict = 2048 , # max tokens, -1 == inf

            model_repo = "TheBloke/Marcoroni-7B-v3-GGUF" , # Hugging Face repo
            model_filename = "marcoroni-7b-v3.Q4_K_M.gguf" , # model file in repo

            system_prompt_type = "Alpaca" # system prompt type
        )
    ])
ros2 launch llama_bringup marcoroni.launch.py

llama_ros(YAML 配置)

点击展开
 n_ctx : 2048 # context of the LLM in tokens
n_batch : 8 # batch size in tokens
n_gpu_layers : 0 # layers to load in GPU
n_threads : 1 # threads
n_predict : 2048 # max tokens, -1 == inf

model_repo : " cstr/Spaetzle-v60-7b-GGUF " # Hugging Face repo
model_filename : " Spaetzle-v60-7b-q4-k-m.gguf " # model file in repo

system_prompt_type : " Alpaca " # system prompt type 
 import os
from launch import LaunchDescription
from llama_bringup . utils import create_llama_launch_from_yaml
from ament_index_python . packages import get_package_share_directory


def generate_launch_description ():
    return LaunchDescription ([
        create_llama_launch_from_yaml ( os . path . join (
            get_package_share_directory ( "llama_bringup" ), "models" , "Spaetzle.yaml" ))
    ])
ros2 launch llama_bringup spaetzle.launch.py

llama_ros(YAML 配置+模型分片)

点击展开
 n_ctx : 2048 # context of the LLM in tokens
n_batch : 8 # batch size in tokens
n_gpu_layers : 0 # layers to load in GPU
n_threads : 1 # threads
n_predict : 2048 # max tokens, -1 == inf

model_repo : " Qwen/Qwen2.5-Coder-7B-Instruct-GGUF " # Hugging Face repo
model_filename : " qwen2.5-coder-7b-instruct-q4_k_m-00001-of-00002.gguf " # model shard file in repo

system_prompt_type : " ChatML " # system prompt type 
ros2 llama launch Qwen2.yaml

llava_ros(Python 启动)

点击展开
 from launch import LaunchDescription
from llama_bringup . utils import create_llama_launch

def generate_launch_description ():

    return LaunchDescription ([
        create_llama_launch (
            use_llava = True , # enable llava

            n_ctx = 8192 , # context of the LLM in tokens, use a huge context size to load images
            n_batch = 512 , # batch size in tokens
            n_gpu_layers = 33 , # layers to load in GPU
            n_threads = 1 , # threads
            n_predict = 8192 , # max tokens, -1 == inf

            model_repo = "cjpais/llava-1.6-mistral-7b-gguf" , # Hugging Face repo
            model_filename = "llava-v1.6-mistral-7b.Q4_K_M.gguf" , # model file in repo

            mmproj_repo = "cjpais/llava-1.6-mistral-7b-gguf" , # Hugging Face repo
            mmproj_filename = "mmproj-model-f16.gguf" , # mmproj file in repo

            system_prompt_type = "Mistral" # system prompt type
        )
    ])
ros2 launch llama_bringup llava.launch.py

llava_ros(YAML 配置)

点击展开
 use_llava : True # enable llava

n_ctx : 8192 # context of the LLM in tokens use a huge context size to load images
n_batch : 512 # batch size in tokens
n_gpu_layers : 33 # layers to load in GPU
n_threads : 1 # threads
n_predict : 8192 # max tokens -1 : :  inf

model_repo : " cjpais/llava-1.6-mistral-7b-gguf " # Hugging Face repo
model_filename : " llava-v1.6-mistral-7b.Q4_K_M.gguf " # model file in repo

mmproj_repo : " cjpais/llava-1.6-mistral-7b-gguf " # Hugging Face repo
mmproj_filename : " mmproj-model-f16.gguf " # mmproj file in repo

system_prompt_type : " mistral " # system prompt type 
 def generate_launch_description ():
    return LaunchDescription ([
        create_llama_launch_from_yaml ( os . path . join (
            get_package_share_directory ( "llama_bringup" ),
            "models" , "llava-1.6-mistral-7b-gguf.yaml" ))
    ])
ros2 launch llama_bringup llava.launch.py

LoRA适配器

您可以在启动 LLM 时使用 LoRA 适配器。使用 llama.cpp 功能,您可以加载多个适配器,选择适用于每个适配器的比例。这里有一个将 LoRA 适配器与 Phi-3 结合使用的示例。您可以使用/llama/list_loras服务列出 LoRA,并使用/llama/update_loras服务修改其比例值。比例值为 0.0 表示不使用该 LoRA。

点击展开
 n_ctx : 2048
n_batch : 8
n_gpu_layers : 0
n_threads : 1
n_predict : 2048

model_repo : " bartowski/Phi-3.5-mini-instruct-GGUF "
model_filename : " Phi-3.5-mini-instruct-Q4_K_M.gguf "

lora_adapters :
  - repo : " zhhan/adapter-Phi-3-mini-4k-instruct_code_writing "
    filename : " Phi-3-mini-4k-instruct-adaptor-f16-code_writer.gguf "
    scale : 0.5
  - repo : " zhhan/adapter-Phi-3-mini-4k-instruct_summarization "
    filename : " Phi-3-mini-4k-instruct-adaptor-f16-summarization.gguf "
    scale : 0.5

system_prompt_type : " Phi-3 "

ROS 2 客户端

llama_ros 和 llava_ros 都提供 ROS 2 接口来访问模型的主要功能。这里有一些关于如何在 ROS 2 节点中使用它们的示例。此外,请查看 llama_demo_node.py 和 llava_demo_node.py 演示。

标记化

点击展开
 from rclpy . node import Node
from llama_msgs . srv import Tokenize


class ExampleNode ( Node ):
    def __init__ ( self ) -> None :
        super (). __init__ ( "example_node" )

        # create the client
        self . srv_client = self . create_client ( Tokenize , "/llama/tokenize" )

        # create the request
        req = Tokenize . Request ()
        req . text = "Example text"

        # call the tokenize service
        self . srv_client . wait_for_service ()
        tokens = self . srv_client . call ( req ). tokens 

去代币化

点击展开
 from rclpy . node import Node
from llama_msgs . srv import Detokenize


class ExampleNode ( Node ):
    def __init__ ( self ) -> None :
        super (). __init__ ( "example_node" )

        # create the client
        self . srv_client = self . create_client ( Detokenize , "/llama/detokenize" )

        # create the request
        req = Detokenize . Request ()
        req . tokens = [ 123 , 123 ]

        # call the tokenize service
        self . srv_client . wait_for_service ()
        text = self . srv_client . call ( req ). text 

嵌入

点击展开

请记住启动 llama_ros 并将嵌入设置为 true 以便能够使用您的 LLM 生成嵌入。

 from rclpy . node import Node
from llama_msgs . srv import Embeddings


class ExampleNode ( Node ):
    def __init__ ( self ) -> None :
        super (). __init__ ( "example_node" )

        # create the client
        self . srv_client = self . create_client ( Embeddings , "/llama/generate_embeddings" )

        # create the request
        req = Embeddings . Request ()
        req . prompt = "Example text"
        req . normalize = True

        # call the embedding service
        self . srv_client . wait_for_service ()
        embeddings = self . srv_client . call ( req ). embeddings 

生成响应

点击展开
 import rclpy
from rclpy . node import Node
from rclpy . action import ActionClient
from llama_msgs . action import GenerateResponse


class ExampleNode ( Node ):
    def __init__ ( self ) -> None :
        super (). __init__ ( "example_node" )

        # create the client
        self . action_client = ActionClient (
            self , GenerateResponse , "/llama/generate_response" )

        # create the goal and set the sampling config
        goal = GenerateResponse . Goal ()
        goal . prompt = self . prompt
        goal . sampling_config . temp = 0.2

        # wait for the server and send the goal
        self . action_client . wait_for_server ()
        send_goal_future = self . action_client . send_goal_async (
            goal )

        # wait for the server
        rclpy . spin_until_future_complete ( self , send_goal_future )
        get_result_future = send_goal_future . result (). get_result_async ()

        # wait again and take the result
        rclpy . spin_until_future_complete ( self , get_result_future )
        result : GenerateResponse . Result = get_result_future . result (). result 

生成响应 (llava)

点击展开
 import cv2
from cv_bridge import CvBridge

import rclpy
from rclpy . node import Node
from rclpy . action import ActionClient
from llama_msgs . action import GenerateResponse


class ExampleNode ( Node ):
    def __init__ ( self ) -> None :
        super (). __init__ ( "example_node" )

        # create a cv bridge for the image
        self . cv_bridge = CvBridge ()

        # create the client
        self . action_client = ActionClient (
            self , GenerateResponse , "/llama/generate_response" )

        # create the goal and set the sampling config
        goal = GenerateResponse . Goal ()
        goal . prompt = self . prompt
        goal . sampling_config . temp = 0.2

        # add your image to the goal
        image = cv2 . imread ( "/path/to/your/image" , cv2 . IMREAD_COLOR )
        goal . image = self . cv_bridge . cv2_to_imgmsg ( image )

        # wait for the server and send the goal
        self . action_client . wait_for_server ()
        send_goal_future = self . action_client . send_goal_async (
            goal )

        # wait for the server
        rclpy . spin_until_future_complete ( self , send_goal_future )
        get_result_future = send_goal_future . result (). get_result_async ()

        # wait again and take the result
        rclpy . spin_until_future_complete ( self , get_result_future )
        result : GenerateResponse . Result = get_result_future . result (). result

浪链

LangChain 有一个 llama_ros 集成。因此,可以应用及时的工程技术。这里有一个使用它的示例。

llama_ros(链)

点击展开
 import rclpy
from llama_ros . langchain import LlamaROS
from langchain . prompts import PromptTemplate
from langchain_core . output_parsers import StrOutputParser


rclpy . init ()

# create the llama_ros llm for langchain
llm = LlamaROS ()

# create a prompt template
prompt_template = "tell me a joke about {topic}"
prompt = PromptTemplate (
    input_variables = [ "topic" ],
    template = prompt_template
)

# create a chain with the llm and the prompt template
chain = prompt | llm | StrOutputParser ()

# run the chain
text = chain . invoke ({ "topic" : "bears" })
print ( text )

rclpy . shutdown ()

llama_ros(流)

点击展开
 import rclpy
from llama_ros . langchain import LlamaROS
from langchain . prompts import PromptTemplate
from langchain_core . output_parsers import StrOutputParser


rclpy . init ()

# create the llama_ros llm for langchain
llm = LlamaROS ()

# create a prompt template
prompt_template = "tell me a joke about {topic}"
prompt = PromptTemplate (
    input_variables = [ "topic" ],
    template = prompt_template
)

# create a chain with the llm and the prompt template
chain = prompt | llm | StrOutputParser ()

# run the chain
for c in chain . stream ({ "topic" : "bears" }):
    print ( c , flush = True , end = "" )

rclpy . shutdown ()

拉瓦罗斯

点击展开
 import rclpy
from llama_ros . langchain import LlamaROS

rclpy . init ()

# create the llama_ros llm for langchain
llm = LlamaROS ()

# bind the url_image
llm = llm . bind ( image_url = image_url ). stream ( "Describe the image" )
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"

# run the llm
for c in llm :
    print ( c , flush = True , end = "" )

rclpy . shutdown ()

llama_ros_embeddings (RAG)

点击展开
 import rclpy
from langchain_chroma import Chroma
from llama_ros . langchain import LlamaROSEmbeddings


rclpy . init ()

# create the llama_ros embeddings for langchain
embeddings = LlamaROSEmbeddings ()

# create a vector database and assign it
db = Chroma ( embedding_function = embeddings )

# create the retriever
retriever = db . as_retriever ( search_kwargs = { "k" : 5 })

# add your texts
db . add_texts ( texts = [ "your_texts" ])

# retrieve documents
documents = retriever . invoke ( "your_query" )
print ( documents )

rclpy . shutdown ()

llama_ros (Renranker)

点击展开
 import rclpy
from llama_ros . langchain import LlamaROSReranker
from llama_ros . langchain import LlamaROSEmbeddings

from langchain_community . vectorstores import FAISS
from langchain_community . document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain . retrievers import ContextualCompressionRetriever


rclpy . init ()

# load the documents
documents = TextLoader ( "../state_of_the_union.txt" ,). load ()
text_splitter = RecursiveCharacterTextSplitter (
    chunk_size = 500 , chunk_overlap = 100 )
texts = text_splitter . split_documents ( documents )

# create the llama_ros embeddings
embeddings = LlamaROSEmbeddings ()

# create the VD and the retriever
retriever = FAISS . from_documents (
    texts , embeddings ). as_retriever ( search_kwargs = { "k" : 20 })

# create the compressor using the llama_ros reranker
compressor = LlamaROSReranker ()
compression_retriever = ContextualCompressionRetriever (
    base_compressor = compressor , base_retriever = retriever
)

# retrieve the documents
compressed_docs = compression_retriever . invoke (
    "What did the president say about Ketanji Jackson Brown"
)

for doc in compressed_docs :
    print ( "-" * 50 )
    print ( doc . page_content )
    print ( " n " )

rclpy . shutdown ()

llama_ros(LLM + RAG + 重新排序)

点击展开
 import bs4
import rclpy
from langchain import hub
from langchain_chroma import Chroma
from langchain_community . document_loaders import WebBaseLoader
from langchain_core . output_parsers import StrOutputParser
from langchain_core . runnables import RunnablePassthrough
from langchain_text_splitters import RecursiveCharacterTextSplitter
from llama_ros . langchain import LlamaROS , LlamaROSEmbeddings , LlamaROSReranker
from langchain . retrievers import ContextualCompressionRetriever


rclpy . init ()

# load, chunk and index the contents of the blog
loader = WebBaseLoader (
    web_paths = ( "https://lilianweng.github.io/posts/2023-06-23-agent/" ,),
    bs_kwargs = dict (
        parse_only = bs4 . SoupStrainer (
            class_ = ( "post-content" , "post-title" , "post-header" )
        )
    ),
)
docs = loader . load ()

text_splitter = RecursiveCharacterTextSplitter (
    chunk_size = 1000 , chunk_overlap = 200 )
splits = text_splitter . split_documents ( docs )
vectorstore = Chroma . from_documents (
    documents = splits , embedding = LlamaROSEmbeddings ())

# retrieve and generate using the relevant snippets of the blog
retriever = vectorstore . as_retriever ( search_kwargs = { "k" : 20 })
prompt = hub . pull ( "rlm/rag-prompt" )

compressor = LlamaROSReranker ( top_n = 5 )
compression_retriever = ContextualCompressionRetriever (
    base_compressor = compressor , base_retriever = retriever
)


def format_docs ( docs ):
    return " n n " . join ( doc . page_content for doc in docs )


# create and use the chain
rag_chain = (
    { "context" : compression_retriever | format_docs ,
        "question" : RunnablePassthrough ()}
    | prompt
    | LlamaROS ( temp = 0.0 )
    | StrOutputParser ()
)

print ( rag_chain . invoke ( "What is Task Decomposition?" ))

rclpy . shutdown ()

聊天_骆驼_罗斯

点击展开
Who is the character in the middle of the image?"}, {"type": "image_url", "image_url": "{image_url}"} ]) ]) # create the chain chain = prompt | chat | StrOutputParser() # stream and print the LLM output for text in self.chain.stream({"image_url": "https://pics.filmaffinity.com/Dragon_Ball_Bola_de_Dragaon_Serie_de_TV-973171538-large.jpg"}): print(text, end="", flush=True) print("", end="n", flush=True) rclpy.shutdown()">
 import rclpy
from llama_ros . langchain import ChatLlamaROS
from langchain_core . messages import SystemMessage
from langchain_core . prompts import ChatPromptTemplate , HumanMessagePromptTemplate
from langchain_core . output_parsers import StrOutputParser


rclpy . init ()

# create chat
chat = ChatLlamaROS (
    temp = 0.2 ,
    penalty_last_n = 8 ,
)

# create prompt template with messages
prompt = ChatPromptTemplate . from_messages ([
    SystemMessage ( "You are a IA that just answer with a single word." ),
    HumanMessagePromptTemplate . from_template ( template = [
        { "type" : "text" , "text" : "Who is the character in the middle of the image?" },
        { "type" : "image_url" , "image_url" : "{image_url}" }
    ])
])

# create the chain
chain = prompt | chat | StrOutputParser ()

# stream and print the LLM output
for text in self . chain . stream ({ "image_url" : "https://pics.filmaffinity.com/Dragon_Ball_Bola_de_Dragaon_Serie_de_TV-973171538-large.jpg" }):
    print ( text , end = "" , flush = True )

print ( "" , end = " n " , flush = True )

rclpy . shutdown ()

演示

法学硕士演示

ros2 launch llama_bringup spaetzle.launch.py
ros2 run llama_demos llama_demo_node --ros-args -p prompt:= " your prompt "
llama_ros_gpu_new_1.mp4

嵌入生成演示

ros2 llama launch ~ /ros2_ws/src/llama_ros/llama_bringup/models/bge-base-en-v1.5.yaml
ros2 run llama_demos llama_embeddings_demo_node
llama_embeddings_1.mp4

重新排名演示

ros2 llama launch ~ /ros2_ws/src/llama_ros/llama_bringup/models/jina-reranker.yaml
ros2 run llama_demos llama_rerank_demo_node
rerank_1.mp4

VLM 演示

ros2 launch llama_bringup minicpm-2.6.launch.py
ros2 run llama_demos llava_demo_node --ros-args -p prompt:= " your prompt " -p image_url:= " url of the image " -p use_image:= " whether to send the image "
frieren_1.mp4

聊天模板演示

ros2 llama launch MiniCPM-2.6.yaml
点击展开MiniCPM-2.6
" image_suffix: "" model_repo: "openbmb/MiniCPM-V-2_6-gguf" model_filename: "ggml-model-Q4_K_M.gguf" mmproj_repo: "openbmb/MiniCPM-V-2_6-gguf" mmproj_filename: "mmproj-model-f16.gguf" stopping_words: ["<|im_end|>"]">
 use_llava : True

n_ctx : 8192
n_batch : 512
n_gpu_layers : 20
n_threads : 1
n_predict : 8192

image_prefix : "  "
image_suffix : "  "

model_repo : " openbmb/MiniCPM-V-2_6-gguf "
model_filename : " ggml-model-Q4_K_M.gguf "

mmproj_repo : " openbmb/MiniCPM-V-2_6-gguf "
mmproj_filename : " mmproj-model-f16.gguf "

stopping_words : ["<|im_end|>"] 
ros2 run llama_demos chatllama_demo_node
截屏视频.from.08-30-2024.10.00.41.AM.webm

完整演示(LLM + 聊天模板 + RAG + 重新排名 + Stream)

ros2 llama launch ~ /ros2_ws/src/llama_ros/llama_bringup/models/bge-base-en-v1.5.yaml
ros2 llama launch ~ /ros2_ws/src/llama_ros/llama_bringup/models/jina-reranker.yaml
ros2 llama launch Llama-3.yaml
单击展开 Llama-3.yaml
"]">
 n_ctx : 4096
n_batch : 256
n_gpu_layers : 33
n_threads : -1
n_predict : -1

model_repo : " lmstudio-community/Llama-3.2-1B-Instruct-GGUF "
model_filename : " Llama-3.2-1B-Instruct-Q8_0.gguf "

stopping_words : ["<|eot_id|>"] 
ros2 run llama_demos llama_rag_demo_node
骆驼_rag_2.mp4
展开
附加信息