ดาวน์โหลด outlines - outlines ดาวน์โหลดซอร์สโค้ด

โลโก้เค้าร่าง

️ ทำให้ LLM พูดภาษาของทุกแอปพลิเคชันได้ ?

สร้างด้วย ❤?? โดยทีมงานที่ .txt

ช่องยูทูป | บล็อก .txt | ทวิตเตอร์

pip install outlines

ครั้งแรกที่นี่? ไปที่คู่มือการตั้งค่าของเรา

คุณสมบัติ

Outlines มีการเปิดตัวและฟีเจอร์ใหม่ๆ ที่จะมาทุกสัปดาห์ อย่าลืมติดดาวและ ? ดูพื้นที่เก็บข้อมูลนี้ ติดตาม @dottxtai เพื่อติดตามข่าวสารล่าสุด!

เหตุใดฉันจึงควรใช้การสร้างแบบมีโครงสร้าง

มันไม่ได้เพิ่มค่าใช้จ่ายใด ๆ ในระหว่างการอนุมาน (ฟรีต้นทุน)
ช่วยให้โมเดลโอเพ่นซอร์สสามารถเอาชนะโมเดลโอเพ่นซอร์สได้ (Mistral, GPT-4)
มันเร่งการอนุมาน
ปรับปรุงประสิทธิภาพของรุ่นพื้นฐาน (GSM8K)
ปรับปรุงประสิทธิภาพของโมเดลที่ได้รับการปรับแต่ง (ConNNL)
ปรับปรุงประสิทธิภาพของโมเดล (ต้องการตัวอย่างน้อยลง)

บริษัท .txt

เราก่อตั้งบริษัทเพื่อก้าวข้ามขีดจำกัดของการสร้างแบบมีโครงสร้าง เรียนรู้เพิ่มเติมเกี่ยวกับ .txt และลองใช้ .json API ของเรา หากคุณต้องการโซลูชันแบบโฮสต์

การสร้างแบบมีโครงสร้าง

ขั้นตอนแรกสู่ความน่าเชื่อถือของระบบที่มีโมเดลภาษาขนาดใหญ่คือต้องแน่ใจว่ามีส่วนต่อประสานที่กำหนดไว้อย่างดีระหว่างเอาต์พุตและโค้ดที่ผู้ใช้กำหนด Outlines จัดเตรียมวิธีการควบคุมการสร้างโมเดลภาษาเพื่อให้สามารถคาดเดาเอาต์พุตได้มากขึ้น

มีหลายทางเลือก

คุณสามารถลดความสมบูรณ์ลงเหลือเพียงตัวเลือกระหว่างความเป็นไปได้หลายประการ:

 import outlines

model = outlines . models . transformers ( "microsoft/Phi-3-mini-4k-instruct" )

prompt = """You are a sentiment-labelling assistant.
Is the following review positive or negative?

Review: This restaurant is just awesome!
"""

generator = outlines . generate . choice ( model , [ "Positive" , "Negative" ])
answer = generator ( prompt )

ข้อจำกัดประเภท

คุณสามารถสั่งให้โมเดลส่งคืนเฉพาะจำนวนเต็มหรือจำนวนทศนิยม:

 import outlines

model = outlines . models . transformers ( "WizardLM/WizardMath-7B-V1.1" )

prompt = "<s>result of 9 + 9 = 18</s><s>result of 1 + 2 = "
answer = outlines . generate . format ( model , int )( prompt )
print ( answer )
# 3

prompt = "sqrt(2)="
generator = outlines . generate . format ( model , float )
answer = generator ( prompt , max_tokens = 10 )
print ( answer )
# 1.41421356

การสร้างโครงสร้าง regex ที่มีประสิทธิภาพ

Outlines ยังมาพร้อมกับการสร้างโครงสร้าง regex ที่รวดเร็วอีกด้วย ในความเป็นจริง ฟังก์ชัน choice และ format เหนือสิ่งอื่นใดใช้การสร้างโครงสร้าง regex ภายใต้ประทุน:

 import outlines

model = outlines . models . transformers ( "microsoft/Phi-3-mini-4k-instruct" )

prompt = "What is the IP address of the Google DNS servers? "

generator = outlines . generate . text ( model )
unstructured = generator ( prompt , max_tokens = 30 )

generator = outlines . generate . regex (
    model ,
    r"((25[0-5]|2[0-4]d|[01]?dd?).){3}(25[0-5]|2[0-4]d|[01]?dd?)" ,
)
structured = generator ( prompt , max_tokens = 30 )

print ( unstructured )
# What is the IP address of the Google DNS servers?
#
# Passive DNS servers are at DNS servers that are private.
# In other words, both IP servers are private. The database
# does not contain Chelsea Manning

print ( structured )
# What is the IP address of the Google DNS servers?
# 2.2.6.1

การสร้างแบบมีโครงสร้าง regex ใน Outlines ต่างจากไลบรารีอื่นๆ ตรงที่เกือบจะเร็วพอๆ กับการสร้างแบบไม่มีโครงสร้าง

การสร้าง JSON ที่มีประสิทธิภาพตามแบบจำลอง Pydantic

โครงร่างช่วยให้เป็นแนวทางในกระบวนการสร้างดังนั้นเอาต์พุตจึง รับประกัน ว่าจะเป็นไปตามสคีมา JSON หรือโมเดล Pydantic:

 from enum import Enum
from pydantic import BaseModel , constr

import outlines
import torch


class Weapon ( str , Enum ):
    sword = "sword"
    axe = "axe"
    mace = "mace"
    spear = "spear"
    bow = "bow"
    crossbow = "crossbow"


class Armor ( str , Enum ):
    leather = "leather"
    chainmail = "chainmail"
    plate = "plate"


class Character ( BaseModel ):
    name : constr ( max_length = 10 )
    age : int
    armor : Armor
    weapon : Weapon
    strength : int


model = outlines . models . transformers ( "microsoft/Phi-3-mini-4k-instruct" )

# Construct structured sequence generator
generator = outlines . generate . json ( model , Character )

# Draw a sample
seed = 789001

character = generator ( "Give me a character description" , seed = seed )

print ( repr ( character ))
# Character(name='Anderson', age=28, armor=<Armor.chainmail: 'chainmail'>, weapon=<Weapon.sword: 'sword'>, strength=8)

character = generator ( "Give me an interesting character description" )

print ( repr ( character ))
# Character(name='Vivian Thr', age=44, armor=<Armor.plate: 'plate'>, weapon=<Weapon.crossbow: 'crossbow'>, strength=125)

วิธีการนี้ใช้ได้กับประเภทยูเนียน ประเภททางเลือก อาร์เรย์ สคีมาแบบซ้อน ฯลฯ ข้อจำกัดบางประการของฟิลด์ยังไม่ได้รับการสนับสนุน แต่อย่างอื่นควรใช้งานได้ทั้งหมด

การสร้าง JSON ที่มีประสิทธิภาพตาม JSON Schema

บางครั้งคุณเพียงต้องการที่จะส่งผ่าน JSON Schema แทนที่จะเป็นโมเดล Pydantic เราช่วยคุณได้:

 import outlines

schema = '''{
    "title": "Character",
    "type": "object",
    "properties": {
        "name": {
            "title": "Name",
            "maxLength": 10,
            "type": "string"
        },
        "age": {
            "title": "Age",
            "type": "integer"
        },
        "armor": {"$ref": "#/definitions/Armor"},
        "weapon": {"$ref": "#/definitions/Weapon"},
        "strength": {
            "title": "Strength",
            "type": "integer"
        }
    },
    "required": ["name", "age", "armor", "weapon", "strength"],
    "definitions": {
        "Armor": {
            "title": "Armor",
            "description": "An enumeration.",
            "enum": ["leather", "chainmail", "plate"],
            "type": "string"
        },
        "Weapon": {
            "title": "Weapon",
            "description": "An enumeration.",
            "enum": ["sword", "axe", "mace", "spear", "bow", "crossbow"],
            "type": "string"
        }
    }
}'''

model = outlines . models . transformers ( "microsoft/Phi-3-mini-4k-instruct" )
generator = outlines . generate . json ( model , schema )
character = generator ( "Give me a character description" )

การใช้ไวยากรณ์ที่ไม่มีบริบทเพื่อเป็นแนวทางในการสร้าง

ไวยากรณ์ที่เป็นทางการครองโลก และ Outlines ก็ทำให้พวกเขาปกครอง LLM เช่นกัน คุณสามารถส่งผ่านไวยากรณ์ที่ไม่มีบริบทในรูปแบบ EBNF และ Outlines จะสร้างผลลัพธ์ที่ถูกต้องสำหรับไวยากรณ์นี้:

 import outlines

arithmetic_grammar = """
    ?start: expression

    ?expression: term (("+" | "-") term)*

    ?term: factor (("*" | "/") factor)*

    ?factor: NUMBER
           | "-" factor
           | "(" expression ")"

    %import common.NUMBER
"""

model = outlines . models . transformers ( "WizardLM/WizardMath-7B-V1.1" )
generator = outlines . generate . cfg ( model , arithmetic_grammar )
sequence = generator ( "Alice had 4 apples and Bob ate 2. Write an expression for Alice's apples:" )

print ( sequence )
# (8-2)

นี่เป็นไวยากรณ์ที่เรียบง่ายมากและคุณสามารถใช้ outlines.generate.cfg เพื่อสร้าง Python, SQL ที่ถูกต้องตามหลักไวยากรณ์ และอื่นๆ อีกมากมาย ข้อความที่มีโครงสร้างทุกประเภทจริงๆ สิ่งที่คุณต้องทำคือค้นหา "ไวยากรณ์ X EBNF" บนเว็บ และดูที่โมดูล grammars Outlines

เปิดฟังก์ชั่น

เค้าร่างสามารถอนุมานโครงสร้างของผลลัพธ์จากลายเซ็นของฟังก์ชัน ผลลัพธ์ที่ได้คือพจนานุกรม และสามารถส่งผ่านไปยังฟังก์ชันได้โดยตรงโดยใช้ไวยากรณ์การขยายพจนานุกรมตามปกติ ** :

 import outlines


def add ( a : int , b : int ):
    return a + b

model = outlines . models . transformers ( "WizardLM/WizardMath-7B-V1.1" )
generator = outlines . generate . json ( model , add )
result = generator ( "Return json with two integers named a and b respectively. a is odd and b even." )

print ( add ( ** result ))
# 3

ข้อได้เปรียบที่ดีของการส่งผ่านฟังก์ชันโดยตรงเพื่อระบุโครงสร้างคือโครงสร้างของ LLM จะเปลี่ยนไปตามคำจำกัดความของฟังก์ชัน ไม่ต้องเปลี่ยนโค้ดหลายที่!

คุณยังสามารถฝังฟังก์ชันต่างๆ ลงใน enum เพื่อสร้างพารามิเตอร์ได้:

 from enum import Enum
from functools import partial

import outlines


def add ( a : int , b : int ) -> int :
    return a + b

def mul ( c : float , d : float ) -> float :
    return c * d

class Operation ( Enum ):
    add = partial ( add )
    mul = partial ( mul )

model = outlines . models . transformers ( "WizardLM/WizardMath-7B-V1.1" )
generator = outlines . generate . json ( model , add )
result = generator ( "Return json with two float named c and d respectively. c is negative and d greater than 1.0." )

print ( result )
# {'c': -3.14, 'd': 1.5}

พร้อมท์

ข้อความแจ้งการสร้างอาจทำให้ยุ่งเหยิงได้ โครงร่าง ช่วยให้เขียนและจัดการพร้อมท์ได้ง่ายขึ้นโดยการห่อหุ้มเทมเพลตภายใน "ฟังก์ชันเทมเพลต"

ฟังก์ชันเหล่านี้ทำให้สามารถแยกตรรกะพร้อมต์ออกจากตรรกะโปรแกรมทั่วไปได้อย่างสวยงาม สามารถนำเข้าจากโมดูลและไลบรารีอื่นได้

ฟังก์ชันเทมเพลตไม่จำเป็นต้องมีนามธรรมมากเกินไป โดยใช้เครื่องมือสร้างเทมเพลต Jinja2 เพื่อช่วยสร้างพรอมต์ที่ซับซ้อนในลักษณะที่กระชับ:

 import outlines

examples = [
    ( "The food was disgusting" , "Negative" ),
    ( "We had a fantastic night" , "Positive" ),
    ( "Recommended" , "Positive" ),
    ( "The waiter was rude" , "Negative" )
]

@ outlines . prompt
def labelling ( to_label , examples ):
    """You are a sentiment-labelling assistant.

    {% for example in examples %}
    {{ example[0] }} // {{ example[1] }}
    {% endfor %}
    {{ to_label }} //
    """

model = outlines . models . transformers ( "microsoft/Phi-3-mini-4k-instruct" )
prompt = labelling ( "Just awesome" , examples )
answer = outlines . generate . text ( model )( prompt , max_tokens = 100 )

เข้าร่วมกับเรา

มีความคิด? มาคุยกับเราได้ที่ Discord
- ต้องการที่จะมีส่วนร่วม? ศึกษาคู่มือการมีส่วนร่วมของเรา
- พบข้อผิดพลาดหรือไม่? เปิดประเด็น

อ้างอิงโครงร่าง

 @article{willard2023efficient,
  title={Efficient Guided Generation for LLMs},
  author={Willard, Brandon T and Louf, R{'e}mi},
  journal={arXiv preprint arXiv:2307.09702},
  year={2023}
}

ขยาย