use whisper

usewhisper

React Hook สำหรับ Openai Whisper API ด้วยเครื่องบันทึกเสียงการถอดรหัสแบบเรียลไทม์และการกำจัดความเงียบในตัวในตัว

การสาธิต
ตัวอย่างการถอดความแบบเรียลไทม์

USE-Whisper-REAL-TIME-TRANSCRICTION.MP4

ประกาศ
Usewhisper สำหรับ React Native กำลังได้รับการพัฒนา

ที่เก็บ: https://github.com/chengsokdara/use-whisper-native

ความคืบหน้า: Chengsokdara/Use-Whisper-Native#1

ติดตั้ง

 npm i @chengsokdara/use-whisper

 yarn add @chengsokdara/use-whisper

การใช้งาน

 import { useWhisper } from '@chengsokdara/use-whisper'

const App = ( ) => {
  const {
    recording ,
    speaking ,
    transcribing ,
    transcript ,
    pauseRecording ,
    startRecording ,
    stopRecording ,
  } = useWhisper ( {
    apiKey : process . env . OPENAI_API_TOKEN , // YOUR_OPEN_AI_TOKEN
  } )

  return (
    < div >
      < p > Recording: { recording } < / p >
      < p > Speaking: { speaking } < / p >
      < p > Transcribing: { transcribing } < / p >
      < p > Transcribed Text: { transcript . text } < / p >
      < button onClick = { ( ) => startRecording ( ) } > Start < / button >
      < button onClick = { ( ) => pauseRecording ( ) } > Pause < / button >
      < button onClick = { ( ) => stopRecording ( ) } > Stop < / button >
    < / div >
  )
}

เซิร์ฟเวอร์ที่กำหนดเอง (เก็บโทเค็น OpenAI API Secure)

 import { useWhisper } from '@chengsokdara/use-whisper'

const App = ( ) => {
  /**
   * you have more control like this
   * do whatever you want with the recorded speech
   * send it to your own custom server
   * and return the response back to useWhisper
   */
  const onTranscribe = ( blob : Blob ) => {
    const base64 = await new Promise < string | ArrayBuffer | null > (
      ( resolve ) => {
        const reader = new FileReader ( )
        reader . onloadend = ( ) => resolve ( reader . result )
        reader . readAsDataURL ( blob )
      }
    )
    const body = JSON . stringify ( { file : base64 , model : 'whisper-1' } )
    const headers = { 'Content-Type' : 'application/json' }
    const { default : axios } = await import ( 'axios' )
    const response = await axios . post ( '/api/whisper' , body , {
      headers ,
    } )
    const { text } = await response . data
    // you must return result from your server in Transcript format
    return {
      blob ,
      text ,
    }
  }

  const { transcript } = useWhisper ( {
    // callback to handle transcription with custom server
    onTranscribe ,
  } )

  return (
    < div >
      < p > { transcript . text } < / p >
    < / div >
  )
}

ตัวอย่าง
trascription การสตรีมแบบเรียลไทม์

 import { useWhisper } from '@chengsokdara/use-whisper'

const App = ( ) => {
  const { transcript } = useWhisper ( {
    apiKey : process . env . OPENAI_API_TOKEN , // YOUR_OPEN_AI_TOKEN
    streaming : true ,
    timeSlice : 1_000 , // 1 second
    whisperConfig : {
      language : 'en' ,
    } ,
  } )

  return (
    < div >
      < p > { transcript . text } < / p >
    < / div >
  )
}

ลบความเงียบก่อนส่งไปยังกระซิบเพื่อประหยัดค่าใช้จ่าย

 import { useWhisper } from '@chengsokdara/use-whisper'

const App = ( ) => {
  const { transcript } = useWhisper ( {
    apiKey : process . env . OPENAI_API_TOKEN , // YOUR_OPEN_AI_TOKEN
    // use ffmpeg-wasp to remove silence from recorded speech
    removeSilence : true ,
  } )

  return (
    < div >
      < p > { transcript . text } < / p >
    < / div >
  )
}

เริ่มบันทึกอัตโนมัติบนส่วนประกอบที่ติดตั้ง

 import { useWhisper } from '@chengsokdara/use-whisper'

const App = ( ) => {
  const { transcript } = useWhisper ( {
    apiKey : process . env . OPENAI_API_TOKEN , // YOUR_OPEN_AI_TOKEN
    // will auto start recording speech upon component mounted
    autoStart : true ,
  } )

  return (
    < div >
      < p > { transcript . text } < / p >
    < / div >
  )
}

บันทึกต่อไปตราบเท่าที่ผู้ใช้กำลังพูด

 import { useWhisper } from '@chengsokdara/use-whisper'

const App = ( ) => {
  const { transcript } = useWhisper ( {
    apiKey : process . env . OPENAI_API_TOKEN , // YOUR_OPEN_AI_TOKEN
    nonStop : true , // keep recording as long as the user is speaking
    stopTimeout : 5000 , // auto stop after 5 seconds
  } )

  return (
    < div >
      < p > { transcript . text } < / p >
    < / div >
  )
}

ปรับแต่งการกำหนดค่า Whisper API เมื่อ AutoTranscribe เป็นจริง

 import { useWhisper } from '@chengsokdara/use-whisper'

const App = ( ) => {
  const { transcript } = useWhisper ( {
    apiKey : process . env . OPENAI_API_TOKEN , // YOUR_OPEN_AI_TOKEN
    autoTranscribe : true ,
    whisperConfig : {
      prompt : 'previous conversation' , // you can pass previous conversation for context
      response_format : 'text' , // output text instead of json
      temperature : 0.8 , // random output
      language : 'es' , // Spanish
    } ,
  } )

  return (
    < div >
      < p > { transcript . text } < / p >
    < / div >
  )
}

การพึ่งพาอาศัยกัน
- @chengsokdara/react-hooks-async asynchronous react hooks
- RecordRTC: เครื่องบันทึกเสียง Cross-Browser
- lamejs เข้ารหัส wav เป็น mp3 สำหรับการสนับสนุน cross-browser
- @ffmpeg/ffmpeg: สำหรับคุณสมบัติการกำจัดความเงียบ
- HARK: สำหรับการพูดการตรวจจับ
- Axios: เนื่องจากการดึงข้อมูลไม่ทำงานกับจุดสิ้นสุดของกระซิบ

การพึ่งพาเหล่านี้ส่วนใหญ่มีการโหลดขี้เกียจดังนั้นจึงนำเข้าเฉพาะเมื่อมีความจำเป็น

API
วัตถุกำหนดค่า

ชื่อ	พิมพ์	ค่าเริ่มต้น	คำอธิบาย
apikey	สาย	-	โทเค็น OpenAI API ของคุณ
การเริ่มต้นอัตโนมัติ	บูลีน	เท็จ	เริ่มการบันทึกเสียงพูดอัตโนมัติบนตัวยึดส่วนประกอบ
autotranscribe	บูลีน	จริง	ควรถอดความอัตโนมัติหลังจากหยุดการบันทึก
โหมด	สาย	การถอดความ	การควบคุม Whisper Mode ไม่ว่าจะเป็นการถอดความหรือการแปลปัจจุบันรองรับการแปลเป็นภาษาอังกฤษเท่านั้น
ไม่หยุด	บูลีน	เท็จ	หากเป็นจริงบันทึกจะหยุดอัตโนมัติหลังจากหยุดเวลา อย่างไรก็ตามหากผู้ใช้พูดต่อไปเครื่องบันทึกจะทำการบันทึกต่อไป
ขจัด	บูลีน	เท็จ	ลบความเงียบก่อนส่งไฟล์ไปยัง OpenAI API
หยุดเวลา	ตัวเลข	5,000 มิลลิวินาที	หากไม่หยุดพักเป็นจริงสิ่งนี้จะกลายเป็นสิ่งจำเป็น การควบคุมนี้เมื่อเครื่องบันทึกอัตโนมัติหยุด
การสตรีม	บูลีน	เท็จ	ถอดความคำพูดแบบเรียลไทม์ตาม Timeslice
เวลา	ตัวเลข	1,000 มิลลิวินาที	ช่วงเวลาระหว่างเหตุการณ์ ondataavailable แต่ละครั้ง
Whisperconfig	Whisperapiconfig	ไม่ได้กำหนด	การกำหนดค่าการถอดรหัส API ของ Whisper API
ondataavailable	(Blob: Blob) => โมฆะ	ไม่ได้กำหนด	ฟังก์ชั่นการโทรกลับสำหรับการบันทึกหยดในช่วงเวลาระหว่าง timeslice
ontranscribe	(blob: blob) => promise <transcript>	ไม่ได้กำหนด	ฟังก์ชั่นการโทรกลับเพื่อจัดการการถอดความบนเซิร์ฟเวอร์ที่กำหนดเองของคุณเอง

Whisperapiconfig

ชื่อ	พิมพ์	ค่าเริ่มต้น	คำอธิบาย
แจ้ง	สาย	ไม่ได้กำหนด	ข้อความเสริมเพื่อเป็นแนวทางในสไตล์ของโมเดลหรือดำเนินการต่อส่วนเสียงก่อนหน้า พรอมต์ควรตรงกับภาษาเสียง
response_format	สาย	JSON	รูปแบบของเอาท์พุทการถอดเสียงในหนึ่งในตัวเลือกเหล่านี้: JSON, TEXT, SRT, verbose_json หรือ VTT
อุณหภูมิ	ตัวเลข	0	อุณหภูมิการสุ่มตัวอย่างระหว่าง 0 ถึง 1 ค่าที่สูงขึ้นเช่น 0.8 จะทำให้เอาต์พุตสุ่มมากขึ้นในขณะที่ค่าที่ต่ำกว่าเช่น 0.2 จะทำให้โฟกัสและกำหนดขึ้นได้มากขึ้น หากตั้งค่าเป็น 0 โมเดลจะใช้ความน่าจะเป็นของบันทึกเพื่อเพิ่มอุณหภูมิโดยอัตโนมัติจนกว่าจะถึงเกณฑ์บางอย่าง
ภาษา	สาย	en	ภาษาของเสียงอินพุต การจัดหาภาษาอินพุตในรูปแบบ ISO-639-1 จะปรับปรุงความแม่นยำและเวลาแฝง

คืนวัตถุ

ชื่อ	พิมพ์	คำอธิบาย
การบันทึก	บูลีน	สถานะบันทึกเสียงพูด
การพูด	บูลีน	ตรวจพบเมื่อผู้ใช้กำลังพูด
การถอดเสียง	บูลีน	ในขณะที่ลบความเงียบออกจากคำพูดและส่งคำขอไปยัง Openai Whisper API
การถอดเสียง	การถอดเสียง	การกลับมาของวัตถุหลังจากการถอดรหัสเสียงกระซิบเสร็จสมบูรณ์
การหยุดชั่วคราว	สัญญา	หยุดการบันทึกเสียงพูดชั่วคราว
การสตาร์ทรี	สัญญา	เริ่มบันทึกเสียงพูด
การหยุด	สัญญา	หยุดการบันทึกเสียงพูด

การถอดเสียง

ชื่อ	พิมพ์	คำอธิบาย
หยด	หยด	คำพูดที่บันทึกไว้ในหยดจาวาสคริปต์
ข้อความ	สาย	ข้อความที่ถอดออกจาก Whisper API