-
Notifications
You must be signed in to change notification settings - Fork 28
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #30 from Azure-Samples/speech
Adding speech input via custom element
- Loading branch information
Showing
4 changed files
with
276 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
class SpeechInputButton extends HTMLElement { | ||
constructor() { | ||
super(); | ||
this.isRecording = false; | ||
const SpeechRecognition = | ||
window.SpeechRecognition || window.webkitSpeechRecognition; | ||
if (!SpeechRecognition) { | ||
this.dispatchEvent( | ||
new CustomEvent("speecherror", { | ||
detail: { error: "SpeechRecognition not supported" }, | ||
}) | ||
); | ||
return; | ||
} | ||
this.speechRecognition = new SpeechRecognition(); | ||
this.speechRecognition.lang = navigator.language || navigator.userLanguage; | ||
this.speechRecognition.interimResults = false; | ||
this.speechRecognition.maxAlternatives = 1; | ||
} | ||
|
||
connectedCallback() { | ||
this.innerHTML = ` | ||
<button class="btn btn-outline-secondary" type="button"> | ||
<i class="bi bi-mic"></i> | ||
</button>`; | ||
this.recordButton = this.querySelector("button"); | ||
this.recordButton.addEventListener("click", () => this.toggleRecording()); | ||
} | ||
|
||
renderButtonOn() { | ||
this.recordButton.classList.add("speech-input-active"); | ||
this.recordButton.innerHTML = '<i class="bi bi-mic-fill"></i>'; | ||
} | ||
|
||
renderButtonOff() { | ||
this.recordButton.classList.remove("speech-input-active"); | ||
this.recordButton.innerHTML = '<i class="bi bi-mic"></i>'; | ||
} | ||
|
||
startRecording() { | ||
if (this.speechRecognition == null) { | ||
this.dispatchEvent( | ||
new CustomEvent("speech-input-error", { | ||
detail: { error: "SpeechRecognition not supported" }, | ||
}) | ||
); | ||
} | ||
|
||
this.speechRecognition.onresult = (event) => { | ||
let input = ""; | ||
for (const result of event.results) { | ||
input += result[0].transcript; | ||
} | ||
this.dispatchEvent( | ||
new CustomEvent("speech-input-result", { | ||
detail: { transcript: input }, | ||
}) | ||
); | ||
}; | ||
|
||
this.speechRecognition.onend = () => { | ||
// NOTE: In some browsers (e.g. Chrome), the recording will stop automatically after a few seconds of silence. | ||
this.isRecording = false; | ||
this.renderButtonOff(); | ||
this.dispatchEvent(new Event("speech-input-end")); | ||
}; | ||
|
||
this.speechRecognition.onerror = (event) => { | ||
if (this.speechRecognition) { | ||
this.speechRecognition.stop(); | ||
if (event.error == "no-speech") { | ||
this.dispatchEvent( | ||
new CustomEvent("speech-input-error", { | ||
detail: { | ||
error: | ||
"No speech was detected. Please check your system audio settings and try again.", | ||
}, | ||
}) | ||
); | ||
} else if (event.error == "language-not-supported") { | ||
this.dispatchEvent( | ||
new CustomEvent("speech-input-error", { | ||
detail: { | ||
error: | ||
"The selected language is not supported. Please try a different language.", | ||
}, | ||
}) | ||
); | ||
} else { | ||
this.dispatchEvent( | ||
new CustomEvent("speech-input-error", { | ||
detail: { | ||
error: "An error occurred while recording. Please try again.", | ||
}, | ||
}) | ||
); | ||
} | ||
} | ||
}; | ||
|
||
this.speechRecognition.start(); | ||
this.isRecording = true; | ||
this.renderButtonOn(); | ||
} | ||
|
||
toggleRecording() { | ||
if (this.isRecording) { | ||
this.speechRecognition.stop(); | ||
} else { | ||
this.startRecording(); | ||
} | ||
} | ||
} | ||
|
||
customElements.define("speech-input-button", SpeechInputButton); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
class SpeechOutputButton extends HTMLElement { | ||
static observedAttributes = ["text"]; | ||
|
||
constructor() { | ||
super(); | ||
this.isPlaying = false; | ||
const SpeechSynthesis = | ||
window.speechSynthesis || window.webkitSpeechSynthesis; | ||
if (!SpeechSynthesis) { | ||
this.dispatchEvent( | ||
new CustomEvent("speech-output-error", { | ||
detail: { error: "SpeechSynthesis not supported" }, | ||
}) | ||
); | ||
return; | ||
} | ||
this.synth = SpeechSynthesis; | ||
this.lngCode = navigator.language || navigator.userLanguage; | ||
} | ||
|
||
connectedCallback() { | ||
this.innerHTML = ` | ||
<button class="btn btn-outline-secondary" type="button"> | ||
<i class="bi bi-volume-up"></i> | ||
</button>`; | ||
this.speechButton = this.querySelector("button"); | ||
this.speechButton.addEventListener("click", () => | ||
this.toggleSpeechOutput() | ||
); | ||
} | ||
|
||
renderButtonOn() { | ||
this.speechButton.classList.add("speech-output-active"); | ||
this.speechButton.innerHTML = '<i class="bi bi-volume-up-fill"></i>'; | ||
} | ||
|
||
renderButtonOff() { | ||
this.speechButton.classList.remove("speech-output-active"); | ||
this.speechButton.innerHTML = '<i class="bi bi-volume-up"></i>'; | ||
} | ||
|
||
toggleSpeechOutput() { | ||
if (!this.isConnected) { | ||
return; | ||
} | ||
const text = this.getAttribute("text"); | ||
if (this.synth != null) { | ||
if (this.isPlaying || text === "") { | ||
this.synth.cancel(); // removes all utterances from the utterance queue. | ||
this.isPlaying = false; | ||
this.renderButtonOff(); | ||
return; | ||
} | ||
|
||
// Create a new utterance and play it. | ||
const utterance = new SpeechSynthesisUtterance(text); | ||
utterance.lang = this.lngCode; | ||
utterance.volume = 1; | ||
utterance.rate = 1; | ||
utterance.pitch = 1; | ||
|
||
let voice = this.synth | ||
.getVoices() | ||
.filter((voice) => voice.lang === this.lngCode)[0]; | ||
if (!voice) { | ||
voice = this.synth | ||
.getVoices() | ||
.filter((voice) => voice.lang === "en-US")[0]; | ||
} | ||
utterance.voice = voice; | ||
|
||
if (!utterance) { | ||
return; | ||
} | ||
|
||
this.synth.speak(utterance); | ||
|
||
utterance.onstart = () => { | ||
this.isPlaying = true; | ||
this.renderButtonOn(); | ||
}; | ||
|
||
utterance.onend = () => { | ||
this.isPlaying = false; | ||
this.renderButtonOff(); | ||
}; | ||
} | ||
} | ||
} | ||
|
||
customElements.define("speech-output-button", SpeechOutputButton); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,34 +1,72 @@ | ||
* { | ||
box-sizing: border-box; | ||
box-sizing: border-box; | ||
} | ||
|
||
html, body { | ||
height: 100%; | ||
html, | ||
body { | ||
height: 100%; | ||
background-color: #f8f9fa; | ||
} | ||
|
||
#messages .toast-container { | ||
margin-bottom: 12px; | ||
margin-bottom: 12px; | ||
} | ||
|
||
#messages .message-file img { | ||
max-width: 100%; | ||
max-height: 400px; | ||
max-width: 100%; | ||
max-height: 400px; | ||
} | ||
|
||
.background-user { | ||
background-color: #4f28b9; | ||
background-color: #4f28b9; | ||
} | ||
|
||
.background-assistant { | ||
background-color: #0080ff; | ||
background-color: #0080ff; | ||
} | ||
|
||
#image-preview { | ||
max-height: 150px; | ||
float: right; | ||
margin-left: 20px; | ||
max-height: 150px; | ||
float: right; | ||
margin-left: 20px; | ||
} | ||
|
||
#no-messages-heading { | ||
margin-top: 20%; | ||
margin-top: 20%; | ||
} | ||
|
||
#chat-area { | ||
background-color: white; | ||
border: 1px solid #c4cad0; | ||
} | ||
|
||
/* Speech input/output buttons */ | ||
|
||
#chat-form speech-input-button button { | ||
border-top-right-radius: 0; | ||
border-bottom-right-radius: 0; | ||
} | ||
|
||
speech-input-button button.speech-input-active, | ||
speech-output-button button.speech-output-active { | ||
border: 1px solid blue; | ||
color: blue; | ||
animation: pulse 1s infinite; | ||
} | ||
|
||
speech-input-button button.speech-input-active i, | ||
speech-output-button button.speech-output-active { | ||
color: blue; | ||
} | ||
|
||
@keyframes pulse { | ||
0% { | ||
box-shadow: 0 0 0 0 rgba(0, 123, 255, 0.7); | ||
} | ||
70% { | ||
box-shadow: 0 0 0 6px rgba(0, 123, 255, 0); | ||
} | ||
100% { | ||
box-shadow: 0 0 0 0 rgba(0, 123, 255, 0); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -50,7 +50,7 @@ <h2 id="no-messages-heading" class="text-center">Chat with your uploaded images< | |
</div> | ||
</template> | ||
</div> | ||
<div id="chat-area" class="text-light px-4 py-2 rounded-top-5 text-dark d-flex flex-column justify-content-center background-user"> | ||
<div id="chat-area" class="px-4 py-2 rounded-top-5 text-dark d-flex flex-column justify-content-center"> | ||
<form id="chat-form"> | ||
<div class="d-flex"> | ||
<div class="flex-grow-1"> | ||
|
@@ -62,11 +62,12 @@ <h2 id="no-messages-heading" class="text-center">Chat with your uploaded images< | |
<input id="file" name="file" class="form-control form-control-sm" type="file" accept=".png, .jpg" aria-label="Upload File"></input> | ||
</div> | ||
|
||
|
||
<label for="message" class="form-label bi" style="color:white">Ask question about image:</label> | ||
<div class="input-group"> | ||
<i class="bi bi-body-text input-group-text" aria-hidden="true"></i> | ||
<speech-input-button></speech-input-button> | ||
<input id="message" name="message" class="form-control form-control-sm" type="text" rows="1" placeholder="<Your Message>" aria-label="Ask ChatGPT"></input> | ||
<button type="submit" class="btn btn-outline-light"> | ||
<button type="submit" class="btn btn-primary"> | ||
Send | ||
<i class="bi bi-send-fill" aria-hidden="true"></i> | ||
</button> | ||
|
@@ -79,7 +80,8 @@ <h2 id="no-messages-heading" class="text-center">Chat with your uploaded images< | |
</main> | ||
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/showdown.min.js"></script> | ||
<script src="https://cdn.jsdelivr.net/npm/@microsoft/[email protected]/dist/iife/index.js"></script> | ||
|
||
<script src="/static/speech-input.js?v=2"></script> | ||
<script src="/static/speech-output.js?v=2"></script> | ||
<script> | ||
const form = document.getElementById("chat-form"); | ||
const messageInput = document.getElementById("message"); | ||
|
@@ -112,6 +114,15 @@ <h2 id="no-messages-heading" class="text-center">Chat with your uploaded images< | |
} | ||
}); | ||
|
||
const speechInputButton = document.querySelector("speech-input-button"); | ||
speechInputButton.addEventListener("speech-input-result", (event) => { | ||
messageInput.value += " " + event.detail.transcript.trim(); | ||
messageInput.focus(); | ||
}); | ||
speechInputButton.addEventListener("speech-input-error", (event) => { | ||
alert(event.detail.error); | ||
}); | ||
|
||
form.addEventListener("submit", async function(e) { | ||
e.preventDefault(); | ||
|
||
|
@@ -174,6 +185,11 @@ <h2 id="no-messages-heading" class="text-center">Chat with your uploaded images< | |
}); | ||
|
||
messageInput.value = ""; | ||
|
||
const speechOutput = document.createElement("speech-output-button"); | ||
speechOutput.setAttribute("text", answer); | ||
messageDiv.appendChild(speechOutput); | ||
messageDiv.focus(); | ||
} catch (error) { | ||
messageDiv.innerHTML = "Error: " + error; | ||
} | ||
|