Skip to content

Commit

Permalink
Merge pull request #30 from Azure-Samples/speech
Browse files Browse the repository at this point in the history
Adding speech input via custom element
  • Loading branch information
pamelafox authored Dec 17, 2024
2 parents b1d7e34 + 6a1fa89 commit 0e2f386
Show file tree
Hide file tree
Showing 4 changed files with 276 additions and 16 deletions.
115 changes: 115 additions & 0 deletions src/quartapp/static/speech-input.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
class SpeechInputButton extends HTMLElement {
constructor() {
super();
this.isRecording = false;
const SpeechRecognition =
window.SpeechRecognition || window.webkitSpeechRecognition;
if (!SpeechRecognition) {
this.dispatchEvent(
new CustomEvent("speecherror", {
detail: { error: "SpeechRecognition not supported" },
})
);
return;
}
this.speechRecognition = new SpeechRecognition();
this.speechRecognition.lang = navigator.language || navigator.userLanguage;
this.speechRecognition.interimResults = false;
this.speechRecognition.maxAlternatives = 1;
}

connectedCallback() {
this.innerHTML = `
<button class="btn btn-outline-secondary" type="button">
<i class="bi bi-mic"></i>
</button>`;
this.recordButton = this.querySelector("button");
this.recordButton.addEventListener("click", () => this.toggleRecording());
}

renderButtonOn() {
this.recordButton.classList.add("speech-input-active");
this.recordButton.innerHTML = '<i class="bi bi-mic-fill"></i>';
}

renderButtonOff() {
this.recordButton.classList.remove("speech-input-active");
this.recordButton.innerHTML = '<i class="bi bi-mic"></i>';
}

startRecording() {
if (this.speechRecognition == null) {
this.dispatchEvent(
new CustomEvent("speech-input-error", {
detail: { error: "SpeechRecognition not supported" },
})
);
}

this.speechRecognition.onresult = (event) => {
let input = "";
for (const result of event.results) {
input += result[0].transcript;
}
this.dispatchEvent(
new CustomEvent("speech-input-result", {
detail: { transcript: input },
})
);
};

this.speechRecognition.onend = () => {
// NOTE: In some browsers (e.g. Chrome), the recording will stop automatically after a few seconds of silence.
this.isRecording = false;
this.renderButtonOff();
this.dispatchEvent(new Event("speech-input-end"));
};

this.speechRecognition.onerror = (event) => {
if (this.speechRecognition) {
this.speechRecognition.stop();
if (event.error == "no-speech") {
this.dispatchEvent(
new CustomEvent("speech-input-error", {
detail: {
error:
"No speech was detected. Please check your system audio settings and try again.",
},
})
);
} else if (event.error == "language-not-supported") {
this.dispatchEvent(
new CustomEvent("speech-input-error", {
detail: {
error:
"The selected language is not supported. Please try a different language.",
},
})
);
} else {
this.dispatchEvent(
new CustomEvent("speech-input-error", {
detail: {
error: "An error occurred while recording. Please try again.",
},
})
);
}
}
};

this.speechRecognition.start();
this.isRecording = true;
this.renderButtonOn();
}

toggleRecording() {
if (this.isRecording) {
this.speechRecognition.stop();
} else {
this.startRecording();
}
}
}

customElements.define("speech-input-button", SpeechInputButton);
91 changes: 91 additions & 0 deletions src/quartapp/static/speech-output.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
class SpeechOutputButton extends HTMLElement {
static observedAttributes = ["text"];

constructor() {
super();
this.isPlaying = false;
const SpeechSynthesis =
window.speechSynthesis || window.webkitSpeechSynthesis;
if (!SpeechSynthesis) {
this.dispatchEvent(
new CustomEvent("speech-output-error", {
detail: { error: "SpeechSynthesis not supported" },
})
);
return;
}
this.synth = SpeechSynthesis;
this.lngCode = navigator.language || navigator.userLanguage;
}

connectedCallback() {
this.innerHTML = `
<button class="btn btn-outline-secondary" type="button">
<i class="bi bi-volume-up"></i>
</button>`;
this.speechButton = this.querySelector("button");
this.speechButton.addEventListener("click", () =>
this.toggleSpeechOutput()
);
}

renderButtonOn() {
this.speechButton.classList.add("speech-output-active");
this.speechButton.innerHTML = '<i class="bi bi-volume-up-fill"></i>';
}

renderButtonOff() {
this.speechButton.classList.remove("speech-output-active");
this.speechButton.innerHTML = '<i class="bi bi-volume-up"></i>';
}

toggleSpeechOutput() {
if (!this.isConnected) {
return;
}
const text = this.getAttribute("text");
if (this.synth != null) {
if (this.isPlaying || text === "") {
this.synth.cancel(); // removes all utterances from the utterance queue.
this.isPlaying = false;
this.renderButtonOff();
return;
}

// Create a new utterance and play it.
const utterance = new SpeechSynthesisUtterance(text);
utterance.lang = this.lngCode;
utterance.volume = 1;
utterance.rate = 1;
utterance.pitch = 1;

let voice = this.synth
.getVoices()
.filter((voice) => voice.lang === this.lngCode)[0];
if (!voice) {
voice = this.synth
.getVoices()
.filter((voice) => voice.lang === "en-US")[0];
}
utterance.voice = voice;

if (!utterance) {
return;
}

this.synth.speak(utterance);

utterance.onstart = () => {
this.isPlaying = true;
this.renderButtonOn();
};

utterance.onend = () => {
this.isPlaying = false;
this.renderButtonOff();
};
}
}
}

customElements.define("speech-output-button", SpeechOutputButton);
62 changes: 50 additions & 12 deletions src/quartapp/static/styles.css
Original file line number Diff line number Diff line change
@@ -1,34 +1,72 @@
* {
box-sizing: border-box;
box-sizing: border-box;
}

html, body {
height: 100%;
html,
body {
height: 100%;
background-color: #f8f9fa;
}

#messages .toast-container {
margin-bottom: 12px;
margin-bottom: 12px;
}

#messages .message-file img {
max-width: 100%;
max-height: 400px;
max-width: 100%;
max-height: 400px;
}

.background-user {
background-color: #4f28b9;
background-color: #4f28b9;
}

.background-assistant {
background-color: #0080ff;
background-color: #0080ff;
}

#image-preview {
max-height: 150px;
float: right;
margin-left: 20px;
max-height: 150px;
float: right;
margin-left: 20px;
}

#no-messages-heading {
margin-top: 20%;
margin-top: 20%;
}

#chat-area {
background-color: white;
border: 1px solid #c4cad0;
}

/* Speech input/output buttons */

#chat-form speech-input-button button {
border-top-right-radius: 0;
border-bottom-right-radius: 0;
}

speech-input-button button.speech-input-active,
speech-output-button button.speech-output-active {
border: 1px solid blue;
color: blue;
animation: pulse 1s infinite;
}

speech-input-button button.speech-input-active i,
speech-output-button button.speech-output-active {
color: blue;
}

@keyframes pulse {
0% {
box-shadow: 0 0 0 0 rgba(0, 123, 255, 0.7);
}
70% {
box-shadow: 0 0 0 6px rgba(0, 123, 255, 0);
}
100% {
box-shadow: 0 0 0 0 rgba(0, 123, 255, 0);
}
}
24 changes: 20 additions & 4 deletions src/quartapp/templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ <h2 id="no-messages-heading" class="text-center">Chat with your uploaded images<
</div>
</template>
</div>
<div id="chat-area" class="text-light px-4 py-2 rounded-top-5 text-dark d-flex flex-column justify-content-center background-user">
<div id="chat-area" class="px-4 py-2 rounded-top-5 text-dark d-flex flex-column justify-content-center">
<form id="chat-form">
<div class="d-flex">
<div class="flex-grow-1">
Expand All @@ -62,11 +62,12 @@ <h2 id="no-messages-heading" class="text-center">Chat with your uploaded images<
<input id="file" name="file" class="form-control form-control-sm" type="file" accept=".png, .jpg" aria-label="Upload File"></input>
</div>


<label for="message" class="form-label bi" style="color:white">Ask question about image:</label>
<div class="input-group">
<i class="bi bi-body-text input-group-text" aria-hidden="true"></i>
<speech-input-button></speech-input-button>
<input id="message" name="message" class="form-control form-control-sm" type="text" rows="1" placeholder="<Your Message>" aria-label="Ask ChatGPT"></input>
<button type="submit" class="btn btn-outline-light">
<button type="submit" class="btn btn-primary">
Send
<i class="bi bi-send-fill" aria-hidden="true"></i>
</button>
Expand All @@ -79,7 +80,8 @@ <h2 id="no-messages-heading" class="text-center">Chat with your uploaded images<
</main>
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/showdown.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/@microsoft/[email protected]/dist/iife/index.js"></script>

<script src="/static/speech-input.js?v=2"></script>
<script src="/static/speech-output.js?v=2"></script>
<script>
const form = document.getElementById("chat-form");
const messageInput = document.getElementById("message");
Expand Down Expand Up @@ -112,6 +114,15 @@ <h2 id="no-messages-heading" class="text-center">Chat with your uploaded images<
}
});

const speechInputButton = document.querySelector("speech-input-button");
speechInputButton.addEventListener("speech-input-result", (event) => {
messageInput.value += " " + event.detail.transcript.trim();
messageInput.focus();
});
speechInputButton.addEventListener("speech-input-error", (event) => {
alert(event.detail.error);
});

form.addEventListener("submit", async function(e) {
e.preventDefault();

Expand Down Expand Up @@ -174,6 +185,11 @@ <h2 id="no-messages-heading" class="text-center">Chat with your uploaded images<
});

messageInput.value = "";

const speechOutput = document.createElement("speech-output-button");
speechOutput.setAttribute("text", answer);
messageDiv.appendChild(speechOutput);
messageDiv.focus();
} catch (error) {
messageDiv.innerHTML = "Error: " + error;
}
Expand Down

0 comments on commit 0e2f386

Please sign in to comment.