From 93344ed5a68a116af68ce1ef20f5712bcbb47121 Mon Sep 17 00:00:00 2001
From: DevEmperor <56255079+devemperor@users.noreply.github.com>
Date: Wed, 2 Oct 2024 21:05:22 +0200
Subject: [PATCH] feature: transcribe voice as input using Whisper AI
---
.idea/deploymentTargetSelector.xml | 10 -
app/src/main/AndroidManifest.xml | 7 +-
...utActivity.java => InputTypeActivity.java} | 43 +--
.../activities/InputWhisperActivity.java | 262 ++++++++++++++++++
.../wristassist/util/InputIntentBuilder.java | 11 +-
.../main/res/drawable/twotone_error_24.xml | 7 +
app/src/main/res/drawable/twotone_mic_24.xml | 9 +
app/src/main/res/drawable/twotone_send_24.xml | 7 +
.../res/layout/activity_input_whisper.xml | 110 ++++++++
app/src/main/res/values/colors.xml | 2 +-
10 files changed, 417 insertions(+), 51 deletions(-)
delete mode 100644 .idea/deploymentTargetSelector.xml
rename app/src/main/java/net/devemperor/wristassist/activities/{InputActivity.java => InputTypeActivity.java} (63%)
create mode 100644 app/src/main/java/net/devemperor/wristassist/activities/InputWhisperActivity.java
create mode 100644 app/src/main/res/drawable/twotone_error_24.xml
create mode 100644 app/src/main/res/drawable/twotone_mic_24.xml
create mode 100644 app/src/main/res/drawable/twotone_send_24.xml
create mode 100644 app/src/main/res/layout/activity_input_whisper.xml
diff --git a/.idea/deploymentTargetSelector.xml b/.idea/deploymentTargetSelector.xml
deleted file mode 100644
index b268ef3..0000000
--- a/.idea/deploymentTargetSelector.xml
+++ /dev/null
@@ -1,10 +0,0 @@
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/app/src/main/AndroidManifest.xml b/app/src/main/AndroidManifest.xml
index 4aa139d..ee5a3ae 100644
--- a/app/src/main/AndroidManifest.xml
+++ b/app/src/main/AndroidManifest.xml
@@ -4,6 +4,7 @@
+
@@ -85,7 +86,11 @@
android:exported="false"
android:taskAffinity="" />
+
{
+ if (recorder != null) {
+ stopRecording();
+ } else {
+ startRecording();
+ }
+ });
+
+ startRecording();
+ }
+
+ @Override
+ protected void onDestroy() {
+ super.onDestroy();
+
+ if (recorder != null) {
+ try {
+ recorder.stop();
+ } catch (RuntimeException ignored) { }
+ recorder.release();
+ recorder = null;
+
+ if (recordTimeRunnable != null) {
+ recordTimeHandler.removeCallbacks(recordTimeRunnable);
+ }
+ }
+
+ if (speechApiThread != null) speechApiThread.shutdownNow();
+ }
+
+ @Override
+ public void onRequestPermissionsResult(int requestCode, @NonNull String[] permissions, @NonNull int[] grantResults) {
+ super.onRequestPermissionsResult(requestCode, permissions, grantResults);
+ if (requestCode == 1337) {
+ if (grantResults.length > 0 && grantResults[0] == PackageManager.PERMISSION_GRANTED) {
+ startRecording();
+ } else {
+ showError();
+ }
+ }
+ }
+
+ private void startRecording() {
+ if (checkSelfPermission(android.Manifest.permission.RECORD_AUDIO) != android.content.pm.PackageManager.PERMISSION_GRANTED) {
+ requestPermissions(new String[]{android.Manifest.permission.RECORD_AUDIO}, 1337);
+ return;
+ }
+
+ sendBtn.setImageDrawable(AppCompatResources.getDrawable(this, R.drawable.twotone_send_24));
+ errorIv.setVisibility(View.GONE);
+ errorIv2.setVisibility(View.GONE);
+ recordingIv.setVisibility(View.VISIBLE);
+ recordingIv2.setVisibility(View.INVISIBLE);
+
+ AlphaAnimation alphaAnimation = new AlphaAnimation(1f, 0f);
+ alphaAnimation.setDuration(1000);
+ alphaAnimation.setRepeatCount(AlphaAnimation.INFINITE);
+ alphaAnimation.setRepeatMode(AlphaAnimation.REVERSE);
+ recordingIv.startAnimation(alphaAnimation);
+
+ AlphaAnimation alphaAnimation2 = new AlphaAnimation(0f, 1f);
+ alphaAnimation2.setDuration(1000);
+ alphaAnimation2.setRepeatCount(AlphaAnimation.INFINITE);
+ alphaAnimation2.setRepeatMode(AlphaAnimation.REVERSE);
+ recordingIv2.startAnimation(alphaAnimation2);
+
+ timeTv.setVisibility(View.VISIBLE);
+
+ recorder = new MediaRecorder();
+ recorder.setAudioSource(MediaRecorder.AudioSource.MIC);
+ recorder.setOutputFormat(MediaRecorder.OutputFormat.MPEG_4);
+ recorder.setAudioEncoder(MediaRecorder.AudioEncoder.AAC);
+ recorder.setAudioEncodingBitRate(64000);
+ recorder.setAudioSamplingRate(44100);
+ recorder.setOutputFile(new File(getCacheDir(), "whisper_input_audio.mp3"));
+
+ try {
+ recorder.prepare();
+ recorder.start();
+ } catch (IOException e) {
+ showError();
+ }
+
+ elapsedTime = 0;
+ recordTimeHandler.post(recordTimeRunnable);
+ }
+
+ private void stopRecording() {
+ if (recorder != null) {
+ try {
+ recorder.stop();
+ } catch (RuntimeException ignored) { }
+ recorder.release();
+ recorder = null;
+
+ if (recordTimeRunnable != null) {
+ recordTimeHandler.removeCallbacks(recordTimeRunnable);
+ }
+
+ startWhisperApiRequest();
+ }
+ }
+
+ private void startWhisperApiRequest() {
+ sendBtn.setEnabled(false);
+ progressBar.setVisibility(View.VISIBLE);
+ timeTv.setVisibility(View.GONE);
+ recordingIv.setVisibility(View.GONE);
+ recordingIv.clearAnimation();
+ recordingIv2.setVisibility(View.GONE);
+ recordingIv2.clearAnimation();
+ errorIv.setVisibility(View.GONE);
+ errorIv2.setVisibility(View.GONE);
+
+ String apiKey = sp.getString("net.devemperor.wristassist.api_key", "noApiKey");
+ Retrofit retrofit = new Retrofit.Builder()
+ .baseUrl("https://api.openai.com/")
+ .client(defaultClient(apiKey.replaceAll("[^ -~]", ""), Duration.ofSeconds(120)).newBuilder().build())
+ .addConverterFactory(JacksonConverterFactory.create(defaultObjectMapper()))
+ .addCallAdapterFactory(RxJava2CallAdapterFactory.create())
+ .build();
+ OpenAiService service = new OpenAiService(retrofit.create(OpenAiApi.class));
+
+ speechApiThread = Executors.newSingleThreadExecutor();
+ speechApiThread.execute(() -> {
+ try {
+ CreateTranscriptionRequest request = CreateTranscriptionRequest.builder()
+ .model("whisper-1")
+ .responseFormat("verbose_json")
+ .build();
+ TranscriptionResult result = service.createTranscription(request, new File(getCacheDir(), "whisper_input_audio.mp3"));
+
+ // TODO: add usage to db
+
+ Intent data = new Intent();
+ data.putExtra("net.devemperor.wristassist.input.content", result.getText());
+ setResult(RESULT_OK, data);
+ finish();
+
+ } catch (RuntimeException e) {
+ if (!(e.getCause() instanceof InterruptedIOException)) {
+ FirebaseCrashlytics fc = FirebaseCrashlytics.getInstance();
+ fc.setCustomKey("settings", sp.getAll().toString());
+ fc.setUserId(sp.getString("net.devemperor.wristassist.userid", "null"));
+ fc.recordException(e);
+ fc.sendUnsentReports();
+
+ showError();
+ }
+ }
+ });
+ }
+
+ private void showError() {
+ runOnUiThread(() -> {
+ if (sp.getBoolean("net.devemperor.wristassist.vibrate", true)) {
+ ((Vibrator) getSystemService(VIBRATOR_SERVICE))
+ .vibrate(VibrationEffect.createWaveform(new long[]{50, 50, 50, 50, 50}, new int[]{-1, 0, -1, 0, -1}, -1));
+ }
+
+ progressBar.setVisibility(View.GONE);
+ timeTv.setVisibility(View.GONE);
+ errorIv.setVisibility(View.VISIBLE);
+ errorIv2.setVisibility(View.VISIBLE);
+ recordingIv.setVisibility(View.GONE);
+ recordingIv2.setVisibility(View.GONE);
+ sendBtn.setEnabled(true);
+ sendBtn.setImageDrawable(AppCompatResources.getDrawable(this, R.drawable.twotone_replay_24));
+ });
+ }
+}
\ No newline at end of file
diff --git a/app/src/main/java/net/devemperor/wristassist/util/InputIntentBuilder.java b/app/src/main/java/net/devemperor/wristassist/util/InputIntentBuilder.java
index 708592b..32f1be2 100644
--- a/app/src/main/java/net/devemperor/wristassist/util/InputIntentBuilder.java
+++ b/app/src/main/java/net/devemperor/wristassist/util/InputIntentBuilder.java
@@ -3,7 +3,8 @@
import android.content.Context;
import android.content.Intent;
-import net.devemperor.wristassist.activities.InputActivity;
+import net.devemperor.wristassist.activities.InputTypeActivity;
+import net.devemperor.wristassist.activities.InputWhisperActivity;
public class InputIntentBuilder {
@@ -56,14 +57,18 @@ public InputIntentBuilder setHandsFree(boolean handsFree) {
}
public Intent build() {
- Intent intent = new Intent(context, InputActivity.class);
+ Intent intent;
+ if (handsFree) {
+ intent = new Intent(context, InputWhisperActivity.class);
+ } else {
+ intent = new Intent(context, InputTypeActivity.class);
+ }
intent.putExtra("net.devemperor.wristassist.input.title", title);
intent.putExtra("net.devemperor.wristassist.input.content", content);
intent.putExtra("net.devemperor.wristassist.input.hint", hint);
intent.putExtra("net.devemperor.wristassist.input.title2", title2);
intent.putExtra("net.devemperor.wristassist.input.content2", content2);
intent.putExtra("net.devemperor.wristassist.input.hint2", hint2);
- intent.putExtra("net.devemperor.wristassist.input.hands_free", handsFree);
return intent;
}
}
diff --git a/app/src/main/res/drawable/twotone_error_24.xml b/app/src/main/res/drawable/twotone_error_24.xml
new file mode 100644
index 0000000..7ee408f
--- /dev/null
+++ b/app/src/main/res/drawable/twotone_error_24.xml
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
diff --git a/app/src/main/res/drawable/twotone_mic_24.xml b/app/src/main/res/drawable/twotone_mic_24.xml
new file mode 100644
index 0000000..5e93857
--- /dev/null
+++ b/app/src/main/res/drawable/twotone_mic_24.xml
@@ -0,0 +1,9 @@
+
+
+
+
+
+
+
+
+
diff --git a/app/src/main/res/drawable/twotone_send_24.xml b/app/src/main/res/drawable/twotone_send_24.xml
new file mode 100644
index 0000000..5752268
--- /dev/null
+++ b/app/src/main/res/drawable/twotone_send_24.xml
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
diff --git a/app/src/main/res/layout/activity_input_whisper.xml b/app/src/main/res/layout/activity_input_whisper.xml
new file mode 100644
index 0000000..d51160c
--- /dev/null
+++ b/app/src/main/res/layout/activity_input_whisper.xml
@@ -0,0 +1,110 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/app/src/main/res/values/colors.xml b/app/src/main/res/values/colors.xml
index cea6744..2accb20 100644
--- a/app/src/main/res/values/colors.xml
+++ b/app/src/main/res/values/colors.xml
@@ -1,7 +1,7 @@
#7B1FA2
- #667B1FA2
+ #737B1FA2
#ffffff
#000000
#A8A8A8