From 93344ed5a68a116af68ce1ef20f5712bcbb47121 Mon Sep 17 00:00:00 2001 From: DevEmperor <56255079+devemperor@users.noreply.github.com> Date: Wed, 2 Oct 2024 21:05:22 +0200 Subject: [PATCH] feature: transcribe voice as input using Whisper AI --- .idea/deploymentTargetSelector.xml | 10 - app/src/main/AndroidManifest.xml | 7 +- ...utActivity.java => InputTypeActivity.java} | 43 +-- .../activities/InputWhisperActivity.java | 262 ++++++++++++++++++ .../wristassist/util/InputIntentBuilder.java | 11 +- .../main/res/drawable/twotone_error_24.xml | 7 + app/src/main/res/drawable/twotone_mic_24.xml | 9 + app/src/main/res/drawable/twotone_send_24.xml | 7 + .../res/layout/activity_input_whisper.xml | 110 ++++++++ app/src/main/res/values/colors.xml | 2 +- 10 files changed, 417 insertions(+), 51 deletions(-) delete mode 100644 .idea/deploymentTargetSelector.xml rename app/src/main/java/net/devemperor/wristassist/activities/{InputActivity.java => InputTypeActivity.java} (63%) create mode 100644 app/src/main/java/net/devemperor/wristassist/activities/InputWhisperActivity.java create mode 100644 app/src/main/res/drawable/twotone_error_24.xml create mode 100644 app/src/main/res/drawable/twotone_mic_24.xml create mode 100644 app/src/main/res/drawable/twotone_send_24.xml create mode 100644 app/src/main/res/layout/activity_input_whisper.xml diff --git a/.idea/deploymentTargetSelector.xml b/.idea/deploymentTargetSelector.xml deleted file mode 100644 index b268ef3..0000000 --- a/.idea/deploymentTargetSelector.xml +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - \ No newline at end of file diff --git a/app/src/main/AndroidManifest.xml b/app/src/main/AndroidManifest.xml index 4aa139d..ee5a3ae 100644 --- a/app/src/main/AndroidManifest.xml +++ b/app/src/main/AndroidManifest.xml @@ -4,6 +4,7 @@ + @@ -85,7 +86,11 @@ android:exported="false" android:taskAffinity="" /> + { + if (recorder != null) { + stopRecording(); + } else { + startRecording(); + } + }); + + startRecording(); + } + + @Override + protected void onDestroy() { + super.onDestroy(); + + if (recorder != null) { + try { + recorder.stop(); + } catch (RuntimeException ignored) { } + recorder.release(); + recorder = null; + + if (recordTimeRunnable != null) { + recordTimeHandler.removeCallbacks(recordTimeRunnable); + } + } + + if (speechApiThread != null) speechApiThread.shutdownNow(); + } + + @Override + public void onRequestPermissionsResult(int requestCode, @NonNull String[] permissions, @NonNull int[] grantResults) { + super.onRequestPermissionsResult(requestCode, permissions, grantResults); + if (requestCode == 1337) { + if (grantResults.length > 0 && grantResults[0] == PackageManager.PERMISSION_GRANTED) { + startRecording(); + } else { + showError(); + } + } + } + + private void startRecording() { + if (checkSelfPermission(android.Manifest.permission.RECORD_AUDIO) != android.content.pm.PackageManager.PERMISSION_GRANTED) { + requestPermissions(new String[]{android.Manifest.permission.RECORD_AUDIO}, 1337); + return; + } + + sendBtn.setImageDrawable(AppCompatResources.getDrawable(this, R.drawable.twotone_send_24)); + errorIv.setVisibility(View.GONE); + errorIv2.setVisibility(View.GONE); + recordingIv.setVisibility(View.VISIBLE); + recordingIv2.setVisibility(View.INVISIBLE); + + AlphaAnimation alphaAnimation = new AlphaAnimation(1f, 0f); + alphaAnimation.setDuration(1000); + alphaAnimation.setRepeatCount(AlphaAnimation.INFINITE); + alphaAnimation.setRepeatMode(AlphaAnimation.REVERSE); + recordingIv.startAnimation(alphaAnimation); + + AlphaAnimation alphaAnimation2 = new AlphaAnimation(0f, 1f); + alphaAnimation2.setDuration(1000); + alphaAnimation2.setRepeatCount(AlphaAnimation.INFINITE); + alphaAnimation2.setRepeatMode(AlphaAnimation.REVERSE); + recordingIv2.startAnimation(alphaAnimation2); + + timeTv.setVisibility(View.VISIBLE); + + recorder = new MediaRecorder(); + recorder.setAudioSource(MediaRecorder.AudioSource.MIC); + recorder.setOutputFormat(MediaRecorder.OutputFormat.MPEG_4); + recorder.setAudioEncoder(MediaRecorder.AudioEncoder.AAC); + recorder.setAudioEncodingBitRate(64000); + recorder.setAudioSamplingRate(44100); + recorder.setOutputFile(new File(getCacheDir(), "whisper_input_audio.mp3")); + + try { + recorder.prepare(); + recorder.start(); + } catch (IOException e) { + showError(); + } + + elapsedTime = 0; + recordTimeHandler.post(recordTimeRunnable); + } + + private void stopRecording() { + if (recorder != null) { + try { + recorder.stop(); + } catch (RuntimeException ignored) { } + recorder.release(); + recorder = null; + + if (recordTimeRunnable != null) { + recordTimeHandler.removeCallbacks(recordTimeRunnable); + } + + startWhisperApiRequest(); + } + } + + private void startWhisperApiRequest() { + sendBtn.setEnabled(false); + progressBar.setVisibility(View.VISIBLE); + timeTv.setVisibility(View.GONE); + recordingIv.setVisibility(View.GONE); + recordingIv.clearAnimation(); + recordingIv2.setVisibility(View.GONE); + recordingIv2.clearAnimation(); + errorIv.setVisibility(View.GONE); + errorIv2.setVisibility(View.GONE); + + String apiKey = sp.getString("net.devemperor.wristassist.api_key", "noApiKey"); + Retrofit retrofit = new Retrofit.Builder() + .baseUrl("https://api.openai.com/") + .client(defaultClient(apiKey.replaceAll("[^ -~]", ""), Duration.ofSeconds(120)).newBuilder().build()) + .addConverterFactory(JacksonConverterFactory.create(defaultObjectMapper())) + .addCallAdapterFactory(RxJava2CallAdapterFactory.create()) + .build(); + OpenAiService service = new OpenAiService(retrofit.create(OpenAiApi.class)); + + speechApiThread = Executors.newSingleThreadExecutor(); + speechApiThread.execute(() -> { + try { + CreateTranscriptionRequest request = CreateTranscriptionRequest.builder() + .model("whisper-1") + .responseFormat("verbose_json") + .build(); + TranscriptionResult result = service.createTranscription(request, new File(getCacheDir(), "whisper_input_audio.mp3")); + + // TODO: add usage to db + + Intent data = new Intent(); + data.putExtra("net.devemperor.wristassist.input.content", result.getText()); + setResult(RESULT_OK, data); + finish(); + + } catch (RuntimeException e) { + if (!(e.getCause() instanceof InterruptedIOException)) { + FirebaseCrashlytics fc = FirebaseCrashlytics.getInstance(); + fc.setCustomKey("settings", sp.getAll().toString()); + fc.setUserId(sp.getString("net.devemperor.wristassist.userid", "null")); + fc.recordException(e); + fc.sendUnsentReports(); + + showError(); + } + } + }); + } + + private void showError() { + runOnUiThread(() -> { + if (sp.getBoolean("net.devemperor.wristassist.vibrate", true)) { + ((Vibrator) getSystemService(VIBRATOR_SERVICE)) + .vibrate(VibrationEffect.createWaveform(new long[]{50, 50, 50, 50, 50}, new int[]{-1, 0, -1, 0, -1}, -1)); + } + + progressBar.setVisibility(View.GONE); + timeTv.setVisibility(View.GONE); + errorIv.setVisibility(View.VISIBLE); + errorIv2.setVisibility(View.VISIBLE); + recordingIv.setVisibility(View.GONE); + recordingIv2.setVisibility(View.GONE); + sendBtn.setEnabled(true); + sendBtn.setImageDrawable(AppCompatResources.getDrawable(this, R.drawable.twotone_replay_24)); + }); + } +} \ No newline at end of file diff --git a/app/src/main/java/net/devemperor/wristassist/util/InputIntentBuilder.java b/app/src/main/java/net/devemperor/wristassist/util/InputIntentBuilder.java index 708592b..32f1be2 100644 --- a/app/src/main/java/net/devemperor/wristassist/util/InputIntentBuilder.java +++ b/app/src/main/java/net/devemperor/wristassist/util/InputIntentBuilder.java @@ -3,7 +3,8 @@ import android.content.Context; import android.content.Intent; -import net.devemperor.wristassist.activities.InputActivity; +import net.devemperor.wristassist.activities.InputTypeActivity; +import net.devemperor.wristassist.activities.InputWhisperActivity; public class InputIntentBuilder { @@ -56,14 +57,18 @@ public InputIntentBuilder setHandsFree(boolean handsFree) { } public Intent build() { - Intent intent = new Intent(context, InputActivity.class); + Intent intent; + if (handsFree) { + intent = new Intent(context, InputWhisperActivity.class); + } else { + intent = new Intent(context, InputTypeActivity.class); + } intent.putExtra("net.devemperor.wristassist.input.title", title); intent.putExtra("net.devemperor.wristassist.input.content", content); intent.putExtra("net.devemperor.wristassist.input.hint", hint); intent.putExtra("net.devemperor.wristassist.input.title2", title2); intent.putExtra("net.devemperor.wristassist.input.content2", content2); intent.putExtra("net.devemperor.wristassist.input.hint2", hint2); - intent.putExtra("net.devemperor.wristassist.input.hands_free", handsFree); return intent; } } diff --git a/app/src/main/res/drawable/twotone_error_24.xml b/app/src/main/res/drawable/twotone_error_24.xml new file mode 100644 index 0000000..7ee408f --- /dev/null +++ b/app/src/main/res/drawable/twotone_error_24.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/app/src/main/res/drawable/twotone_mic_24.xml b/app/src/main/res/drawable/twotone_mic_24.xml new file mode 100644 index 0000000..5e93857 --- /dev/null +++ b/app/src/main/res/drawable/twotone_mic_24.xml @@ -0,0 +1,9 @@ + + + + + + + + + diff --git a/app/src/main/res/drawable/twotone_send_24.xml b/app/src/main/res/drawable/twotone_send_24.xml new file mode 100644 index 0000000..5752268 --- /dev/null +++ b/app/src/main/res/drawable/twotone_send_24.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/app/src/main/res/layout/activity_input_whisper.xml b/app/src/main/res/layout/activity_input_whisper.xml new file mode 100644 index 0000000..d51160c --- /dev/null +++ b/app/src/main/res/layout/activity_input_whisper.xml @@ -0,0 +1,110 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/app/src/main/res/values/colors.xml b/app/src/main/res/values/colors.xml index cea6744..2accb20 100644 --- a/app/src/main/res/values/colors.xml +++ b/app/src/main/res/values/colors.xml @@ -1,7 +1,7 @@ #7B1FA2 - #667B1FA2 + #737B1FA2 #ffffff #000000 #A8A8A8