Skip to content

Commit

Permalink
feature: transcribe voice as input using Whisper AI
Browse files Browse the repository at this point in the history
  • Loading branch information
DevEmperor committed Oct 2, 2024
1 parent f9d6208 commit 93344ed
Show file tree
Hide file tree
Showing 10 changed files with 417 additions and 51 deletions.
10 changes: 0 additions & 10 deletions .idea/deploymentTargetSelector.xml

This file was deleted.

7 changes: 6 additions & 1 deletion app/src/main/AndroidManifest.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
<uses-permission android:name="android.permission.VIBRATE" />
<uses-permission android:name="android.permission.WAKE_LOCK" />
<uses-permission android:name="android.permission.INTERNET" />
<uses-permission android:name="android.permission.RECORD_AUDIO" />

<uses-feature android:name="android.hardware.type.watch" />

Expand Down Expand Up @@ -85,7 +86,11 @@
android:exported="false"
android:taskAffinity="" />
<activity
android:name=".activities.InputActivity"
android:name=".activities.InputTypeActivity"
android:exported="false"
android:taskAffinity="" />
<activity
android:name=".activities.InputWhisperActivity"
android:exported="false"
android:taskAffinity="" />
<activity
Expand Down
Original file line number Diff line number Diff line change
@@ -1,23 +1,18 @@
package net.devemperor.wristassist.activities;

import android.content.ActivityNotFoundException;
import android.content.Intent;
import android.os.Bundle;
import android.speech.RecognizerIntent;
import android.view.KeyEvent;
import android.view.View;
import android.widget.EditText;
import android.widget.ScrollView;
import android.widget.TextView;
import android.widget.Toast;

import androidx.appcompat.app.AppCompatActivity;

import net.devemperor.wristassist.R;

import java.util.Objects;

public class InputActivity extends AppCompatActivity {
public class InputTypeActivity extends AppCompatActivity {

ScrollView inputSv;
TextView inputTitleTv;
Expand All @@ -28,21 +23,20 @@ public class InputActivity extends AppCompatActivity {
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_input);
setContentView(R.layout.activity_input_type);

inputSv = findViewById(R.id.activity_input_sv);
inputSv = findViewById(R.id.activity_input_type_sv);

String title = getIntent().getStringExtra("net.devemperor.wristassist.input.title");
String content = getIntent().getStringExtra("net.devemperor.wristassist.input.content");
String hint = getIntent().getStringExtra("net.devemperor.wristassist.input.hint");
String title2 = getIntent().getStringExtra("net.devemperor.wristassist.input.title2");
String content2 = getIntent().getStringExtra("net.devemperor.wristassist.input.content2");
String hint2 = getIntent().getStringExtra("net.devemperor.wristassist.input.hint2");
boolean handsFree = getIntent().getBooleanExtra("net.devemperor.wristassist.input.hands_free", false);
inputTitleTv = findViewById(R.id.activity_input_title_tv);
inputContentEt = findViewById(R.id.activity_input_content_et);
inputTitle2Tv = findViewById(R.id.activity_input_title2_tv);
inputContent2Et = findViewById(R.id.activity_input_content2_et);
inputTitleTv = findViewById(R.id.activity_input_type_title_tv);
inputContentEt = findViewById(R.id.activity_input_type_content_et);
inputTitle2Tv = findViewById(R.id.activity_input_type_title2_tv);
inputContent2Et = findViewById(R.id.activity_input_type_content2_et);
inputTitleTv.setText(title);
inputContentEt.setText(content);
inputContentEt.setHint(hint);
Expand Down Expand Up @@ -71,29 +65,6 @@ protected void onCreate(Bundle savedInstanceState) {
}

inputSv.requestFocus();

if (handsFree) {
try {
Intent intent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, RecognizerIntent.LANGUAGE_MODEL_FREE_FORM);
startActivityForResult(intent, 1337);
} catch (ActivityNotFoundException e) {
e.printStackTrace();
Toast.makeText(this, R.string.wristassist_no_speech_recognition, Toast.LENGTH_SHORT).show();
}
}
}

@Override
public void onActivityResult(int requestCode, int resultCode, Intent data) {
if (requestCode == 1337 && resultCode == RESULT_OK) {
String result = Objects.requireNonNull(data.getStringArrayListExtra(RecognizerIntent.EXTRA_RESULTS)).get(0);
Intent intent = new Intent();
intent.putExtra("net.devemperor.wristassist.input.content", result);
setResult(RESULT_OK, intent);
finish();
}
super.onActivityResult(requestCode, resultCode, data);
}

public void cancel(View view) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,262 @@
package net.devemperor.wristassist.activities;

import static com.theokanning.openai.service.OpenAiService.defaultClient;
import static com.theokanning.openai.service.OpenAiService.defaultObjectMapper;

import android.content.Intent;
import android.content.SharedPreferences;
import android.content.pm.PackageManager;
import android.media.MediaRecorder;
import android.os.Bundle;
import android.os.Handler;
import android.os.Looper;
import android.os.VibrationEffect;
import android.os.Vibrator;
import android.view.View;
import android.view.animation.AlphaAnimation;
import android.widget.ImageView;
import android.widget.TextView;

import androidx.annotation.NonNull;
import androidx.appcompat.app.AppCompatActivity;
import androidx.appcompat.content.res.AppCompatResources;

import com.google.android.material.floatingactionbutton.FloatingActionButton;
import com.google.android.material.progressindicator.LinearProgressIndicator;
import com.google.firebase.crashlytics.FirebaseCrashlytics;
import com.theokanning.openai.audio.CreateTranscriptionRequest;
import com.theokanning.openai.audio.TranscriptionResult;
import com.theokanning.openai.client.OpenAiApi;
import com.theokanning.openai.service.OpenAiService;

import net.devemperor.wristassist.R;

import java.io.File;
import java.io.IOException;
import java.io.InterruptedIOException;
import java.time.Duration;
import java.util.Locale;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

import retrofit2.Retrofit;
import retrofit2.adapter.rxjava2.RxJava2CallAdapterFactory;
import retrofit2.converter.jackson.JacksonConverterFactory;

public class InputWhisperActivity extends AppCompatActivity {

TextView titleTv;
FloatingActionButton sendBtn;
LinearProgressIndicator progressBar;
TextView timeTv;
ImageView recordingIv;
ImageView recordingIv2;
ImageView errorIv;
ImageView errorIv2;

SharedPreferences sp;
MediaRecorder recorder;
Runnable recordTimeRunnable;
Handler recordTimeHandler;
ExecutorService speechApiThread;
long elapsedTime;

@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_input_whisper);

titleTv = findViewById(R.id.activity_input_whisper_title_tv);
sendBtn = findViewById(R.id.activity_input_whisper_send_btn);
progressBar = findViewById(R.id.activity_input_whisper_pb);
timeTv = findViewById(R.id.activity_input_whisper_time_tv);
recordingIv = findViewById(R.id.activity_input_whisper_recording_iv);
recordingIv2 = findViewById(R.id.activity_input_whisper_recording_iv2);
errorIv = findViewById(R.id.activity_input_whisper_error_iv);
errorIv2 = findViewById(R.id.activity_input_whisper_error_iv2);

sp = getSharedPreferences("net.devemperor.wristassist", MODE_PRIVATE);

recordTimeHandler = new Handler(Looper.getMainLooper());
recordTimeRunnable = new Runnable() {
@Override
public void run() {
elapsedTime += 100;
timeTv.setText(String.format(Locale.getDefault(), "%02d:%02d", (int) (elapsedTime / 60000), (int) (elapsedTime / 1000) % 60));
recordTimeHandler.postDelayed(this, 100);
}
};

titleTv.setText(getIntent().getStringExtra("net.devemperor.wristassist.input.title"));

sendBtn.setOnClickListener(v -> {
if (recorder != null) {
stopRecording();
} else {
startRecording();
}
});

startRecording();
}

@Override
protected void onDestroy() {
super.onDestroy();

if (recorder != null) {
try {
recorder.stop();
} catch (RuntimeException ignored) { }
recorder.release();
recorder = null;

if (recordTimeRunnable != null) {
recordTimeHandler.removeCallbacks(recordTimeRunnable);
}
}

if (speechApiThread != null) speechApiThread.shutdownNow();
}

@Override
public void onRequestPermissionsResult(int requestCode, @NonNull String[] permissions, @NonNull int[] grantResults) {
super.onRequestPermissionsResult(requestCode, permissions, grantResults);
if (requestCode == 1337) {
if (grantResults.length > 0 && grantResults[0] == PackageManager.PERMISSION_GRANTED) {
startRecording();
} else {
showError();
}
}
}

private void startRecording() {
if (checkSelfPermission(android.Manifest.permission.RECORD_AUDIO) != android.content.pm.PackageManager.PERMISSION_GRANTED) {
requestPermissions(new String[]{android.Manifest.permission.RECORD_AUDIO}, 1337);
return;
}

sendBtn.setImageDrawable(AppCompatResources.getDrawable(this, R.drawable.twotone_send_24));
errorIv.setVisibility(View.GONE);
errorIv2.setVisibility(View.GONE);
recordingIv.setVisibility(View.VISIBLE);
recordingIv2.setVisibility(View.INVISIBLE);

AlphaAnimation alphaAnimation = new AlphaAnimation(1f, 0f);
alphaAnimation.setDuration(1000);
alphaAnimation.setRepeatCount(AlphaAnimation.INFINITE);
alphaAnimation.setRepeatMode(AlphaAnimation.REVERSE);
recordingIv.startAnimation(alphaAnimation);

AlphaAnimation alphaAnimation2 = new AlphaAnimation(0f, 1f);
alphaAnimation2.setDuration(1000);
alphaAnimation2.setRepeatCount(AlphaAnimation.INFINITE);
alphaAnimation2.setRepeatMode(AlphaAnimation.REVERSE);
recordingIv2.startAnimation(alphaAnimation2);

timeTv.setVisibility(View.VISIBLE);

recorder = new MediaRecorder();
recorder.setAudioSource(MediaRecorder.AudioSource.MIC);
recorder.setOutputFormat(MediaRecorder.OutputFormat.MPEG_4);
recorder.setAudioEncoder(MediaRecorder.AudioEncoder.AAC);
recorder.setAudioEncodingBitRate(64000);
recorder.setAudioSamplingRate(44100);
recorder.setOutputFile(new File(getCacheDir(), "whisper_input_audio.mp3"));

try {
recorder.prepare();
recorder.start();
} catch (IOException e) {
showError();
}

elapsedTime = 0;
recordTimeHandler.post(recordTimeRunnable);
}

private void stopRecording() {
if (recorder != null) {
try {
recorder.stop();
} catch (RuntimeException ignored) { }
recorder.release();
recorder = null;

if (recordTimeRunnable != null) {
recordTimeHandler.removeCallbacks(recordTimeRunnable);
}

startWhisperApiRequest();
}
}

private void startWhisperApiRequest() {
sendBtn.setEnabled(false);
progressBar.setVisibility(View.VISIBLE);
timeTv.setVisibility(View.GONE);
recordingIv.setVisibility(View.GONE);
recordingIv.clearAnimation();
recordingIv2.setVisibility(View.GONE);
recordingIv2.clearAnimation();
errorIv.setVisibility(View.GONE);
errorIv2.setVisibility(View.GONE);

String apiKey = sp.getString("net.devemperor.wristassist.api_key", "noApiKey");
Retrofit retrofit = new Retrofit.Builder()
.baseUrl("https://api.openai.com/")
.client(defaultClient(apiKey.replaceAll("[^ -~]", ""), Duration.ofSeconds(120)).newBuilder().build())
.addConverterFactory(JacksonConverterFactory.create(defaultObjectMapper()))
.addCallAdapterFactory(RxJava2CallAdapterFactory.create())
.build();
OpenAiService service = new OpenAiService(retrofit.create(OpenAiApi.class));

speechApiThread = Executors.newSingleThreadExecutor();
speechApiThread.execute(() -> {
try {
CreateTranscriptionRequest request = CreateTranscriptionRequest.builder()
.model("whisper-1")
.responseFormat("verbose_json")
.build();
TranscriptionResult result = service.createTranscription(request, new File(getCacheDir(), "whisper_input_audio.mp3"));

// TODO: add usage to db

Intent data = new Intent();
data.putExtra("net.devemperor.wristassist.input.content", result.getText());
setResult(RESULT_OK, data);
finish();

} catch (RuntimeException e) {
if (!(e.getCause() instanceof InterruptedIOException)) {
FirebaseCrashlytics fc = FirebaseCrashlytics.getInstance();
fc.setCustomKey("settings", sp.getAll().toString());
fc.setUserId(sp.getString("net.devemperor.wristassist.userid", "null"));
fc.recordException(e);
fc.sendUnsentReports();

showError();
}
}
});
}

private void showError() {
runOnUiThread(() -> {
if (sp.getBoolean("net.devemperor.wristassist.vibrate", true)) {
((Vibrator) getSystemService(VIBRATOR_SERVICE))
.vibrate(VibrationEffect.createWaveform(new long[]{50, 50, 50, 50, 50}, new int[]{-1, 0, -1, 0, -1}, -1));
}

progressBar.setVisibility(View.GONE);
timeTv.setVisibility(View.GONE);
errorIv.setVisibility(View.VISIBLE);
errorIv2.setVisibility(View.VISIBLE);
recordingIv.setVisibility(View.GONE);
recordingIv2.setVisibility(View.GONE);
sendBtn.setEnabled(true);
sendBtn.setImageDrawable(AppCompatResources.getDrawable(this, R.drawable.twotone_replay_24));
});
}
}
Loading

0 comments on commit 93344ed

Please sign in to comment.