Skip to content

Commit

Permalink
Merge pull request #44 from louis030195/d-#32
Browse files Browse the repository at this point in the history
#32

- use tokio in screenpipe-vision
- use n parallel tasks (should be same behaviour than before)
- (new) skip frame if cpu overloaded for OCR (configurable)
- can now turn on/off devices from screenpipe-audio using API
- can now turn on/off vision using API
- disconnecting audio device in use stop listening. User need to make API request to enable any new device (could be improved later)
  • Loading branch information
louis030195 authored Jul 15, 2024
2 parents 6dac6cd + 0a6b547 commit 8043f71
Show file tree
Hide file tree
Showing 19 changed files with 1,219 additions and 699 deletions.
3 changes: 3 additions & 0 deletions .cargo/config.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[build]
# for console_subscriber
rustflags = ["--cfg", "tokio_unstable"]
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,5 @@ candle-nn = { package = "candle-nn", version = "0.6.0" }
candle-transformers = { package = "candle-transformers", version = "0.6.0" }
tokenizers = "0.19.1"
tracing = "0.1.37"
tokio = { version = "1.15", features = ["full", "tracing"] }

4 changes: 4 additions & 0 deletions screenpipe-audio/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,13 @@ crossbeam = "0.8"
# Bytes
bytemuck = "1.16.1"

# Async
tokio = { workspace = true }

[dev-dependencies]
tempfile = "3.3.0"
infer = "0.15"
tokio = { version = "1.0", features = ["full"] }

[features]
metal = ["candle/metal", "candle-nn/metal", "candle-transformers/metal"]
Expand Down
28 changes: 23 additions & 5 deletions screenpipe-audio/src/bin/screenpipe-audio.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@ use screenpipe_audio::create_whisper_channel;
use screenpipe_audio::default_input_device;
use screenpipe_audio::default_output_device;
use screenpipe_audio::list_audio_devices;
use screenpipe_audio::parse_device_spec;
use screenpipe_audio::parse_audio_device;
use screenpipe_audio::record_and_transcribe;
use screenpipe_audio::AudioDevice;
use std::path::PathBuf;
use std::sync::atomic::AtomicBool;
use std::sync::Arc;
use std::thread;
use std::time::Duration;

Expand All @@ -33,6 +35,8 @@ fn print_devices(devices: &[AudioDevice]) {
}
}

// TODO - kinda bad cli here

fn main() -> Result<()> {
use env_logger::Builder;
use log::LevelFilter;
Expand All @@ -56,27 +60,41 @@ fn main() -> Result<()> {
} else {
args.audio_device
.iter()
.map(|d| parse_device_spec(d))
.map(|d| parse_audio_device(d))
.collect::<Result<Vec<_>>>()?
};

if devices.is_empty() {
return Err(anyhow!("No audio input devices found"));
}

let chunk_duration = Duration::from_secs(30);
// delete .mp3 files (output*.mp3)
std::fs::remove_file("output_0.mp3").unwrap_or_default();
std::fs::remove_file("output_1.mp3").unwrap_or_default();

let chunk_duration = Duration::from_secs(5);
let output_path = PathBuf::from("output.mp3");
let (whisper_sender, whisper_receiver) = create_whisper_channel()?;

// Spawn threads for each device
let recording_threads: Vec<_> = devices
.into_iter()
.enumerate()
.map(|(i, device)| {
let whisper_sender = whisper_sender.clone();
let output_path = output_path.with_file_name(format!("output_{}.mp3", i));
let device_control = Arc::new(AtomicBool::new(true));
let device_clone = device.clone();

thread::spawn(move || {
record_and_transcribe(&device, chunk_duration, output_path, whisper_sender)
let device_control_clone = Arc::clone(&device_control);

record_and_transcribe(
&device_clone,
chunk_duration,
output_path,
whisper_sender,
device_control_clone,
)
})
})
.collect();
Expand Down
Loading

0 comments on commit 8043f71

Please sign in to comment.