Skip to content

Commit

Permalink
Offline Speech Recognition #2089 (#2258)
Browse files Browse the repository at this point in the history
  • Loading branch information
VladislavAntonyuk authored Oct 5, 2024
1 parent 67f44a3 commit 9b7e48d
Show file tree
Hide file tree
Showing 30 changed files with 1,199 additions and 377 deletions.
7 changes: 0 additions & 7 deletions global.json

This file was deleted.

1 change: 1 addition & 0 deletions samples/CommunityToolkit.Maui.Sample/AppShell.xaml.cs
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ public partial class AppShell : Shell
CreateViewModelMapping<FileSaverPage, FileSaverViewModel, EssentialsGalleryPage, EssentialsGalleryViewModel>(),
CreateViewModelMapping<FolderPickerPage, FolderPickerViewModel, EssentialsGalleryPage, EssentialsGalleryViewModel>(),
CreateViewModelMapping<SpeechToTextPage, SpeechToTextViewModel, EssentialsGalleryPage, EssentialsGalleryViewModel>(),
CreateViewModelMapping<OfflineSpeechToTextPage, OfflineSpeechToTextViewModel, EssentialsGalleryPage, EssentialsGalleryViewModel>(),

// Add Extensions View Models
CreateViewModelMapping<ColorAnimationExtensionsPage, ColorAnimationExtensionsViewModel, ExtensionsGalleryPage, ExtensionsGalleryViewModel>(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,7 @@
<!--<EmbedAssembliesIntoApk>true</EmbedAssembliesIntoApk>-->
</PropertyGroup>

<PropertyGroup Condition="'$(Configuration)' == 'Release'
AND $([MSBuild]::GetTargetPlatformIdentifier('$(TargetFramework)')) != 'tizen'
AND $([MSBuild]::GetTargetPlatformIdentifier('$(TargetFramework)')) != 'android'
AND $([MSBuild]::GetTargetPlatformIdentifier('$(TargetFramework)')) != 'windows'">
<PropertyGroup Condition="'$(Configuration)' == 'Release' &#xD;&#xA; AND $([MSBuild]::GetTargetPlatformIdentifier('$(TargetFramework)')) != 'tizen'&#xD;&#xA; AND $([MSBuild]::GetTargetPlatformIdentifier('$(TargetFramework)')) != 'android'&#xD;&#xA; AND $([MSBuild]::GetTargetPlatformIdentifier('$(TargetFramework)')) != 'windows'">
<!-- Ahead-of-time compilation is not yet supported net8.0-tizen -->
<!-- Cross-OS native compilation is not supported net8.0-android -->
<!-- Only iOS + MacCatalyst support AOT in .NET 8 -->
Expand All @@ -61,7 +58,7 @@
<!-- Custom Fonts -->
<MauiFont Include="Resources\Fonts\*" />

<PackageReference Include="Microsoft.Maui.Controls" Version="*"/>
<PackageReference Include="Microsoft.Maui.Controls" Version="*" />
<PackageReference Include="CommunityToolkit.Maui.Markup" Version="4.1.0" />
<PackageReference Include="CommunityToolkit.Mvvm" Version="8.2.2" />
<PackageReference Include="Microsoft.Extensions.Http.Resilience" Version="8.8.0" />
Expand All @@ -87,6 +84,18 @@
<ProjectReference Include="..\..\src\CommunityToolkit.Maui.Maps\CommunityToolkit.Maui.Maps.csproj" />
</ItemGroup>

<ItemGroup>
<Compile Update="Pages\Essentials\OfflineSpeechToTextPage.xaml.cs">
<DependentUpon>OfflineSpeechToTextPage.xaml</DependentUpon>
</Compile>
</ItemGroup>

<ItemGroup>
<MauiXaml Update="Pages\Essentials\OfflineSpeechToTextPage.xaml">
<Generator>MSBuild:Compile</Generator>
</MauiXaml>
</ItemGroup>

<PropertyGroup Condition="$([MSBuild]::GetTargetPlatformIdentifier('$(TargetFramework)'))=='windows' and $(Configuration) == 'Release'">
<RuntimeIdentifier>win-x64</RuntimeIdentifier>
</PropertyGroup>
Expand Down
1 change: 1 addition & 0 deletions samples/CommunityToolkit.Maui.Sample/MauiProgram.cs
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ static void RegisterViewsAndViewModels(in IServiceCollection services)
services.AddTransientWithShellRoute<FileSaverPage, FileSaverViewModel>();
services.AddTransientWithShellRoute<FolderPickerPage, FolderPickerViewModel>();
services.AddTransientWithShellRoute<SpeechToTextPage, SpeechToTextViewModel>();
services.AddTransientWithShellRoute<OfflineSpeechToTextPage, OfflineSpeechToTextViewModel>();

// Add Extensions Pages + ViewModels
services.AddTransientWithShellRoute<ColorAnimationExtensionsPage, ColorAnimationExtensionsViewModel>();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
<?xml version="1.0" encoding="utf-8" ?>
<pages:BasePage xmlns="http://schemas.microsoft.com/dotnet/2021/maui"
xmlns:x="http://schemas.microsoft.com/winfx/2009/xaml"
xmlns:pages="clr-namespace:CommunityToolkit.Maui.Sample.Pages"
x:Class="CommunityToolkit.Maui.Sample.Pages.Essentials.OfflineSpeechToTextPage"
xmlns:vm="clr-namespace:CommunityToolkit.Maui.Sample.ViewModels.Essentials"
xmlns:essentials="clr-namespace:CommunityToolkit.Maui.Sample.Pages.Essentials"
x:TypeArguments="vm:OfflineSpeechToTextViewModel"
x:DataType="vm:OfflineSpeechToTextViewModel"
Title="OfflineSpeechToText">

<ContentPage.Resources>
<essentials:PickerLocaleDisplayConverter x:Key="PickerLocaleDisplayConverter" />
</ContentPage.Resources>

<ScrollView>
<VerticalStackLayout
Spacing="20"
Padding="30,0">

<Label
Text="SpeechToText allows the user to convert speech to text in real time"
HorizontalTextAlignment="Center"/>

<Label
Text="Locale"
FontAttributes="Bold"/>

<Picker
ItemsSource="{Binding Locales}"
SelectedItem="{Binding CurrentLocale}"
ItemDisplayBinding="{Binding ., Converter={StaticResource PickerLocaleDisplayConverter}}"/>

<Label
Text="State"
FontAttributes="Bold"/>

<Label
Text="{Binding State}"
FontSize="18"
HorizontalOptions="Center"
HorizontalTextAlignment="Center"
MinimumHeightRequest="100" />

<Label
Text="Language Output"
FontAttributes="Bold"/>

<Label
Text="{Binding RecognitionText}"
FontSize="18"
HorizontalOptions="Center"
HorizontalTextAlignment="Center"
MinimumHeightRequest="100" />

<Button
Text="Play"
Command="{Binding PlayCommand}"
HorizontalOptions="Center" />

<Border
StrokeThickness="2"
Stroke="#808080"
StrokeShape="RoundRectangle 8,8,8,8"
Padding="12">
<Border.Content>
<Grid RowDefinitions="*,60"
ColumnDefinitions="*,*"
RowSpacing="12"
ColumnSpacing="12">

<Button
Grid.Row="0"
Grid.Column="0"
Text="StartListenAsync"
Command="{Binding StartListenCommand}"
HorizontalOptions="End" />

<Button
Grid.Row="0"
Grid.Column="1"
Text="StopListenAsync"
Command="{Binding StopListenCommand}"
HorizontalOptions="Start" />

<Label
Grid.Row="1"
Grid.ColumnSpan="2"
Text="The `StartListenAsync` API starts the speech-to-text service and shares the results using `RecognitionResultUpdated` event and `RecognitionResultCompleted` event."
HorizontalOptions="Center"
HorizontalTextAlignment="Center"
FontSize="12"/>

</Grid>
</Border.Content>
</Border>
</VerticalStackLayout>
</ScrollView>

</pages:BasePage>
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
using System.Globalization;
using CommunityToolkit.Maui.Converters;
using CommunityToolkit.Maui.Sample.ViewModels.Essentials;

namespace CommunityToolkit.Maui.Sample.Pages.Essentials;

public partial class OfflineSpeechToTextPage : BasePage<OfflineSpeechToTextViewModel>
{
public OfflineSpeechToTextPage(OfflineSpeechToTextViewModel viewModel) : base(viewModel)
{
InitializeComponent();
}

protected override async void OnAppearing()
{
base.OnAppearing();

await BindingContext.SetLocalesCommand.ExecuteAsync(null);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -94,44 +94,7 @@
</Grid>
</Border.Content>
</Border>

<Border
StrokeThickness="2"
Stroke="#808080"
StrokeShape="RoundRectangle 8,8,8,8"
Padding="12">
<Border.Content>
<Grid RowDefinitions="*,60"
ColumnDefinitions="*,*"
RowSpacing="12"
ColumnSpacing="12">

<Button
Grid.Row="0"
Grid.Column="0"
Text="StartOfflineListenAsync"
Command="{Binding StartOfflineListenCommand}"
HorizontalOptions="End" />

<Button
Grid.Row="0"
Grid.Column="1"
Text="StopOfflineListenAsync"
Command="{Binding StopOfflineListenCommand}"
HorizontalOptions="Start" />

<Label
Grid.Row="1"
Grid.ColumnSpan="2"
Text="The `StartOfflineListenAsync` API starts the speech-to-text service and shares the results using `RecognitionResultUpdated` event and `RecognitionResultCompleted` event."
HorizontalOptions="Center"
HorizontalTextAlignment="Center"
FontSize="12"/>

</Grid>
</Border.Content>
</Border>

ee
</VerticalStackLayout>
</ScrollView>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,6 @@ public class EssentialsGalleryViewModel() : BaseGalleryViewModel(
SectionModel.Create<BadgeViewModel>("Badge", "Allows the user to set app icon badge count on the home screen"),
SectionModel.Create<FileSaverViewModel>("FileSaver", "Allows the user to save files to the filesystem"),
SectionModel.Create<FolderPickerViewModel>("FolderPicker", "Allows picking folders from the file system"),
SectionModel.Create<SpeechToTextViewModel>("SpeechToText", "Converts speech to text")
SectionModel.Create<SpeechToTextViewModel>("SpeechToText", "Converts speech to text"),
SectionModel.Create<OfflineSpeechToTextViewModel>("OfflineSpeechToText", "Converts speech to text offline")
]);
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
using System.Collections.ObjectModel;
using System.Collections.Specialized;
using System.Globalization;
using CommunityToolkit.Maui.Alerts;
using CommunityToolkit.Maui.Media;
using CommunityToolkit.Mvvm.ComponentModel;
using CommunityToolkit.Mvvm.Input;

namespace CommunityToolkit.Maui.Sample.ViewModels.Essentials;

public partial class OfflineSpeechToTextViewModel : BaseViewModel
{
const string defaultLanguage = "en-US";
const string defaultLanguage_android = "en";
const string defaultLanguage_tizen = "en_US";

readonly ITextToSpeech textToSpeech;
readonly ISpeechToText speechToText;

[ObservableProperty]
Locale? currentLocale;

public SpeechToTextState? State => speechToText.CurrentState;

[ObservableProperty]
string? recognitionText = "Welcome to .NET MAUI Community Toolkit!";

[ObservableProperty, NotifyCanExecuteChangedFor(nameof(StartListenCommand))]
bool canStartListenExecute = true;

[ObservableProperty, NotifyCanExecuteChangedFor(nameof(StopListenCommand))]
bool canStopListenExecute = false;

public OfflineSpeechToTextViewModel(ITextToSpeech textToSpeech)
{
this.textToSpeech = textToSpeech;
this.speechToText = OfflineSpeechToText.Default;

Locales.CollectionChanged += HandleLocalesCollectionChanged;
this.speechToText.StateChanged += HandleSpeechToTextStateChanged;
this.speechToText.RecognitionResultCompleted += HandleRecognitionResultCompleted;
}

public ObservableCollection<Locale> Locales { get; } = [];

[RelayCommand]
async Task SetLocales(CancellationToken token)
{
Locales.Clear();

var locales = await textToSpeech.GetLocalesAsync().WaitAsync(token);

foreach (var locale in locales.OrderBy(x => x.Language).ThenBy(x => x.Name))
{
Locales.Add(locale);
}

CurrentLocale = Locales.FirstOrDefault(x => x.Language is defaultLanguage or defaultLanguage_android or defaultLanguage_tizen) ?? Locales.FirstOrDefault();
}

[RelayCommand]
async Task Play(CancellationToken cancellationToken)
{
var timeoutCancellationTokenSource = new CancellationTokenSource(TimeSpan.FromSeconds(5));

try
{
await textToSpeech.SpeakAsync(RecognitionText ?? "Welcome to .NET MAUI Community Toolkit!", new()
{
Locale = CurrentLocale,
Pitch = 1,
Volume = 1
}, cancellationToken).WaitAsync(timeoutCancellationTokenSource.Token);
}
catch (TaskCanceledException)
{
await Toast.Make("Playback automatically stopped after 5 seconds").Show(cancellationToken);
#if IOS
await Toast.Make("If you did not hear playback, test again on a physical iOS device").Show(cancellationToken);
#endif
}
}

[RelayCommand(CanExecute = nameof(CanStartListenExecute))]
async Task StartListen()
{
CanStartListenExecute = false;

var isGranted = await speechToText.RequestPermissions(CancellationToken.None);
if (!isGranted)
{
await Toast.Make("Permission not granted").Show(CancellationToken.None);
return;
}

if (Connectivity.NetworkAccess != NetworkAccess.Internet)
{
await Toast.Make("Internet connection is required").Show(CancellationToken.None);
return;
}

const string beginSpeakingPrompt = "Begin speaking...";

RecognitionText = beginSpeakingPrompt;

speechToText.RecognitionResultUpdated += HandleRecognitionResultUpdated;

await speechToText.StartListenAsync(new SpeechToTextOptions()
{
Culture = CultureInfo.GetCultureInfo(CurrentLocale?.Language ?? defaultLanguage),
ShouldReportPartialResults = true
}, CancellationToken.None);

if (RecognitionText is beginSpeakingPrompt)
{
RecognitionText = string.Empty;
}
}

[RelayCommand(CanExecute = nameof(CanStopListenExecute))]
Task StopListen()
{
CanStartListenExecute = true;
CanStopListenExecute = false;

speechToText.RecognitionResultUpdated -= HandleRecognitionResultUpdated;

return speechToText.StopListenAsync(CancellationToken.None);
}

void HandleRecognitionResultUpdated(object? sender, SpeechToTextRecognitionResultUpdatedEventArgs e)
{
RecognitionText += e.RecognitionResult;
}

void HandleRecognitionResultCompleted(object? sender, SpeechToTextRecognitionResultCompletedEventArgs e)
{
RecognitionText = e.RecognitionResult.IsSuccessful ? e.RecognitionResult.Text : e.RecognitionResult.Exception.Message;
}

void HandleSpeechToTextStateChanged(object? sender, SpeechToTextStateChangedEventArgs e)
{
OnPropertyChanged(nameof(State));
}

void HandleLocalesCollectionChanged(object? sender, NotifyCollectionChangedEventArgs e)
{
OnPropertyChanged(nameof(CurrentLocale));
}
}
Loading

0 comments on commit 9b7e48d

Please sign in to comment.