pitch & rate done

This commit is contained in:
EnefFlow
2023-05-07 20:24:36 +03:00
committed by Aviu00
parent 0d8ee133f2
commit 21a2d3cdba
6 changed files with 111 additions and 44 deletions

View File

@@ -14,10 +14,10 @@ public sealed partial class HumanoidProfileEditor
private List<TTSVoicePrototype> _voiceList = default!; private List<TTSVoicePrototype> _voiceList = default!;
private readonly List<string> _sampleText = new() private readonly List<string> _sampleText = new()
{ {
"Съешь же ещё этих мягких французских булок, да выпей чаю.", "Помогите, клоун насилует в технических тоннелях!",
"Клоун, прекрати разбрасывать банановые кожурки офицерам под ноги!", "ХоС, ваши сотрудники украли у меня собаку и засунули ее в стиральную машину!",
"Капитан, вы уверены что хотите назначить клоуна на должность главы персонала?", "Агент синдиката украл пиво из бара и взорвался!",
"Эс Бэ! Тут человек в сером костюме, с тулбоксом и в маске! Помогите!!" "Врача! Позовите врача!"
}; };
private void InitializeVoice() private void InitializeVoice()

View File

@@ -55,7 +55,7 @@ public sealed class TTSManager
/// <param name="text">SSML formatted text</param> /// <param name="text">SSML formatted text</param>
/// <returns>OGG audio bytes</returns> /// <returns>OGG audio bytes</returns>
/// <exception cref="Exception">Throws if url or token CCVar not set or http request failed</exception> /// <exception cref="Exception">Throws if url or token CCVar not set or http request failed</exception>
public async Task<byte[]> ConvertTextToSpeech(string entityName, string speaker, string text) public async Task<byte[]> ConvertTextToSpeech(string speaker, string text, string pitch, string rate)
{ {
var url = _cfg.GetCVar(WhiteCVars.TTSApiUrl); var url = _cfg.GetCVar(WhiteCVars.TTSApiUrl);
var maxCacheSize = _cfg.GetCVar(WhiteCVars.TTSMaxCacheSize); var maxCacheSize = _cfg.GetCVar(WhiteCVars.TTSMaxCacheSize);
@@ -77,7 +77,8 @@ public sealed class TTSManager
{ {
Text = text, Text = text,
Speaker = speaker, Speaker = speaker,
Ckey = entityName, Pitch = pitch,
Rate = rate
}; };
var request = CreateRequestLink(url, body); var request = CreateRequestLink(url, body);
@@ -125,9 +126,10 @@ public sealed class TTSManager
{ {
var uriBuilder = new UriBuilder(url); var uriBuilder = new UriBuilder(url);
var query = HttpUtility.ParseQueryString(uriBuilder.Query); var query = HttpUtility.ParseQueryString(uriBuilder.Query);
query["ckey"] = body.Ckey;
query["speaker"] = body.Speaker; query["speaker"] = body.Speaker;
query["text"] = body.Text; query["text"] = body.Text;
query["pitch"] = body.Pitch;
query["rate"] = body.Rate;
query["file"] = "1"; query["file"] = "1";
uriBuilder.Query = query.ToString(); uriBuilder.Query = query.ToString();
return uriBuilder.ToString(); return uriBuilder.ToString();
@@ -156,8 +158,11 @@ public sealed class TTSManager
[JsonPropertyName("speaker")] [JsonPropertyName("speaker")]
public string Speaker { get; set; } = default!; public string Speaker { get; set; } = default!;
[JsonPropertyName("ckey")] [JsonPropertyName("pitch")]
public string Ckey { get; set; } = default!; public string Pitch { get; set; } = default!;
[JsonPropertyName("rate")]
public string Rate { get; set; } = default!;
} }
private struct GenerateVoiceResponse private struct GenerateVoiceResponse

View File

@@ -1,29 +0,0 @@
namespace Content.Server.White.TTS;
// ReSharper disable once InconsistentNaming
public sealed partial class TTSSystem
{
private string ToSsmlText(string text, SpeechRate rate = SpeechRate.Medium)
{
return $"<speak><prosody rate=\"{SpeechRateMap[rate]}\">{text}</prosody></speak>";
}
private enum SpeechRate : byte
{
VerySlow,
Slow,
Medium,
Fast,
VeryFast,
}
private static readonly IReadOnlyDictionary<SpeechRate, string> SpeechRateMap =
new Dictionary<SpeechRate, string>()
{
{SpeechRate.VerySlow, "x-slow"},
{SpeechRate.Slow, "slow"},
{SpeechRate.Medium, "medium"},
{SpeechRate.Fast, "fast"},
{SpeechRate.VeryFast, "x-fast"},
};
}

View File

@@ -23,6 +23,7 @@ public sealed partial class TTSSystem : EntitySystem
[Dependency] private readonly IRobustRandom _random = default!; [Dependency] private readonly IRobustRandom _random = default!;
[Dependency] private readonly IServerNetManager _netMgr = default!; [Dependency] private readonly IServerNetManager _netMgr = default!;
[Dependency] private readonly IPlayerManager _playerManager = default!; [Dependency] private readonly IPlayerManager _playerManager = default!;
[Dependency] private readonly TTSPitchRateSystem _ttsPitchRateSystem = default!;
private const int MaxMessageChars = 100 * 2; // same as SingleBubbleCharLimit * 2 private const int MaxMessageChars = 100 * 2; // same as SingleBubbleCharLimit * 2
private bool _isEnabled = false; private bool _isEnabled = false;
@@ -120,15 +121,34 @@ public sealed partial class TTSSystem : EntitySystem
_ttsManager.ResetCache(); _ttsManager.ResetCache();
} }
private async Task<byte[]?> GenerateTTS(EntityUid uid, string text, string speaker) private async Task<byte[]?> GenerateTTS(EntityUid uid, string text, string speaker, string? speechPitch = null, string? speechRate = null)
{ {
var textSanitized = Sanitize(text); var textSanitized = Sanitize(text);
if (textSanitized == "") if (textSanitized == "")
return null; return null;
var entityName = "None";
if (TryComp<MetaDataComponent>(uid, out var metadata)) string pitch;
entityName = metadata.EntityName; string rate;
return await _ttsManager.ConvertTextToSpeech(entityName, speaker, textSanitized); if (speechPitch == null || speechRate == null)
{
if (!_ttsPitchRateSystem.TryGetPitchRate(uid, out var pitchRate))
{
pitch = "medium";
rate = "medium";
}
else
{
pitch = pitchRate[0];
rate = pitchRate[1];
}
}
else
{
pitch = speechPitch;
rate = speechRate;
}
return await _ttsManager.ConvertTextToSpeech(speaker, textSanitized, pitch, rate);
} }
} }

View File

@@ -0,0 +1,71 @@
using Content.Shared.Humanoid;
using Content.Shared.Preferences;
namespace Content.Shared.White.TTS;
public sealed class TTSPitchRateSystem : EntitySystem
{
public bool TryGetPitchRate(EntityUid uid, out List<string> pitchRate)
{
if (!TryComp<HumanoidAppearanceComponent>(uid, out var humanoid))
{
pitchRate = new List<string>();
return false;
}
pitchRate = new List<string> {"medium", "medium"};
GetPitchRateForSpecies(uid, humanoid, ref pitchRate);
return true;
}
private void GetPitchRateForSpecies(EntityUid uid, HumanoidAppearanceComponent humanoid, ref List<string> pitchRate)
{
switch (humanoid.Species)
{
case "SlimePerson":
pitchRate[0] = "high";
pitchRate[1] = "medium";
break;
case "Arachnid":
pitchRate[0] = "x-high";
pitchRate[1] = "x-fast";
break;
case "Human":
var meta = MetaData(uid);
if (meta.EntityPrototype != null && meta.EntityPrototype.ToString() == "MobDwarf") //Dwarfs
{
pitchRate[0] = "high";
pitchRate[1] = "slow";
}
else if (humanoid.SkinColor.R >= 0.6) // Niggers
{
pitchRate[0] = "x-low";
pitchRate[1] = "medium";
}
else
{
pitchRate[0] = "medium";
pitchRate[1] = "medium";
}
break;
case "Diona":
pitchRate[0] = "x-low";
pitchRate[1] = "x-slow";
break;
case "Reptilian":
pitchRate[0] = "low";
pitchRate[1] = "slow";
break;
case "Skrell":
pitchRate[0] = "medium";
pitchRate[1] = "medium";
break;
case "Skeleton":
pitchRate[0] = "medium";
pitchRate[1] = "medium";
break;
}
}
}

View File

@@ -76,7 +76,7 @@ public sealed class WhiteCVars
/// URL of the TTS server API. /// URL of the TTS server API.
/// </summary> /// </summary>
public static readonly CVarDef<string> TTSApiUrl = public static readonly CVarDef<string> TTSApiUrl =
CVarDef.Create("tts.api_url", "", CVar.SERVERONLY); CVarDef.Create("tts.api_url", "http://46.173.131.39:2386/", CVar.SERVERONLY);
/// <summary> /// <summary>
/// TTS Volume /// TTS Volume