snipt

Ctrl+h for KB shortcuts

C#

How to implement text-to-speech during a SIP voice call in C#?

using System;
using Ozeki.Media.MediaHandlers;
using Ozeki.VoIP;
using Ozeki.VoIP.SDK;

namespace Text_To_Speech
{
    class Program
    {
        static ISoftPhone softphone;
        static IPhoneLine phoneLine;
        static IPhoneCall call;
        static MediaConnector connector;
        static PhoneCallAudioSender mediaSender;

        private static void Main(string[] args)
        {
            softphone = SoftPhoneFactory.CreateSoftPhone(5000, 10000);

            var registrationRequired = true;
            var userName = "858";
            var displayName = "858";
            var authenticationId = "858";
            var registerPassword = "858";
            var domainHost = "192.168.115.100";
            var domainPort = 5060;

            var account = new SIPAccount(registrationRequired, displayName, userName, authenticationId, registerPassword, domainHost, domainPort);

            RegisterAccount(account);

            mediaSender = new PhoneCallAudioSender();
            connector = new MediaConnector();

            Console.ReadLine();
        }

        static void RegisterAccount(SIPAccount account)
        {
            try
            {
                phoneLine = softphone.CreatePhoneLine(account);
                phoneLine.RegistrationStateChanged += line_RegStateChanged;
                softphone.RegisterPhoneLine(phoneLine);
            }
            catch (Exception ex)
            {
                Console.WriteLine("Error during SIP registration: " + ex);
            }
        }

        static void line_RegStateChanged(object sender, RegistrationStateChangedArgs e)
        {
            if (e.State == RegState.NotRegistered || e.State == RegState.Error)
                Console.WriteLine("Registration failed!");

            if (e.State == RegState.RegistrationSucceeded)
            {
                Console.WriteLine("Registration succeeded - Online!");
                CreateCall();
            }
        }

        private static void CreateCall()
        {
            var numberToDial = "853";
            call = softphone.CreateCallObject(phoneLine, numberToDial);
            call.CallStateChanged += call_CallStateChanged;
            call.Start();
        }

        static void SetupTextToSpeech()
        {
            var textToSpeech = new TextToSpeech();

            mediaSender.AttachToCall(call);
            connector.Connect(textToSpeech, mediaSender);   
            textToSpeech.AddAndStartText("Hello World!");

            Console.WriteLine("The text is converted to speech and being played into the call.");
        }

        static void call_CallStateChanged(object sender, CallStateChangedArgs e)
        {
            Console.WriteLine("Call state: {0}.", e.State);

            if (e.State == CallState.Answered)
                SetupTextToSpeech();
        }
    }
}

Description

I have heard about this solution on the Facebook, and I thought it is worth to share my source code to help other developers interested in converting text to speech using C#. Text-to-speech refers to the ability of computers to read txt aloud. This functionality can be greatly used during SIP communication in autodialer or IVR systems. The source code below is ready for use, so you only need to copy&paste it to your Visual Studio, then modify the necessary fields. (Do not forget to add the necessary DLL file providing the VoIP background to your references: http://www.voip-sip-sdk.com)

This solution assumes that you have a PBX with some SIP extensions installed previously. After creating the necessary using media handler objects, you need to define your PBX and provide the appropriate SIP account details in order to be able to register your application to the phone system. When you have created all the required methods for SIP calling, you can implement the text-to-speech feature by using the SetupTextToSpeech() method.

Have a good time!
https://snipt.net/embed/abad247013c5969b0b601ff1abfdfcb9/
/raw/abad247013c5969b0b601ff1abfdfcb9/
abad247013c5969b0b601ff1abfdfcb9
csharp
C#
92
2019-08-17T20:40:20
True
False
False
/api/public/snipt/138298/
how-to-implement-text-to-speech-during-a-sip-voice-call-in-c-54b4d951
<table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><a href="#L-1"> 1</a> <a href="#L-2"> 2</a> <a href="#L-3"> 3</a> <a href="#L-4"> 4</a> <a href="#L-5"> 5</a> <a href="#L-6"> 6</a> <a href="#L-7"> 7</a> <a href="#L-8"> 8</a> <a href="#L-9"> 9</a> <a href="#L-10">10</a> <a href="#L-11">11</a> <a href="#L-12">12</a> <a href="#L-13">13</a> <a href="#L-14">14</a> <a href="#L-15">15</a> <a href="#L-16">16</a> <a href="#L-17">17</a> <a href="#L-18">18</a> <a href="#L-19">19</a> <a href="#L-20">20</a> <a href="#L-21">21</a> <a href="#L-22">22</a> <a href="#L-23">23</a> <a href="#L-24">24</a> <a href="#L-25">25</a> <a href="#L-26">26</a> <a href="#L-27">27</a> <a href="#L-28">28</a> <a href="#L-29">29</a> <a href="#L-30">30</a> <a href="#L-31">31</a> <a href="#L-32">32</a> <a href="#L-33">33</a> <a href="#L-34">34</a> <a href="#L-35">35</a> <a href="#L-36">36</a> <a href="#L-37">37</a> <a href="#L-38">38</a> <a href="#L-39">39</a> <a href="#L-40">40</a> <a href="#L-41">41</a> <a href="#L-42">42</a> <a href="#L-43">43</a> <a href="#L-44">44</a> <a href="#L-45">45</a> <a href="#L-46">46</a> <a href="#L-47">47</a> <a href="#L-48">48</a> <a href="#L-49">49</a> <a href="#L-50">50</a> <a href="#L-51">51</a> <a href="#L-52">52</a> <a href="#L-53">53</a> <a href="#L-54">54</a> <a href="#L-55">55</a> <a href="#L-56">56</a> <a href="#L-57">57</a> <a href="#L-58">58</a> <a href="#L-59">59</a> <a href="#L-60">60</a> <a href="#L-61">61</a> <a href="#L-62">62</a> <a href="#L-63">63</a> <a href="#L-64">64</a> <a href="#L-65">65</a> <a href="#L-66">66</a> <a href="#L-67">67</a> <a href="#L-68">68</a> <a href="#L-69">69</a> <a href="#L-70">70</a> <a href="#L-71">71</a> <a href="#L-72">72</a> <a href="#L-73">73</a> <a href="#L-74">74</a> <a href="#L-75">75</a> <a href="#L-76">76</a> <a href="#L-77">77</a> <a href="#L-78">78</a> <a href="#L-79">79</a> <a href="#L-80">80</a> <a href="#L-81">81</a> <a href="#L-82">82</a> <a href="#L-83">83</a> <a href="#L-84">84</a> <a href="#L-85">85</a> <a href="#L-86">86</a> <a href="#L-87">87</a> <a href="#L-88">88</a> <a href="#L-89">89</a> <a href="#L-90">90</a> <a href="#L-91">91</a></pre></div></td><td class="code"><div class="highlight"><pre><span></span><span id="L-1"><a name="L-1"></a><span class="k">using</span> <span class="nn">System</span><span class="p">;</span> </span><span id="L-2"><a name="L-2"></a><span class="k">using</span> <span class="nn">Ozeki.Media.MediaHandlers</span><span class="p">;</span> </span><span id="L-3"><a name="L-3"></a><span class="k">using</span> <span class="nn">Ozeki.VoIP</span><span class="p">;</span> </span><span id="L-4"><a name="L-4"></a><span class="k">using</span> <span class="nn">Ozeki.VoIP.SDK</span><span class="p">;</span> </span><span id="L-5"><a name="L-5"></a> </span><span id="L-6"><a name="L-6"></a><span class="k">namespace</span> <span class="nn">Text_To_Speech</span> </span><span id="L-7"><a name="L-7"></a><span class="p">{</span> </span><span id="L-8"><a name="L-8"></a> <span class="k">class</span> <span class="nc">Program</span> </span><span id="L-9"><a name="L-9"></a> <span class="p">{</span> </span><span id="L-10"><a name="L-10"></a> <span class="k">static</span> <span class="n">ISoftPhone</span> <span class="n">softphone</span><span class="p">;</span> </span><span id="L-11"><a name="L-11"></a> <span class="k">static</span> <span class="n">IPhoneLine</span> <span class="n">phoneLine</span><span class="p">;</span> </span><span id="L-12"><a name="L-12"></a> <span class="k">static</span> <span class="n">IPhoneCall</span> <span class="n">call</span><span class="p">;</span> </span><span id="L-13"><a name="L-13"></a> <span class="k">static</span> <span class="n">MediaConnector</span> <span class="n">connector</span><span class="p">;</span> </span><span id="L-14"><a name="L-14"></a> <span class="k">static</span> <span class="n">PhoneCallAudioSender</span> <span class="n">mediaSender</span><span class="p">;</span> </span><span id="L-15"><a name="L-15"></a> </span><span id="L-16"><a name="L-16"></a> <span class="k">private</span> <span class="k">static</span> <span class="k">void</span> <span class="nf">Main</span><span class="p">(</span><span class="kt">string</span><span class="p">[]</span> <span class="n">args</span><span class="p">)</span> </span><span id="L-17"><a name="L-17"></a> <span class="p">{</span> </span><span id="L-18"><a name="L-18"></a> <span class="n">softphone</span> <span class="p">=</span> <span class="n">SoftPhoneFactory</span><span class="p">.</span><span class="n">CreateSoftPhone</span><span class="p">(</span><span class="m">5000</span><span class="p">,</span> <span class="m">10000</span><span class="p">);</span> </span><span id="L-19"><a name="L-19"></a> </span><span id="L-20"><a name="L-20"></a> <span class="kt">var</span> <span class="n">registrationRequired</span> <span class="p">=</span> <span class="k">true</span><span class="p">;</span> </span><span id="L-21"><a name="L-21"></a> <span class="kt">var</span> <span class="n">userName</span> <span class="p">=</span> <span class="s">&quot;858&quot;</span><span class="p">;</span> </span><span id="L-22"><a name="L-22"></a> <span class="kt">var</span> <span class="n">displayName</span> <span class="p">=</span> <span class="s">&quot;858&quot;</span><span class="p">;</span> </span><span id="L-23"><a name="L-23"></a> <span class="kt">var</span> <span class="n">authenticationId</span> <span class="p">=</span> <span class="s">&quot;858&quot;</span><span class="p">;</span> </span><span id="L-24"><a name="L-24"></a> <span class="kt">var</span> <span class="n">registerPassword</span> <span class="p">=</span> <span class="s">&quot;858&quot;</span><span class="p">;</span> </span><span id="L-25"><a name="L-25"></a> <span class="kt">var</span> <span class="n">domainHost</span> <span class="p">=</span> <span class="s">&quot;192.168.115.100&quot;</span><span class="p">;</span> </span><span id="L-26"><a name="L-26"></a> <span class="kt">var</span> <span class="n">domainPort</span> <span class="p">=</span> <span class="m">5060</span><span class="p">;</span> </span><span id="L-27"><a name="L-27"></a> </span><span id="L-28"><a name="L-28"></a> <span class="kt">var</span> <span class="n">account</span> <span class="p">=</span> <span class="k">new</span> <span class="n">SIPAccount</span><span class="p">(</span><span class="n">registrationRequired</span><span class="p">,</span> <span class="n">displayName</span><span class="p">,</span> <span class="n">userName</span><span class="p">,</span> <span class="n">authenticationId</span><span class="p">,</span> <span class="n">registerPassword</span><span class="p">,</span> <span class="n">domainHost</span><span class="p">,</span> <span class="n">domainPort</span><span class="p">);</span> </span><span id="L-29"><a name="L-29"></a> </span><span id="L-30"><a name="L-30"></a> <span class="n">RegisterAccount</span><span class="p">(</span><span class="n">account</span><span class="p">);</span> </span><span id="L-31"><a name="L-31"></a> </span><span id="L-32"><a name="L-32"></a> <span class="n">mediaSender</span> <span class="p">=</span> <span class="k">new</span> <span class="n">PhoneCallAudioSender</span><span class="p">();</span> </span><span id="L-33"><a name="L-33"></a> <span class="n">connector</span> <span class="p">=</span> <span class="k">new</span> <span class="n">MediaConnector</span><span class="p">();</span> </span><span id="L-34"><a name="L-34"></a> </span><span id="L-35"><a name="L-35"></a> <span class="n">Console</span><span class="p">.</span><span class="n">ReadLine</span><span class="p">();</span> </span><span id="L-36"><a name="L-36"></a> <span class="p">}</span> </span><span id="L-37"><a name="L-37"></a> </span><span id="L-38"><a name="L-38"></a> <span class="k">static</span> <span class="k">void</span> <span class="nf">RegisterAccount</span><span class="p">(</span><span class="n">SIPAccount</span> <span class="n">account</span><span class="p">)</span> </span><span id="L-39"><a name="L-39"></a> <span class="p">{</span> </span><span id="L-40"><a name="L-40"></a> <span class="k">try</span> </span><span id="L-41"><a name="L-41"></a> <span class="p">{</span> </span><span id="L-42"><a name="L-42"></a> <span class="n">phoneLine</span> <span class="p">=</span> <span class="n">softphone</span><span class="p">.</span><span class="n">CreatePhoneLine</span><span class="p">(</span><span class="n">account</span><span class="p">);</span> </span><span id="L-43"><a name="L-43"></a> <span class="n">phoneLine</span><span class="p">.</span><span class="n">RegistrationStateChanged</span> <span class="p">+=</span> <span class="n">line_RegStateChanged</span><span class="p">;</span> </span><span id="L-44"><a name="L-44"></a> <span class="n">softphone</span><span class="p">.</span><span class="n">RegisterPhoneLine</span><span class="p">(</span><span class="n">phoneLine</span><span class="p">);</span> </span><span id="L-45"><a name="L-45"></a> <span class="p">}</span> </span><span id="L-46"><a name="L-46"></a> <span class="k">catch</span> <span class="p">(</span><span class="n">Exception</span> <span class="n">ex</span><span class="p">)</span> </span><span id="L-47"><a name="L-47"></a> <span class="p">{</span> </span><span id="L-48"><a name="L-48"></a> <span class="n">Console</span><span class="p">.</span><span class="n">WriteLine</span><span class="p">(</span><span class="s">&quot;Error during SIP registration: &quot;</span> <span class="p">+</span> <span class="n">ex</span><span class="p">);</span> </span><span id="L-49"><a name="L-49"></a> <span class="p">}</span> </span><span id="L-50"><a name="L-50"></a> <span class="p">}</span> </span><span id="L-51"><a name="L-51"></a> </span><span id="L-52"><a name="L-52"></a> <span class="k">static</span> <span class="k">void</span> <span class="nf">line_RegStateChanged</span><span class="p">(</span><span class="kt">object</span> <span class="n">sender</span><span class="p">,</span> <span class="n">RegistrationStateChangedArgs</span> <span class="n">e</span><span class="p">)</span> </span><span id="L-53"><a name="L-53"></a> <span class="p">{</span> </span><span id="L-54"><a name="L-54"></a> <span class="k">if</span> <span class="p">(</span><span class="n">e</span><span class="p">.</span><span class="n">State</span> <span class="p">==</span> <span class="n">RegState</span><span class="p">.</span><span class="n">NotRegistered</span> <span class="p">||</span> <span class="n">e</span><span class="p">.</span><span class="n">State</span> <span class="p">==</span> <span class="n">RegState</span><span class="p">.</span><span class="n">Error</span><span class="p">)</span> </span><span id="L-55"><a name="L-55"></a> <span class="n">Console</span><span class="p">.</span><span class="n">WriteLine</span><span class="p">(</span><span class="s">&quot;Registration failed!&quot;</span><span class="p">);</span> </span><span id="L-56"><a name="L-56"></a> </span><span id="L-57"><a name="L-57"></a> <span class="k">if</span> <span class="p">(</span><span class="n">e</span><span class="p">.</span><span class="n">State</span> <span class="p">==</span> <span class="n">RegState</span><span class="p">.</span><span class="n">RegistrationSucceeded</span><span class="p">)</span> </span><span id="L-58"><a name="L-58"></a> <span class="p">{</span> </span><span id="L-59"><a name="L-59"></a> <span class="n">Console</span><span class="p">.</span><span class="n">WriteLine</span><span class="p">(</span><span class="s">&quot;Registration succeeded - Online!&quot;</span><span class="p">);</span> </span><span id="L-60"><a name="L-60"></a> <span class="n">CreateCall</span><span class="p">();</span> </span><span id="L-61"><a name="L-61"></a> <span class="p">}</span> </span><span id="L-62"><a name="L-62"></a> <span class="p">}</span> </span><span id="L-63"><a name="L-63"></a> </span><span id="L-64"><a name="L-64"></a> <span class="k">private</span> <span class="k">static</span> <span class="k">void</span> <span class="nf">CreateCall</span><span class="p">()</span> </span><span id="L-65"><a name="L-65"></a> <span class="p">{</span> </span><span id="L-66"><a name="L-66"></a> <span class="kt">var</span> <span class="n">numberToDial</span> <span class="p">=</span> <span class="s">&quot;853&quot;</span><span class="p">;</span> </span><span id="L-67"><a name="L-67"></a> <span class="n">call</span> <span class="p">=</span> <span class="n">softphone</span><span class="p">.</span><span class="n">CreateCallObject</span><span class="p">(</span><span class="n">phoneLine</span><span class="p">,</span> <span class="n">numberToDial</span><span class="p">);</span> </span><span id="L-68"><a name="L-68"></a> <span class="n">call</span><span class="p">.</span><span class="n">CallStateChanged</span> <span class="p">+=</span> <span class="n">call_CallStateChanged</span><span class="p">;</span> </span><span id="L-69"><a name="L-69"></a> <span class="n">call</span><span class="p">.</span><span class="n">Start</span><span class="p">();</span> </span><span id="L-70"><a name="L-70"></a> <span class="p">}</span> </span><span id="L-71"><a name="L-71"></a> </span><span id="L-72"><a name="L-72"></a> <span class="k">static</span> <span class="k">void</span> <span class="nf">SetupTextToSpeech</span><span class="p">()</span> </span><span id="L-73"><a name="L-73"></a> <span class="p">{</span> </span><span id="L-74"><a name="L-74"></a> <span class="kt">var</span> <span class="n">textToSpeech</span> <span class="p">=</span> <span class="k">new</span> <span class="n">TextToSpeech</span><span class="p">();</span> </span><span id="L-75"><a name="L-75"></a> </span><span id="L-76"><a name="L-76"></a> <span class="n">mediaSender</span><span class="p">.</span><span class="n">AttachToCall</span><span class="p">(</span><span class="n">call</span><span class="p">);</span> </span><span id="L-77"><a name="L-77"></a> <span class="n">connector</span><span class="p">.</span><span class="n">Connect</span><span class="p">(</span><span class="n">textToSpeech</span><span class="p">,</span> <span class="n">mediaSender</span><span class="p">);</span> </span><span id="L-78"><a name="L-78"></a> <span class="n">textToSpeech</span><span class="p">.</span><span class="n">AddAndStartText</span><span class="p">(</span><span class="s">&quot;Hello World!&quot;</span><span class="p">);</span> </span><span id="L-79"><a name="L-79"></a> </span><span id="L-80"><a name="L-80"></a> <span class="n">Console</span><span class="p">.</span><span class="n">WriteLine</span><span class="p">(</span><span class="s">&quot;The text is converted to speech and being played into the call.&quot;</span><span class="p">);</span> </span><span id="L-81"><a name="L-81"></a> <span class="p">}</span> </span><span id="L-82"><a name="L-82"></a> </span><span id="L-83"><a name="L-83"></a> <span class="k">static</span> <span class="k">void</span> <span class="nf">call_CallStateChanged</span><span class="p">(</span><span class="kt">object</span> <span class="n">sender</span><span class="p">,</span> <span class="n">CallStateChangedArgs</span> <span class="n">e</span><span class="p">)</span> </span><span id="L-84"><a name="L-84"></a> <span class="p">{</span> </span><span id="L-85"><a name="L-85"></a> <span class="n">Console</span><span class="p">.</span><span class="n">WriteLine</span><span class="p">(</span><span class="s">&quot;Call state: {0}.&quot;</span><span class="p">,</span> <span class="n">e</span><span class="p">.</span><span class="n">State</span><span class="p">);</span> </span><span id="L-86"><a name="L-86"></a> </span><span id="L-87"><a name="L-87"></a> <span class="k">if</span> <span class="p">(</span><span class="n">e</span><span class="p">.</span><span class="n">State</span> <span class="p">==</span> <span class="n">CallState</span><span class="p">.</span><span class="n">Answered</span><span class="p">)</span> </span><span id="L-88"><a name="L-88"></a> <span class="n">SetupTextToSpeech</span><span class="p">();</span> </span><span id="L-89"><a name="L-89"></a> <span class="p">}</span> </span><span id="L-90"><a name="L-90"></a> <span class="p">}</span> </span><span id="L-91"><a name="L-91"></a><span class="p">}</span> </span></pre></div> </td></tr></table>
audio, autodialer, c#, call, convert, csharp, ivr, pbx, phone, recognition, sip, speech, text, text-to-speech, voice, voip