snipt

Ctrl+h for KB shortcuts
#35060

C#

Lucene Custom Tokenizer

    public class AlphaNumbericTokenizer : Lucene.Net.Analysis.CharTokenizer
    {
        public AlphaNumbericTokenizer (System.IO.TextReader input)
            : base(input)
        {
        }
        protected override bool IsTokenChar(char c)
        {
            //TODO: Logic for identifying token or token separator
            return char.IsLetterOrDigit(c);
        }
    }
https://snipt.net/embed/075abb8c033d191e3207662c1b3078a3/
https://snipt.net/raw/075abb8c033d191e3207662c1b3078a3/
075abb8c033d191e3207662c1b3078a3
csharp
C#
13
2014-04-16T12:03:20
True
False
/api/public/snipt/35060/
blog-5
<table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><a href="#L-1"> 1</a> <a href="#L-2"> 2</a> <a href="#L-3"> 3</a> <a href="#L-4"> 4</a> <a href="#L-5"> 5</a> <a href="#L-6"> 6</a> <a href="#L-7"> 7</a> <a href="#L-8"> 8</a> <a href="#L-9"> 9</a> <a href="#L-10">10</a> <a href="#L-11">11</a> <a href="#L-12">12</a></pre></div></td><td class="code"><div class="highlight"><pre><span id="L-1"><a name="L-1"></a> <span class="k">public</span> <span class="k">class</span> <span class="nc">AlphaNumbericTokenizer</span> <span class="p">:</span> <span class="n">Lucene</span><span class="p">.</span><span class="n">Net</span><span class="p">.</span><span class="n">Analysis</span><span class="p">.</span><span class="n">CharTokenizer</span> </span><span id="L-2"><a name="L-2"></a> <span class="p">{</span> </span><span id="L-3"><a name="L-3"></a> <span class="k">public</span> <span class="nf">AlphaNumbericTokenizer</span> <span class="p">(</span><span class="n">System</span><span class="p">.</span><span class="n">IO</span><span class="p">.</span><span class="n">TextReader</span> <span class="n">input</span><span class="p">)</span> </span><span id="L-4"><a name="L-4"></a> <span class="p">:</span> <span class="k">base</span><span class="p">(</span><span class="n">input</span><span class="p">)</span> </span><span id="L-5"><a name="L-5"></a> <span class="p">{</span> </span><span id="L-6"><a name="L-6"></a> <span class="p">}</span> </span><span id="L-7"><a name="L-7"></a> <span class="k">protected</span> <span class="k">override</span> <span class="kt">bool</span> <span class="nf">IsTokenChar</span><span class="p">(</span><span class="kt">char</span> <span class="n">c</span><span class="p">)</span> </span><span id="L-8"><a name="L-8"></a> <span class="p">{</span> </span><span id="L-9"><a name="L-9"></a> <span class="c1">//TODO: Logic for identifying token or token separator</span> </span><span id="L-10"><a name="L-10"></a> <span class="k">return</span> <span class="kt">char</span><span class="p">.</span><span class="n">IsLetterOrDigit</span><span class="p">(</span><span class="n">c</span><span class="p">);</span> </span><span id="L-11"><a name="L-11"></a> <span class="p">}</span> </span><span id="L-12"><a name="L-12"></a> <span class="p">}</span> </span></pre></div> </td></tr></table>
lucene.net, tokenizer