AnalysisRequestHandler

<!> Solr1.3

The AnalysisRequestHandler is a RequestHandler designed to take in documents as input and return the tokens as output.

It is available via [WWW] https://issues.apache.org/jira/browse/SOLR-477

Input is very similar to UpdateXmlMessages in that a post can be one or more <doc>s, as in

<docs>
  <doc>
    <field name="employeeId">05991</field>
    <field name="office">Bridgewater</field>
    <field name="skills">Perl</field>
    <field name="skills">Java</field>
  </doc>
  [<doc> ... </doc>[<doc> ... </doc>]]
</docs>

The docs tag can actually be any value, it need not be docs. In fact, you could send an <add> to the AnalysisRequestHandler and it should work just fine.

The output will look something like:

<?xml version="1.0" encoding="UTF-8"?>
<response>
  <lst name="responseHeader">
    <int name="status">0</int>
    <int name="QTime">1</int>
  </lst>
  <str name="WARNING">This response format is experimental. It is likely to change in the future.</str>
  <lst name="response">
    <!-- The name attribute contains the doc id of the document indexed -->
    <lst name="TWINX2048-3200PRO">
      <!-- The name attribute contains the name of the Field -->
      <lst name="id">
        <!-- The token info -->
        <lst name="token">
          <str name="value">TWINX2048-3200PRO</str>
          <int name="start">0</int>
          <int name="end">17</int>
          <int name="posInc">1</int>
          <str name="type">word</str>
        </lst>
      </lst>
      <lst name="name">
        <lst name="token">
          <str name="value">corsair</str>
          <int name="start">0</int>
          <int name="end">7</int>
          <int name="posInc">1</int>
          <str name="type">word</str>
        </lst>
        <lst name="token">
          <str name="value">xms</str>
          <int name="start">9</int>
          <int name="end">12</int>
          <int name="posInc">1</int>
          <str name="type">word</str>
        </lst>
        <lst name="token">
          <str name="value">2</str>
          <int name="start">13</int>
          <int name="end">14</int>
          <int name="posInc">1</int>
          <str name="type">word</str>
        </lst>
        <lst name="token">
          <str name="value">gb</str>
          <int name="start">14</int>
          <int name="end">16</int>
          <int name="posInc">1</int>
          <str name="type">word</str>
        </lst>
        <lst name="token">
          <str name="value">2</str>
          <int name="start">18</int>
          <int name="end">19</int>
          <int name="posInc">1</int>
          <str name="type">word</str>
        </lst>
        <lst name="token">
          <str name="value">x</str>
          <int name="start">20</int>
          <int name="end">21</int>
          <int name="posInc">1</int>
          <str name="type">word</str>
        </lst>
        <lst name="token">
          <str name="value">1</str>
          <int name="start">22</int>
          <int name="end">23</int>
          <int name="posInc">1</int>
          <str name="type">word</str>
        </lst>
        <lst name="token">
          <str name="value">gb</str>
          <int name="start">23</int>
          <int name="end">25</int>
          <int name="posInc">1</int>
          <str name="type">word</str>
        </lst>
        <lst name="token">
          <str name="value">184</str>
          <int name="start">27</int>
          <int name="end">30</int>
          <int name="posInc">1</int>
          <str name="type">word</str>
        </lst>
        <lst name="token">
          <str name="value">pin</str>
          <int name="start">31</int>
          <int name="end">34</int>
          <int name="posInc">1</int>
          <str name="type">word</str>
        </lst>
        <lst name="token">
          <str name="value">ddr</str>
          <int name="start">35</int>
          <int name="end">38</int>
          <int name="posInc">1</int>
          <str name="type">word</str>
        </lst>
        <lst name="token">
          <str name="value">sdram</str>
          <int name="start">39</int>
          <int name="end">44</int>
          <int name="posInc">1</int>
          <str name="type">word</str>
        </lst>
        <lst name="token">
          <str name="value">unbuff</str>
          <int name="start">45</int>
          <int name="end">55</int>
          <int name="posInc">1</int>
          <str name="type">word</str>
        </lst>
        <lst name="token">
          <str name="value">ddr</str>
          <int name="start">56</int>
          <int name="end">59</int>
          <int name="posInc">1</int>
          <str name="type">word</str>
        </lst>
        <lst name="token">
          <str name="value">400</str>
          <int name="start">60</int>
          <int name="end">63</int>
          <int name="posInc">1</int>
          <str name="type">word</str>
        </lst>
        <lst name="token">
          <str name="value">pc</str>
          <int name="start">65</int>
          <int name="end">67</int>
          <int name="posInc">1</int>
          <str name="type">word</str>
        </lst>
        <lst name="token">
          <str name="value">3200</str>
          <int name="start">68</int>
          <int name="end">72</int>
          <int name="posInc">1</int>
          <str name="type">word</str>
        </lst>
        <lst name="token">
          <str name="value">dual</str>
          <int name="start">74</int>
          <int name="end">78</int>
          <int name="posInc">1</int>
          <str name="type">word</str>
        </lst>
        <lst name="token">
          <str name="value">channel</str>
          <int name="start">79</int>
          <int name="end">86</int>
          <int name="posInc">1</int>
          <str name="type">word</str>
        </lst>
        <lst name="token">
          <str name="value">kit</str>
          <int name="start">87</int>
          <int name="end">90</int>
          <int name="posInc">1</int>
          <str name="type">word</str>
        </lst>
        <lst name="token">
          <str name="value">system</str>
          <int name="start">91</int>
          <int name="end">97</int>
          <int name="posInc">1</int>
          <str name="type">word</str>
        </lst>
        <lst name="token">
          <str name="value">memori</str>
          <int name="start">98</int>
          <int name="end">104</int>
          <int name="posInc">1</int>
          <str name="type">word</str>
        </lst>
        <lst name="token">
          <str name="value">retail</str>
          <int name="start">107</int>
          <int name="end">113</int>
          <int name="posInc">1</int>
          <str name="type">word</str>
        </lst>
      </lst>
    </lst>
  </lst>
</response>


See the inline comments for explanation.


CategorySolrRequestHandler

last edited 2008-02-20 23:15:57 by GrantIngersoll