The AnalysisRequestHandler is a RequestHandler designed to take in documents as input and return the tokens as output.
It is available via
https://issues.apache.org/jira/browse/SOLR-477
Input is very similar to UpdateXmlMessages in that a post can be one or more <doc>s, as in
<docs>
<doc>
<field name="employeeId">05991</field>
<field name="office">Bridgewater</field>
<field name="skills">Perl</field>
<field name="skills">Java</field>
</doc>
[<doc> ... </doc>[<doc> ... </doc>]]
</docs>
The docs tag can actually be any value, it need not be docs. In fact, you could send an <add> to the AnalysisRequestHandler and it should work just fine.
The output will look something like:
<?xml version="1.0" encoding="UTF-8"?>
<response>
<lst name="responseHeader">
<int name="status">0</int>
<int name="QTime">1</int>
</lst>
<str name="WARNING">This response format is experimental. It is likely to change in the future.</str>
<lst name="response">
<!-- The name attribute contains the doc id of the document indexed -->
<lst name="TWINX2048-3200PRO">
<!-- The name attribute contains the name of the Field -->
<lst name="id">
<!-- The token info -->
<lst name="token">
<str name="value">TWINX2048-3200PRO</str>
<int name="start">0</int>
<int name="end">17</int>
<int name="posInc">1</int>
<str name="type">word</str>
</lst>
</lst>
<lst name="name">
<lst name="token">
<str name="value">corsair</str>
<int name="start">0</int>
<int name="end">7</int>
<int name="posInc">1</int>
<str name="type">word</str>
</lst>
<lst name="token">
<str name="value">xms</str>
<int name="start">9</int>
<int name="end">12</int>
<int name="posInc">1</int>
<str name="type">word</str>
</lst>
<lst name="token">
<str name="value">2</str>
<int name="start">13</int>
<int name="end">14</int>
<int name="posInc">1</int>
<str name="type">word</str>
</lst>
<lst name="token">
<str name="value">gb</str>
<int name="start">14</int>
<int name="end">16</int>
<int name="posInc">1</int>
<str name="type">word</str>
</lst>
<lst name="token">
<str name="value">2</str>
<int name="start">18</int>
<int name="end">19</int>
<int name="posInc">1</int>
<str name="type">word</str>
</lst>
<lst name="token">
<str name="value">x</str>
<int name="start">20</int>
<int name="end">21</int>
<int name="posInc">1</int>
<str name="type">word</str>
</lst>
<lst name="token">
<str name="value">1</str>
<int name="start">22</int>
<int name="end">23</int>
<int name="posInc">1</int>
<str name="type">word</str>
</lst>
<lst name="token">
<str name="value">gb</str>
<int name="start">23</int>
<int name="end">25</int>
<int name="posInc">1</int>
<str name="type">word</str>
</lst>
<lst name="token">
<str name="value">184</str>
<int name="start">27</int>
<int name="end">30</int>
<int name="posInc">1</int>
<str name="type">word</str>
</lst>
<lst name="token">
<str name="value">pin</str>
<int name="start">31</int>
<int name="end">34</int>
<int name="posInc">1</int>
<str name="type">word</str>
</lst>
<lst name="token">
<str name="value">ddr</str>
<int name="start">35</int>
<int name="end">38</int>
<int name="posInc">1</int>
<str name="type">word</str>
</lst>
<lst name="token">
<str name="value">sdram</str>
<int name="start">39</int>
<int name="end">44</int>
<int name="posInc">1</int>
<str name="type">word</str>
</lst>
<lst name="token">
<str name="value">unbuff</str>
<int name="start">45</int>
<int name="end">55</int>
<int name="posInc">1</int>
<str name="type">word</str>
</lst>
<lst name="token">
<str name="value">ddr</str>
<int name="start">56</int>
<int name="end">59</int>
<int name="posInc">1</int>
<str name="type">word</str>
</lst>
<lst name="token">
<str name="value">400</str>
<int name="start">60</int>
<int name="end">63</int>
<int name="posInc">1</int>
<str name="type">word</str>
</lst>
<lst name="token">
<str name="value">pc</str>
<int name="start">65</int>
<int name="end">67</int>
<int name="posInc">1</int>
<str name="type">word</str>
</lst>
<lst name="token">
<str name="value">3200</str>
<int name="start">68</int>
<int name="end">72</int>
<int name="posInc">1</int>
<str name="type">word</str>
</lst>
<lst name="token">
<str name="value">dual</str>
<int name="start">74</int>
<int name="end">78</int>
<int name="posInc">1</int>
<str name="type">word</str>
</lst>
<lst name="token">
<str name="value">channel</str>
<int name="start">79</int>
<int name="end">86</int>
<int name="posInc">1</int>
<str name="type">word</str>
</lst>
<lst name="token">
<str name="value">kit</str>
<int name="start">87</int>
<int name="end">90</int>
<int name="posInc">1</int>
<str name="type">word</str>
</lst>
<lst name="token">
<str name="value">system</str>
<int name="start">91</int>
<int name="end">97</int>
<int name="posInc">1</int>
<str name="type">word</str>
</lst>
<lst name="token">
<str name="value">memori</str>
<int name="start">98</int>
<int name="end">104</int>
<int name="posInc">1</int>
<str name="type">word</str>
</lst>
<lst name="token">
<str name="value">retail</str>
<int name="start">107</int>
<int name="end">113</int>
<int name="posInc">1</int>
<str name="type">word</str>
</lst>
</lst>
</lst>
</lst>
</response>
See the inline comments for explanation.