<!> Solr1.3

The AnalysisRequestHandler is a RequestHandler designed to take in documents as input and return the tokens as output.

Input is very similar to UpdateXmlMessages in that a post can be one or more <doc>s, as in

<docs>
  <doc>
    <field name="employeeId">05991</field>
    <field name="office">Bridgewater</field>
    <field name="skills">Perl</field>
    <field name="skills">Java</field>
  </doc>
  [<doc> ... </doc>[<doc> ... </doc>]]
</docs>

The docs tag can actually be any value, it need not be docs. This allows one to use the same Solr XML input that is used for indexing.

$ cd example/exampledocs
$ curl 'http://localhost:8983/solr/analysis/document?wt=json&indent=true' --data-binary @monitor.xml -H 'Content-type:text/xml; charset=utf-8'

The output will look something like:

 {
  "responseHeader":{
    "status":0,
      "QTime":8},
    "analysis":{
      "3007WFP":{
        "name":{
          "index":{
            "Dell Widescreen UltraSharp 3007WFP":{
              "org.apache.lucene.analysis.WhitespaceTokenizer":[{
                "text":"Dell",
                "type":"word",
                "start":0,
                "end":4,
                "position":1},
              {
                "text":"Widescreen",
                "type":"word",
                "start":5,
                "end":15,
                "position":2},
              {
                "text":"UltraSharp",
                "type":"word",
                "start":16,
                "end":26,
                "position":3},
              {
                "text":"3007WFP",
                "type":"word",
                "start":27,
                "end":34,
                "position":4}],
              "org.apache.lucene.analysis.StopFilter":[{
                "text":"Dell",
                "type":"word",
                "start":0,
                "end":4,
                "position":1},
              {
                "text":"Widescreen",
                "type":"word",
                "start":5,
                "end":15,
                "position":2},
              {
                "text":"UltraSharp",
                "type":"word",
                "start":16,
                "end":26,
                "position":3},
              {
                "text":"3007WFP",
                "type":"word",
                "start":27,
                "end":34,
                "position":4}],
              "org.apache.solr.analysis.WordDelimiterFilter":[{
                "text":"Dell",
                "type":"word",
                "start":0,
                "end":4,
                "position":1},
              {
                "text":"Widescreen",
                "type":"word",
                "start":5,
                "end":15,
                "position":2},
              {
                "text":"UltraSharp",
                "type":"word",
                "start":16,
                "end":26,
                "position":3},
              {
                "text":"3007",
                "type":"word",
                "start":27,
                "end":31,
                "position":4},
              {
                "text":"WFP",
                "type":"word",
                "start":31,
                "end":34,
                "position":5}],
              "org.apache.lucene.analysis.LowerCaseFilter":[{
                "text":"dell",
                "type":"word",
                "start":0,
                "end":4,
                "position":1},
              {
                "text":"widescreen",
                "type":"word",
                "start":5,
                "end":15,
                "position":2},
              {
                "text":"ultrasharp",
                "type":"word",
                "start":16,
                "end":26,
                "position":3},
              {
                "text":"3007",
                "type":"word",
                "start":27,
                "end":31,
                "position":4},
              {
                "text":"wfp",
                "type":"word",
                "start":31,
                "end":34,
                "position":5}]} } },
[...]
  • No labels