MIT Information Extraction provides free state-of-the-art information extraction tools. The current release includes tools for performing named entity extraction and binary relation detection as well as tools for training custom extractors and relation detectors.
Support for MITIE is provided as a runtime binding in Tika org.apache.tika.parser.ner.mitie.MITIENERecogniser
class
git clone https://github.com/manalishah/mitie-resources cd mitie-resources # absolute path to mitie-resources folder export NER_RES=$PWD chmod a+x install.sh ./install.sh |
For running MITIE, the following steps are essential:
export TIKA_APP={your/path/to/tika-app}/target/tika-app-1.13-SNAPSHOT.jar java -Djava.library.path=$NER_RES/MITIE/mitielib -Dner.mitie.model=$NER_RES/MITIE/MITIE-models/english/ner_model.dat -Dner.impl.class=org.apache.tika.parser.ner.mitie.MITIENERecogniser -classpath $NER_RES/MITIE/mitielib/javamitie.jar:$TIKA_APP org.apache.tika.cli.TikaCLI --config=$NER_RES/tika-config.xml -m $NER_RES/sample.txt |
export TIKA_APP={your/path/to/tika-app}/target/tika-app-1.13-SNAPSHOT.jar java -Dner.mitie.model=$NER_RES/MITIE/MITIE-models/english/ner_model.dat -Dner.impl.class=org.apache.tika.parser.ner.mitie.MITIENERecogniser -classpath $NER_RES/MITIE/mitielib/javamitie.jar:$TIKA_APP org.apache.tika.cli.TikaCLI --config=$NER_RES/tika-config.xml -m $NER_RES/sample.txt |
Content-Length: 63 Content-Type: text/plain NER_LOCATION: Los Angeles NER_LOCATION: California X-Parsed-By: org.apache.tika.parser.CompositeParser X-Parsed-By: org.apache.tika.parser.ner.NamedEntityParser resourceName: sample.txt |
export TIKA_SERVER={your/path/to/tika-server}/target/tika-server-1.13-SNAPSHOT.jar java -Djava.library.path=$NER_RES/MITIE/mitielib -Dner.mitie.model=$NER_RES/MITIE/MITIE-models/english/ner_model.dat -Dner.impl.class=org.apache.tika.parser.ner.mitie.MITIENERecogniser -classpath $NER_RES/MITIE/mitielib/javamitie.jar:$TIKA_SERVER org.apache.tika.server.TikaServerCli --config=$NER_RES/tika-config.xml -p 9998 |
export TIKA_SERVER={your/path/to/tika-server}/target/tika-server-1.13-SNAPSHOT.jar java -Dner.mitie.model=$NER_RES/MITIE/MITIE-models/english/ner_model.dat -Dner.impl.class=org.apache.tika.parser.ner.mitie.MITIENERecogniser -classpath $NER_RES/MITIE/mitielib/javamitie.jar:$TIKA_SERVER org.apache.tika.server.TikaServerCli --config=$NER_RES/tika-config.xml -p 9998 |
curl -T $NER_RES/sample.txt http://localhost:9998/meta -H "Accept: application/json" |
{ "Content-Type":"text/plain", "NER_LOCATION":["Los Angeles","California"], "X-Parsed-By":["org.apache.tika.parser.CompositeParser","org.apache.tika.parser.ner.NamedEntityParser"], "language":"sl" } |