List of Nutch Properties
This list of Nutch configuration properties is intended for development. It includes deprecated properties and properties used only "internally". The list is generated from nutch-default.xml and Java sources.
Legend:
Def: defined in nutch-default.xml
Used: read or set from Java code
Temp: temporarily used to pass settings (eg, from command-line arguments) to map or reduce jobs
Depr.: deprecated
(owr.): some properties are defined in nutch-default.xml (and may be set in nutch-site.xml) but are overwritten by a command-line argument (tests and benchmarks are excluded)
(test): overwritten only in tests and benchmarks
1.X (master Branch) | 2.x (deprecated codebase) | |||||||
Property | Def. | Used | Temp. | Depr. | Def. | Used | Temp. | Depr. |
CrawlDBScanner.regex | X | X | ||||||
CrawlDBScanner.status | X | X | ||||||
anchorIndexingFilter.deduplicate | X | X | (test) | X | X | (test) | ||
arc.url.version | X | |||||||
content.server.port | X | X | ||||||
crawl.gen.delay | X | X | X | X | ||||
crawldb.url.filters | X | X | X | |||||
crawldb.url.normalizers | X | |||||||
crawldb.url.normalizers.scope | X | |||||||
creativecommons.exclude.unlicensed | X | X | ||||||
db.default.fetch.interval | X | X | NUTCH-1409 | X | X | NUTCH-1409 | ||
db.fetch.interval.default | X | X | (test) | X | X | |||
db.fetch.interval.max | X | X | X | X | ||||
db.fetch.retry.max | X | X | X | X | ||||
db.fetch.schedule.adaptive.dec_rate | X | X | X | X | ||||
db.fetch.schedule.adaptive.inc_rate | X | X | X | X | ||||
db.fetch.schedule.adaptive.max_interval | X | X | X | X | ||||
db.fetch.schedule.adaptive.min_interval | X | X | X | X | ||||
db.fetch.schedule.adaptive.sync_delta | X | X | X | X | ||||
db.fetch.schedule.adaptive.sync_delta_rate | X | X | X | X | ||||
db.fetch.schedule.class | X | X | (test) | X | X | |||
db.fetch.schedule.mime.file | X | X | ||||||
db.ignore.external.links | X | X | X | X | ||||
db.ignore.internal.links | X | X | X | |||||
db.injector.overwrite | X | X | ||||||
db.injector.update | X | X | (test) | |||||
db.max.anchor.length | X | X | X | |||||
db.max.fetch.interval | X | NUTCH-1409 | X | NUTCH-1409 | ||||
db.max.inlinks | X | X | X | |||||
db.max.outlinks.per.page | X | X | X | X | ||||
db.parsemeta.to.crawldb | X | X | X | |||||
db.preserve.backup | X | X | ||||||
db.reader.stats.sort | X | X | X | X | ||||
db.reader.topn | X | X | ||||||
db.reader.topn.min | X | X | ||||||
db.score.count.filtered | X | X | X | X | ||||
db.score.injected | X | X | X | X | ||||
db.score.link.external | X | X | X | X | ||||
db.score.link.internal | X | X | X | X | ||||
db.signature.class | X | X | X | X | ||||
db.signature.text_profile.min_token_len | X | X | X | X | ||||
db.signature.text_profile.quant_rate | X | X | X | X | ||||
db.update.additions.allowed | X | X | X | X | ||||
db.update.max.inlinks | X | X | X | X | ||||
db.update.purge.404 | X | X | ||||||
dc.language | X | |||||||
domain.statistics.mode | X | X | X | X | ||||
elastic.index | X | |||||||
elastic.max.bulk.docs | X | |||||||
elastic.max.bulk.size | X | |||||||
encodingdetector.charset.min.confidence | X | X | ||||||
fail.on.job.failure | X | |||||||
fetcher.exit | X | |||||||
fetcher.follow.outlinks.depth | X | X | ||||||
fetcher.follow.outlinks.depth.divisor | X | X | ||||||
fetcher.follow.outlinks.ignore.external | X | X | ||||||
fetcher.follow.outlinks.num.links | X | X | ||||||
fetcher.job.resume | X | |||||||
fetcher.max.crawl.delay | X | X | X | X | ||||
fetcher.max.exceptions.per.queue | X | X | X | |||||
fetcher.parse | X | X | (test) | X | X | |||
fetcher.queue.depth.multiplier | X | X | X | X | ||||
fetcher.queue.mode | X | X | X | X | ||||
fetcher.queue.use.host.settings | X | X | ||||||
fetcher.server.delay | X | X | X | X | ||||
fetcher.server.min.delay | X | X | X | X | ||||
fetcher.store.content | X | X | X | X | ||||
fetcher.threads.fetch | X | X | (owr.) | X | X | |||
fetcher.threads.per.host | NUTCH-1409 | NUTCH-1409 | ||||||
fetcher.threads.per.host.by.ip | X | |||||||
fetcher.threads.per.queue | X | X | X | X | ||||
fetcher.threads.timeout.divisor | X | X | ||||||
fetcher.throughput.threshold.check.after | X | X | (owr.) | X | X | |||
fetcher.throughput.threshold.pages | X | X | X | X | ||||
fetcher.throughput.threshold.retries | X | X | ||||||
fetcher.throughput.threshold.sequence | X | X | ||||||
fetcher.timelimit | X | X | X | X | ||||
fetcher.timelimit.mins | X | X | X | X | ||||
fetcher.verbose | X | X | X | |||||
file.content.ignored | X | X | ||||||
file.content.limit | X | X | (test) | X | X | (test) | ||
file.crawl.parent | X | X | X | X | ||||
free.generator.filter | X | |||||||
free.generator.normalize | X | |||||||
ftp.content.limit | X | X | X | X | ||||
ftp.follow.talk | X | X | X | X | ||||
ftp.keep.connection | X | X | X | X | ||||
ftp.password | X | X | X | X | ||||
ftp.server.timeout | X | X | X | X | ||||
ftp.timeout | X | X | X | X | ||||
ftp.username | X | X | X | X | ||||
generate.batch.id | X | |||||||
generate.count.mode | X | X | X | X | ||||
generate.curTime | X | X | ||||||
generate.filter | X | X | ||||||
generate.max.count | X | X | X | X | ||||
generate.max.distance | X | X | ||||||
generate.max.num.segments | X | |||||||
generate.max.per.host | X | X | NUTCH-1409 | NUTCH-1409 | ||||
generate.max.per.host.by.ip | X | NUTCH-1409 | NUTCH-1409 | |||||
generate.min.interval | X | X | ||||||
generate.min.score | X | X | X | |||||
generate.normalise | X | X | ||||||
generate.partition.seed | X | |||||||
generate.restrict.status | X | |||||||
generate.topN | X | X | ||||||
generate.update.crawldb | X | X | X | X | ||||
hostdb.concurrency.level | X | |||||||
hostdb.lru.size | X | |||||||
htmlparsefilter.order | X | X | X | X | ||||
http.accept | X | X | X | X | ||||
http.accept.language | X | X | X | X | ||||
http.agent.description | X | X | X | X | ||||
http.agent.email | X | X | X | X | ||||
http.agent.host | X | X | X | X | ||||
http.agent.name | X | X | (test) | X | X | (test) | ||
http.agent.url | X | X | X | X | ||||
http.agent.version | X | X | X | X | ||||
http.auth.file | X | X | X | X | ||||
http.auth.verbose | X | X | ||||||
http.content.limit | X | X | X | X | ||||
http.max.delays | X | X | ||||||
http.proxy.host | X | X | (test) | X | X | (test) | ||
http.proxy.password | X | X | X | X | ||||
http.proxy.port | X | X | (test) | X | X | (test) | ||
http.proxy.realm | X | X | X | X | ||||
http.proxy.username | X | X | X | X | ||||
http.redirect.max | X | X | ||||||
http.robots.403.allow | X | X | X | X | ||||
http.robots.agents | X | X | (test) | X | X | (test) | ||
http.timeout | X | X | X | X | ||||
http.useHttp11 | X | X | X | X | ||||
http.verbose | X | X | X | X | ||||
index.content.md | X | X | ||||||
index.db.md | X | X | ||||||
index.parse.md | X | X | (test) | |||||
index.replace.regexp | X | X | ||||||
index.static | X | X | ||||||
indexer.add.domain | X | X | ||||||
indexer.delete | X | |||||||
indexer.delete.robots.noindex | X | |||||||
indexer.max.content.length | X | X | ||||||
indexer.max.title.length | X | X | X | X | (test) | |||
indexer.score.power | X | X | X | X | ||||
indexer.skip.notmodified | X | X | ||||||
indexer.url.filters | X | X | X | |||||
indexer.url.normalizers | X | |||||||
indexer.writer.classes | X | X | X | X | ||||
indexingfilter.order | X | X | X | X | ||||
injector.current.time | X | X | X | X | ||||
lang.analyze.max.length | X | X | X | |||||
lang.extraction.policy | X | X | X | X | ||||
lang.identification.only.certain | X | X | X | X | ||||
lang.ngram.max.length | X | |||||||
lang.ngram.min.length | X | |||||||
link.analyze.damping.factor | X | X | ||||||
link.analyze.initial.score | X | X | ||||||
link.analyze.iteration | X | X | ||||||
link.analyze.normalize.score | X | X | ||||||
link.analyze.num.iterations | X | X | ||||||
link.analyze.rank.one | X | X | ||||||
link.delete.gone | X | X | ||||||
link.ignore.internal.domain | X | X | ||||||
link.ignore.internal.host | X | X | ||||||
link.ignore.limit.domain | X | X | ||||||
link.ignore.limit.page | X | X | ||||||
link.loops.depth | X | X | ||||||
link.score.updater.clear.score | X | X | ||||||
linkdb.url.filters | X | X | X | |||||
linkdb.url.normalizer | X | |||||||
linkdb.url.normalizer.scope | X | |||||||
metatag.description | X | |||||||
metatag.keywords | X | |||||||
metatags.names | X | X | (test) | |||||
mime.type.magic | X | X | X | X | ||||
mime.types.file | X | X | X | X | ||||
moreIndexingFilter.indexMimeTypeParts | X | X | (test) | X | X | (test) | ||
moreIndexingFilter.mapMimeTypes | X | X | ||||||
nutch.conf.uuid | X | X | ||||||
parse.filter.urls | X | X | (owr.) | |||||
parse.job.force | X | |||||||
parse.job.resume | X | |||||||
parse.normalize.urls | X | X | (owr.) | |||||
parse.plugin.file | X | X | (test) | X | X | (test) | ||
parser.caching.forbidden.policy | X | X | X | X | ||||
parser.character.encoding.default | X | X | X | X | ||||
parser.fix.embeddedparams | X | |||||||
parser.html.form.use_action | X | X | (test) | X | X | (test) | ||
parser.html.impl | X | X | X | X | ||||
parser.html.outlinks.ignore_tags | X | X | X | X | ||||
parser.skip.truncated | X | X | X | X | ||||
parser.timeout | X | X | X | X | ||||
partition.url.mode | X | X | X | X | ||||
partition.url.seed | X | X | X | |||||
plugin.auto-activation | X | X | X | X | ||||
plugin.excludes | X | X | X | X | ||||
plugin.folders | X | X | X | X | ||||
plugin.includes | X | X | (test) | X | X | (test) | ||
schema.prefix | X | |||||||
scoring.filter.order | X | X | X | X | ||||
segment.dump.dir | X | |||||||
segment.merger.filter | X | X | ||||||
segment.merger.normalizer | X | X | ||||||
segment.merger.segmentName | X | X | ||||||
segment.merger.slice | X | X | ||||||
segment.proxy.port | X | X | ||||||
segment.reader.co | X | X | ||||||
segment.reader.fe | X | X | ||||||
segment.reader.ge | X | X | ||||||
segment.reader.pa | X | X | ||||||
segment.reader.pd | X | X | ||||||
segment.reader.pt | X | X | ||||||
sftp.password | X | |||||||
sftp.port | X | |||||||
sftp.server | X | |||||||
sftp.user | X | |||||||
solr.auth | X | X | ||||||
solr.auth.password | X | |||||||
solr.auth.username | X | |||||||
solr.commit.index | X | X | X | X | ||||
solr.commit.size | X | X | X | X | ||||
solr.mapping.file | X | X | X | X | ||||
solr.params | X | |||||||
solr.server.url | X | X | ||||||
storage.crawl.id | X | X | ||||||
storage.data.store.class | X | X | (test) | |||||
storage.schema.host | X | X | ||||||
storage.schema.webpage | X | X | ||||||
subcollection.default.field | X | |||||||
subcollection.default.fieldname | X | |||||||
subcollections.config | X | X | ||||||
subcollections.xml | X | X | ||||||
tika.config.file | X | |||||||
urlfilter.automaton.file | X | X | X | X | ||||
urlfilter.automaton.rules | X | X | ||||||
urlfilter.domain.file | X | X | X | X | ||||
urlfilter.domain.rules | X | X | ||||||
urlfilter.domainblacklist.file | X | |||||||
urlfilter.domainblacklist.rules | X | |||||||
urlfilter.order | X | X | X | X | ||||
urlfilter.prefix.file | X | X | X | X | ||||
urlfilter.prefix.rules | X | X | ||||||
urlfilter.regex.file | X | X | X | X | ||||
urlfilter.regex.rules | X | X | ||||||
urlfilter.suffix.file | X | X | (test) | X | X | (test) | ||
urlfilter.suffix.rules | X | X | ||||||
urlmeta.tags | X | X | ||||||
urlnormalizer.hosts.file | X | |||||||
urlnormalizer.hosts.rules | X | |||||||
urlnormalizer.loop.count | X | X | X | X | ||||
urlnormalizer.order | X | X | (test) | X | X | (test) | ||
urlnormalizer.regex.file | X | X | X | X | ||||
urlnormalizer.regex.rules | X | X | ||||||
webgraph.url.filters | X | X | X | |||||
webgraph.url.normalizers | X | |||||||
webtable.dump.content | X | |||||||
webtable.dump.headers | X | |||||||
webtable.dump.links | X | |||||||
webtable.dump.text | X | |||||||
webtable.url.regex | X |
back to FrontPage