public class CrawlDbReader extends AbstractChecker implements Closeable
| Modifier and Type | Class and Description |
|---|---|
static class |
CrawlDbReader.CrawlDatumCsvOutputFormat |
static class |
CrawlDbReader.CrawlDatumJsonOutputFormat |
static class |
CrawlDbReader.CrawlDbDumpMapper |
static class |
CrawlDbReader.CrawlDbStatMapper |
static class |
CrawlDbReader.CrawlDbStatReducer |
static class |
CrawlDbReader.CrawlDbTopNMapper |
static class |
CrawlDbReader.CrawlDbTopNReducer |
static class |
CrawlDbReader.JsonIndenter |
| Modifier and Type | Field and Description |
|---|---|
protected String |
crawlDb |
keepClientCnxOpen, stdin, tcpPort, usage| Constructor and Description |
|---|
CrawlDbReader() |
| Modifier and Type | Method and Description |
|---|---|
void |
close() |
CrawlDatum |
get(String crawlDb,
String url,
Configuration config) |
static void |
main(String[] args) |
protected int |
process(String line,
StringBuilder output) |
void |
processDumpJob(String crawlDb,
String output,
Configuration config,
String format,
String regex,
String status,
Integer retry,
String expr,
Float sample) |
void |
processStatJob(String crawlDb,
Configuration config,
boolean sort) |
void |
processTopNJob(String crawlDb,
long topN,
float min,
String output,
Configuration config) |
Object |
query(Map<String,String> args,
Configuration conf,
String type,
String crawlId) |
void |
readUrl(String crawlDb,
String url,
Configuration config,
StringBuilder output) |
int |
run(String[] args) |
getProtocolOutput, parseArgs, processSingle, processStdin, processTCP, rungetConf, setConfclone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, waitgetConf, setConfprotected String crawlDb
public void close()
close in interface Closeableclose in interface AutoCloseablepublic void processStatJob(String crawlDb, Configuration config, boolean sort) throws IOException, InterruptedException, ClassNotFoundException
public CrawlDatum get(String crawlDb, String url, Configuration config) throws IOException
IOExceptionprotected int process(String line, StringBuilder output) throws Exception
process in class AbstractCheckerExceptionpublic void readUrl(String crawlDb, String url, Configuration config, StringBuilder output) throws IOException
IOExceptionpublic void processDumpJob(String crawlDb, String output, Configuration config, String format, String regex, String status, Integer retry, String expr, Float sample) throws IOException, ClassNotFoundException, InterruptedException
public void processTopNJob(String crawlDb, long topN, float min, String output, Configuration config) throws IOException, ClassNotFoundException, InterruptedException
public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException, Exception
run in interface ToolIOExceptionInterruptedExceptionClassNotFoundExceptionExceptionCopyright © 2021 The Apache Software Foundation