Solr可以独立运行,运行在Jetty、Tomcat等这些Servlet容器中,Solr 索引的实现方法很简单,用 POST 方法向 Solr 服务器发送一个描述 Field 及其内容的 XML 文档,Solr根据xml文档添加、删除、更新索引 。Solr 搜索只需要发送 HTTP GET 请求,然后对 Solr 返回Xml、json等格式的查询结果进行解析,组织页面布局。Solr不提供构建UI的功能,Solr提供了一个管理界面,通过管理界面可以查询Solr的配置和运行情况。
public static void main( String[] args ) { //System.setProperty("solr.solr.home", "../../../example/solr");
Server server = new Server(); ServerConnector connector = new ServerConnector(server, new HttpConnectionFactory()); // Set some timeout options to make debugging easier. connector.setIdleTimeout(1000 * 60 * 60); connector.setSoLingerTime(-1); connector.setPort(8983); server.setConnectors(new Connector[] { connector }); WebAppContext bb = new WebAppContext(); bb.setServer(server); bb.setContextPath("/solr"); bb.setWar("solr/webapp/web");
/** Jetty HTTP Servlet Server. * This class is the main class for the Jetty HTTP Servlet server. * It aggregates Connectors (HTTP request receivers) and request Handlers. * The server is itself a handler and a ThreadPool. Connectors use the ThreadPool methods * to run jobs that will eventually call the handle method. */
@Override public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException { if (needsScores) { return new SynonymWeight(this, searcher, boost); } else { // if scores are not needed, let BooleanWeight deal with optimizing that case. BooleanQuery.Builder bq = new BooleanQuery.Builder(); for (Term term : terms) { bq.add(new TermQuery(term), BooleanClause.Occur.SHOULD); } return searcher.rewrite(bq.build()).createWeight(searcher, needsScores, boost); } }
@Override public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException { final IndexReaderContext context = searcher.getTopReaderContext(); final TermContext termState; if (perReaderTermState == null || perReaderTermState.wasBuiltFor(context) == false) { if (needsScores) { // make TermQuery single-pass if we don't have a PRTS or if the context // differs! termState = TermContext.build(context, term); } else { // do not compute the term state, this will help save seeks in the terms // dict on segments that have a cache entry for this query termState = null; } } else { // PRTS was pre-build for this IS termState = this.perReaderTermState; }
return new TermWeight(searcher, needsScores, boost, termState); }
public TermWeight(IndexSearcher searcher, boolean needsScores, float boost, TermContext termStates) throws IOException { super(TermQuery.this); if (needsScores && termStates == null) { throw new IllegalStateException("termStates are required when scores are needed"); } this.needsScores = needsScores; this.termStates = termStates; this.similarity = searcher.getSimilarity(needsScores);
final CollectionStatistics collectionStats; final TermStatistics termStats; if (needsScores) { termStates.setQuery(this.getQuery().getKeyword()); collectionStats = searcher.collectionStatistics(term.field()); termStats = searcher.termStatistics(term, termStates); } else { // we do not need the actual stats, use fake stats with docFreq=maxDoc and ttf=-1 final int maxDoc = searcher.getIndexReader().maxDoc(); collectionStats = new CollectionStatistics(term.field(), maxDoc, -1, -1, -1); termStats = new TermStatistics(term.bytes(), maxDoc, -1,term.bytes()); } this.stats = similarity.computeWeight(boost, collectionStats, termStats); }
// TODO: should we make this // threaded...? the Collector could be sync'd? // always use single thread: for (LeafReaderContext ctx : leaves) { // search each subreader final LeafCollector leafCollector; try { leafCollector = collector.getLeafCollector(ctx);//1 } catch (CollectionTerminatedException e) { // there is no doc of interest in this reader context // continue with the following leaf continue; } BulkScorer scorer = weight.bulkScorer(ctx);//2 if (scorer != null) { try { scorer.score(leafCollector, ctx.reader().getLiveDocs());//3 } catch (CollectionTerminatedException e) { // collection was terminated prematurely // continue with the following leaf } } } }
@Override public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { final int docBase = context.docBase; return new ScorerLeafCollector() {
@Override public void collect(int doc) throws IOException { float score = scorer.score(); /* Document document=context.reader().document(doc); */ // This collector cannot handle these scores: assert score != Float.NEGATIVE_INFINITY; assert !Float.isNaN(score);
totalHits++; if (score <= pqTop.score) { // Since docs are returned in-order (i.e., increasing doc Id), a document // with equal score to pqTop.score cannot compete since HitQueue favors // documents with lower doc Ids. Therefore reject those docs too. return; } pqTop.doc = doc + docBase; pqTop.score = score; pqTop = pq.updateTop(); }
/** * Optional method, to return a {@link BulkScorer} to * score the query and send hits to a {@link Collector}. * Only queries that have a different top-level approach * need to override this; the default implementation * pulls a normal {@link Scorer} and iterates and * collects the resulting hits which are not marked as deleted. * * @param context * the {@link org.apache.lucene.index.LeafReaderContext} for which to return the {@link Scorer}. * * @return a {@link BulkScorer} which scores documents and * passes them to a collector. * @throws IOException if there is a low-level I/O error */ public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
Scorer scorer = scorer(context); if (scorer == null) { // No docs match return null; }
// This impl always scores docs in order, so we can // ignore scoreDocsInOrder: return new DefaultBulkScorer(scorer); }
/** Just wraps a Scorer and performs top scoring using it. * @lucene.internal */ protected static class DefaultBulkScorer extends BulkScorer { private final Scorer scorer; private final DocIdSetIterator iterator; private final TwoPhaseIterator twoPhase;
/** Sole constructor. */ public DefaultBulkScorer(Scorer scorer) { if (scorer == null) { throw new NullPointerException(); } this.scorer = scorer; this.iterator = scorer.iterator(); this.twoPhase = scorer.twoPhaseIterator(); }
@Override public long cost() { return iterator.cost(); }
@Override public int score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException { collector.setScorer(scorer); if (scorer.docID() == -1 && min == 0 && max == DocIdSetIterator.NO_MORE_DOCS) { scoreAll(collector, iterator, twoPhase, acceptDocs); return DocIdSetIterator.NO_MORE_DOCS; } else { int doc = scorer.docID(); if (doc < min) { if (twoPhase == null) { doc = iterator.advance(min); } else { doc = twoPhase.approximation().advance(min); } } return scoreRange(collector, iterator, twoPhase, acceptDocs, doc, max); } }
/** Specialized method to bulk-score all hits; we * separate this from {@link #scoreRange} to help out * hotspot. * See <a href="https://issues.apache.org/jira/browse/LUCENE-5487">LUCENE-5487</a> */ static void scoreAll(LeafCollector collector, DocIdSetIterator iterator, TwoPhaseIterator twoPhase, Bits acceptDocs) throws IOException { if (twoPhase == null) { for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) { if (acceptDocs == null || acceptDocs.get(doc)) { collector.collect(doc); } } } else { // The scorer has an approximation, so run the approximation first, then check acceptDocs, then confirm final DocIdSetIterator approximation = twoPhase.approximation(); for (int doc = approximation.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = approximation.nextDoc()) { if ((acceptDocs == null || acceptDocs.get(doc)) && twoPhase.matches()) { collector.collect(doc); } } } }