org.apache.lucene.analysis.cn.smart.SentenceTokenizer.incrementToken()
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
termAtt.setEmpty().append(buffer);
offsetAtt.setOffset(correctOffset(tokenStart), correctOffset(tokenEnd));
typeAtt.setType("sentence");
org.apache.solr.handler.AnalysisRequestHandlerBase.analyzeValue(String, AnalysisContext)
TokenizerFactory tfac = tokenizerChain.getTokenizerFactory();
TokenStream tokenStream = tfac.create(tokenizerChain.initReader(null, new StringReader(value)));
List<AttributeSource> tokens = analyzeTokenStream(tokenStream);
namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokens, context));
for (TokenFilterFactory tokenFilterFactory : filtfacs) {
for (final AttributeSource tok : tokens) {
tok.getAttribute(TokenTrackingAttribute.class).freezeStage();
}
tokenStream = tokenFilterFactory.create(listBasedTokenStream);
tokens = analyzeTokenStream(tokenStream);
namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokens, context));
listBasedTokenStream = new ListBasedTokenStream(tokens);
}
org.apache.solr.handler.AnalysisRequestHandlerBase.analyzeTokenStream(TokenStream)
private List<AttributeSource> analyzeTokenStream(TokenStream tokenStream) {
final List<AttributeSource> tokens = new ArrayList<AttributeSource>();
final PositionIncrementAttribute posIncrAtt = tokenStream.addAttribute(PositionIncrementAttribute.class);
final TokenTrackingAttribute trackerAtt = tokenStream.addAttribute(TokenTrackingAttribute.class);
// for backwards compatibility, add all "common" attributes
tokenStream.addAttribute(OffsetAttribute.class);
tokenStream.addAttribute(TypeAttribute.class);
try {
tokenStream.reset();
int position = 0;
while (tokenStream.incrementToken()) {
position += posIncrAtt.getPositionIncrement();
trackerAtt.setActPosition(position);
tokens.add(tokenStream.cloneAttributes());
}
} catch (IOException ioe) {
throw new RuntimeException("Error occured while iterating over tokenstream", ioe);
}
return tokens;
}
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
termAtt.setEmpty().append(buffer);
offsetAtt.setOffset(correctOffset(tokenStart), correctOffset(tokenEnd));
typeAtt.setType("sentence");
org.apache.solr.handler.AnalysisRequestHandlerBase.analyzeValue(String, AnalysisContext)
TokenizerFactory tfac = tokenizerChain.getTokenizerFactory();
TokenStream tokenStream = tfac.create(tokenizerChain.initReader(null, new StringReader(value)));
List<AttributeSource> tokens = analyzeTokenStream(tokenStream);
namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokens, context));
for (TokenFilterFactory tokenFilterFactory : filtfacs) {
for (final AttributeSource tok : tokens) {
tok.getAttribute(TokenTrackingAttribute.class).freezeStage();
}
tokenStream = tokenFilterFactory.create(listBasedTokenStream);
tokens = analyzeTokenStream(tokenStream);
namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokens, context));
listBasedTokenStream = new ListBasedTokenStream(tokens);
}
org.apache.solr.handler.AnalysisRequestHandlerBase.analyzeTokenStream(TokenStream)
private List<AttributeSource> analyzeTokenStream(TokenStream tokenStream) {
final List<AttributeSource> tokens = new ArrayList<AttributeSource>();
final PositionIncrementAttribute posIncrAtt = tokenStream.addAttribute(PositionIncrementAttribute.class);
final TokenTrackingAttribute trackerAtt = tokenStream.addAttribute(TokenTrackingAttribute.class);
// for backwards compatibility, add all "common" attributes
tokenStream.addAttribute(OffsetAttribute.class);
tokenStream.addAttribute(TypeAttribute.class);
try {
tokenStream.reset();
int position = 0;
while (tokenStream.incrementToken()) {
position += posIncrAtt.getPositionIncrement();
trackerAtt.setActPosition(position);
tokens.add(tokenStream.cloneAttributes());
}
} catch (IOException ioe) {
throw new RuntimeException("Error occured while iterating over tokenstream", ioe);
}
return tokens;
}
No comments:
Post a Comment