mirror of
https://github.com/btdig/dhtcrawler2.git
synced 2025-01-18 12:11:39 +00:00
add http search result highlight
This commit is contained in:
parent
2f8842a18d
commit
c0b383a7b7
@ -117,7 +117,8 @@ search_by_sphinx(Keyword, Page) ->
|
||||
[US, TotalFound, CostTime, DBTime div 1000])),
|
||||
Tip = ?TEXT("<h4>search ~s, ~b results, ~f seconds, db ~f seconds</h4>",
|
||||
[Keyword, TotalFound, CostTime / 1000, DBTime / 1000 / 1000]),
|
||||
BodyList = format_search_result(Rets),
|
||||
HRets = highlight_search_result(Keyword, Rets),
|
||||
BodyList = format_search_result(HRets),
|
||||
Body = ?TEXT("<ol>~s</ol>", [lists:flatten(BodyList)]),
|
||||
Tip ++ Body ++ append_page_nav(Keyword, Page, TotalFound).
|
||||
|
||||
@ -154,10 +155,10 @@ format_one_result({multi, Hash, {Name, Files}, Announce, CTime}, ShowAll) ->
|
||||
format_one_result(Hash, Name, Files, Announce, CTime, ShowAll).
|
||||
|
||||
format_one_result(Hash, Name, Files, Announce, CTime, ShowAll) ->
|
||||
SortedFiles = http_common:sort_file_by_size(Files),
|
||||
%SortedFiles = http_common:sort_file_by_size(Files),
|
||||
?TEXT("<li><p class=\"search-title\">
|
||||
<a target='_blank' href=\"/e/http_handler:index?q=~s\">~s</a></p><ul>~s</ul>",
|
||||
[Hash, Name, format_files(SortedFiles, ShowAll)]) ++
|
||||
[Hash, Name, format_files(Files, ShowAll)]) ++
|
||||
?TEXT("<p class=\"search-detail\">Index at: ~s | File count: ~p | Query count: ~p | Total Size: ~s
|
||||
<a href=\"~s\" class=\"download-tip\"> Download</a></p>",
|
||||
[format_time_string(CTime), length(Files), Announce, size_string(http_common:total_size(Files)), format_magnet(Hash)]).
|
||||
@ -211,3 +212,15 @@ format_time_string(Secs) ->
|
||||
format_date_string(Secs) ->
|
||||
{{Y, M, D}, _} = time_util:seconds_to_local_time(Secs),
|
||||
?TEXT("~b-~2..0b-~2..0b", [Y, M, D]).
|
||||
|
||||
% use sphinx excerpt to highlight result
|
||||
highlight_search_result(Key, RetList) ->
|
||||
[highlight_one_result(Key, Result) || Result <- RetList].
|
||||
|
||||
highlight_one_result(Key, {single, Hash, {Name, Length}, Announce, CTime}) ->
|
||||
HighLName = sphinx_search:highlight_title(Key, Name),
|
||||
{single, Hash, {HighLName, Length}, Announce, CTime};
|
||||
highlight_one_result(Key, {multi, Hash, {Name, Files}, Announce, CTime}) ->
|
||||
HighLName = sphinx_search:highlight_title(Key, Name),
|
||||
{multi, Hash, {HighLName, sphinx_search:highlight_files(Key, Files)}, Announce, CTime}.
|
||||
|
||||
|
74
src/http_front/sphinx_excerpt.erl
Normal file
74
src/http_front/sphinx_excerpt.erl
Normal file
@ -0,0 +1,74 @@
|
||||
%%
|
||||
%% sphinx_excerpt.erl
|
||||
%% Kevin Lynx
|
||||
%% 08.24.2013
|
||||
%%
|
||||
-module(sphinx_excerpt).
|
||||
-export([build_excerpt/5, build_excerpt/3]).
|
||||
-compile(export_all).
|
||||
|
||||
build_excerpt(Key, Docs, Index) ->
|
||||
build_excerpt(localhost, 9312, Key, Docs, Index).
|
||||
|
||||
build_excerpt(IP, Port, Key, Docs, Index)
|
||||
when is_binary(Key), is_binary(Index), is_list(Docs) ->
|
||||
case connect(IP, Port) of
|
||||
{ok, Sock} ->
|
||||
Ret = do_build_excerpt(Sock, Key, Docs, Index),
|
||||
catch gen_tcp:close(Sock),
|
||||
Ret;
|
||||
Error ->
|
||||
Error
|
||||
end.
|
||||
|
||||
do_build_excerpt(Sock, Key, Docs, Index) ->
|
||||
Flag = 257, % 1 | 256, allow_empty not work
|
||||
BeforeMatch = <<"<span class='highlight'>">>,
|
||||
AfterMatch = <<"</span>">>,
|
||||
ChunkSep = <<"...">>,
|
||||
Limit = 256,
|
||||
Around = 5,
|
||||
LimitPassages = 0,
|
||||
LimitWords = 0,
|
||||
StartPageId = 1,
|
||||
HtmlStripMode = <<"index">>,
|
||||
PassageBoundary = <<"none">>,
|
||||
Commands = [{32, 0}, {32, Flag},
|
||||
{string, Index}, {string, Key}, {string, BeforeMatch},
|
||||
{string, AfterMatch}, {string, ChunkSep}, {32, Limit},
|
||||
{32, Around}, {32, LimitPassages}, {32, LimitWords},
|
||||
{32, StartPageId}, {string, HtmlStripMode}, {string, PassageBoundary}, {32, length(Docs)}] ++
|
||||
[{string, Doc} || Doc <- Docs],
|
||||
{Bytes, Size} = giza_protocol:commands_to_bytes(Commands),
|
||||
giza_protocol:write_number(Sock, 1, 16),
|
||||
giza_protocol:write_number(Sock, 259, 16),
|
||||
giza_protocol:write_number(Sock, Size, 32),
|
||||
gen_tcp:send(Sock, Bytes),
|
||||
parse_excerpt_res(Sock, Docs).
|
||||
|
||||
parse_excerpt_res(Sock, Docs) ->
|
||||
{ok, <<_:16, _:16, Len:32>>} = gen_tcp:recv(Sock, 8),
|
||||
true = Len > 0,
|
||||
[read_string_res(Sock, Doc) || Doc <- Docs].
|
||||
|
||||
read_string_res(Sock, Doc) ->
|
||||
R = giza_protocol:read_lp_string(Sock),
|
||||
case byte_size(R) == byte_size(Doc) of
|
||||
true -> <<>>;
|
||||
false -> R
|
||||
end.
|
||||
|
||||
connect(Host, Port) ->
|
||||
case gen_tcp:connect(Host, Port,
|
||||
[binary, {packet, raw},
|
||||
{active, false}]) of
|
||||
{ok, Sock} ->
|
||||
{ok, _RawVersion} = gen_tcp:recv(Sock, 4),
|
||||
giza_protocol:write_number(Sock, 1, 32),
|
||||
{ok, Sock};
|
||||
_ -> error
|
||||
end.
|
||||
%
|
||||
test() ->
|
||||
build_excerpt(localhost, 9312, <<"avi hi">>, [<<"hello">>, <<"hi, a a a hello avi world">>], <<"xml">>).
|
||||
|
@ -5,7 +5,7 @@
|
||||
%%
|
||||
-module(sphinx_search).
|
||||
-include("vlog.hrl").
|
||||
-export([search/4, search_hash/3]).
|
||||
-export([search/4, search_hash/3, highlight_title/2, highlight_files/2]).
|
||||
-define(PORT, 9312).
|
||||
-define(INDEX, "xml").
|
||||
|
||||
@ -48,5 +48,41 @@ translate_hash({_DocID, Item}) ->
|
||||
40 = length(Hash),
|
||||
Hash.
|
||||
|
||||
highlight_title(Key, Name) when is_list(Name) ->
|
||||
R = case catch sphinx_excerpt:build_excerpt(list_to_binary(Key), [list_to_binary(Name)], list_to_binary(?INDEX)) of
|
||||
{'EXIT', Reason} ->
|
||||
?E(?FMT("highlight_title ~p", [Reason])),
|
||||
<<>>;
|
||||
[Ret] ->
|
||||
Ret
|
||||
end,
|
||||
if byte_size(R) == 0 -> Name; true -> binary_to_list(R) end.
|
||||
|
||||
highlight_files(Key, Files) when is_list(Files) ->
|
||||
{Names, Lens} = lists:unzip(Files),
|
||||
BNames = [list_to_binary(Name) || Name <- Names],
|
||||
case catch build_file_excerpts(list_to_binary(Key), BNames, 800) of
|
||||
{'EXIT', Reason} ->
|
||||
?E(?FMT("highlight_files ~p", [Reason])),
|
||||
Files;
|
||||
Rets ->
|
||||
{L1, L2} = lists:foldl(fun({BName, Name, Len}, Acc) ->
|
||||
{HList, NHList} = Acc,
|
||||
if byte_size(BName) == 0 ->
|
||||
{HList, [{Name, Len}|NHList]};
|
||||
true ->
|
||||
{[{binary_to_list(BName), Len}|HList], NHList}
|
||||
end
|
||||
end, {[], []}, lists:zip3(Rets, Names, Lens)),
|
||||
L1 ++ L2
|
||||
end.
|
||||
|
||||
% too many files in one batch will cause error
|
||||
build_file_excerpts(BKey, BNames, Batch) ->
|
||||
BIndex = list_to_binary(?INDEX),
|
||||
Cnt = length(BNames) div Batch + 1,
|
||||
SubNamesList = [lists:sublist(BNames, 1 + I * Batch, Batch) ||
|
||||
I <- lists:seq(0, Cnt - 1)],
|
||||
lists:flatten([sphinx_excerpt:build_excerpt(BKey, BName, BIndex) ||
|
||||
BName <- SubNamesList]).
|
||||
|
||||
|
@ -44,6 +44,10 @@ span.file-size {
|
||||
p.page-nav {
|
||||
text-align:center;
|
||||
}
|
||||
.highlight {
|
||||
color:blue;
|
||||
background:yellow;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
Loading…
Reference in New Issue
Block a user