add http search result highlight

This commit is contained in:
Kevin Lynx 2013-08-24 16:42:51 +08:00
parent 2f8842a18d
commit c0b383a7b7
4 changed files with 131 additions and 4 deletions

View File

@ -117,7 +117,8 @@ search_by_sphinx(Keyword, Page) ->
[US, TotalFound, CostTime, DBTime div 1000])), [US, TotalFound, CostTime, DBTime div 1000])),
Tip = ?TEXT("<h4>search ~s, ~b results, ~f seconds, db ~f seconds</h4>", Tip = ?TEXT("<h4>search ~s, ~b results, ~f seconds, db ~f seconds</h4>",
[Keyword, TotalFound, CostTime / 1000, DBTime / 1000 / 1000]), [Keyword, TotalFound, CostTime / 1000, DBTime / 1000 / 1000]),
BodyList = format_search_result(Rets), HRets = highlight_search_result(Keyword, Rets),
BodyList = format_search_result(HRets),
Body = ?TEXT("<ol>~s</ol>", [lists:flatten(BodyList)]), Body = ?TEXT("<ol>~s</ol>", [lists:flatten(BodyList)]),
Tip ++ Body ++ append_page_nav(Keyword, Page, TotalFound). Tip ++ Body ++ append_page_nav(Keyword, Page, TotalFound).
@ -154,10 +155,10 @@ format_one_result({multi, Hash, {Name, Files}, Announce, CTime}, ShowAll) ->
format_one_result(Hash, Name, Files, Announce, CTime, ShowAll). format_one_result(Hash, Name, Files, Announce, CTime, ShowAll).
format_one_result(Hash, Name, Files, Announce, CTime, ShowAll) -> format_one_result(Hash, Name, Files, Announce, CTime, ShowAll) ->
SortedFiles = http_common:sort_file_by_size(Files), %SortedFiles = http_common:sort_file_by_size(Files),
?TEXT("<li><p class=\"search-title\"> ?TEXT("<li><p class=\"search-title\">
<a target='_blank' href=\"/e/http_handler:index?q=~s\">~s</a></p><ul>~s</ul>", <a target='_blank' href=\"/e/http_handler:index?q=~s\">~s</a></p><ul>~s</ul>",
[Hash, Name, format_files(SortedFiles, ShowAll)]) ++ [Hash, Name, format_files(Files, ShowAll)]) ++
?TEXT("<p class=\"search-detail\">Index at: ~s | File count: ~p | Query count: ~p | Total Size: ~s ?TEXT("<p class=\"search-detail\">Index at: ~s | File count: ~p | Query count: ~p | Total Size: ~s
<a href=\"~s\" class=\"download-tip\"> Download</a></p>", <a href=\"~s\" class=\"download-tip\"> Download</a></p>",
[format_time_string(CTime), length(Files), Announce, size_string(http_common:total_size(Files)), format_magnet(Hash)]). [format_time_string(CTime), length(Files), Announce, size_string(http_common:total_size(Files)), format_magnet(Hash)]).
@ -211,3 +212,15 @@ format_time_string(Secs) ->
format_date_string(Secs) -> format_date_string(Secs) ->
{{Y, M, D}, _} = time_util:seconds_to_local_time(Secs), {{Y, M, D}, _} = time_util:seconds_to_local_time(Secs),
?TEXT("~b-~2..0b-~2..0b", [Y, M, D]). ?TEXT("~b-~2..0b-~2..0b", [Y, M, D]).
% use sphinx excerpt to highlight result
highlight_search_result(Key, RetList) ->
[highlight_one_result(Key, Result) || Result <- RetList].
highlight_one_result(Key, {single, Hash, {Name, Length}, Announce, CTime}) ->
HighLName = sphinx_search:highlight_title(Key, Name),
{single, Hash, {HighLName, Length}, Announce, CTime};
highlight_one_result(Key, {multi, Hash, {Name, Files}, Announce, CTime}) ->
HighLName = sphinx_search:highlight_title(Key, Name),
{multi, Hash, {HighLName, sphinx_search:highlight_files(Key, Files)}, Announce, CTime}.

View File

@ -0,0 +1,74 @@
%%
%% sphinx_excerpt.erl
%% Kevin Lynx
%% 08.24.2013
%%
-module(sphinx_excerpt).
-export([build_excerpt/5, build_excerpt/3]).
-compile(export_all).
build_excerpt(Key, Docs, Index) ->
build_excerpt(localhost, 9312, Key, Docs, Index).
build_excerpt(IP, Port, Key, Docs, Index)
when is_binary(Key), is_binary(Index), is_list(Docs) ->
case connect(IP, Port) of
{ok, Sock} ->
Ret = do_build_excerpt(Sock, Key, Docs, Index),
catch gen_tcp:close(Sock),
Ret;
Error ->
Error
end.
do_build_excerpt(Sock, Key, Docs, Index) ->
Flag = 257, % 1 | 256, allow_empty not work
BeforeMatch = <<"<span class='highlight'>">>,
AfterMatch = <<"</span>">>,
ChunkSep = <<"...">>,
Limit = 256,
Around = 5,
LimitPassages = 0,
LimitWords = 0,
StartPageId = 1,
HtmlStripMode = <<"index">>,
PassageBoundary = <<"none">>,
Commands = [{32, 0}, {32, Flag},
{string, Index}, {string, Key}, {string, BeforeMatch},
{string, AfterMatch}, {string, ChunkSep}, {32, Limit},
{32, Around}, {32, LimitPassages}, {32, LimitWords},
{32, StartPageId}, {string, HtmlStripMode}, {string, PassageBoundary}, {32, length(Docs)}] ++
[{string, Doc} || Doc <- Docs],
{Bytes, Size} = giza_protocol:commands_to_bytes(Commands),
giza_protocol:write_number(Sock, 1, 16),
giza_protocol:write_number(Sock, 259, 16),
giza_protocol:write_number(Sock, Size, 32),
gen_tcp:send(Sock, Bytes),
parse_excerpt_res(Sock, Docs).
parse_excerpt_res(Sock, Docs) ->
{ok, <<_:16, _:16, Len:32>>} = gen_tcp:recv(Sock, 8),
true = Len > 0,
[read_string_res(Sock, Doc) || Doc <- Docs].
read_string_res(Sock, Doc) ->
R = giza_protocol:read_lp_string(Sock),
case byte_size(R) == byte_size(Doc) of
true -> <<>>;
false -> R
end.
connect(Host, Port) ->
case gen_tcp:connect(Host, Port,
[binary, {packet, raw},
{active, false}]) of
{ok, Sock} ->
{ok, _RawVersion} = gen_tcp:recv(Sock, 4),
giza_protocol:write_number(Sock, 1, 32),
{ok, Sock};
_ -> error
end.
%
test() ->
build_excerpt(localhost, 9312, <<"avi hi">>, [<<"hello">>, <<"hi, a a a hello avi world">>], <<"xml">>).

View File

@ -5,7 +5,7 @@
%% %%
-module(sphinx_search). -module(sphinx_search).
-include("vlog.hrl"). -include("vlog.hrl").
-export([search/4, search_hash/3]). -export([search/4, search_hash/3, highlight_title/2, highlight_files/2]).
-define(PORT, 9312). -define(PORT, 9312).
-define(INDEX, "xml"). -define(INDEX, "xml").
@ -48,5 +48,41 @@ translate_hash({_DocID, Item}) ->
40 = length(Hash), 40 = length(Hash),
Hash. Hash.
highlight_title(Key, Name) when is_list(Name) ->
R = case catch sphinx_excerpt:build_excerpt(list_to_binary(Key), [list_to_binary(Name)], list_to_binary(?INDEX)) of
{'EXIT', Reason} ->
?E(?FMT("highlight_title ~p", [Reason])),
<<>>;
[Ret] ->
Ret
end,
if byte_size(R) == 0 -> Name; true -> binary_to_list(R) end.
highlight_files(Key, Files) when is_list(Files) ->
{Names, Lens} = lists:unzip(Files),
BNames = [list_to_binary(Name) || Name <- Names],
case catch build_file_excerpts(list_to_binary(Key), BNames, 800) of
{'EXIT', Reason} ->
?E(?FMT("highlight_files ~p", [Reason])),
Files;
Rets ->
{L1, L2} = lists:foldl(fun({BName, Name, Len}, Acc) ->
{HList, NHList} = Acc,
if byte_size(BName) == 0 ->
{HList, [{Name, Len}|NHList]};
true ->
{[{binary_to_list(BName), Len}|HList], NHList}
end
end, {[], []}, lists:zip3(Rets, Names, Lens)),
L1 ++ L2
end.
% too many files in one batch will cause error
build_file_excerpts(BKey, BNames, Batch) ->
BIndex = list_to_binary(?INDEX),
Cnt = length(BNames) div Batch + 1,
SubNamesList = [lists:sublist(BNames, 1 + I * Batch, Batch) ||
I <- lists:seq(0, Cnt - 1)],
lists:flatten([sphinx_excerpt:build_excerpt(BKey, BName, BIndex) ||
BName <- SubNamesList]).

View File

@ -44,6 +44,10 @@ span.file-size {
p.page-nav { p.page-nav {
text-align:center; text-align:center;
} }
.highlight {
color:blue;
background:yellow;
}
</style> </style>
</head> </head>
<body> <body>