diff --git a/src/http_front/http_handler.erl b/src/http_front/http_handler.erl index 98f3358..841474a 100644 --- a/src/http_front/http_handler.erl +++ b/src/http_front/http_handler.erl @@ -117,7 +117,8 @@ search_by_sphinx(Keyword, Page) -> [US, TotalFound, CostTime, DBTime div 1000])), Tip = ?TEXT("
Index at: ~s | File count: ~p | Query count: ~p | Total Size: ~s Download
", [format_time_string(CTime), length(Files), Announce, size_string(http_common:total_size(Files)), format_magnet(Hash)]). @@ -211,3 +212,15 @@ format_time_string(Secs) -> format_date_string(Secs) -> {{Y, M, D}, _} = time_util:seconds_to_local_time(Secs), ?TEXT("~b-~2..0b-~2..0b", [Y, M, D]). + +% use sphinx excerpt to highlight result +highlight_search_result(Key, RetList) -> + [highlight_one_result(Key, Result) || Result <- RetList]. + +highlight_one_result(Key, {single, Hash, {Name, Length}, Announce, CTime}) -> + HighLName = sphinx_search:highlight_title(Key, Name), + {single, Hash, {HighLName, Length}, Announce, CTime}; +highlight_one_result(Key, {multi, Hash, {Name, Files}, Announce, CTime}) -> + HighLName = sphinx_search:highlight_title(Key, Name), + {multi, Hash, {HighLName, sphinx_search:highlight_files(Key, Files)}, Announce, CTime}. + diff --git a/src/http_front/sphinx_excerpt.erl b/src/http_front/sphinx_excerpt.erl new file mode 100644 index 0000000..fa761b6 --- /dev/null +++ b/src/http_front/sphinx_excerpt.erl @@ -0,0 +1,74 @@ +%% +%% sphinx_excerpt.erl +%% Kevin Lynx +%% 08.24.2013 +%% +-module(sphinx_excerpt). +-export([build_excerpt/5, build_excerpt/3]). +-compile(export_all). + +build_excerpt(Key, Docs, Index) -> + build_excerpt(localhost, 9312, Key, Docs, Index). + +build_excerpt(IP, Port, Key, Docs, Index) +when is_binary(Key), is_binary(Index), is_list(Docs) -> + case connect(IP, Port) of + {ok, Sock} -> + Ret = do_build_excerpt(Sock, Key, Docs, Index), + catch gen_tcp:close(Sock), + Ret; + Error -> + Error + end. + +do_build_excerpt(Sock, Key, Docs, Index) -> + Flag = 257, % 1 | 256, allow_empty not work + BeforeMatch = <<"">>, + AfterMatch = <<"">>, + ChunkSep = <<"...">>, + Limit = 256, + Around = 5, + LimitPassages = 0, + LimitWords = 0, + StartPageId = 1, + HtmlStripMode = <<"index">>, + PassageBoundary = <<"none">>, + Commands = [{32, 0}, {32, Flag}, + {string, Index}, {string, Key}, {string, BeforeMatch}, + {string, AfterMatch}, {string, ChunkSep}, {32, Limit}, + {32, Around}, {32, LimitPassages}, {32, LimitWords}, + {32, StartPageId}, {string, HtmlStripMode}, {string, PassageBoundary}, {32, length(Docs)}] ++ + [{string, Doc} || Doc <- Docs], + {Bytes, Size} = giza_protocol:commands_to_bytes(Commands), + giza_protocol:write_number(Sock, 1, 16), + giza_protocol:write_number(Sock, 259, 16), + giza_protocol:write_number(Sock, Size, 32), + gen_tcp:send(Sock, Bytes), + parse_excerpt_res(Sock, Docs). + +parse_excerpt_res(Sock, Docs) -> + {ok, <<_:16, _:16, Len:32>>} = gen_tcp:recv(Sock, 8), + true = Len > 0, + [read_string_res(Sock, Doc) || Doc <- Docs]. + +read_string_res(Sock, Doc) -> + R = giza_protocol:read_lp_string(Sock), + case byte_size(R) == byte_size(Doc) of + true -> <<>>; + false -> R + end. + +connect(Host, Port) -> + case gen_tcp:connect(Host, Port, + [binary, {packet, raw}, + {active, false}]) of + {ok, Sock} -> + {ok, _RawVersion} = gen_tcp:recv(Sock, 4), + giza_protocol:write_number(Sock, 1, 32), + {ok, Sock}; + _ -> error + end. +% +test() -> + build_excerpt(localhost, 9312, <<"avi hi">>, [<<"hello">>, <<"hi, a a a hello avi world">>], <<"xml">>). + diff --git a/src/http_front/sphinx_search.erl b/src/http_front/sphinx_search.erl index f954a73..1062234 100644 --- a/src/http_front/sphinx_search.erl +++ b/src/http_front/sphinx_search.erl @@ -5,7 +5,7 @@ %% -module(sphinx_search). -include("vlog.hrl"). --export([search/4, search_hash/3]). +-export([search/4, search_hash/3, highlight_title/2, highlight_files/2]). -define(PORT, 9312). -define(INDEX, "xml"). @@ -48,5 +48,41 @@ translate_hash({_DocID, Item}) -> 40 = length(Hash), Hash. +highlight_title(Key, Name) when is_list(Name) -> + R = case catch sphinx_excerpt:build_excerpt(list_to_binary(Key), [list_to_binary(Name)], list_to_binary(?INDEX)) of + {'EXIT', Reason} -> + ?E(?FMT("highlight_title ~p", [Reason])), + <<>>; + [Ret] -> + Ret + end, + if byte_size(R) == 0 -> Name; true -> binary_to_list(R) end. +highlight_files(Key, Files) when is_list(Files) -> + {Names, Lens} = lists:unzip(Files), + BNames = [list_to_binary(Name) || Name <- Names], + case catch build_file_excerpts(list_to_binary(Key), BNames, 800) of + {'EXIT', Reason} -> + ?E(?FMT("highlight_files ~p", [Reason])), + Files; + Rets -> + {L1, L2} = lists:foldl(fun({BName, Name, Len}, Acc) -> + {HList, NHList} = Acc, + if byte_size(BName) == 0 -> + {HList, [{Name, Len}|NHList]}; + true -> + {[{binary_to_list(BName), Len}|HList], NHList} + end + end, {[], []}, lists:zip3(Rets, Names, Lens)), + L1 ++ L2 + end. + +% too many files in one batch will cause error +build_file_excerpts(BKey, BNames, Batch) -> + BIndex = list_to_binary(?INDEX), + Cnt = length(BNames) div Batch + 1, + SubNamesList = [lists:sublist(BNames, 1 + I * Batch, Batch) || + I <- lists:seq(0, Cnt - 1)], + lists:flatten([sphinx_excerpt:build_excerpt(BKey, BName, BIndex) || + BName <- SubNamesList]). diff --git a/www/page.temp b/www/page.temp index e633967..cda3977 100644 --- a/www/page.temp +++ b/www/page.temp @@ -44,6 +44,10 @@ span.file-size { p.page-nav { text-align:center; } +.highlight { + color:blue; + background:yellow; +}