diff --git a/src/common/string_util.erl b/src/common/string_util.erl index 7a599dd..8a8da0f 100644 --- a/src/common/string_util.erl +++ b/src/common/string_util.erl @@ -3,8 +3,33 @@ %% Kevin Lynx %% -module(string_util). --export([format/2]). +-compile(export_all). +-export([format/2, strip_invalid_unicode/1]). format(Fmt, Arg) when is_list(Fmt), is_list(Arg) -> lists:flatten(io_lib:format(Fmt, Arg)). +% strip these unicode control characters +strip_invalid_unicode(L) when is_list(L) -> + binary_to_list(strip_invalid_unicode(list_to_binary(L))); +strip_invalid_unicode(<<>>) -> + <<>>; +strip_invalid_unicode(<>) -> + case is_valid_unicode(C) of + true -> + RR = strip_invalid_unicode(R), + <>; + false -> + strip_invalid_unicode(R) + end; +strip_invalid_unicode(<<_, R/binary>>) -> + strip_invalid_unicode(R). + +is_valid_unicode(C) when C < 16#20 -> + false; +is_valid_unicode(C) when C >= 16#7f, C =< 16#ff -> + false; +is_valid_unicode(_) -> + true. + + diff --git a/src/http_front/sphinx_search.erl b/src/http_front/sphinx_search.erl index 9d9a41b..6b82b55 100644 --- a/src/http_front/sphinx_search.erl +++ b/src/http_front/sphinx_search.erl @@ -18,10 +18,9 @@ search(Conn, Key, Offset, Count) -> {T2, TDocs} = case catch giza_request:send(Q4) of {'EXIT', R} -> ?W(?FMT("sphinx search error ~p", [R])), - []; + {now(), []}; {ok, Ret} -> - T = now(), - {T, decode_search_ret(Conn, Ret)} + {now(), decode_search_ret(Conn, Ret)} end, T3 = now(), Stats = {timer:now_diff(T2, T1), timer:now_diff(T3, T2)}, diff --git a/src/sphinx_builder/sphinx_cmd.erl b/src/sphinx_builder/sphinx_cmd.erl index 147c8f1..f7ee856 100644 --- a/src/sphinx_builder/sphinx_cmd.erl +++ b/src/sphinx_builder/sphinx_cmd.erl @@ -27,7 +27,8 @@ do_build_init_index(MainFile, DeltaFile, CfgFile) -> build_delta_index(IndexFile, Delta, CfgFile, MinID, MaxID) -> Cmd = "indexer -c " ++ CfgFile ++ " --rotate " ++ Delta, Res = os:cmd(Cmd), - Dest = backup_delta_file(Delta, MinID, MaxID, IndexFile), + Success = check_cmd_success(Res), + Dest = backup_delta_file(Delta, MinID, MaxID, IndexFile, Success), ?I(?FMT("command `~s' result on ~s~n" ++ Res, [Cmd, Dest])). merge_index(Main, Delta, CfgFile) -> @@ -36,9 +37,13 @@ merge_index(Main, Delta, CfgFile) -> Res = os:cmd(Cmd), ?I(?FMT("command `~s' result~n" ++ Res, [Cmd])). -backup_delta_file(Delta, MinID, MaxID, IndexFile) -> +backup_delta_file(Delta, MinID, MaxID, IndexFile, Flag) -> Path = filename:dirname(IndexFile), Dest = string_util:format(Path ++ "/" ++ Delta ++ "[~b-~b]" ++ ".xml", [MinID, MaxID]), - file:copy(IndexFile, Dest), + if not Flag -> file:copy(IndexFile, Dest); true -> skip end, Dest. + +% too simple +check_cmd_success(Res) -> + string:str(Res, "succesfully") > 0. diff --git a/src/sphinx_builder/sphinx_xml.erl b/src/sphinx_builder/sphinx_xml.erl index a8f68ef..08db1b3 100644 --- a/src/sphinx_builder/sphinx_xml.erl +++ b/src/sphinx_builder/sphinx_xml.erl @@ -5,6 +5,7 @@ %% -module(sphinx_xml). -behaviour(gen_server). +-compile(export_all). -include("vlog.hrl"). -export([init/1, handle_call/3, @@ -48,9 +49,9 @@ handle_cast(save, #state{docs = Docs, ids = IDs} = State) when length(Docs) > 0 handle_cast(stop, State) -> {stop, normal, State}. -handle_call({insert, {ID, Hash, Name, Files, Query, CreatedAt}}, _From, State) -> +handle_call({insert, DocT}, _From, State) -> #state{docs = Docs, ids = IDs, max = Max} = State, - Doc = sphinx_doc:element(Hash, Name, Files, ID, Query, CreatedAt), + {ID, Doc} = create_doc(DocT), {NewDocs, NewIDs} = try_save([Doc|Docs], Max, [ID|IDs]), {reply, ok, State#state{docs = NewDocs, ids = NewIDs}}; @@ -83,3 +84,20 @@ get_id_range([First|IDs]) -> lists:foldl(fun(ID, {Min, Max}) -> {min(ID, Min), max(ID, Max)} end, {First, First}, IDs). + +create_doc({ID, Hash, Name, Files, Query, CreatedAt}) -> + ValidName = valid_name(Name), + ValidFiles = valid_file_names(Files), + Doc = sphinx_doc:element(Hash, ValidName, ValidFiles, ID, Query, CreatedAt), + {ID, Doc}. + +valid_file_names(Files) -> + [{valid_name(Name), Length} || {Name, Length} <- Files]. + +valid_name(S) -> + ValidName = string_util:strip_invalid_unicode(S), + if length(ValidName) < length(S) -> + ?I(?FMT("~s -> ~s", [S, ValidName])); + true -> ok + end, + ValidName.