diff --git a/src/db_daterange.erl b/src/db_daterange.erl index 4774487..897ddac 100644 --- a/src/db_daterange.erl +++ b/src/db_daterange.erl @@ -42,18 +42,19 @@ insert(Conn, Hash) when is_list(Hash) -> gen_server:cast(srv_name(), insert), IRet. -% [ListHash, ListHash] +% [{ListHash, Req}, {ListHash, Req}] lookup(Conn, DaySecs, Count) -> Sel = {'$query', {date, DaySecs}, '$orderby', {reqs, -1}}, List = mongo:do(safe, master, Conn, ?DBNAME, fun() -> - Cursor = mongo:find(?COLLNAME, Sel, {'_id', 1}, 0, Count), + Cursor = mongo:find(?COLLNAME, Sel, {'_id', 1, reqs, 1}, 0, Count), mongo_cursor:rest(Cursor) end), [decode_hash(Doc) || Doc <- List]. decode_hash(Doc) -> {ID} = bson:lookup('_id', Doc), - binary_to_list(ID). + {Req} = bson:lookup(reqs, Doc), + {binary_to_list(ID), Req}. % delete all oldest hashes try_delete_oldest(Conn) -> diff --git a/src/db_store_mongo.erl b/src/db_store_mongo.erl index 7b5c979..2bfc699 100644 --- a/src/db_store_mongo.erl +++ b/src/db_store_mongo.erl @@ -91,9 +91,16 @@ search_newest_top(Conn, Count, DaySecs) -> % use hash_date to search, based on date index, so the search speed is very fast search_newest_top_by_date(Conn, Count, DaySecs) -> Hashes = db_daterange:lookup(Conn, DaySecs, Count), - Infos = [index(Conn, Hash) || Hash <- Hashes], + Infos = [day_info(Conn, Hash, Req) || {Hash, Req} <- Hashes], lists:filter(fun(E) -> E /= {} end, Infos). +day_info(Conn, Hash, Req) -> + case index(Conn, Hash) of + {} -> {}; + Info -> + setelement(4, Info, Req) + end. + index(Conn, Hash) when is_list(Hash) -> Ret = mongo_do_slave(Conn, fun() -> mongo:find_one(?COLLNAME, {'_id', list_to_binary(Hash)}) @@ -214,6 +221,8 @@ decode_ret_item(Item) -> {Torrent} = bson:lookup(obj, Item), decode_torrent_item(Torrent). +% {single, Hash, {Name, Length}, RequestCount, CreatedAt} +% {multi, Hash, {Name, Files}, RequestCount, CreatedAt} decode_torrent_item(Torrent) -> {BinHash} = bson:lookup('_id', Torrent), Hash = binary_to_list(BinHash), diff --git a/src/torrent/tor_download.erl b/src/torrent/tor_download.erl index b44ca1a..f36a8aa 100644 --- a/src/torrent/tor_download.erl +++ b/src/torrent/tor_download.erl @@ -23,15 +23,15 @@ -define(REQ_TIMEOUT, 60*1000). % when ibrowse crashed, it will not notify these requests timeout, that will % make these requests stay in the state forever --define(REQ_ERROR_TIMEOUT, 2*?REQ_TIMEOUT). +-define(REQ_ERROR_TIMEOUT, 5*60*1000). -define(IS_ERROR_TIMEOUT(T), (timer:now_diff(now(), T) div 1000 > ?REQ_ERROR_TIMEOUT)). -record(state, {start, hashSum = 0, reqSum = 0, totalTime = 0, reqs}). start_global() -> ibrowse:start(), - Options = [{max_sessions, ?HTTP_SESSION}, {max_pipeline_size, ?HTTP_PIPELINE}], + %Options = [{max_sessions, ?HTTP_SESSION}, {max_pipeline_size, ?HTTP_PIPELINE}], % not work here ? - [ibrowse:set_dest(Host, 80, Options) || Host <- get_req_hosts()], + %[ibrowse:set_dest(Host, 80, Options) || Host <- get_req_hosts()], ok. start_link() -> @@ -48,14 +48,14 @@ stats(Pid) -> gen_server:call(Pid, get_stats, infinity). init([]) -> - {ok, #state{start = now(), reqs = gb_trees:empty()}}. + {ok, #state{start = now(), reqs = gb_trees:empty()}, 0}. handle_cast({download, MagHash, From}, State) -> #state{reqs = Reqs, hashSum = H, reqSum = R} = State, % remove these invalid requests UpdateReqs = Reqs, %check_error_timeout_reqs(Reqs), NewReqs = create_download(UpdateReqs, MagHash, From), - NewSum = R + 1 - gb_trees:size(Reqs) - gb_trees:size(UpdateReqs), + NewSum = R + 1 - (gb_trees:size(Reqs) - gb_trees:size(UpdateReqs)), {noreply, State#state{reqs = NewReqs, hashSum = H + 1, reqSum = NewSum}}; handle_cast(stop, State) -> @@ -93,6 +93,10 @@ handle_info({ibrowse_async_response, ReqID, Body}, State) -> end, {noreply, NewState}; +handle_info(timeout, State) -> + timer:send_interval(?REQ_ERROR_TIMEOUT div 2, check_error_timeout), + {noreply, State}; + handle_info(check_error_timeout, State) -> #state{reqs = Reqs} = State, NewReqs = check_error_timeout_reqs(Reqs), @@ -164,19 +168,20 @@ unzip_content(_B) -> error. %% http stuff -get_req_hosts() -> - ["http://bt.box.n0808.com", - "http://torcache.net", - "http:/torrange.com", - "http://zoink.it"]. +%get_req_hosts() -> +% ["http://bt.box.n0808.com", +% "http://torcache.net", +% "http:/torrange.com", +% "http://zoink.it"]. create_req_urls(MagHash) when is_list(MagHash), length(MagHash) == 40 -> U1 = "http://torcache.net/torrent/" ++ MagHash ++ ".torrent", U2 = format_btbox_url(MagHash), U3 = "http://torrage.com/torrent/" ++ MagHash ++ ".torrent", + % zoink cause ibrowse crash because error response % zoink.it support https, but the ssl library seems memory leak - U4 = "http://zoink.it/torrent/" ++ MagHash ++ ".torrent", - [U1, U2, U3, U4]. + %U4 = "http://zoink.it/torrent/" ++ MagHash ++ ".torrent", + [U1, U2, U3]. is_ssl_url(URL) when is_list(URL), length(URL) > 4 -> string:substr(URL, 1, 5) == "https". @@ -187,16 +192,26 @@ format_btbox_url(MagHash) -> "http://bt.box.n0808.com/" ++ H ++ "/" ++ T ++ "/" ++ MagHash ++ ".torrent". check_error_timeout_reqs(Reqs) -> + Size = gb_trees:size(Reqs), ReqList = gb_trees:to_list(Reqs), - lists:foldl(fun(E, NewReqs) -> + NewReqs = lists:foldl(fun(E, NewReqs) -> check_error_timeout(NewReqs, E) - end, gb_trees:empty(), ReqList). + end, gb_trees:empty(), ReqList), + NewSize = gb_trees:size(NewReqs), + case NewSize < Size of + true -> + ?E(?FMT("remove ~p timeout download reqs, new size ~p", + [Size - NewSize, NewSize])); + false -> + ok + end, + NewReqs. check_error_timeout(Acc, {ReqID, {MagHash, _, From, _, Start} = Req}) -> case ?IS_ERROR_TIMEOUT(Start) of true -> From ! {got_torrent, failed, MagHash}, - ?E(?FMT("download req error timeout ~s", [MagHash])), + ?E(?FMT("download req timeout ~p ~s", [ReqID, MagHash])), Acc; false -> gb_trees:insert(ReqID, Req, Acc)