mirror of
https://github.com/btdig/dhtcrawler2.git
synced 2025-02-24 06:09:05 +00:00
add progress displaying for cache_indexer
This commit is contained in:
parent
062130cbd9
commit
a0e1be291d
@ -6,6 +6,7 @@
|
|||||||
-module(index_file).
|
-module(index_file).
|
||||||
-export([start/2]).
|
-export([start/2]).
|
||||||
-export([worker_run/3]).
|
-export([worker_run/3]).
|
||||||
|
-define(PRINT_STEP, 5000*41).
|
||||||
|
|
||||||
start(Conn, FileName) ->
|
start(Conn, FileName) ->
|
||||||
spawn_link(?MODULE, worker_run, [self(), Conn, FileName]).
|
spawn_link(?MODULE, worker_run, [self(), Conn, FileName]).
|
||||||
@ -13,6 +14,7 @@ start(Conn, FileName) ->
|
|||||||
load_position(Name) ->
|
load_position(Name) ->
|
||||||
StatusFile = Name ++ ".sta",
|
StatusFile = Name ++ ".sta",
|
||||||
Pos = case file:consult(StatusFile) of
|
Pos = case file:consult(StatusFile) of
|
||||||
|
{ok, []} -> 0;
|
||||||
{ok, [Status]} ->
|
{ok, [Status]} ->
|
||||||
proplists:get_value(position, Status);
|
proplists:get_value(position, Status);
|
||||||
{error, _} ->
|
{error, _} ->
|
||||||
@ -29,22 +31,32 @@ worker_run(Parent, Conn, FileName) ->
|
|||||||
Pos = load_position(FileName),
|
Pos = load_position(FileName),
|
||||||
io:format("start to process ~s from ~p~n", [FileName, Pos]),
|
io:format("start to process ~s from ~p~n", [FileName, Pos]),
|
||||||
{ok, FP} = file:open(FileName, [read]),
|
{ok, FP} = file:open(FileName, [read]),
|
||||||
|
{ok, MaxPos} = file:position(FP, eof),
|
||||||
file:position(FP, Pos),
|
file:position(FP, Pos),
|
||||||
Sum = process_hash(Conn, FileName, FP),
|
Step = MaxPos div 10, % every 10% to display progress
|
||||||
|
CheckStep = if Step < ?PRINT_STEP -> ?PRINT_STEP; true -> Step end,
|
||||||
|
Sum = process_hash(Conn, FileName, FP, Pos, MaxPos, CheckStep),
|
||||||
Parent ! {worker_done, self(), FileName},
|
Parent ! {worker_done, self(), FileName},
|
||||||
file:close(FP),
|
file:close(FP),
|
||||||
io:format("Index file ~s done, ~p hashes~n", [FileName, Sum]).
|
io:format("Index file ~s done, ~p hashes~n", [FileName, Sum]).
|
||||||
|
|
||||||
process_hash(Conn, FileName, FP) ->
|
process_hash(Conn, FileName, FP, PrintPos, MaxPos, CheckStep) ->
|
||||||
case io:get_line(FP, "") of
|
case io:get_line(FP, "") of
|
||||||
eof -> 0;
|
eof -> 0;
|
||||||
Line ->
|
Line ->
|
||||||
save_hash(Conn, strip_lf(Line)),
|
save_hash(Conn, strip_lf(Line)),
|
||||||
{ok, Pos} = file:position(FP, cur),
|
{ok, Pos} = file:position(FP, cur),
|
||||||
save_position(FileName, Pos),
|
NewPrintPos = check_progress(FileName, Pos, PrintPos, MaxPos, CheckStep),
|
||||||
1 + process_hash(Conn, FileName, FP)
|
1 + process_hash(Conn, FileName, FP, NewPrintPos, MaxPos, CheckStep)
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
check_progress(FileName, Pos, PrintPos, MaxPos, CheckStep) when Pos >= PrintPos ->
|
||||||
|
save_position(FileName, Pos),
|
||||||
|
io:format("~s -> ~b%~n", [FileName, 100 * Pos div MaxPos]),
|
||||||
|
PrintPos + CheckStep;
|
||||||
|
check_progress(_, _, PrintPos, _, _) ->
|
||||||
|
PrintPos.
|
||||||
|
|
||||||
strip_lf(S) ->
|
strip_lf(S) ->
|
||||||
lists:sublist(S, length(S) - 1).
|
lists:sublist(S, length(S) - 1).
|
||||||
|
|
||||||
|
34
tools/download_sync.bat
Normal file
34
tools/download_sync.bat
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
wget http://torrage.com/sync/201201.txt
|
||||||
|
wget http://torrage.com/sync/201202.txt
|
||||||
|
wget http://torrage.com/sync/201203.txt
|
||||||
|
wget http://torrage.com/sync/201204.txt
|
||||||
|
wget http://torrage.com/sync/201205.txt
|
||||||
|
wget http://torrage.com/sync/201206.txt
|
||||||
|
wget http://torrage.com/sync/201207.txt
|
||||||
|
wget http://torrage.com/sync/201208.txt
|
||||||
|
wget http://torrage.com/sync/201209.txt
|
||||||
|
wget http://torrage.com/sync/201210.txt
|
||||||
|
wget http://torrage.com/sync/201211.txt
|
||||||
|
wget http://torrage.com/sync/201212.txt
|
||||||
|
wget http://torrage.com/sync/201301.txt
|
||||||
|
wget http://torrage.com/sync/201302.txt
|
||||||
|
wget http://torrage.com/sync/201303.txt
|
||||||
|
wget http://torrage.com/sync/201304.txt
|
||||||
|
wget http://torrage.com/sync/201305.txt
|
||||||
|
wget http://torrage.com/sync/201306.txt
|
||||||
|
wget http://torrage.com/sync/20130701.txt
|
||||||
|
wget http://torrage.com/sync/20130702.txt
|
||||||
|
wget http://torrage.com/sync/20130703.txt
|
||||||
|
wget http://torrage.com/sync/20130704.txt
|
||||||
|
wget http://torrage.com/sync/20130705.txt
|
||||||
|
wget http://torrage.com/sync/20130706.txt
|
||||||
|
wget http://torrage.com/sync/20130707.txt
|
||||||
|
wget http://torrage.com/sync/20130708.txt
|
||||||
|
wget http://torrage.com/sync/20130709.txt
|
||||||
|
wget http://torrage.com/sync/20130710.txt
|
||||||
|
wget http://torrage.com/sync/20130711.txt
|
||||||
|
wget http://torrage.com/sync/20130712.txt
|
||||||
|
wget http://torrage.com/sync/20130713.txt
|
||||||
|
wget http://torrage.com/sync/20130714.txt
|
||||||
|
wget http://torrage.com/sync/20130715.txt
|
||||||
|
|
Loading…
Reference in New Issue
Block a user