mirror of
https://github.com/btdig/dhtcrawler2.git
synced 2025-02-22 13:19:05 +00:00
add rmmseg, with a pre-compiled win32 version
This commit is contained in:
parent
77893f0759
commit
753486c16a
12638
priv/chars.dic
Normal file
12638
priv/chars.dic
Normal file
File diff suppressed because it is too large
Load Diff
120308
priv/words.dic
Normal file
120308
priv/words.dic
Normal file
File diff suppressed because it is too large
Load Diff
@ -18,6 +18,7 @@
|
||||
search_recently/2,
|
||||
search_newest_top/3,
|
||||
search/2]).
|
||||
-export([decode_torrent_item/1]).
|
||||
-compile(export_all).
|
||||
-define(DBNAME, torrents).
|
||||
-define(COLLNAME, hashes).
|
||||
@ -183,15 +184,7 @@ create_torrent_desc(Conn, Hash, Name, Length, Announce, Files) ->
|
||||
files, encode_file_list(Files)}.
|
||||
-else.
|
||||
create_torrent_desc(_Conn, Hash, Name, Length, Announce, Files) ->
|
||||
NameArray = case string_split:split(Name) of
|
||||
{error, L, D} ->
|
||||
?E(?FMT("string split failed(error): ~p ~p", [L, D])),
|
||||
[Name];
|
||||
{incomplete, L, D} ->
|
||||
?E(?FMT("string split failed(incomplte): ~p ~p", [L, D])),
|
||||
[Name];
|
||||
{ok, R} -> R
|
||||
end,
|
||||
NameArray = seg_text(Name, Files),
|
||||
{'_id', list_to_binary(Hash),
|
||||
name, list_to_binary(Name),
|
||||
name_array, NameArray,
|
||||
@ -199,6 +192,19 @@ create_torrent_desc(_Conn, Hash, Name, Length, Announce, Files) ->
|
||||
created_at, time_util:now_seconds(),
|
||||
announce, Announce,
|
||||
files, encode_file_list(Files)}.
|
||||
|
||||
seg_text(Name, Files) ->
|
||||
FullName = lists:foldl(fun({S, _}, Acc) ->
|
||||
Acc ++ " " ++ S
|
||||
end, Name, Files),
|
||||
seg_text(FullName).
|
||||
|
||||
seg_text(FullName) ->
|
||||
case config:get(use_rmmseg, false) of
|
||||
false -> list_to_binary(FullName);
|
||||
true ->
|
||||
rmmseg:seg_space(list_to_binary(FullName))
|
||||
end.
|
||||
-endif.
|
||||
|
||||
% {file1, {name, xx, length, xx}, file2, {name, xx, length, xx}}
|
||||
|
@ -34,6 +34,7 @@ start_standalone(IP, Port, Size) ->
|
||||
start_dep_apps(),
|
||||
tor_download:start_global(),
|
||||
config:start_link("hash_reader.config", fun() -> config_default() end),
|
||||
init_rmmseg(config:get(use_rmmseg, false)),
|
||||
% NOTE:
|
||||
Stats = {hash_reader_stats, {hash_reader_stats, start_link, [Size]}, permanent, 2000, worker, [hash_reader_stats]},
|
||||
DownloadStats = {tor_download_stats, {tor_download_stats, start_link, []}, permanent, 2000, worker, [tor_download_stats]},
|
||||
@ -41,6 +42,14 @@ start_standalone(IP, Port, Size) ->
|
||||
DBDateRange = {db_daterange, {db_daterange, start_link, [?DBPOOLNAME]}, permanent, 1000, worker, [db_daterange]},
|
||||
start_link(IP, Port, Size, [Log, DBDateRange, DownloadStats, Stats]).
|
||||
|
||||
init_rmmseg(true) ->
|
||||
io:format("rmmseg is enabled~n", []),
|
||||
rmmseg:init(),
|
||||
rmmseg:load_dicts();
|
||||
init_rmmseg(false) ->
|
||||
io:format("rmmseg is disabled~n", []),
|
||||
ok.
|
||||
|
||||
start_link(IP, Port, Size) ->
|
||||
start_link(IP, Port, Size, []).
|
||||
|
||||
@ -72,4 +81,5 @@ config_default() ->
|
||||
{save_to_db, false},
|
||||
{save_to_file, true},
|
||||
{load_from_db, false},
|
||||
{use_rmmseg, false},
|
||||
{torrent_path, "torrents/"}].
|
||||
|
2
src/rmmseg/README.md
Normal file
2
src/rmmseg/README.md
Normal file
@ -0,0 +1,2 @@
|
||||
If you want to use rmmseg in dhtcrawler2 on Windows, and use WIN32 erlang, you can use rmmseg_win32.dll directly, copy it to priv directory.
|
||||
|
46
src/rmmseg/rmmseg.erl
Normal file
46
src/rmmseg/rmmseg.erl
Normal file
@ -0,0 +1,46 @@
|
||||
%%
|
||||
%% rmmseg.erl
|
||||
%% Kevin Lynx
|
||||
%%
|
||||
-module(rmmseg).
|
||||
-export([init/0,
|
||||
load_dicts/0,
|
||||
seg_space/1,
|
||||
load_dicts/2,
|
||||
seg/1]).
|
||||
-onload(init/0).
|
||||
-compile(export_all).
|
||||
|
||||
init() ->
|
||||
File = in_priv_path("rmmseg"),
|
||||
ok = erlang:load_nif(File, 0).
|
||||
|
||||
load_dicts(_CharFile, _WordFile) ->
|
||||
not_loaded.
|
||||
|
||||
seg(_BStr) ->
|
||||
not_loaded.
|
||||
|
||||
load_dicts() ->
|
||||
Chars = in_priv_path("chars.dic"),
|
||||
Words = in_priv_path("words.dic"),
|
||||
load_dicts(Chars, Words).
|
||||
|
||||
seg_space(BStr) when is_binary(BStr) ->
|
||||
List = rmmseg:seg(BStr),
|
||||
Ret = lists:foldl(fun(E, Acc) ->
|
||||
case Acc == <<>> of
|
||||
true -> E;
|
||||
false ->
|
||||
<<Acc/binary, " ", E/binary>>
|
||||
end
|
||||
end, <<>>, List),
|
||||
Ret.
|
||||
|
||||
in_priv_path(Name) ->
|
||||
filename:join([filename:dirname(code:which(?MODULE)), "..", "priv", Name]).
|
||||
|
||||
%%
|
||||
sample() ->
|
||||
not_loaded.
|
||||
|
BIN
src/rmmseg/rmmseg_win32.dll
Normal file
BIN
src/rmmseg/rmmseg_win32.dll
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user