add rmmseg, with a pre-compiled win32 version

src
Kevin Lynx 11 years ago
parent 77893f0759
commit 753486c16a

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -18,6 +18,7 @@
search_recently/2,
search_newest_top/3,
search/2]).
-export([decode_torrent_item/1]).
-compile(export_all).
-define(DBNAME, torrents).
-define(COLLNAME, hashes).
@ -183,15 +184,7 @@ create_torrent_desc(Conn, Hash, Name, Length, Announce, Files) ->
files, encode_file_list(Files)}.
-else.
create_torrent_desc(_Conn, Hash, Name, Length, Announce, Files) ->
NameArray = case string_split:split(Name) of
{error, L, D} ->
?E(?FMT("string split failed(error): ~p ~p", [L, D])),
[Name];
{incomplete, L, D} ->
?E(?FMT("string split failed(incomplte): ~p ~p", [L, D])),
[Name];
{ok, R} -> R
end,
NameArray = seg_text(Name, Files),
{'_id', list_to_binary(Hash),
name, list_to_binary(Name),
name_array, NameArray,
@ -199,6 +192,19 @@ create_torrent_desc(_Conn, Hash, Name, Length, Announce, Files) ->
created_at, time_util:now_seconds(),
announce, Announce,
files, encode_file_list(Files)}.
seg_text(Name, Files) ->
FullName = lists:foldl(fun({S, _}, Acc) ->
Acc ++ " " ++ S
end, Name, Files),
seg_text(FullName).
seg_text(FullName) ->
case config:get(use_rmmseg, false) of
false -> list_to_binary(FullName);
true ->
rmmseg:seg_space(list_to_binary(FullName))
end.
-endif.
% {file1, {name, xx, length, xx}, file2, {name, xx, length, xx}}

@ -34,6 +34,7 @@ start_standalone(IP, Port, Size) ->
start_dep_apps(),
tor_download:start_global(),
config:start_link("hash_reader.config", fun() -> config_default() end),
init_rmmseg(config:get(use_rmmseg, false)),
% NOTE:
Stats = {hash_reader_stats, {hash_reader_stats, start_link, [Size]}, permanent, 2000, worker, [hash_reader_stats]},
DownloadStats = {tor_download_stats, {tor_download_stats, start_link, []}, permanent, 2000, worker, [tor_download_stats]},
@ -41,6 +42,14 @@ start_standalone(IP, Port, Size) ->
DBDateRange = {db_daterange, {db_daterange, start_link, [?DBPOOLNAME]}, permanent, 1000, worker, [db_daterange]},
start_link(IP, Port, Size, [Log, DBDateRange, DownloadStats, Stats]).
init_rmmseg(true) ->
io:format("rmmseg is enabled~n", []),
rmmseg:init(),
rmmseg:load_dicts();
init_rmmseg(false) ->
io:format("rmmseg is disabled~n", []),
ok.
start_link(IP, Port, Size) ->
start_link(IP, Port, Size, []).
@ -72,4 +81,5 @@ config_default() ->
{save_to_db, false},
{save_to_file, true},
{load_from_db, false},
{use_rmmseg, false},
{torrent_path, "torrents/"}].

@ -0,0 +1,2 @@
If you want to use rmmseg in dhtcrawler2 on Windows, and use WIN32 erlang, you can use rmmseg_win32.dll directly, copy it to priv directory.

@ -0,0 +1,46 @@
%%
%% rmmseg.erl
%% Kevin Lynx
%%
-module(rmmseg).
-export([init/0,
load_dicts/0,
seg_space/1,
load_dicts/2,
seg/1]).
-onload(init/0).
-compile(export_all).
init() ->
File = in_priv_path("rmmseg"),
ok = erlang:load_nif(File, 0).
load_dicts(_CharFile, _WordFile) ->
not_loaded.
seg(_BStr) ->
not_loaded.
load_dicts() ->
Chars = in_priv_path("chars.dic"),
Words = in_priv_path("words.dic"),
load_dicts(Chars, Words).
seg_space(BStr) when is_binary(BStr) ->
List = rmmseg:seg(BStr),
Ret = lists:foldl(fun(E, Acc) ->
case Acc == <<>> of
true -> E;
false ->
<<Acc/binary, " ", E/binary>>
end
end, <<>>, List),
Ret.
in_priv_path(Name) ->
filename:join([filename:dirname(code:which(?MODULE)), "..", "priv", Name]).
%%
sample() ->
not_loaded.

Binary file not shown.
Loading…
Cancel
Save