(* Функция нахождения наибольшего самоповторения строки *) let self_repeat s = let len = String.length s in let rec sr i = if (String.sub s 0 i) = (String.sub s (len - i) i) then i else sr (pred i) in if len = 0 then 0 else sr (pred (String.length s)) ;; (* Поиск подстроки алгоритмом Кнута - Морриса - Пратта *) let kmp haystack needle = let build_backw s = Array.init (String.length s) (fun i -> self_repeat (String.sub s 0 i)) in let apply_backw backw s sub = let needle_len = String.length needle and haystack_len = String.length haystack in let rec apply i n = if n = needle_len then (i - needle_len) else if i = haystack_len then -1 else if s.[i] <> sub.[n] then if n = 0 then apply (succ i) 0 else apply i backw.(n) else apply (succ i) (succ n) in apply 0 0 in apply_backw (build_backw needle) haystack needle;; (* Поиск подстроки построением автомата *) let build_fsm needle = let needle_len = String.length needle in Array.init 256 (fun ch -> Array.init needle_len (fun len -> let c = char_of_int ch in if needle.[len] = c then succ len else self_repeat ((String.sub needle 0 len) ^ (String.make 1 c))));; let automate haystack needle = let needle_len = String.length needle and haystack_len = String.length haystack and auto = build_fsm needle in let rec regular i state = if state = needle_len then (i - needle_len) else if i = haystack_len then -1 else regular (succ i) (auto.(int_of_char haystack.[i]).(state)) in regular 0 0;; let slow_search haystack needle = let needle_len = String.length needle in let search_end = String.length haystack - needle_len in let rec search i = if i > search_end then -1 else if (String.sub haystack i needle_len) = needle then i else search (succ i) in search 0;; let hash s = let sum = ref 0 in for i = 0 to pred (String.length s) do sum := !sum + (int_of_char s.[i]); done; !sum;; let rabin_karp haystack needle = let haystack_len = String.length haystack and needle_len = String.length needle in let search_end = haystack_len - needle_len in if search_end < 0 then -1 else let needle_hash = hash needle in let rec search i h = if (i > search_end) then -1 else if (h = needle_hash) && ((String.sub haystack i needle_len) = needle) then i else if (i = search_end) then -1 else search (succ i) (h - (int_of_char haystack.[i]) + (int_of_char haystack.[i + needle_len])) in search 0 (hash (String.sub haystack 0 needle_len));; let random_string n = let s = String.create n in for i = 0 to n - 1 do s.[i] <- char_of_int (Random.int 2 + 48); done; s;; (* Таймер - более удобный способ замерять прошедшее время *) let timer = ref 0.;; let init_timer () = timer := Sys.time ();; let read_timer () = let result = Sys.time () -. !timer in timer := Sys.time (); result;; Random.self_init ();; (*for i = 1 to 1000 do Printf.printf "%d: " i; flush stdout; let haystack = random_string 30 (* миллион тормозит, хотя работает *) and needle = random_string 5 in Printf.printf "%s and %s: " haystack needle; flush stdout; let knuth = (kmp haystack needle) and rk = (rabin_karp haystack needle) in if (knuth = rk) then print_endline "success" else failwith (Printf.sprintf "Epic fail: %d expected, %d found" knuth rk); done;;*) let print_flush s = print_endline s; flush stdout;; let test () = let haystack = Array.init 100 (fun _ -> random_string 1000) and needle = Array.init 100 (fun _ -> random_string 10) in let test_func func = init_timer (); for i = 0 to 99 do for j = 1 to 100 do ignore (func haystack.(i) needle.(i)); done; done; read_timer () in print_flush "Results:"; print_flush (string_of_float (test_func kmp)); print_flush (string_of_float (test_func slow_search)); print_flush (string_of_float (test_func rabin_karp));; test ();;