func _MostFreqKHashing(string, k) {
var seen = Hash()
var chars = string.chars
var freq = chars.freq
var schars = freq.keys.sort_by {|c| -freq{c} }
var mfkh = []
for i in ^k {
chars.each { |c|
seen{c} && next
if (freq{c} == freq{schars[i]}) {
seen{c} = true
mfkh << Hash(c => c, f => freq{c})
break
}
}
}
mfkh << (k-seen.len -> of { Hash(c => :NULL, f => 0) }...)
mfkh
}
func MostFreqKSDF(a, b, k, d) {
var mfkh_a = _MostFreqKHashing(a, k);
var mfkh_b = _MostFreqKHashing(b, k);
d - gather {
mfkh_a.each { |s|
s{:c} == :NULL && next
mfkh_b.each { |t|
s{:c} == t{:c} &&
take(s{:f} + (s{:f} == t{:f} ? 0 : t{:f}))
}
}
}.sum
}
func MostFreqKHashing(string, k) {
gather {
_MostFreqKHashing(string, k).each { |h|
take("%s%d" % (h{:c}, h{:f}))
}
}.join
}
var str1 = "LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLV"
var str2 = "EWIWGGFSVDKATLNRFFAFHFILPFTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLG"
say "str1 = #{str1.dump}"
say "str2 = #{str2.dump}"
say ''
say("MostFreqKHashing(str1, 2) = ", MostFreqKHashing(str1, 2))
say("MostFreqKHashing(str2, 2) = ", MostFreqKHashing(str2, 2))
say("MostFreqKSDF(str1, str2, 2, 100) = ", MostFreqKSDF(str1, str2, 2, 100))
say ''
var arr = [
%w(night nacht),
%w(my a),
%w(research research),
%w(aaaaabbbb ababababa),
%w(significant capabilities),
]
var k = 2
var limit = 10
for s,t in arr {
"mfkh(%s, %s, #{k}) = [%s, %s] (SDF: %d)\n".printf(
s.dump, t.dump,
MostFreqKHashing(s, k).dump,
MostFreqKHashing(t, k).dump,
MostFreqKSDF(s, t, k, limit),
)
}
Output:
str1 = "LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLV"
str2 = "EWIWGGFSVDKATLNRFFAFHFILPFTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLG"
MostFreqKHashing(str1, 2) = L9T8
MostFreqKHashing(str2, 2) = F9L8
MostFreqKSDF(str1, str2, 2, 100) = 83
mfkh("night", "nacht", 2) = ["n1i1", "n1a1"] (SDF: 9)
mfkh("my", "a", 2) = ["m1y1", "a1NULL0"] (SDF: 10)
mfkh("research", "research", 2) = ["r2e2", "r2e2"] (SDF: 6)
mfkh("aaaaabbbb", "ababababa", 2) = ["a5b4", "a5b4"] (SDF: 1)
mfkh("significant", "capabilities", 2) = ["i3n2", "i3a2"] (SDF: 7)