Commit 6b9bc4a5 authored by Coraline Ehmke's avatar Coraline Ehmke

Hash clump detects cohorts!

parent ee3ac2a1
module Snuffle
module Detectors
module HashClump
class HashClump
attr_accessor :hashes
def initialize(hashes={})
def initialize(hashes=[])
self.hashes = hashes
end
def cohorts(type=:values)
overlapping(hashes)[type].keys
def cohorts
clusters.map do |cluster|
[
cluster[:hash].values.sort,
cluster[:neighbors].select{|n| n[:distance] < 0.25 }.map{|n| n[:hash].values.sort}
].flatten.uniq
end.uniq.select{|cluster| cluster.size > 1}
end
def overlapping
{
keys: Snuffle::Util::Histogram.from(self.hashes.map(&:keys)).select{|k,v| v > 1},
values: Snuffle::Util::Histogram.from(self.hashes.map(&:values)).select{|k,v| v > 1}
}
private
def clusters
cohorts = []
hashes.each do |outer_hash|
cohorts << {
hash: outer_hash,
neighbors: (hashes - [outer_hash]).map do |inner_hash|
{
hash: inner_hash,
distance: distance(outer_hash.value_matrix, inner_hash.value_matrix)
}
end
}
end
cohorts
end
def calculate_sum(primary_matrix, token_matrix)
primary_matrix.inject([]){ |a,k| a << (primary_matrix & token_matrix).count ** 2; a}.reduce(:+)
end
def distance(primary_matrix, token_matrix)
(1.0 / (1 + Math.sqrt(calculate_sum(primary_matrix, token_matrix))).to_f)
end
end
......
......@@ -26,6 +26,18 @@ module Snuffle
node.children.map{ |child| child.children.last.name }
end
def key_matrix
keys.map(&:hash)
end
def value_matrix
values.map(&:hash)
end
def inspect
pairs
end
end
end
......
......@@ -13,13 +13,21 @@ class Customer
end
def neighborhood
fake_neighborhood_api_call(city: self.city, state: self.state, postal_code: self.postal_code)
fake_neighborhood_api_call(state: self.state, city: self.city, postal_code: self.postal_code)
end
def something_else
fake_neighborhood_api_call(city: self.city, state: self.state)
end
def and_something_else_again
fake_neighborhood_api_call(city: self.city)
end
def and_something_else_again_and_again
fake_neighborhood_api_call(state: self.state)
end
def fake_neighborhood_api_call(args={})
"Probably River North"
end
......
......@@ -9,6 +9,14 @@ def file
File.open("spec/fixtures/program_2.rb", "r").read
end
def d
@d ||= Snuffle::FileParser.new(file)
def file_parser
@file_parser ||= Snuffle::FileParser.new(file)
end
def hashes
file_parser.hashes
end
def clump
@clump ||= Snuffle::Detectors::HashClump.new(hashes)
end
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment