From b614689b3c293be1cbd566f28996e6c3a41f70a3 Mon Sep 17 00:00:00 2001 From: gebele Date: Wed, 9 Aug 2017 12:31:27 +0000 Subject: deleted obsolete files --- nanoparticles.rb | 182 ------------------------------------------------------- 1 file changed, 182 deletions(-) delete mode 100644 nanoparticles.rb (limited to 'nanoparticles.rb') diff --git a/nanoparticles.rb b/nanoparticles.rb deleted file mode 100644 index d3399e9..0000000 --- a/nanoparticles.rb +++ /dev/null @@ -1,182 +0,0 @@ -require 'rserve' -require 'json' -require 'yaml' -require 'csv' - -ENDPOINT = "Cell.association (Net cell association [mL/ug(Mg)])" - -def query_features - relevant_features = JSON.parse(File.read("./relevant-features.json")) -end - -def predict params - # calculate difference parameters - diff_features = JSON.parse(File.read("./diff-features.json")) - diff_features.each do |feature,originals| - params[feature] = params[originals[1]]-params[originals[0]] # causes rounding errors! - end - neighbors = [] - sim_sum = 0 - weighted_sum = 0 - match = nil - relevant_features = JSON.parse(File.read("./relevant-features.json")) - weights = relevant_features.values.collect{|v| v["r"]} - JSON.parse(File.read("./data.json")).each do |id,categories| - query_values = [] - neighbor_values = [] - relevant_features.keys.each do |f| - query_values << params[f] - neighbor_values << categories["physchem"][f] - end - sim = weighted_cosine_similarity(query_values,neighbor_values,weights) - if sim > 0.9999 # no exact match because of rounding errors - match = {id => categories} - elsif sim > 0.95 - neighbor = categories - neighbor["similarity"] = sim - neighbor["sim"] = cosine_similarity(query_values,neighbor_values) - neighbor["id"] = id - sim_sum += sim - weighted_sum += sim*Math.log10(categories["tox"][ENDPOINT]) - #weighted_sum += sim*categories["tox"][ENDPOINT] - neighbors << neighbor - end - end - neighbors.sort!{|a,b| b["similarity"] <=> a["similarity"]} - sim_sum == 0 ? prediction = nil : prediction = 10**(weighted_sum/sim_sum) - { - :query => params, - :match => match, - :prediction => {ENDPOINT => prediction}, - :neighbors => neighbors - } -end - -class Object - def numeric? - true if Float(self) rescue false - end -end - -def euclidean_distance(a, b) - sq = a.zip(b).map{|a,b| (a - b) ** 2} - Math.sqrt(sq.inject(0) {|s,c| s + c}) -end - -def dot_product(a, b) - products = a.zip(b).map{|a, b| a * b} - products.inject(0) {|s,p| s + p} -end - -def magnitude(point) - squares = point.map{|x| x ** 2} - Math.sqrt(squares.inject(0) {|s, c| s + c}) -end - -# http://stackoverflow.com/questions/1838806/euclidean-distance-vs-pearson-correlation-vs-cosine-similarity -def cosine_similarity(a, b) - dot_product(a, b) / (magnitude(a) * magnitude(b)) -end - -def weighted_cosine_similarity(a, b, w) - dot_product = 0 - magnitude_a = 0 - magnitude_b = 0 - (0..a.size-1).each do |i| - dot_product += w[i].abs*a[i]*b[i] - magnitude_a += w[i].abs*a[i]**2 - magnitude_b += w[i].abs*b[i]**2 - end - dot_product/Math.sqrt(magnitude_a*magnitude_b) - -end - -#@endpoint = @data.collect{|r| r[5]} - -def neighbors query -end - -def csv2json - csv = CSV.read("data/MergedSheets_edit.csv") - csv.collect!{|row| row[0..36].collect{|c| c.numeric? ? c.to_f : c } }.compact - feature_names = [ - "ID", - csv[0][1], - csv[0][2], - csv[0][3], - csv[6][4], - "#{csv[0][5]} (#{csv[6][5]} [#{csv[11][5]}])", # endpoint - "#{csv[0][6]} (#{csv[6][6]})", # endpoint - "#{csv[6][7]} [#{csv[11][7]}]", - "#{csv[6][8]} [#{csv[11][8]}]", - "#{csv[6][9]} [#{csv[11][9]}]", - ] - (10..10+5*3).step(3) do |i| - feature_names += [ - "#{csv[6][i]} [#{csv[11][i]}]", - "#{csv[6][i+1]} #{csv[8][i+1]} [#{csv[11][i+1]}]", - "#{csv[6][i+2]} #{csv[8][i+2]}", - ] - end - feature_names += [ - "#{csv[6][28]}", - "#{csv[6][29]} #{csv[8][29]}", - "#{csv[6][30]} #{csv[8][30]}", - ] - (31..34).each do |i| - feature_names << "#{csv[6][i]} #{csv[8][i]} [#{csv[11][i]}]" - end - (35..36).each do |i| - feature_names << "#{csv[6][i]} #{csv[8][i]} #{csv[10][i]} [#{csv[11][i]}]" - end - data = {} - csv.drop(12).each do |row| - id = row.first - if id.match /^G/ # skip Ag, too many missing values - data[id] = {} - row.each_with_index do |col,i| - if i == 0 - data[id][:composition] = {} - elsif i < 5 - data[id][:composition][feature_names[i]] = col - elsif i == 5 - data[id][:tox] ||= {} - data[id][:tox][feature_names[i]] = col - elsif i > 6 - data[id][:physchem] ||= {} - data[id][:physchem][feature_names[i]] = col - end - end - end - end - File.open("data.json","w+"){|f| f.puts data.to_json} - data -end - -#puts data.to_yaml -=begin -R.assign "endpoint", endpoint -(0..data[0].size).each do |c| - if data.collect{|r| r[c]}.uniq.size > 1 - begin - R.assign "feature", data.collect{|r| r[c]} - R.eval "r <- cor(-log(endpoint),-log(feature),use='complete')" - r = R.eval("r").to_ruby - p "#{c}: #{r}" if r > 0.3 or r < -0.3 - rescue - end - end -end - - -csv[0..13].each do |row| - row.each_with_index do |col,i| - features[i] = features[i].to_s+", "+col.to_s - end -end - -puts features.select{|f| f.match(/Mean/)}.to_yaml - - #n+=1 - #p n,row.first unless row.first.match /^[G|S]/ -=end -- cgit v1.2.3