summaryrefslogtreecommitdiff
path: root/nanoparticles.rb
diff options
context:
space:
mode:
Diffstat (limited to 'nanoparticles.rb')
-rw-r--r--nanoparticles.rb182
1 files changed, 0 insertions, 182 deletions
diff --git a/nanoparticles.rb b/nanoparticles.rb
deleted file mode 100644
index d3399e9..0000000
--- a/nanoparticles.rb
+++ /dev/null
@@ -1,182 +0,0 @@
-require 'rserve'
-require 'json'
-require 'yaml'
-require 'csv'
-
-ENDPOINT = "Cell.association (Net cell association [mL/ug(Mg)])"
-
-def query_features
- relevant_features = JSON.parse(File.read("./relevant-features.json"))
-end
-
-def predict params
- # calculate difference parameters
- diff_features = JSON.parse(File.read("./diff-features.json"))
- diff_features.each do |feature,originals|
- params[feature] = params[originals[1]]-params[originals[0]] # causes rounding errors!
- end
- neighbors = []
- sim_sum = 0
- weighted_sum = 0
- match = nil
- relevant_features = JSON.parse(File.read("./relevant-features.json"))
- weights = relevant_features.values.collect{|v| v["r"]}
- JSON.parse(File.read("./data.json")).each do |id,categories|
- query_values = []
- neighbor_values = []
- relevant_features.keys.each do |f|
- query_values << params[f]
- neighbor_values << categories["physchem"][f]
- end
- sim = weighted_cosine_similarity(query_values,neighbor_values,weights)
- if sim > 0.9999 # no exact match because of rounding errors
- match = {id => categories}
- elsif sim > 0.95
- neighbor = categories
- neighbor["similarity"] = sim
- neighbor["sim"] = cosine_similarity(query_values,neighbor_values)
- neighbor["id"] = id
- sim_sum += sim
- weighted_sum += sim*Math.log10(categories["tox"][ENDPOINT])
- #weighted_sum += sim*categories["tox"][ENDPOINT]
- neighbors << neighbor
- end
- end
- neighbors.sort!{|a,b| b["similarity"] <=> a["similarity"]}
- sim_sum == 0 ? prediction = nil : prediction = 10**(weighted_sum/sim_sum)
- {
- :query => params,
- :match => match,
- :prediction => {ENDPOINT => prediction},
- :neighbors => neighbors
- }
-end
-
-class Object
- def numeric?
- true if Float(self) rescue false
- end
-end
-
-def euclidean_distance(a, b)
- sq = a.zip(b).map{|a,b| (a - b) ** 2}
- Math.sqrt(sq.inject(0) {|s,c| s + c})
-end
-
-def dot_product(a, b)
- products = a.zip(b).map{|a, b| a * b}
- products.inject(0) {|s,p| s + p}
-end
-
-def magnitude(point)
- squares = point.map{|x| x ** 2}
- Math.sqrt(squares.inject(0) {|s, c| s + c})
-end
-
-# http://stackoverflow.com/questions/1838806/euclidean-distance-vs-pearson-correlation-vs-cosine-similarity
-def cosine_similarity(a, b)
- dot_product(a, b) / (magnitude(a) * magnitude(b))
-end
-
-def weighted_cosine_similarity(a, b, w)
- dot_product = 0
- magnitude_a = 0
- magnitude_b = 0
- (0..a.size-1).each do |i|
- dot_product += w[i].abs*a[i]*b[i]
- magnitude_a += w[i].abs*a[i]**2
- magnitude_b += w[i].abs*b[i]**2
- end
- dot_product/Math.sqrt(magnitude_a*magnitude_b)
-
-end
-
-#@endpoint = @data.collect{|r| r[5]}
-
-def neighbors query
-end
-
-def csv2json
- csv = CSV.read("data/MergedSheets_edit.csv")
- csv.collect!{|row| row[0..36].collect{|c| c.numeric? ? c.to_f : c } }.compact
- feature_names = [
- "ID",
- csv[0][1],
- csv[0][2],
- csv[0][3],
- csv[6][4],
- "#{csv[0][5]} (#{csv[6][5]} [#{csv[11][5]}])", # endpoint
- "#{csv[0][6]} (#{csv[6][6]})", # endpoint
- "#{csv[6][7]} [#{csv[11][7]}]",
- "#{csv[6][8]} [#{csv[11][8]}]",
- "#{csv[6][9]} [#{csv[11][9]}]",
- ]
- (10..10+5*3).step(3) do |i|
- feature_names += [
- "#{csv[6][i]} [#{csv[11][i]}]",
- "#{csv[6][i+1]} #{csv[8][i+1]} [#{csv[11][i+1]}]",
- "#{csv[6][i+2]} #{csv[8][i+2]}",
- ]
- end
- feature_names += [
- "#{csv[6][28]}",
- "#{csv[6][29]} #{csv[8][29]}",
- "#{csv[6][30]} #{csv[8][30]}",
- ]
- (31..34).each do |i|
- feature_names << "#{csv[6][i]} #{csv[8][i]} [#{csv[11][i]}]"
- end
- (35..36).each do |i|
- feature_names << "#{csv[6][i]} #{csv[8][i]} #{csv[10][i]} [#{csv[11][i]}]"
- end
- data = {}
- csv.drop(12).each do |row|
- id = row.first
- if id.match /^G/ # skip Ag, too many missing values
- data[id] = {}
- row.each_with_index do |col,i|
- if i == 0
- data[id][:composition] = {}
- elsif i < 5
- data[id][:composition][feature_names[i]] = col
- elsif i == 5
- data[id][:tox] ||= {}
- data[id][:tox][feature_names[i]] = col
- elsif i > 6
- data[id][:physchem] ||= {}
- data[id][:physchem][feature_names[i]] = col
- end
- end
- end
- end
- File.open("data.json","w+"){|f| f.puts data.to_json}
- data
-end
-
-#puts data.to_yaml
-=begin
-R.assign "endpoint", endpoint
-(0..data[0].size).each do |c|
- if data.collect{|r| r[c]}.uniq.size > 1
- begin
- R.assign "feature", data.collect{|r| r[c]}
- R.eval "r <- cor(-log(endpoint),-log(feature),use='complete')"
- r = R.eval("r").to_ruby
- p "#{c}: #{r}" if r > 0.3 or r < -0.3
- rescue
- end
- end
-end
-
-
-csv[0..13].each do |row|
- row.each_with_index do |col,i|
- features[i] = features[i].to_s+", "+col.to_s
- end
-end
-
-puts features.select{|f| f.match(/Mean/)}.to_yaml
-
- #n+=1
- #p n,row.first unless row.first.match /^[G|S]/
-=end