From 7e824ae1a52ee27bda90dd08783aef0ab3a539a9 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 29 Jan 2016 12:23:14 +0100 Subject: internal calculation of difference features --- application.rb | 4 ++-- diff-features.json | 18 ++++++++++++++++++ nanoparticles.rb | 40 ++++++++++++++++++++++++++-------------- query-features.json | 15 +++++++++++++++ test/predict.rb | 14 ++++++++------ 5 files changed, 69 insertions(+), 22 deletions(-) create mode 100644 diff-features.json create mode 100644 query-features.json diff --git a/application.rb b/application.rb index 7ff09be..dee2ba9 100644 --- a/application.rb +++ b/application.rb @@ -5,8 +5,8 @@ also_reload './nanoparticles.rb' get '/?' do data = JSON.parse(File.read("./data.json")) - relevant_features = JSON.parse(File.read("./relevant-features.json")) - @example = data[data.keys.sample]["physchem"].select{|f,v| relevant_features.keys.include? f} + query_features = JSON.parse(File.read("./query-features.json")) + @example = data[data.keys.sample]["physchem"].select{|f,v| query_features.include? f} # create a data entry form with @example as default values end diff --git a/diff-features.json b/diff-features.json new file mode 100644 index 0000000..eced440 --- /dev/null +++ b/diff-features.json @@ -0,0 +1,18 @@ +{ + "ZETA POTENTIAL Change": [ + "ZETA POTENTIAL [mV]", + "ZETA POTENTIAL Human serum (Sigma #H4522) [mV]" + ], + "Localized Surface Plasmon Resonance (LSPR) index Change": [ + "Localized Surface Plasmon Resonance (LSPR) index", + "Localized Surface Plasmon Resonance (LSPR) index Human serum (Sigma #H4522)" + ], + "Polydispersity index Change": [ + "Polydispersity index [nm]", + "Polydispersity index Human serum (Sigma #H4522) [nm]" + ], + "Volume Mean Hydrodynamic Diameter Change": [ + "Volume Mean Hydrodynamic Diameter [nm]", + "Volume Mean Hydrodynamic Diameter Human serum (Sigma #H4522) [nm]" + ] +} diff --git a/nanoparticles.rb b/nanoparticles.rb index e34e509..d3399e9 100644 --- a/nanoparticles.rb +++ b/nanoparticles.rb @@ -5,7 +5,16 @@ require 'csv' ENDPOINT = "Cell.association (Net cell association [mL/ug(Mg)])" +def query_features + relevant_features = JSON.parse(File.read("./relevant-features.json")) +end + def predict params + # calculate difference parameters + diff_features = JSON.parse(File.read("./diff-features.json")) + diff_features.each do |feature,originals| + params[feature] = params[originals[1]]-params[originals[0]] # causes rounding errors! + end neighbors = [] sim_sum = 0 weighted_sum = 0 @@ -13,21 +22,24 @@ def predict params relevant_features = JSON.parse(File.read("./relevant-features.json")) weights = relevant_features.values.collect{|v| v["r"]} JSON.parse(File.read("./data.json")).each do |id,categories| - neighbor_values = categories["physchem"].select{|f,v| params.keys.include? f}.values - if params.values == neighbor_values + query_values = [] + neighbor_values = [] + relevant_features.keys.each do |f| + query_values << params[f] + neighbor_values << categories["physchem"][f] + end + sim = weighted_cosine_similarity(query_values,neighbor_values,weights) + if sim > 0.9999 # no exact match because of rounding errors match = {id => categories} - else - sim = weighted_cosine_similarity(params.values,neighbor_values,weights) - if sim > 0.95 - neighbor = categories - neighbor["similarity"] = sim - neighbor["sim"] = cosine_similarity(params.values,neighbor_values) - neighbor["id"] = id - sim_sum += sim - weighted_sum += sim*Math.log10(categories["tox"][ENDPOINT]) - #weighted_sum += sim*categories["tox"][ENDPOINT] - neighbors << neighbor - end + elsif sim > 0.95 + neighbor = categories + neighbor["similarity"] = sim + neighbor["sim"] = cosine_similarity(query_values,neighbor_values) + neighbor["id"] = id + sim_sum += sim + weighted_sum += sim*Math.log10(categories["tox"][ENDPOINT]) + #weighted_sum += sim*categories["tox"][ENDPOINT] + neighbors << neighbor end end neighbors.sort!{|a,b| b["similarity"] <=> a["similarity"]} diff --git a/query-features.json b/query-features.json new file mode 100644 index 0000000..81c5e1d --- /dev/null +++ b/query-features.json @@ -0,0 +1,15 @@ +[ + "Localized Surface Plasmon Resonance (LSPR) index", + "Localized Surface Plasmon Resonance (LSPR) index Human serum (Sigma #H4522)", + "ZETA POTENTIAL [mV]", + "ZETA POTENTIAL Human serum (Sigma #H4522) [mV]", + "Intensity Mean Hydrodynamic Diameter Human serum (Sigma #H4522) [nm]", + "Volume Mean Hydrodynamic Diameter Human serum (Sigma #H4522) [nm]", + "Z-Average Hydrodynamic Diameter Human serum (Sigma #H4522) [nm]", + "Total surface area (SAtot) Human serum (Sigma #H4522) [cm^2]", + "Polydispersity index [nm]", + "Polydispersity index Human serum (Sigma #H4522) [nm]", + "Volume Mean Hydrodynamic Diameter [nm]", + "Volume Mean Hydrodynamic Diameter Human serum (Sigma #H4522) [nm]", + "Number Mean Hydrodynamic Diameter Human serum (Sigma #H4522) [nm]" +] diff --git a/test/predict.rb b/test/predict.rb index 9aed5e2..c72de02 100644 --- a/test/predict.rb +++ b/test/predict.rb @@ -1,9 +1,11 @@ require_relative "../nanoparticles.rb" data = JSON.parse(File.read("./data.json")) -relevant_features = JSON.parse(File.read("./relevant-features.json")) -example = data[data.keys.sample]["physchem"].select{|f,v| relevant_features.keys.include? f} -#data.collect -puts predict(example)[:match].collect{|id,v| v["tox"]}.first -puts predict(example)[:prediction] -#puts predict(example)[:neighbors].size +query_features = JSON.parse(File.read("./query-features.json")) +key = data.keys.sample +p key +example = data[key]["physchem"].select{|f,v| query_features.include? f} +prediction = predict(example) +puts prediction[:match].collect{|id,v| v["tox"]}.first +puts prediction[:prediction] +puts prediction[:neighbors].size -- cgit v1.2.3