summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--application.rb4
-rw-r--r--diff-features.json18
-rw-r--r--nanoparticles.rb40
-rw-r--r--query-features.json15
-rw-r--r--test/predict.rb14
5 files changed, 69 insertions, 22 deletions
diff --git a/application.rb b/application.rb
index 7ff09be..dee2ba9 100644
--- a/application.rb
+++ b/application.rb
@@ -5,8 +5,8 @@ also_reload './nanoparticles.rb'
get '/?' do
data = JSON.parse(File.read("./data.json"))
- relevant_features = JSON.parse(File.read("./relevant-features.json"))
- @example = data[data.keys.sample]["physchem"].select{|f,v| relevant_features.keys.include? f}
+ query_features = JSON.parse(File.read("./query-features.json"))
+ @example = data[data.keys.sample]["physchem"].select{|f,v| query_features.include? f}
# create a data entry form with @example as default values
end
diff --git a/diff-features.json b/diff-features.json
new file mode 100644
index 0000000..eced440
--- /dev/null
+++ b/diff-features.json
@@ -0,0 +1,18 @@
+{
+ "ZETA POTENTIAL Change": [
+ "ZETA POTENTIAL [mV]",
+ "ZETA POTENTIAL Human serum (Sigma #H4522) [mV]"
+ ],
+ "Localized Surface Plasmon Resonance (LSPR) index Change": [
+ "Localized Surface Plasmon Resonance (LSPR) index",
+ "Localized Surface Plasmon Resonance (LSPR) index Human serum (Sigma #H4522)"
+ ],
+ "Polydispersity index Change": [
+ "Polydispersity index [nm]",
+ "Polydispersity index Human serum (Sigma #H4522) [nm]"
+ ],
+ "Volume Mean Hydrodynamic Diameter Change": [
+ "Volume Mean Hydrodynamic Diameter [nm]",
+ "Volume Mean Hydrodynamic Diameter Human serum (Sigma #H4522) [nm]"
+ ]
+}
diff --git a/nanoparticles.rb b/nanoparticles.rb
index e34e509..d3399e9 100644
--- a/nanoparticles.rb
+++ b/nanoparticles.rb
@@ -5,7 +5,16 @@ require 'csv'
ENDPOINT = "Cell.association (Net cell association [mL/ug(Mg)])"
+def query_features
+ relevant_features = JSON.parse(File.read("./relevant-features.json"))
+end
+
def predict params
+ # calculate difference parameters
+ diff_features = JSON.parse(File.read("./diff-features.json"))
+ diff_features.each do |feature,originals|
+ params[feature] = params[originals[1]]-params[originals[0]] # causes rounding errors!
+ end
neighbors = []
sim_sum = 0
weighted_sum = 0
@@ -13,21 +22,24 @@ def predict params
relevant_features = JSON.parse(File.read("./relevant-features.json"))
weights = relevant_features.values.collect{|v| v["r"]}
JSON.parse(File.read("./data.json")).each do |id,categories|
- neighbor_values = categories["physchem"].select{|f,v| params.keys.include? f}.values
- if params.values == neighbor_values
+ query_values = []
+ neighbor_values = []
+ relevant_features.keys.each do |f|
+ query_values << params[f]
+ neighbor_values << categories["physchem"][f]
+ end
+ sim = weighted_cosine_similarity(query_values,neighbor_values,weights)
+ if sim > 0.9999 # no exact match because of rounding errors
match = {id => categories}
- else
- sim = weighted_cosine_similarity(params.values,neighbor_values,weights)
- if sim > 0.95
- neighbor = categories
- neighbor["similarity"] = sim
- neighbor["sim"] = cosine_similarity(params.values,neighbor_values)
- neighbor["id"] = id
- sim_sum += sim
- weighted_sum += sim*Math.log10(categories["tox"][ENDPOINT])
- #weighted_sum += sim*categories["tox"][ENDPOINT]
- neighbors << neighbor
- end
+ elsif sim > 0.95
+ neighbor = categories
+ neighbor["similarity"] = sim
+ neighbor["sim"] = cosine_similarity(query_values,neighbor_values)
+ neighbor["id"] = id
+ sim_sum += sim
+ weighted_sum += sim*Math.log10(categories["tox"][ENDPOINT])
+ #weighted_sum += sim*categories["tox"][ENDPOINT]
+ neighbors << neighbor
end
end
neighbors.sort!{|a,b| b["similarity"] <=> a["similarity"]}
diff --git a/query-features.json b/query-features.json
new file mode 100644
index 0000000..81c5e1d
--- /dev/null
+++ b/query-features.json
@@ -0,0 +1,15 @@
+[
+ "Localized Surface Plasmon Resonance (LSPR) index",
+ "Localized Surface Plasmon Resonance (LSPR) index Human serum (Sigma #H4522)",
+ "ZETA POTENTIAL [mV]",
+ "ZETA POTENTIAL Human serum (Sigma #H4522) [mV]",
+ "Intensity Mean Hydrodynamic Diameter Human serum (Sigma #H4522) [nm]",
+ "Volume Mean Hydrodynamic Diameter Human serum (Sigma #H4522) [nm]",
+ "Z-Average Hydrodynamic Diameter Human serum (Sigma #H4522) [nm]",
+ "Total surface area (SAtot) Human serum (Sigma #H4522) [cm^2]",
+ "Polydispersity index [nm]",
+ "Polydispersity index Human serum (Sigma #H4522) [nm]",
+ "Volume Mean Hydrodynamic Diameter [nm]",
+ "Volume Mean Hydrodynamic Diameter Human serum (Sigma #H4522) [nm]",
+ "Number Mean Hydrodynamic Diameter Human serum (Sigma #H4522) [nm]"
+]
diff --git a/test/predict.rb b/test/predict.rb
index 9aed5e2..c72de02 100644
--- a/test/predict.rb
+++ b/test/predict.rb
@@ -1,9 +1,11 @@
require_relative "../nanoparticles.rb"
data = JSON.parse(File.read("./data.json"))
-relevant_features = JSON.parse(File.read("./relevant-features.json"))
-example = data[data.keys.sample]["physchem"].select{|f,v| relevant_features.keys.include? f}
-#data.collect
-puts predict(example)[:match].collect{|id,v| v["tox"]}.first
-puts predict(example)[:prediction]
-#puts predict(example)[:neighbors].size
+query_features = JSON.parse(File.read("./query-features.json"))
+key = data.keys.sample
+p key
+example = data[key]["physchem"].select{|f,v| query_features.include? f}
+prediction = predict(example)
+puts prediction[:match].collect{|id,v| v["tox"]}.first
+puts prediction[:prediction]
+puts prediction[:neighbors].size