summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ext/lazar/rinstall.R1
-rw-r--r--lib/crossvalidation.rb33
-rw-r--r--lib/import.rb10
-rw-r--r--lib/model.rb10
-rw-r--r--lib/nanoparticle.rb6
-rw-r--r--lib/validation-statistics.rb14
6 files changed, 22 insertions, 52 deletions
diff --git a/ext/lazar/rinstall.R b/ext/lazar/rinstall.R
index 7023f60..98e612d 100644
--- a/ext/lazar/rinstall.R
+++ b/ext/lazar/rinstall.R
@@ -1,6 +1,7 @@
libdir = commandArgs(trailingOnly=TRUE)[1]
repo = "https://stat.ethz.ch/CRAN/"
#install.packages("Rserve",lib=libdir,repos=repo,dependencies=TRUE)
+install.packages("stringi",lib=libdir,repos=repo,dependencies=TRUE);
install.packages("iterators",lib=libdir,repos=repo,dependencies=TRUE);
install.packages("foreach",lib=libdir,repos=repo,dependencies=TRUE);
install.packages("gridExtra",lib=libdir,repos=repo,dependencies=TRUE);
diff --git a/lib/crossvalidation.rb b/lib/crossvalidation.rb
index 5a05955..bcb3ccf 100644
--- a/lib/crossvalidation.rb
+++ b/lib/crossvalidation.rb
@@ -96,39 +96,6 @@ module OpenTox
crossvalidation_ids.collect{|id| CrossValidation.find(id)}
end
- def correlation_plot format: "png"
- #unless correlation_plot_id
- feature = Feature.find(crossvalidations.first.model.prediction_feature)
- title = feature.name
- title += "[#{feature.unit}]" if feature.unit and !feature.unit.blank?
- tmpfile = "/tmp/#{id.to_s}_correlation.#{format}"
- images = []
- crossvalidations.each_with_index do |cv,i|
- x = []
- y = []
- cv.predictions.each do |sid,p|
- x << p["measurements"].median
- y << p["value"]
- end
- R.assign "measurement", x
- R.assign "prediction", y
- R.eval "all = c(measurement,prediction)"
- R.eval "range = c(min(all), max(all))"
- R.eval "image#{i} = qplot(prediction,measurement,main='#{title} #{i}',xlab='Prediction',ylab='Measurement',asp=1,xlim=range, ylim=range)"
- R.eval "image#{i} = image#{i} + geom_abline(intercept=0, slope=1)"
- images << "image#{i}"
-
- R.eval "ggsave(file='/home/ist/lazar/test/tmp#{i}.pdf', plot=image#{i})"
- end
- R.eval "pdf('#{tmpfile}')"
- R.eval "grid.arrange(#{images.join ","},ncol=#{images.size})"
- R.eval "dev.off()"
- file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{id.to_s}_correlation_plot.#{format}")
- correlation_plot_id = $gridfs.insert_one(file)
- update(:correlation_plot_id => correlation_plot_id)
- #end
- $gridfs.find_one(_id: correlation_plot_id).data
- end
end
end
diff --git a/lib/import.rb b/lib/import.rb
index aa2ee75..7a68335 100644
--- a/lib/import.rb
+++ b/lib/import.rb
@@ -6,12 +6,12 @@ module OpenTox
include OpenTox
# time critical step: JSON parsing (>99%), Oj brings only minor speed gains (~1%)
- def self.import dir="."
+ def self.import
datasets = {}
bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle?media=application%2Fjson'))["dataset"]
bundles.each do |bundle|
- datasets[bundle["URI"]] = Dataset.find_or_create_by(:source => bundle["URI"],:name => bundle["title"])
- $logger.debug bundle["title"]
+ datasets[bundle["URI"]] = Dataset.find_or_create_by(:source => bundle["URI"],:name => bundle["title"].strip)
+ $logger.debug bundle["title"].strip
nanoparticles = JSON.parse(RestClientWrapper.get(bundle["dataset"]+"?media=application%2Fjson"))["dataEntry"]
nanoparticles.each_with_index do |np,n|
core_id = nil
@@ -29,7 +29,7 @@ module OpenTox
compound.name = names.first
compound.names = names.compact
else
- compound = Compound.find_or_create_by(:name => names.first,:names => names)
+ compound = Compound.find_or_create_by(:name => names.first,:names => names.compact)
end
compound.save
if c["relation"] == "HAS_CORE"
@@ -94,6 +94,7 @@ module OpenTox
nanoparticle.save
print "#{n}, "
end
+ puts
end
datasets.each { |u,d| d.save }
end
@@ -119,4 +120,3 @@ module OpenTox
end
end
-
diff --git a/lib/model.rb b/lib/model.rb
index 38c1915..9c4a93f 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -364,8 +364,7 @@ module OpenTox
end
def repeated_crossvalidation
- # full class name required
- OpenTox::Validation::RepeatedCrossValidation.find repeated_crossvalidation_id
+ OpenTox::Validation::RepeatedCrossValidation.find repeated_crossvalidation_id # full class name required
end
def crossvalidations
@@ -387,8 +386,7 @@ module OpenTox
training_dataset = Dataset.from_csv_file file
model = Lazar.create training_dataset: training_dataset
model_validation[:model_id] = model.id
- # full class name required
- model_validation[:repeated_crossvalidation_id] = OpenTox::Validation::RepeatedCrossValidation.create(model).id
+ model_validation[:repeated_crossvalidation_id] = OpenTox::Validation::RepeatedCrossValidation.create(model).id # full class name required
model_validation.save
model_validation
end
@@ -397,7 +395,7 @@ module OpenTox
# find/import training_dataset
training_dataset ||= Dataset.where(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
- unless training_dataset # try to import from json dump
+ unless training_dataset # try to import
Import::Enanomapper.import
training_dataset = Dataset.where(name: "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
bad_request_error "Cannot import 'Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles' dataset" unless training_dataset
@@ -412,7 +410,7 @@ module OpenTox
)
model = LazarRegression.create prediction_feature: prediction_feature, training_dataset: training_dataset, algorithms: algorithms
model_validation[:model_id] = model.id
- repeated_cv = Validation::RepeatedCrossValidation.create model
+ repeated_cv = OpenTox::Validation::RepeatedCrossValidation.create model, 10, 5
model_validation[:repeated_crossvalidation_id] = repeated_cv.id
model_validation.save
model_validation
diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
index 02d9a89..06db4d2 100644
--- a/lib/nanoparticle.rb
+++ b/lib/nanoparticle.rb
@@ -40,6 +40,12 @@ module OpenTox
properties[feature.id.to_s] << value
properties[feature.id.to_s].uniq!
when "TOX"
+ if feature.name.match("Cell Viability Assay") and !feature.name.match("SLOPE") # -log10 transformation
+ value = -Math.log10(value)
+ feature.unit = "-log10(#{feature.unit})" unless feature.unit.match "log10"
+ feature.warnings += ["-log10 transformed values"] unless feature.warnings.include? "-log10 transformed values"
+ feature.save
+ end
dataset.add self, feature, value
else
warn "Unknown feature type '#{feature.category}'. Value '#{value}' not inserted."
diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb
index b6f8a60..2202b79 100644
--- a/lib/validation-statistics.rb
+++ b/lib/validation-statistics.rb
@@ -6,8 +6,6 @@ module OpenTox
self.accept_values = model.prediction_feature.accept_values
self.confusion_matrix = Array.new(accept_values.size){Array.new(accept_values.size,0)}
self.weighted_confusion_matrix = Array.new(accept_values.size){Array.new(accept_values.size,0)}
- true_rate = {}
- predictivity = {}
nr_instances = 0
predictions.each do |cid,pred|
# TODO
@@ -38,11 +36,11 @@ module OpenTox
end
end
end
- true_rate = {}
- predictivity = {}
+ self.true_rate = {}
+ self.predictivity = {}
accept_values.each_with_index do |v,i|
- true_rate[v] = confusion_matrix[i][i]/confusion_matrix[i].reduce(:+).to_f
- predictivity[v] = confusion_matrix[i][i]/confusion_matrix.collect{|n| n[i]}.reduce(:+).to_f
+ self.true_rate[v] = confusion_matrix[i][i]/confusion_matrix[i].reduce(:+).to_f
+ self.predictivity[v] = confusion_matrix[i][i]/confusion_matrix.collect{|n| n[i]}.reduce(:+).to_f
end
confidence_sum = 0
weighted_confusion_matrix.each do |r|
@@ -60,8 +58,8 @@ module OpenTox
:weighted_confusion_matrix => weighted_confusion_matrix,
:accuracy => accuracy,
:weighted_accuracy => weighted_accuracy,
- :true_rate => true_rate,
- :predictivity => predictivity,
+ :true_rate => self.true_rate,
+ :predictivity => self.predictivity,
}
end