From bd0d062a05b97a6d875b6f94bdb77da4de9b512a Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 8 Dec 2016 13:04:38 +0100 Subject: OpenTox::Validation::RepeatedCrossValidation fixed, stringi R library added --- ext/lazar/rinstall.R | 1 + lib/model.rb | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/ext/lazar/rinstall.R b/ext/lazar/rinstall.R index 7023f60..98e612d 100644 --- a/ext/lazar/rinstall.R +++ b/ext/lazar/rinstall.R @@ -1,6 +1,7 @@ libdir = commandArgs(trailingOnly=TRUE)[1] repo = "https://stat.ethz.ch/CRAN/" #install.packages("Rserve",lib=libdir,repos=repo,dependencies=TRUE) +install.packages("stringi",lib=libdir,repos=repo,dependencies=TRUE); install.packages("iterators",lib=libdir,repos=repo,dependencies=TRUE); install.packages("foreach",lib=libdir,repos=repo,dependencies=TRUE); install.packages("gridExtra",lib=libdir,repos=repo,dependencies=TRUE); diff --git a/lib/model.rb b/lib/model.rb index e8b30ca..76ca0c9 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -364,7 +364,7 @@ module OpenTox end def repeated_crossvalidation - Validation::RepeatedCrossValidation.find repeated_crossvalidation_id + OpenTox::Validation::RepeatedCrossValidation.find repeated_crossvalidation_id end def crossvalidations @@ -386,7 +386,7 @@ module OpenTox training_dataset = Dataset.from_csv_file file model = Lazar.create training_dataset: training_dataset model_validation[:model_id] = model.id - model_validation[:repeated_crossvalidation_id] = Validation::RepeatedCrossValidation.create(model).id + model_validation[:repeated_crossvalidation_id] = OpenTox::Validation::RepeatedCrossValidation.create(model).id model_validation.save model_validation end @@ -410,7 +410,7 @@ module OpenTox ) model = LazarRegression.create prediction_feature: prediction_feature, training_dataset: training_dataset, algorithms: algorithms model_validation[:model_id] = model.id - repeated_cv = Validation::RepeatedCrossValidation.create model + repeated_cv = OpenTox::Validation::RepeatedCrossValidation.create model model_validation[:repeated_crossvalidation_id] = repeated_cv.id model_validation.save model_validation -- cgit v1.2.3 From 8e4cd2f6654de5b88cdad91eb0c5050be13222f1 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Mon, 12 Dec 2016 12:46:24 +0100 Subject: modena import workarounds --- lib/import.rb | 28 +++++++++++++++++++++++----- lib/nanoparticle.rb | 6 ++++++ 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/lib/import.rb b/lib/import.rb index aa2ee75..38970e5 100644 --- a/lib/import.rb +++ b/lib/import.rb @@ -6,12 +6,12 @@ module OpenTox include OpenTox # time critical step: JSON parsing (>99%), Oj brings only minor speed gains (~1%) - def self.import dir="." + def self.import datasets = {} bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle?media=application%2Fjson'))["dataset"] bundles.each do |bundle| - datasets[bundle["URI"]] = Dataset.find_or_create_by(:source => bundle["URI"],:name => bundle["title"]) - $logger.debug bundle["title"] + datasets[bundle["URI"]] = Dataset.find_or_create_by(:source => bundle["URI"],:name => bundle["title"].strip) + $logger.debug bundle["title"].strip nanoparticles = JSON.parse(RestClientWrapper.get(bundle["dataset"]+"?media=application%2Fjson"))["dataEntry"] nanoparticles.each_with_index do |np,n| core_id = nil @@ -29,7 +29,7 @@ module OpenTox compound.name = names.first compound.names = names.compact else - compound = Compound.find_or_create_by(:name => names.first,:names => names) + compound = Compound.find_or_create_by(:name => names.first,:names => names.compact) end compound.save if c["relation"] == "HAS_CORE" @@ -78,6 +78,24 @@ module OpenTox category = "P-CHEM" warnings = ["Category changed from TOX to P-CHEM"] end + if bundle["title"].match /MODENA/ # fix MODENA endpoint names + case study["protocol"]["category"]["term"] + when /BAO_0003009/ + warnings = ["Original name was '#{name}'"] + name = "Cell Viability Assay " + name + unless name.match(/SLOPE/) + end + when /BAO_0010001/ + warnings = ["Original name was '#{name}'"] + name = "ATP Assay " + name + when /NPO_1709/ + warnings = ["Original name was '#{name}'"] + name = "LDH Release Assay " + name + when /NPO_1911/ + warnings = ["Original name was '#{name}'"] + name = "MTT Assay " + name + end + end feature = klass.find_or_create_by( :name => name, :unit => unit, @@ -94,6 +112,7 @@ module OpenTox nanoparticle.save print "#{n}, " end + puts end datasets.each { |u,d| d.save } end @@ -119,4 +138,3 @@ module OpenTox end end - diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb index 02d9a89..06db4d2 100644 --- a/lib/nanoparticle.rb +++ b/lib/nanoparticle.rb @@ -40,6 +40,12 @@ module OpenTox properties[feature.id.to_s] << value properties[feature.id.to_s].uniq! when "TOX" + if feature.name.match("Cell Viability Assay") and !feature.name.match("SLOPE") # -log10 transformation + value = -Math.log10(value) + feature.unit = "-log10(#{feature.unit})" unless feature.unit.match "log10" + feature.warnings += ["-log10 transformed values"] unless feature.warnings.include? "-log10 transformed values" + feature.save + end dataset.add self, feature, value else warn "Unknown feature type '#{feature.category}'. Value '#{value}' not inserted." -- cgit v1.2.3 From 0813b486bc6b107a6970bae7cfb4ca262d7c5697 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Mon, 12 Dec 2016 13:02:29 +0100 Subject: 5 repeated cvs for enm --- lib/crossvalidation.rb | 33 --------------------------------- lib/model.rb | 4 ++-- 2 files changed, 2 insertions(+), 35 deletions(-) diff --git a/lib/crossvalidation.rb b/lib/crossvalidation.rb index 5a05955..bcb3ccf 100644 --- a/lib/crossvalidation.rb +++ b/lib/crossvalidation.rb @@ -96,39 +96,6 @@ module OpenTox crossvalidation_ids.collect{|id| CrossValidation.find(id)} end - def correlation_plot format: "png" - #unless correlation_plot_id - feature = Feature.find(crossvalidations.first.model.prediction_feature) - title = feature.name - title += "[#{feature.unit}]" if feature.unit and !feature.unit.blank? - tmpfile = "/tmp/#{id.to_s}_correlation.#{format}" - images = [] - crossvalidations.each_with_index do |cv,i| - x = [] - y = [] - cv.predictions.each do |sid,p| - x << p["measurements"].median - y << p["value"] - end - R.assign "measurement", x - R.assign "prediction", y - R.eval "all = c(measurement,prediction)" - R.eval "range = c(min(all), max(all))" - R.eval "image#{i} = qplot(prediction,measurement,main='#{title} #{i}',xlab='Prediction',ylab='Measurement',asp=1,xlim=range, ylim=range)" - R.eval "image#{i} = image#{i} + geom_abline(intercept=0, slope=1)" - images << "image#{i}" - - R.eval "ggsave(file='/home/ist/lazar/test/tmp#{i}.pdf', plot=image#{i})" - end - R.eval "pdf('#{tmpfile}')" - R.eval "grid.arrange(#{images.join ","},ncol=#{images.size})" - R.eval "dev.off()" - file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{id.to_s}_correlation_plot.#{format}") - correlation_plot_id = $gridfs.insert_one(file) - update(:correlation_plot_id => correlation_plot_id) - #end - $gridfs.find_one(_id: correlation_plot_id).data - end end end diff --git a/lib/model.rb b/lib/model.rb index 5a1c191..9c4a93f 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -395,7 +395,7 @@ module OpenTox # find/import training_dataset training_dataset ||= Dataset.where(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first - unless training_dataset # try to import from json dump + unless training_dataset # try to import Import::Enanomapper.import training_dataset = Dataset.where(name: "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first bad_request_error "Cannot import 'Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles' dataset" unless training_dataset @@ -410,7 +410,7 @@ module OpenTox ) model = LazarRegression.create prediction_feature: prediction_feature, training_dataset: training_dataset, algorithms: algorithms model_validation[:model_id] = model.id - repeated_cv = OpenTox::Validation::RepeatedCrossValidation.create model + repeated_cv = OpenTox::Validation::RepeatedCrossValidation.create model, 10, 5 model_validation[:repeated_crossvalidation_id] = repeated_cv.id model_validation.save model_validation -- cgit v1.2.3 From 68a87da10774a1af117d075b26302b0f8b85bf19 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Sun, 18 Dec 2016 12:43:41 +0100 Subject: modena import modifications removed --- lib/import.rb | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/lib/import.rb b/lib/import.rb index 38970e5..7a68335 100644 --- a/lib/import.rb +++ b/lib/import.rb @@ -78,24 +78,6 @@ module OpenTox category = "P-CHEM" warnings = ["Category changed from TOX to P-CHEM"] end - if bundle["title"].match /MODENA/ # fix MODENA endpoint names - case study["protocol"]["category"]["term"] - when /BAO_0003009/ - warnings = ["Original name was '#{name}'"] - name = "Cell Viability Assay " + name - unless name.match(/SLOPE/) - end - when /BAO_0010001/ - warnings = ["Original name was '#{name}'"] - name = "ATP Assay " + name - when /NPO_1709/ - warnings = ["Original name was '#{name}'"] - name = "LDH Release Assay " + name - when /NPO_1911/ - warnings = ["Original name was '#{name}'"] - name = "MTT Assay " + name - end - end feature = klass.find_or_create_by( :name => name, :unit => unit, -- cgit v1.2.3 From a5abdd27b8c9b3f1cf65a567bfd9eb17ecc91a72 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Tue, 20 Dec 2016 16:28:36 +0100 Subject: true_rate, predictivity stored in crossvalidation --- lib/validation-statistics.rb | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb index b6f8a60..2202b79 100644 --- a/lib/validation-statistics.rb +++ b/lib/validation-statistics.rb @@ -6,8 +6,6 @@ module OpenTox self.accept_values = model.prediction_feature.accept_values self.confusion_matrix = Array.new(accept_values.size){Array.new(accept_values.size,0)} self.weighted_confusion_matrix = Array.new(accept_values.size){Array.new(accept_values.size,0)} - true_rate = {} - predictivity = {} nr_instances = 0 predictions.each do |cid,pred| # TODO @@ -38,11 +36,11 @@ module OpenTox end end end - true_rate = {} - predictivity = {} + self.true_rate = {} + self.predictivity = {} accept_values.each_with_index do |v,i| - true_rate[v] = confusion_matrix[i][i]/confusion_matrix[i].reduce(:+).to_f - predictivity[v] = confusion_matrix[i][i]/confusion_matrix.collect{|n| n[i]}.reduce(:+).to_f + self.true_rate[v] = confusion_matrix[i][i]/confusion_matrix[i].reduce(:+).to_f + self.predictivity[v] = confusion_matrix[i][i]/confusion_matrix.collect{|n| n[i]}.reduce(:+).to_f end confidence_sum = 0 weighted_confusion_matrix.each do |r| @@ -60,8 +58,8 @@ module OpenTox :weighted_confusion_matrix => weighted_confusion_matrix, :accuracy => accuracy, :weighted_accuracy => weighted_accuracy, - :true_rate => true_rate, - :predictivity => predictivity, + :true_rate => self.true_rate, + :predictivity => self.predictivity, } end -- cgit v1.2.3