From 96a476a2331daa4d1d6b5ac444bbdbd2ac221a5f Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 10 Sep 2015 12:54:18 +0200 Subject: tests fixed (crossvalidations may fail due to memory constraints) --- lib/compound.rb | 12 +++--------- lib/dataset.rb | 32 +------------------------------- lib/experiment.rb | 4 ++-- lib/model.rb | 7 +++++-- lib/overwrite.rb | 4 ++++ 5 files changed, 15 insertions(+), 44 deletions(-) (limited to 'lib') diff --git a/lib/compound.rb b/lib/compound.rb index 8f393f5..6adf3c0 100644 --- a/lib/compound.rb +++ b/lib/compound.rb @@ -218,11 +218,6 @@ module OpenTox obconversion.write_string(obmol).gsub(/\s/,'').chomp when /sdf/ p "SDF conversion" - # has no effect - #obconversion.add_option("gen3D", OpenBabel::OBConversion::GENOPTIONS) - # segfaults with openbabel git master - #OpenBabel::OBOp.find_type("Gen3D").do(obmol) - # TODO: find disconnected structures # strip_salts # separate @@ -234,14 +229,13 @@ p "SDF conversion" print sdf if sdf.match(/.nan/) -# TODO: fix or eliminate 2d generation $logger.warn "3D generation failed for compound #{identifier}, trying to calculate 2D structure" obconversion.set_options("gen2D", OpenBabel::OBConversion::GENOPTIONS) - #OpenBabel::OBOp.find_type("Gen2D").do(obmol) sdf = obconversion.write_string(obmol) if sdf.match(/.nan/) - $logger.warn "2D generation failed for compound #{identifier}" - sdf = nil + $logger.warn "2D generation failed for compound #{identifier}, rendering without coordinates." + obconversion.remove_option("gen2D", OpenBabel::OBConversion::GENOPTIONS) + sdf = obconversion.write_string(obmol) end end sdf diff --git a/lib/dataset.rb b/lib/dataset.rb index 28d2120..851fabd 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -151,7 +151,7 @@ module OpenTox name = File.basename(file,".*") dataset = self.find_by(:source => source, :name => name) if dataset - $logger.debug "#{file} already in database." + $logger.debug "Skipping #{file}, it is already in the database (id: #{dataset.id})." else $logger.debug "Parsing #{file}." table = CSV.read file, :skip_blanks => true @@ -270,36 +270,6 @@ module OpenTox end -=begin - # TODO remove - - # Create a dataset with compounds and features - def self.create compounds, features, warnings=[], source=nil - dataset = Dataset.new(:warnings => warnings) - dataset.compounds = compounds - dataset.features = features - dataset - end - # merge dataset (i.e. append features) - def +(dataset) - bad_request_error "Dataset merge failed because the argument is not a OpenTox::Dataset but a #{dataset.class}" unless dataset.is_a? Dataset - bad_request_error "Dataset merge failed because compounds are unequal in datasets #{self.id} and #{dataset.id}" unless compound_ids == dataset.compound_ids - self.feature_ids ||= [] - self.feature_ids = self.feature_ids + dataset.feature_ids - @data_entries ||= Array.new(compound_ids.size){[]} - @data_entries.each_with_index do |row,i| - @data_entries[i] = row + dataset.fingerprint(compounds[i]) - end - self - - end - - def fingerprint(compound) - i = compound_ids.index(compound.id) - i.nil? ? nil : data_entries[i] - end -=end - # Fill unset data entries # @param any value def fill_nil_with n diff --git a/lib/experiment.rb b/lib/experiment.rb index 191e76e..2f51756 100644 --- a/lib/experiment.rb +++ b/lib/experiment.rb @@ -34,7 +34,7 @@ module OpenTox if cv $logger.debug "Creating #{cv} for #{model_algorithm}, dataset #{dataset.name}, with prediction_algorithm #{prediction_algorithm}, neighbor_algorithm #{neighbor_algorithm}, neighbor_algorithm_parameters #{neighbor_algorithm_parameter}." crossvalidation = cv.create model - crossvalidation_ids << crossvalidation.id + self.crossvalidation_ids << crossvalidation.id else $logger.warn "#{dataset.features.first} is neither nominal nor numeric." end @@ -55,7 +55,7 @@ module OpenTox def report # TODO create ggplot2 report - crossvalidation_ids.each do |id| + self.crossvalidation_ids.each do |id| cv = CrossValidation.find(id) file = "/tmp/#{id}.svg" File.open(file,"w+"){|f| f.puts cv.correlation_plot} diff --git a/lib/model.rb b/lib/model.rb index 36011a0..547144f 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -138,16 +138,19 @@ module OpenTox end class LazarFminerClassification < LazarClassification - def self.create training_dataset + field :feature_calculation_parameters, type: Hash + + def self.create training_dataset, fminer_params={} model = super(training_dataset) model.update "_type" => self.to_s # adjust class model = self.find model.id # adjust class model.neighbor_algorithm = "OpenTox::Algorithm::Neighbor.fminer_similarity" model.neighbor_algorithm_parameters = { :feature_calculation_algorithm => "OpenTox::Algorithm::Descriptor.smarts_match", - :feature_dataset_id => Algorithm::Fminer.bbrc(training_dataset).id, + :feature_dataset_id => Algorithm::Fminer.bbrc(training_dataset,fminer_params).id, :min_sim => 0.3 } + model.feature_calculation_parameters = fminer_params model.save model end diff --git a/lib/overwrite.rb b/lib/overwrite.rb index cb47527..08baa39 100644 --- a/lib/overwrite.rb +++ b/lib/overwrite.rb @@ -122,4 +122,8 @@ module URI false end + def self.task? uri + uri =~ /task/ and URI.valid? uri + end + end -- cgit v1.2.3