diff options
-rw-r--r-- | lib/algorithm.rb | 2 | ||||
-rw-r--r-- | lib/compound.rb | 8 | ||||
-rw-r--r-- | lib/model.rb | 3 | ||||
-rw-r--r-- | lib/parser.rb | 32 | ||||
-rw-r--r-- | lib/utils.rb | 504 |
5 files changed, 347 insertions, 202 deletions
diff --git a/lib/algorithm.rb b/lib/algorithm.rb index ebd2019..d38b79a 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -546,7 +546,7 @@ module OpenTox # @param [Hash] required keys: compound, features, feature_dataset_uri, pc_type # @return [Hash] Hash with matching Smarts and number of hits def self.lookup(params) - params[:compound].lookup(params[:features], params[:feature_dataset_uri],params[:pc_type]) + params[:compound].lookup(params[:features], params[:feature_dataset_uri],params[:pc_type], params[:lib]) end end diff --git a/lib/compound.rb b/lib/compound.rb index 6d3cb68..770f1b5 100644 --- a/lib/compound.rb +++ b/lib/compound.rb @@ -240,8 +240,9 @@ module OpenTox # @param [Array] Array of feature names # @param [String] Feature dataset uri # @param [String] Comma separated pc types + # @param [String] Comma separated lib # @return [Hash] Hash with feature name as key and value as value - def lookup(feature_array,feature_dataset_uri,pc_type,subjectid=nil) + def lookup(feature_array,feature_dataset_uri,pc_type,lib,subjectid=nil) ds = OpenTox::Dataset.find(feature_dataset_uri,subjectid) #entry = ds.data_entries[self.uri] entry = nil @@ -252,10 +253,9 @@ module OpenTox end } LOGGER.debug "#{entry.size} entries in feature ds for query." unless entry.nil? - if entry.nil? - temp_ds = OpenTox::Dataset.create; temp_ds.add_compound(self.uri) - uri = RestClientWrapper.post(temp_ds.save + "/pcdesc", {:pc_type => pc_type}) + temp_ds = OpenTox::Dataset.create; temp_ds.add_compound(self.uri); temp_uri = temp_ds.save + uri = RestClientWrapper.post(File.join(CONFIG[:services]["opentox-algorithm"], "/pc/AllDescriptors"), {:dataset_uri => temp_uri, :pc_type => pc_type, :lib => lib}) ds = OpenTox::Dataset.find(uri) entry = ds.data_entries[self.uri] ds.delete diff --git a/lib/model.rb b/lib/model.rb index b3de1a3..057c537 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -261,7 +261,8 @@ module OpenTox :compound => @compound, :features => @features, :feature_dataset_uri => @metadata[OT.featureDataset], - :pc_type => self.parameter(\"pc_type\") + :pc_type => self.parameter(\"pc_type\"), + :lib => self.parameter(\"lib\") })") # Adding fingerprint of query compound with features and values(p_value*nr_hits) diff --git a/lib/parser.rb b/lib/parser.rb index e871323..7641f14 100644 --- a/lib/parser.rb +++ b/lib/parser.rb @@ -395,19 +395,26 @@ module OpenTox def warnings - info = '' + info = '<br>' @feature_types.each do |feature,types| + @dataset.add_feature_metadata(feature,{RDF.type => []}) if types.uniq.size == 0 - type = "helper#MissingFeature" # TODO: Fit to OT ontology! - elsif types.uniq.size > 1 - type = OT.NumericFeature + @dataset.add_feature_metadata( + feature, {RDF.type => ( @dataset.features[feature][RDF.type] << "helper#MissingFeature" ) } # TODO: Fit to OT ontology! + ) + info += "'#{@dataset.feature_name(feature)}' detected as 'MissingFeature'<br>" else - type = types.first + info += "'#{@dataset.feature_name(feature)}' detected as " + types.uniq.each { |t| + @dataset.add_feature_metadata( + feature, {RDF.type => @dataset.features[feature][RDF.type] << t} + ) + info += "'#{t.split('#').last}', " + } + info.chop!.chop! + info += "<br>" end - @dataset.add_feature_metadata(feature,{RDF.type => [type]}) - info += "'#{@dataset.feature_name(feature)}' detected as '#{type.split('#').last}'<br>" if type end - @dataset.metadata[OT.Info] = info warnings = '' @@ -469,24 +476,27 @@ module OpenTox unless @duplicate_feature_indices.include? i value = row[i] - #LOGGER.warn "Missing values for #{id}" if value.size == 0 # String is empty feature = @features[feature_idx] type = feature_type(value) # May be NIL - type = OT.NominalFeature unless (type.nil? || regression_features[i]) @feature_types[feature] << type if type + # Add nominal type if #distinct values le @max_class_values + if type == OT.NumericFeature + @feature_types[feature] << OT.NominalFeature unless regression_features[i] + end val = nil case type when OT.NumericFeature val = value.to_f + val = nil if val.infinite? when OT.NominalFeature val = value.to_s end feature_idx += 1 - if val != nil + if val != nil @dataset.add(compound.uri, feature, val) if type != OT.NumericFeature @dataset.features[feature][OT.acceptValue] = [] unless @dataset.features[feature][OT.acceptValue] diff --git a/lib/utils.rb b/lib/utils.rb index 40988db..6689e94 100644 --- a/lib/utils.rb +++ b/lib/utils.rb @@ -6,142 +6,273 @@ module OpenTox module Algorithm + @ambit_descriptor_algorithm_uri = "http://apps.ideaconsult.net:8080/ambit2/algorithm/org.openscience.cdk.qsar.descriptors.molecular." + @ambit_ds_service_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/" + @ambit_mopac_model_uri = "http://apps.ideaconsult.net:8080/ambit2/model/69632" + @keysfile = File.join(ENV['HOME'], ".opentox", "config", "pc_descriptors.yaml") + include OpenTox # Calculate physico-chemical descriptors. - # @param[Hash] Required keys: :dataset_uri, :pc_type, :rjb + # @param[Hash] required: :dataset_uri, :pc_type, :rjb, :task, :add_uri, optional: :descriptor, :lib # @return[String] dataset uri def self.pc_descriptors(params) - begin - ds = OpenTox::Dataset.find(params[:dataset_uri]) - compounds = ds.compounds.collect + ds = OpenTox::Dataset.find(params[:dataset_uri]) + compounds = ds.compounds.collect + task_weights = {"joelib"=> 20, "openbabel"=> 1, "cdk"=> 50 } + task_weights.keys.each { |step| task_weights.delete(step) if (params[:lib] && (!params[:lib].split(",").include?(step)))} + task_weights["load"] = 10 + task_sum = Float task_weights.values.sum + task_weights.keys.each { |step| task_weights[step] /= task_sum } + task_weights.keys.each { |step| task_weights[step] = (task_weights[step]*100).floor } + + jl_master=nil + cdk_master=nil + ob_master=nil - jl_master=nil - ambit_master=nil - # joelib via rjb - types = params[:pc_type].split(",") + # # # openbabel (via ruby bindings) + if !params[:lib] || params[:lib].split(",").include?("openbabel") + ob_master, ob_ids = get_ob_descriptors( { :compounds => compounds, :pc_type => params[:pc_type], :descriptor => params[:descriptor] } ) + params[:task].progress(params[:task].metadata[OT.percentageCompleted] + task_weights["openbabel"]) if params[:task] + end - step= (1.0/types.size * 100).floor - if types.size && types.include?("joelib") - jl_master = get_jl_descriptors( { :compounds => compounds, :rjb => params[:rjb] } ) - types.delete("joelib") - end - params[:task].progress(step) if params[:task] + # # # joelib (via rjb) + if !params[:lib] || params[:lib].split(",").include?("joelib") + jl_master, jl_ids = get_jl_descriptors( { :compounds => compounds, :rjb => params[:rjb], :pc_type => params[:pc_type], :descriptor => params[:descriptor] } ) + params[:task].progress(params[:task].metadata[OT.percentageCompleted] + task_weights["joelib"]) if params[:task] + end - # ambit via REST - if types.size > 0 - ambit_result_uri, smiles_to_inchi = get_ambit_descriptors( { :compounds => compounds, :pc_type => types.join(','), :task => params[:task], :step => step } ) - LOGGER.debug "Ambit result uri for #{params.inspect}: '#{ambit_result_uri.to_yaml}'" - ambit_master = load_ds_csv(ambit_result_uri, smiles_to_inchi) - end + # # # cdk (via REST) + if !params[:lib] || params[:lib].split(",").include?("cdk") + ambit_result_uri, smiles_to_inchi, cdk_ids = get_cdk_descriptors( { :compounds => compounds, :pc_type => params[:pc_type], :task => params[:task], :step => task_weights["cdk"], :descriptor => params[:descriptor] } ) + #LOGGER.debug "Ambit result uri for #{params.inspect}: '#{ambit_result_uri.to_yaml}'" + cdk_master, cdk_ids, ambit_ids = load_ds_csv(ambit_result_uri, smiles_to_inchi, cdk_ids ) + params[:task].progress(params[:task].metadata[OT.percentageCompleted] + task_weights["load"]) if params[:task] + end - # Fuse CSVs - if jl_master && ambit_master - nr_cols = (jl_master[0].size)-1 - LOGGER.debug "Merging #{nr_cols} new columns" - ambit_master.each {|row| nr_cols.times { row.push(nil) } } # Adds empty columns to all rows - jl_master.each do |row| - temp = ambit_master.assoc(row[0]) # Finds the appropriate line in master - ((-1*nr_cols)..-1).collect.each { |idx| - temp[idx] = row[nr_cols+idx+1] if temp # Updates columns if line is found - } - end - master = ambit_master - else - master = jl_master if jl_master - master = ambit_master if ambit_master + # # # fuse CSVs ("master" structures) + if jl_master && cdk_master + nr_cols = (jl_master[0].size)-1 + LOGGER.debug "Merging #{nr_cols} new columns" + cdk_master.each {|row| nr_cols.times { row.push(nil) } } + jl_master.each do |row| + temp = cdk_master.assoc(row[0]) # Finds the appropriate line in master + ((-1*nr_cols)..-1).collect.each { |idx| + temp[idx] = row[nr_cols+idx+1] if temp # Updates columns if line is found + } end + master = cdk_master + else # either jl_master or cdk_master nil + master = jl_master || cdk_master + end + + if ob_master && master + nr_cols = (ob_master[0].size)-1 + LOGGER.debug "Merging #{nr_cols} new columns" + master.each {|row| nr_cols.times { row.push(nil) } } # Adds empty columns to all rows + ob_master.each do |row| + temp = master.assoc(row[0]) # Finds the appropriate line in master + ((-1*nr_cols)..-1).collect.each { |idx| + temp[idx] = row[nr_cols+idx+1] if temp # Updates columns if line is found + } + end + else # either ob_master or master nil + master = ob_master || master + end + + if master + + LOGGER.debug master.collect { |row| row.join(",") }.join("\n") + + ds = OpenTox::Dataset.find ( + OpenTox::RestClientWrapper.post( + File.join(CONFIG[:services]["opentox-dataset"]), master.collect { |row| row.join(",") }.join("\n"), {:content_type => "text/csv"} + ) + ) + + # # # add feature metadata + pc_descriptors = YAML::load_file(@keysfile) + ambit_ids && ambit_ids.each_with_index { |id,idx| + raise "Feature not found" if ! ds.features[File.join(ds.uri, "feature", id.to_s)] + ds.add_feature_metadata(File.join(ds.uri, "feature", id.to_s),{DC.description => "#{pc_descriptors[cdk_ids[idx]][:name]} [#{pc_descriptors[cdk_ids[idx]][:pc_type]}, #{pc_descriptors[cdk_ids[idx]][:lib]}]"}) + ds.add_feature_metadata(File.join(ds.uri, "feature", id.to_s),{DC.creator => @ambit_descriptor_algorithm_uri + cdk_ids[idx]}) + ds.add_feature_metadata(File.join(ds.uri, "feature", id.to_s),{OT.hasSource => params[:dataset_uri]}) + } + ob_ids && ob_ids.each { |id| + raise "Feature not found" if ! ds.features[File.join(ds.uri, "feature", id.to_s)] + ds.add_feature_metadata(File.join(ds.uri, "feature", id.to_s),{DC.description => "#{pc_descriptors[id][:name]} [#{pc_descriptors[id][:pc_type]}, #{pc_descriptors[id][:lib]}]"}) + creator_uri = ds.uri.gsub(/\/dataset\/.*/, "/algorithm/pc") + creator_uri += "/#{id}" if params[:add_uri] + ds.add_feature_metadata(File.join(ds.uri, "feature", id.to_s),{DC.creator => creator_uri}) + ds.add_feature_metadata(File.join(ds.uri, "feature", id.to_s),{OT.hasSource => params[:dataset_uri]}) + } + jl_ids && jl_ids.each { |id| + raise "Feature not found" if ! ds.features[File.join(ds.uri, "feature", id.to_s)] + ds.add_feature_metadata(File.join(ds.uri, "feature", id.to_s),{DC.description => "#{pc_descriptors[id][:name]} [#{pc_descriptors[id][:pc_type]}, #{pc_descriptors[id][:lib]}]"}) + creator_uri = ds.uri.gsub(/\/dataset\/.*/, "/algorithm/pc") + creator_uri += "/#{id}" if params[:add_uri] + ds.add_feature_metadata(File.join(ds.uri, "feature", id.to_s),{DC.creator => creator_uri}) + ds.add_feature_metadata(File.join(ds.uri, "feature", id.to_s),{OT.hasSource => params[:dataset_uri]}) + } - parser = OpenTox::Parser::Spreadsheets.new - ds = OpenTox::Dataset.new - ds.save - parser.dataset = ds - ds = parser.load_csv(master.collect{|r| r.join(",")}.join("\n"),false,true) ds.save + else + raise "No descriptors matching your criteria found." + end + + end + + + # Calculate OpenBabel physico-chemical descriptors. + # @param[Hash] required: :compounds, :pc_type, :task, optional: :descriptor + # @return[Array] CSV, array of field ids, array of field descriptions + def self.get_ob_descriptors(params) + + master = nil + + begin + csvfile = Tempfile.open(['ob_descriptors-','.csv']) + + pc_descriptors = YAML::load_file(@keysfile) + ids = pc_descriptors.collect{ |id, info| + id if info[:lib] == "openbabel" && params[:pc_type].split(",").include?(info[:pc_type]) && (!params[:descriptor] || id == params[:descriptor]) + }.compact + + if ids.length > 0 + csvfile.puts((["SMILES"] + ids).join(",")) + + # remember inchis + inchis = params[:compounds].collect { |c_uri| + URI.encode_www_form_component(OpenTox::Compound.new(c_uri).to_inchi) + } + + # Process compounds + obmol = OpenBabel::OBMol.new + obconversion = OpenBabel::OBConversion.new + obconversion.set_in_and_out_formats 'inchi', 'can' + + inchis.each_with_index { |inchi, c_idx| + row = [inchis[c_idx]] + obconversion.read_string(obmol, URI.decode_www_form_component(inchi)) + ids.each { |name| + if obmol.respond_to?(name.underscore) + val = eval("obmol.#{name.underscore}") if obmol.respond_to?(name.underscore) + else + if name != "nF" && name != "spinMult" && name != "nHal" && name != "logP" + val = OpenBabel::OBDescriptor.find_type(name.underscore).predict(obmol) + elsif name == "nF" + val = OpenBabel::OBDescriptor.find_type("nf").predict(obmol) + elsif name == "spinMult" || name == "nHal" || name == "logP" + val = OpenBabel::OBDescriptor.find_type(name).predict(obmol) + end + end + if OpenTox::Algorithm.numeric?(val) + val = Float(val) + val = nil if val.nan? + val = nil if (val && val.infinite?) + end + row << val + } + LOGGER.debug "Compound #{c_idx+1} (#{inchis.size}), #{row.size} entries" + csvfile.puts(row.join(",")) + csvfile.flush + } + master = CSV::parse(File.open(csvfile.path, "rb").read) + end rescue Exception => e LOGGER.debug "#{e.class}: #{e.message}" LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" + ensure + csvfile.close! end + [ master, ids ] + end - - # Calculates PC descriptors via JOELib2. - # @param[Hash] Required keys: :compounds, :rjb - # @return[String] dataset uri + + + # Calculate Joelib2 physico-chemical descriptors. + # @param[Hash] required: :compounds, :pc_type, :task, optional: :descriptor + # @return[Array] CSV, array of field ids, array of field descriptions def self.get_jl_descriptors(params) - s = params[:rjb] master = nil - raise "No Java environment" unless s + s = params[:rjb]; raise "No Java environment" unless s # Load keys, enter CSV headers begin - keysfile = File.join(ENV['HOME'], ".opentox", "config", "jl_keys.yaml") - csvfile = Tempfile.open(['jl_descriptors-csv-','.sdf']) - jl_keys = YAML::load_file(keysfile) - jl_colnames = jl_keys.collect{ |k| - k.split(".").last - } - csvfile.puts((["SMILES"] + jl_colnames).join(",")) + csvfile = Tempfile.open(['jl_descriptors-','.csv']) - # remember inchis - inchis = params[:compounds].collect { |c_uri| - cmpd = OpenTox::Compound.new(c_uri) - URI.encode_www_form_component(cmpd.to_inchi) - } + pc_descriptors = YAML::load_file(@keysfile) + ids = pc_descriptors.collect{ |id, info| + id if info[:lib] == "joelib" && params[:pc_type].split(",").include?(info[:pc_type]) && (!params[:descriptor] || id == params[:descriptor]) + }.compact - # Process compounds - params[:compounds].each_with_index { |c_uri, c_idx| - cmpd = OpenTox::Compound.new(c_uri) - inchi = cmpd.to_inchi - sdf_data = cmpd.to_sdf - infile = Tempfile.open(['jl_descriptors-in-','.sdf']) - outfile_path = infile.path.gsub(/jl_descriptors-in/,"jl_descriptors-out") + if ids.length > 0 + csvfile.puts((["SMILES"] + ids).join(",")) - begin - infile.puts sdf_data - infile.flush - s.new(infile.path, outfile_path) - - row = [inchis[c_idx]] - jl_keys.each_with_index do |k,i| # Fill row - re = Regexp.new(k) - open(outfile_path) do |f| - f.each do |line| - if @prev =~ re - entry = line.chomp - val = nil - if OpenTox::Algorithm.numeric?(entry) - val = Float(entry) - val = nil if val.nan? - val = nil if val.infinite? + # remember inchis + inchis = params[:compounds].collect { |c_uri| + cmpd = OpenTox::Compound.new(c_uri) + URI.encode_www_form_component(cmpd.to_inchi) + } + + # Process compounds + params[:compounds].each_with_index { |c_uri, c_idx| + cmpd = OpenTox::Compound.new(c_uri) + inchi = cmpd.to_inchi + sdf_data = cmpd.to_sdf + + infile = Tempfile.open(['jl_descriptors-in-','.sdf']) + outfile_path = infile.path.gsub(/jl_descriptors-in/,"jl_descriptors-out") + + begin + infile.puts sdf_data + infile.flush + s.new(infile.path, outfile_path) # runs joelib + + row = [inchis[c_idx]] + ids.each_with_index do |k,i| # Fill row + re = Regexp.new(k) + open(outfile_path) do |f| + f.each do |line| + if @prev == k + entry = line.chomp + val = nil + if OpenTox::Algorithm.numeric?(entry) + val = Float(entry) + val = nil if val.nan? + val = nil if (val && val.infinite?) + end + row << val + break end - row << val + @prev = line.gsub(/^.*types./,"").gsub(/count./,"").gsub(/>/,"").chomp if line =~ re end - @prev = line end end + LOGGER.debug "Compound #{c_idx+1} (#{inchis.size}), #{row.size} entries" + csvfile.puts(row.join(",")) + csvfile.flush + + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message}" + LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" + ensure + File.delete(infile.path.gsub(/\.sdf/,".numeric.sdf")) + File.delete(outfile_path) + infile.close! end - csvfile.puts(row.join(",")) - csvfile.flush - - rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message}" - LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" - ensure - File.delete(infile.path.gsub(/\.sdf/,".numeric.sdf")) - File.delete(outfile_path) - infile.close! - end - } - master = CSV::parse(File.open(csvfile.path, "rb").read) + } + master = CSV::parse(File.open(csvfile.path, "rb").read) + end rescue Exception => e LOGGER.debug "#{e.class}: #{e.message}" @@ -150,133 +281,136 @@ module OpenTox [ csvfile].each { |f| f.close! } end - master + [ master, ids ] + end + # Calculate CDK physico-chemical descriptors via Ambit -- DO NOT OVERLOAD Ambit. + # @param[Hash] required: :compounds, :pc_type, :task, :step optional: :descriptor + # @return[Array] array of Ambit result uri, piecewise (1st: base, 2nd: SMILES, 3rd+: features, hash smiles to inchi, array of field descriptions + def self.get_cdk_descriptors(params) + + ambit_result_uri = [] # 1st pos: base uri, then features + smiles_to_inchi = {} + task_weights = {"electronic"=> 4, "topological"=> 19, "constitutional"=> 12, "geometrical"=> 3, "hybrid"=> 2, "cpsa"=> 1 } + task_weights.keys.each { |pc_type| task_weights.delete(pc_type) if (params[:pc_type] && (!params[:pc_type].split(",").include?(pc_type)))} + task_sum = Float task_weights.values.sum + task_weights.keys.each { |pc_type| task_weights[pc_type] /= task_sum } + task_weights.keys.each { |pc_type| task_weights[pc_type] *= params[:step] } + - # Calcul:compoundsates PC descriptors via Ambit -- DO NOT OVERLOAD Ambit. - # @param[Hash] Required keys: :compounds, :pc_type - # @return[Array] Ambit result uri, piecewise (1st: base, 2nd: SMILES, 3rd+: features - def self.get_ambit_descriptors(params) + # extract wanted descriptors from config file and parameters + pc_descriptors = YAML::load_file(@keysfile) - begin + ids = pc_descriptors.collect { |id, info| + "#{info[:pc_type]}:::#{id}" if info[:lib] == "cdk" && params[:pc_type].split(",").include?(info[:pc_type]) && (!params[:descriptor] || id == params[:descriptor]) + }.compact - ambit_ds_service_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/" - ambit_mopac_model_uri = "http://apps.ideaconsult.net:8080/ambit2/model/69632" - descs = YAML::load_file( File.join(ENV['HOME'], ".opentox", "config", "ambit_descriptors.yaml") ) - descs_uris = [] - types = params[:pc_type].split(",") - descs.each { |uri, cat_name| - if types.include? cat_name[:category] - descs_uris << "#{cat_name[:category]}:::#{uri}" - end - } - if descs_uris.size == 0 - raise "Error! Empty set of descriptors. Did you supply one of [geometrical, topological, electronic, constitutional, hybrid, cpsa] ?" - end - descs_uris.sort! - descs_uris.collect! { |uri| uri.split(":::").last } - #LOGGER.debug "Ambit descriptor URIs: #{descs_uris.join(", ")}" + if ids.size > 0 + ids.sort! + ids.collect! { |id| id.split(":::").last } + # create dataset at Ambit begin - # Create SMI - smiles_array = []; smiles_to_inchi = {} params[:compounds].each do |n| cmpd = OpenTox::Compound.new(n) smiles_string = cmpd.to_smiles smiles_to_inchi[smiles_string] = URI.encode_www_form_component(cmpd.to_inchi) - smiles_array << smiles_string end - smi_file = Tempfile.open(['pc_ambit', '.csv']) - pc_descriptors = nil - - # Create Ambit dataset - smi_file.puts( "SMILES\n" ) - smi_file.puts( smiles_array.join("\n") ) - smi_file.flush - ambit_ds_uri = OpenTox::RestClientWrapper.post(ambit_ds_service_uri, {:file => File.new(smi_file.path)}, {:content_type => "multipart/form-data", :accept => "text/uri-list"} ) + smi_file = Tempfile.open(['pc_ambit', '.csv']) ; smi_file.puts( "SMILES\n" + smiles_to_inchi.keys.join("\n") ) ; smi_file.flush + ambit_ds_uri = OpenTox::RestClientWrapper.post(@ambit_ds_service_uri, {:file => File.new(smi_file.path)}, {:content_type => "multipart/form-data", :accept => "text/uri-list"} ) + ambit_result_uri = [ ambit_ds_uri + "?" ] # 1st pos: base uri, then features rescue Exception => e LOGGER.debug "#{e.class}: #{e.message}" LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" ensure smi_file.close! if smi_file end - ambit_smiles_uri = OpenTox::RestClientWrapper.get(ambit_ds_uri + "/features", {:accept=> "text/uri-list"} ).chomp - - # -C-a-l-c-u-l-a-t-e- -3-D- -f-o-r- -C-P-S-A- - # Always calculate 3D! See http://goo.gl/Tk81j - #if types.include? "cpsa" - ambit_ds_mopac_uri = OpenTox::RestClientWrapper.post(ambit_mopac_model_uri, {:dataset_uri => ambit_ds_uri}, {:accept => "text/uri-list"} ) - LOGGER.debug "MOPAC dataset: #{ambit_ds_mopac_uri }" - #end - - # Get Ambit results - ambit_result_uri = [] # 1st pos: base uri, then features - ambit_result_uri << ambit_ds_uri + "?" + # get SMILES feature URI + ambit_smiles_uri = OpenTox::RestClientWrapper.get( + ambit_ds_uri + "/features", + {:accept=> "text/uri-list"} + ).chomp ambit_result_uri << ("feature_uris[]=" + URI.encode_www_form_component(ambit_smiles_uri) + "&") + # always calculate 3D (http://goo.gl/Tk81j), then get results + OpenTox::RestClientWrapper.post( + @ambit_mopac_model_uri, + {:dataset_uri => ambit_ds_uri}, + {:accept => "text/uri-list"} + ) current_cat = "" - descs_uris.each_with_index do |uri, i| - old_cat = current_cat; current_cat = descs[uri][:category] - params[:task].progress(params[:task].metadata[OT.percentageCompleted] + params[:step]) if params[:task] && params[:step] && old_cat != current_cat && old_cat != "" - algorithm = Algorithm::Generic.new(uri) + ids.each_with_index do |id, i| + old_cat = current_cat; current_cat = pc_descriptors[id][:pc_type] + params[:task].progress(params[:task].metadata[OT.percentageCompleted] + task_weights[old_cat]) if params[:task] && old_cat != current_cat && old_cat != "" + algorithm = Algorithm::Generic.new(@ambit_descriptor_algorithm_uri+id) result_uri = algorithm.run({:dataset_uri => ambit_ds_uri}) ambit_result_uri << result_uri.split("?")[1] + "&" - LOGGER.debug "Ambit (#{descs_uris.size}): #{i+1}" + LOGGER.debug "Ambit (#{ids.size}): #{i+1}" end + params[:task].progress(params[:task].metadata[OT.percentageCompleted] + task_weights[current_cat]) if params[:task] #LOGGER.debug "Ambit result: #{ambit_result_uri.join('')}" - [ ambit_result_uri, smiles_to_inchi ] - - rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message}" - LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" end + + [ ambit_result_uri, smiles_to_inchi, ids ] + end # Load dataset via CSV # @param[Array] Ambit result uri, piecewise (1st: base, 2nd: SMILES, 3rd+: features - # @return[String] dataset uri - def self.load_ds_csv(ambit_result_uri, smiles_to_inchi) + # @param[Hash] keys: SMILES, values: InChIs + # @param[Array] field descriptions, one for each feature + # @return[Array] CSV, array of field ids, array of field descriptions + def self.load_ds_csv(ambit_result_uri, smiles_to_inchi, single_ids) master=nil - (1...ambit_result_uri.size).collect { |idx| - curr_uri = ambit_result_uri[0] + ambit_result_uri[idx] - LOGGER.debug "Requesting #{curr_uri}" - csv_data = CSV.parse( OpenTox::RestClientWrapper.get(curr_uri, {:accept => "text/csv"}) ) - if csv_data[0] && csv_data[0].size>1 - if master.nil? # This is the smiles entry - (1...csv_data.size).each{ |idx| csv_data[idx][1] = smiles_to_inchi[csv_data[idx][1]] } - master = csv_data - next - else - index_uri = csv_data[0].index("SMILES") - csv_data.map {|i| i.delete_at(index_uri)} if index_uri #Removes additional SMILES information - - nr_cols = (csv_data[0].size)-1 - LOGGER.debug "Merging #{nr_cols} new columns" - master.each {|row| nr_cols.times { row.push(nil) } } # Adds empty columns to all rows - csv_data.each do |row| - temp = master.assoc(row[0]) # Finds the appropriate line in master - ((-1*nr_cols)..-1).collect.each { |idx| - temp[idx] = row[nr_cols+idx+1] if temp # Updates columns if line is found - } + ids=[] + ambit_ids=[] + + if ambit_result_uri.size > 0 + (1...ambit_result_uri.size).collect { |idx| + curr_uri = ambit_result_uri[0] + ambit_result_uri[idx] + #LOGGER.debug "Requesting #{curr_uri}" + csv_data = CSV.parse( OpenTox::RestClientWrapper.get(curr_uri, {:accept => "text/csv"}) ) + if csv_data[0] && csv_data[0].size>1 + if master.nil? # This is the smiles entry + (1...csv_data.size).each{ |idx| csv_data[idx][1] = smiles_to_inchi[csv_data[idx][1]] } + master = csv_data + next + else + index_uri = csv_data[0].index("SMILES") + csv_data.map {|i| i.delete_at(index_uri)} if index_uri #Removes additional SMILES information + + nr_cols = (csv_data[0].size)-1 + LOGGER.debug "Merging #{nr_cols} new columns" + ids += Array.new(nr_cols, single_ids[idx-2]) + master.each {|row| nr_cols.times { row.push(nil) } } # Adds empty columns to all rows + csv_data.each do |row| + temp = master.assoc(row[0]) # Finds the appropriate line in master + ((-1*nr_cols)..-1).collect.each { |idx| + temp[idx] = row[nr_cols+idx+1] if temp # Updates columns if line is found + } + end end end - end - } + } - index_uri = master[0].index("Compound") - master.map {|i| i.delete_at(index_uri)} - master[0].each {|cell| cell.chomp!(" ")} - master[0][0] = "Compound" #"SMILES" - index_smi = master[0].index("SMILES") - master.map {|i| i.delete_at(index_smi)} if index_smi - master[0][0] = "SMILES" + index_uri = master[0].index("Compound") + master.map {|i| i.delete_at(index_uri)} + master[0].each {|cell| cell.chomp!(" ")} + master[0][0] = "Compound" #"SMILES" + index_smi = master[0].index("SMILES") + master.map {|i| i.delete_at(index_smi)} if index_smi + master[0][0] = "SMILES" + ambit_ids=master[0].collect + ambit_ids.shift + end #LOGGER.debug "-------- AM: Writing to dumpfile" #File.open("/tmp/test.csv", 'w') {|f| f.write( master.collect {|r| r.join(",")}.join("\n") ) } - master + [ master, ids, ambit_ids ] + end |