diff options
author | Andreas Maunz <andreas@maunz.de> | 2012-02-20 16:34:39 +0100 |
---|---|---|
committer | Andreas Maunz <andreas@maunz.de> | 2012-02-20 16:34:39 +0100 |
commit | adb09e03aafa521621b4bcb6e1d20855873b8840 (patch) | |
tree | 11c0c287cd16bd2ab52cabf24fa6e8bb3fcd8db9 | |
parent | b98e761f7904ce26f44a66ef9a6c9fc29bd0c173 (diff) |
1) Support for progress (task), 2) force numeric feature type
1) utils.rb
2) parser.rb
-rw-r--r-- | lib/parser.rb | 10 | ||||
-rw-r--r-- | lib/utils.rb | 17 |
2 files changed, 18 insertions, 9 deletions
diff --git a/lib/parser.rb b/lib/parser.rb index 18c0ba7..4b56786 100644 --- a/lib/parser.rb +++ b/lib/parser.rb @@ -349,8 +349,10 @@ module OpenTox # Load CSV string (format specification: http://toxcreate.org/help) # @param [String] csv CSV representation of the dataset + # @param [Boolean] drop_missing Whether completely missing rows should be droppped + # @param [Boolean] all_numeric Whether all features should be treated as numeric # @return [OpenTox::Dataset] Dataset object with CSV data - def load_csv(csv, drop_missing=false) + def load_csv(csv, drop_missing=false, all_numeric=false) row = 0 input = csv.split("\n") headers = split_row(input.shift) @@ -362,7 +364,7 @@ module OpenTox row = split_row(row) value_maps = detect_new_values(row, value_maps) value_maps.each_with_index { |vm,j| - if vm.size > @max_class_values # max @max_class_values classes. + if (vm.size > @max_class_values) || all_numeric # max @max_class_values classes. regression_features[j]=true else regression_features[j]=false @@ -371,7 +373,7 @@ module OpenTox } input.each_with_index { |row, i| - drop=false + drop = false row = split_row(row) raise "Entry has size #{row.size}, different from headers (#{headers.size})" if row.size != headers.size if row.include?("") @@ -486,7 +488,7 @@ module OpenTox feature_idx += 1 - if val != nil + if val != nil && !val.infinite? && !val.nan? @dataset.add(compound.uri, feature, val) if type != OT.NumericFeature @dataset.features[feature][OT.acceptValue] = [] unless @dataset.features[feature][OT.acceptValue] diff --git a/lib/utils.rb b/lib/utils.rb index 705dd5d..709200b 100644 --- a/lib/utils.rb +++ b/lib/utils.rb @@ -22,14 +22,18 @@ module OpenTox # joelib via rjb types = params[:pc_type].split(",") + + step= (1.0/types.size * 100).floor if types.size && types.include?("joelib") jl_master = get_jl_descriptors ( { :compounds => compounds, :rjb => params[:rjb] } ) types.delete("joelib") end + params[:task].progress(step) if params[:task] + # ambit via REST if types.size > 0 - ambit_result_uri, smiles_to_inchi = get_ambit_descriptors( { :compounds => compounds, :pc_type => types.join(',') } ) + ambit_result_uri, smiles_to_inchi = get_ambit_descriptors( { :compounds => compounds, :pc_type => types.join(','), :task => params[:task], :step => step } ) LOGGER.debug "Ambit result uri for #{params.inspect}: '#{ambit_result_uri.to_yaml}'" ambit_master = load_ds_csv(ambit_result_uri, smiles_to_inchi) end @@ -56,7 +60,7 @@ module OpenTox ds = OpenTox::Dataset.new ds.save parser.dataset = ds - ds = parser.load_csv(master.collect{|r| r.join(",")}.join("\n")) + ds = parser.load_csv(master.collect{|r| r.join(",")}.join("\n"),false,true) ds.save rescue Exception => e @@ -73,7 +77,6 @@ module OpenTox def self.get_jl_descriptors(params) s = params[:rjb] - LOGGER.debug("------ AM #{s}") master = nil raise "No Java environment" unless s @@ -162,13 +165,14 @@ module OpenTox ambit_mopac_model_uri = "http://apps.ideaconsult.net:8080/ambit2/model/69632" descs = YAML::load_file( File.join(ENV['HOME'], ".opentox", "config", "ambit_descriptors.yaml") ) descs_uris = [] - params[:pc_type] = "electronic,cpsa" if params[:pc_type].nil? # rescue missing pc_type types = params[:pc_type].split(",") descs.each { |uri, cat_name| if types.include? cat_name[:category] - descs_uris << uri + descs_uris << "#{cat_name[:category]}:::#{uri}" end } + descs_uris.sort! + descs_uris.collect! { |uri| uri.split(":::").last } if descs_uris.size == 0 raise "Error! Empty set of descriptors. Did you supply one of [geometrical, topological, electronic, constitutional, hybrid, cpsa] ?" end @@ -209,7 +213,10 @@ module OpenTox ambit_result_uri = [] # 1st pos: base uri, then features ambit_result_uri << ambit_ds_uri + "?" ambit_result_uri << ("feature_uris[]=" + URI.encode_www_form_component(ambit_smiles_uri) + "&") + current_cat = "" descs_uris.each_with_index do |uri, i| + old_cat = current_cat; current_cat = descs[uri][:category] + params[:task].progress(params[:task].metadata[OT.percentageCompleted] + params[:step]) if params[:task] && params[:step] && old_cat != current_cat && old_cat != "" algorithm = Algorithm::Generic.new(uri) result_uri = algorithm.run({:dataset_uri => ambit_ds_uri}) ambit_result_uri << result_uri.split("?")[1] + "&" |