summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Maunz <andreas@maunz.de>2012-02-20 16:34:39 +0100
committerAndreas Maunz <andreas@maunz.de>2012-02-20 16:34:39 +0100
commitadb09e03aafa521621b4bcb6e1d20855873b8840 (patch)
tree11c0c287cd16bd2ab52cabf24fa6e8bb3fcd8db9
parentb98e761f7904ce26f44a66ef9a6c9fc29bd0c173 (diff)
1) Support for progress (task), 2) force numeric feature type
1) utils.rb 2) parser.rb
-rw-r--r--lib/parser.rb10
-rw-r--r--lib/utils.rb17
2 files changed, 18 insertions, 9 deletions
diff --git a/lib/parser.rb b/lib/parser.rb
index 18c0ba7..4b56786 100644
--- a/lib/parser.rb
+++ b/lib/parser.rb
@@ -349,8 +349,10 @@ module OpenTox
# Load CSV string (format specification: http://toxcreate.org/help)
# @param [String] csv CSV representation of the dataset
+ # @param [Boolean] drop_missing Whether completely missing rows should be droppped
+ # @param [Boolean] all_numeric Whether all features should be treated as numeric
# @return [OpenTox::Dataset] Dataset object with CSV data
- def load_csv(csv, drop_missing=false)
+ def load_csv(csv, drop_missing=false, all_numeric=false)
row = 0
input = csv.split("\n")
headers = split_row(input.shift)
@@ -362,7 +364,7 @@ module OpenTox
row = split_row(row)
value_maps = detect_new_values(row, value_maps)
value_maps.each_with_index { |vm,j|
- if vm.size > @max_class_values # max @max_class_values classes.
+ if (vm.size > @max_class_values) || all_numeric # max @max_class_values classes.
regression_features[j]=true
else
regression_features[j]=false
@@ -371,7 +373,7 @@ module OpenTox
}
input.each_with_index { |row, i|
- drop=false
+ drop = false
row = split_row(row)
raise "Entry has size #{row.size}, different from headers (#{headers.size})" if row.size != headers.size
if row.include?("")
@@ -486,7 +488,7 @@ module OpenTox
feature_idx += 1
- if val != nil
+ if val != nil && !val.infinite? && !val.nan?
@dataset.add(compound.uri, feature, val)
if type != OT.NumericFeature
@dataset.features[feature][OT.acceptValue] = [] unless @dataset.features[feature][OT.acceptValue]
diff --git a/lib/utils.rb b/lib/utils.rb
index 705dd5d..709200b 100644
--- a/lib/utils.rb
+++ b/lib/utils.rb
@@ -22,14 +22,18 @@ module OpenTox
# joelib via rjb
types = params[:pc_type].split(",")
+
+ step= (1.0/types.size * 100).floor
if types.size && types.include?("joelib")
jl_master = get_jl_descriptors ( { :compounds => compounds, :rjb => params[:rjb] } )
types.delete("joelib")
end
+ params[:task].progress(step) if params[:task]
+
# ambit via REST
if types.size > 0
- ambit_result_uri, smiles_to_inchi = get_ambit_descriptors( { :compounds => compounds, :pc_type => types.join(',') } )
+ ambit_result_uri, smiles_to_inchi = get_ambit_descriptors( { :compounds => compounds, :pc_type => types.join(','), :task => params[:task], :step => step } )
LOGGER.debug "Ambit result uri for #{params.inspect}: '#{ambit_result_uri.to_yaml}'"
ambit_master = load_ds_csv(ambit_result_uri, smiles_to_inchi)
end
@@ -56,7 +60,7 @@ module OpenTox
ds = OpenTox::Dataset.new
ds.save
parser.dataset = ds
- ds = parser.load_csv(master.collect{|r| r.join(",")}.join("\n"))
+ ds = parser.load_csv(master.collect{|r| r.join(",")}.join("\n"),false,true)
ds.save
rescue Exception => e
@@ -73,7 +77,6 @@ module OpenTox
def self.get_jl_descriptors(params)
s = params[:rjb]
- LOGGER.debug("------ AM #{s}")
master = nil
raise "No Java environment" unless s
@@ -162,13 +165,14 @@ module OpenTox
ambit_mopac_model_uri = "http://apps.ideaconsult.net:8080/ambit2/model/69632"
descs = YAML::load_file( File.join(ENV['HOME'], ".opentox", "config", "ambit_descriptors.yaml") )
descs_uris = []
- params[:pc_type] = "electronic,cpsa" if params[:pc_type].nil? # rescue missing pc_type
types = params[:pc_type].split(",")
descs.each { |uri, cat_name|
if types.include? cat_name[:category]
- descs_uris << uri
+ descs_uris << "#{cat_name[:category]}:::#{uri}"
end
}
+ descs_uris.sort!
+ descs_uris.collect! { |uri| uri.split(":::").last }
if descs_uris.size == 0
raise "Error! Empty set of descriptors. Did you supply one of [geometrical, topological, electronic, constitutional, hybrid, cpsa] ?"
end
@@ -209,7 +213,10 @@ module OpenTox
ambit_result_uri = [] # 1st pos: base uri, then features
ambit_result_uri << ambit_ds_uri + "?"
ambit_result_uri << ("feature_uris[]=" + URI.encode_www_form_component(ambit_smiles_uri) + "&")
+ current_cat = ""
descs_uris.each_with_index do |uri, i|
+ old_cat = current_cat; current_cat = descs[uri][:category]
+ params[:task].progress(params[:task].metadata[OT.percentageCompleted] + params[:step]) if params[:task] && params[:step] && old_cat != current_cat && old_cat != ""
algorithm = Algorithm::Generic.new(uri)
result_uri = algorithm.run({:dataset_uri => ambit_ds_uri})
ambit_result_uri << result_uri.split("?")[1] + "&"