From 18454895a9e426e3a6499ff268044687a996993e Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 12 Jul 2013 18:12:18 +0200 Subject: value type identification/conversion fixed --- lib/dataset.rb | 35 ++++++++++++++--------------------- lib/opentox.rb | 3 --- lib/validation.rb | 4 ++-- 3 files changed, 16 insertions(+), 26 deletions(-) diff --git a/lib/dataset.rb b/lib/dataset.rb index c2ff833..cb64406 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -64,24 +64,13 @@ module OpenTox RestClientWrapper.get(service_uri,{:query => sparql},{:accept => "text/uri-list", :subjectid => @subjectid}).split("\n").each do |row| r,c,v = row.split("\t") @data_entries[r.to_i] ||= [] - #v = v.to_f if v.numeric? - #v = nil if v.is_a? String and v.empty? - @data_entries[r.to_i][c.to_i] = v - end - # TODO: fallbacks for external and unordered datasets - features.each_with_index do |feature,i| - if feature[RDF.type].include? RDF::OT.NumericFeature - if feature[RDF.type].include? RDF::OT.NominalFeature - if feature[RDF.type].include? RDF::OT.StringFeature - @data_entries.each { |row| row[i] = row[i].to_s if row[i] } - else - @data_entries.each { |row| row[i] = row[i] if row[i] } - end - else - @data_entries.each { |row| row[i] = row[i].to_f if row[i] } + # adjust value class depending on feature type, StringFeature takes precedence over NumericFeature + if features[c.to_i][RDF.type].include? RDF::OT.NumericFeature and ! features[c.to_i][RDF.type].include? RDF::OT.StringFeature + v = v.to_f if v end + @data_entries[r.to_i][c.to_i] = v if v end - end + # TODO: fallbacks for external and unordered datasets end @data_entries end @@ -131,7 +120,8 @@ module OpenTox if prediction_feature and confidence_feature compounds.each do |compound| value = values(compound,prediction_feature).first - confidence = values(compound,confidence_feature).first + value = value.to_f if prediction_feature[RDF.type].include? RDF::OT.NumericFeature and ! prediction_feature[RDF.type].include? RDF::OT.StringFeature + confidence = values(compound,confidence_feature).first.to_f predictions << {:compound => compound, :value => value, :confidence => confidence} if value and confidence end end @@ -147,6 +137,8 @@ module OpenTox def upload filename, wait=true uri = RestClientWrapper.put(@uri, {:file => File.new(filename)}, {:subjectid => @subjectid}) wait_for_task uri if URI.task?(uri) and wait + compounds true + features true metadata true @uri end @@ -160,13 +152,14 @@ module OpenTox row = @compounds.collect{|c| c.uri}.index(compound.uri) @features << feature unless @features.collect{|f| f.uri}.include?(feature.uri) col = @features.collect{|f| f.uri}.index(feature.uri) - @data_entries[row] ||= [] - if @data_entries[row][col] # duplicated values - #row = @compounds.size + if @data_entries[row] and @data_entries[row][col] # duplicated values @compounds << compound row = @compounds.collect{|c| c.uri}.rindex(compound.uri) end - @data_entries[row][col] = value + if value + @data_entries[row] ||= [] + @data_entries[row][col] = value + end end # TODO: remove? might be dangerous if feature ordering is incorrect diff --git a/lib/opentox.rb b/lib/opentox.rb index 93e2c52..73a33d3 100644 --- a/lib/opentox.rb +++ b/lib/opentox.rb @@ -156,10 +156,7 @@ module OpenTox # rdf serialization methods for all formats e.g. to_rdfxml send :define_method, "to_#{format}".to_sym do - puts format - puts self.inspect create_rdf - puts @rdf.to_s RDF::Writer.for(format).buffer(:encoding => Encoding::ASCII) do |writer| writer << @rdf end diff --git a/lib/validation.rb b/lib/validation.rb index 3b1cf13..565a366 100644 --- a/lib/validation.rb +++ b/lib/validation.rb @@ -53,7 +53,7 @@ module OpenTox # @param [OpenTox::Task,optional] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly # @return [OpenTox::Validation] def self.create_training_test_split( params, subjectid=nil, waiting_task=nil ) - params[:subjectid] = subjectid if subjectid + #params[:subjectid] = subjectid if subjectid uri = OpenTox::RestClientWrapper.post( File.join($validation[:uri],"training_test_split"), params,{:content_type => "text/uri-list", :subjectid => subjectid},waiting_task ) Validation.new(wait_for_task(uri)) @@ -185,7 +185,7 @@ module OpenTox # @param [OpenTox::Task,optional] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly # @return [OpenTox::Crossvalidation] def self.create( params, subjectid=nil, waiting_task=nil ) - params[:subjectid] = subjectid if subjectid + #params[:subjectid] = subjectid if subjectid uri = OpenTox::RestClientWrapper.post( File.join($validation[:uri],"crossvalidation"), params,{:content_type => "text/uri-list", :subjectid => subjectid},waiting_task ) uri = wait_for_task(uri) -- cgit v1.2.3