From 35423518f2c620926cb7f8818fa3bea86190dea1 Mon Sep 17 00:00:00 2001 From: gebele Date: Thu, 28 Jun 2018 11:59:22 +0000 Subject: allow ID column for uploads --- application.rb | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++-------- batch.rb | 10 ++++++++- 2 files changed, 65 insertions(+), 9 deletions(-) diff --git a/application.rb b/application.rb index 0b3ebe3..182df89 100644 --- a/application.rb +++ b/application.rb @@ -172,6 +172,7 @@ get '/predict/csv/:task/:model/:filename/?' do task = Task.find params[:task].to_s m = Model::Validation.find params[:model].to_s unless params[:model] == "Cramer" dataset = Batch.find_by(:name => filename) + @ids = dataset.ids warnings = dataset.warnings.blank? ? nil : dataset.warnings.join("\n") unless warnings.nil? keys_array = [] @@ -196,6 +197,11 @@ get '/predict/csv/:task/:model/:filename/?' do header = lines.shift out = "" lines.each_with_index do |line,idx| + if !@ids.blank? + arr = line.spli(",") + arr.insert(1, @ids[idx]) + line = arr.join(",") + end if @dups[idx+1] out << "#{line.tr("\n","")},#{@dups[idx+1]}" else @@ -215,15 +221,31 @@ get '/predict/csv/:task/:model/:filename/?' do # add duplicate warning at the end of a line if ID matches if @dups && @dups[idx+1] if prediction_id.is_a? BSON::ObjectId - lines << "#{idx+1},#{identifier},#{Prediction.find(prediction_id).csv.tr("\n","")},#{@dups[idx+1]}" + if @ids.blank? + lines << "#{idx+1},#{identifier},#{Prediction.find(prediction_id).csv.tr("\n","")},#{@dups[idx+1]}" + else + lines << "#{idx+1},#{@ids[idx]},#{identifier},#{Prediction.find(prediction_id).csv.tr("\n","")},#{@dups[idx+1]}" + end else - lines << "#{idx+1},#{identifier},#{p},#{@dups[idx+1]}" + if @ids.blank? + lines << "#{idx+1},#{identifier},#{p},#{@dups[idx+1]}" + else + lines << "#{idx+1},#{@ids[idx]},#{identifier},#{p},#{@dups[idx+1]}" + end end else if prediction_id.is_a? BSON::ObjectId - lines << "#{idx+1},#{identifier},#{Prediction.find(prediction_id).csv}" + if @ids.blank? + lines << "#{idx+1},#{identifier},#{Prediction.find(prediction_id).csv}" + else + lines << "#{idx+1},#{@ids[idx]},#{identifier},#{Prediction.find(prediction_id).csv}" + end else - lines << "#{idx+1},#{identifier},#{p}\n" + if @ids.blank? + lines << "#{idx+1},#{identifier},#{p}\n" + else + lines << "#{idx+1},#{@ids[idx]}#{identifier},#{p}\n" + end end end end @@ -241,6 +263,7 @@ post '/predict/?' do @dataset = Batch.find params[:existing].keys[0] @compounds = @dataset.compounds @identifiers = @dataset.identifiers + @ids = @dataset.ids @filename = @dataset.name end if !params[:fileselect].blank? @@ -254,6 +277,7 @@ post '/predict/?' do $logger.debug "Take file from database." @compounds = @dataset.compounds @identifiers = @dataset.identifiers + @ids = @dataset.ids else File.open('tmp/' + params[:fileselect][:filename], "w") do |f| f.write(params[:fileselect][:tempfile].read) @@ -264,6 +288,7 @@ post '/predict/?' do @dataset = input @compounds = @dataset.compounds @identifiers = @dataset.identifiers + @ids = @dataset.ids else File.delete File.join("tmp", params[:fileselect][:filename]) bad_request_error "Could not serialize file '#{@filename}'." @@ -296,17 +321,30 @@ post '/predict/?' do if type == "Regression" unit = (type == "Regression") ? "(#{m.unit})" : "" converted_unit = (type == "Regression") ? "#{m.unit =~ /\b(mmol\/L)\b/ ? "(mg/L)" : "(mg/kg_bw/day)"}" : "" - header = "ID,Input,Endpoint,Unique SMILES,inTrainingSet,Measurements #{unit},Prediction #{unit},Prediction #{converted_unit},"\ + if @ids.blank? + header = "ID,Input,Endpoint,Unique SMILES,inTrainingSet,Measurements #{unit},Prediction #{unit},Prediction #{converted_unit},"\ + "Prediction Interval Low #{unit},Prediction Interval High #{unit},"\ + "Prediction Interval Low #{converted_unit},Prediction Interval High #{converted_unit},"\ + "inApplicabilityDomain,Note\n" + else + header = "ID,Original ID,Input,Endpoint,Unique SMILES,inTrainingSet,Measurements #{unit},Prediction #{unit},Prediction #{converted_unit},"\ "Prediction Interval Low #{unit},Prediction Interval High #{unit},"\ "Prediction Interval Low #{converted_unit},Prediction Interval High #{converted_unit},"\ "inApplicabilityDomain,Note\n" + end end # add header for classification if type == "Classification" av = m.prediction_feature.accept_values - header = "ID,Input,Endpoint,Unique SMILES,inTrainingSet,Measurements,Consensus Prediction,Consensus Confidence,"\ + if @ids.blank? + header = "ID,Input,Endpoint,Unique SMILES,inTrainingSet,Measurements,Consensus Prediction,Consensus Confidence,"\ + "Structural alerts for mutagenicity,Lazar Prediction,"\ + "Lazar predProbability #{av[0]},Lazar predProbability #{av[1]},inApplicabilityDomain,Note\n" + else + header = "ID,Original ID,Input,Endpoint,Unique SMILES,inTrainingSet,Measurements,Consensus Prediction,Consensus Confidence,"\ "Structural alerts for mutagenicity,Lazar Prediction,"\ "Lazar predProbability #{av[0]},Lazar predProbability #{av[1]},inApplicabilityDomain,Note\n" + end end # predict compounds p = 100.0/@compounds.size @@ -395,12 +433,22 @@ post '/predict/?' do output["cramer_rules"] = prediction.collect{|array| array.collect{|hash| hash["Cramer rules"]}}.flatten.compact output["cramer_rules_extensions"] = prediction.collect{|array| array.collect{|hash| hash["Cramer rules, with extensions"]}}.flatten.compact # header - csv = "ID,Input,Endpoint,Unique SMILES,Cramer rules,Cramer rules with extensions\n" + if @ids.blank? + csv = "ID,Input,Endpoint,Unique SMILES,Cramer rules,Cramer rules with extensions\n" + else + csv = "ID,Original ID,Input,Endpoint,Unique SMILES,Cramer rules,Cramer rules with extensions\n" + end # content compounds.each_with_index do |smiles, idx| - csv << "#{idx+1},#{@identifiers[idx]},#{output["model_name"]},#{smiles},"\ + if @ids.blank? + csv << "#{idx+1},#{@identifiers[idx]},#{output["model_name"]},#{smiles},"\ + "#{output["cramer_rules"][idx] != "nil" ? output["cramer_rules"][idx] : "none" },"\ + "#{output["cramer_rules_extensions"][idx] != "nil" ? output["cramer_rules_extensions"][idx] : "none"}\n" + else + csv << "#{idx+1},#{@ids[idx]},#{@identifiers[idx]},#{output["model_name"]},#{smiles},"\ "#{output["cramer_rules"][idx] != "nil" ? output["cramer_rules"][idx] : "none" },"\ "#{output["cramer_rules_extensions"][idx] != "nil" ? output["cramer_rules_extensions"][idx] : "none"}\n" + end end predictions = {} predictions["Cramer rules"] = output["cramer_rules"].collect{|rule| rule != "nil" ? rule : "none"} diff --git a/batch.rb b/batch.rb index 0002279..2f99000 100644 --- a/batch.rb +++ b/batch.rb @@ -12,6 +12,7 @@ module OpenTox field :name, type: String field :source, type: String field :identifiers, type: Array + field :ids, type: Array field :compounds, type: Array field :warnings, type: Array, default: [] @@ -24,7 +25,14 @@ module OpenTox else $logger.debug "Parsing #{file}." table = CSV.read file, :skip_blanks => true, :encoding => 'windows-1251:utf-8' - batch = self.new(:source => source, :name => name, :identifiers => [], :compounds => []) + batch = self.new(:source => source, :name => name, :identifiers => [], :ids => [], :compounds => []) + + # original IDs + if table[0][0] =~ /ID/i + ids = table.collect{|row| row.shift} + ids.shift + batch.ids = ids + end # features feature_names = table.shift.collect{|f| f.strip} -- cgit v1.2.3