summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgebele <gebele@in-silico.ch>2018-04-23 10:58:39 +0000
committergebele <gebele@in-silico.ch>2018-04-23 10:58:39 +0000
commitde849d5b6cc553cc6969dd6b9b486b808ac89f3d (patch)
tree85010b2683179df101cd643318978924fb41a937
parent19eb655f4af1a4631692989a30a59b7b78e6669b (diff)
batch download with extra duplicate on ID message column
-rw-r--r--application.rb65
-rw-r--r--batch.rb14
2 files changed, 58 insertions, 21 deletions
diff --git a/application.rb b/application.rb
index b567147..38ac934 100644
--- a/application.rb
+++ b/application.rb
@@ -159,27 +159,67 @@ get '/predict/csv/:task/:model/:filename/?' do
response['Content-Type'] = "text/csv"
task = Task.find params[:task].to_s
m = Model::Validation.find params[:model].to_s unless params[:model] == "Cramer"
+ dataset = Batch.find_by(:name => params[:filename])
+ warnings = dataset.warnings.blank? ? nil : dataset.warnings.join("\n")
+ unless warnings.nil?
+ keys_array = []
+ warnings.split("\n").each do |warning|
+ text = warning.split("ID").first
+ numbers = warning.split("ID").last.split("and")
+ keys_array << numbers.collect{|n| n.strip.to_i}
+ end
+ @dups = {}
+ keys_array.each do |keys|
+ keys.each do |key|
+ @dups[key] = "Duplicate compound at ID #{keys.join(" and ")}\n"
+ end
+ end
+ end
endpoint = (params[:model] == "Cramer") ? "Oral_toxicity_(Cramer_rules)" : (m.endpoint =~ /Mutagenicity/i ? "Consensus_mutagenicity" : "#{m.endpoint}_(#{m.species})")
tempfile = Tempfile.new
if params[:model] == "Cramer"
- tempfile.write(task.csv)
+ # add duplicate warning at the end of a line if ID matches
+ if @dups
+ lines = task.csv.split("\n")
+ header = lines.shift
+ out = ""
+ lines.each_with_index do |line,idx|
+ if @dups[idx+1]
+ out << "#{line.tr("\n","")},#{@dups[idx+1]}"
+ else
+ out << line+"\n"
+ end
+ end
+ tempfile.write(header+"\n"+out)
+ else
+ tempfile.write(task.csv)
+ end
else
header = task.csv
lines = []
task.predictions[params[:model]].each_with_index do |hash,idx|
identifier = hash.keys[0]
prediction_id = hash.values[0]
- if prediction_id.is_a? BSON::ObjectId
- lines << "#{idx+1},#{identifier},#{Prediction.find(prediction_id).csv}"
+ # add duplicate warning at the end of a line if ID matches
+ if @dups && @dups[idx+1]
+ if prediction_id.is_a? BSON::ObjectId
+ lines << "#{idx+1},#{identifier},#{Prediction.find(prediction_id).csv.tr("\n","")},#{@dups[idx+1]}"
+ else
+ lines << "#{idx+1},#{identifier},#{p},#{@dups[idx+1]}"
+ end
else
- lines << "#{idx+1},#{identifier},#{p}\n"
+ if prediction_id.is_a? BSON::ObjectId
+ lines << "#{idx+1},#{identifier},#{Prediction.find(prediction_id).csv}"
+ else
+ lines << "#{idx+1},#{identifier},#{p}\n"
+ end
end
end
csv = header + lines.join("")
tempfile.write(csv)
end
tempfile.rewind
- send_file tempfile, :filename => "#{Time.now.strftime("%Y-%m-%d")}_lazar_batch_prediction_#{endpoint}_#{params[:filename]}", :type => "text/csv", :disposition => "attachment"
+ send_file tempfile, :filename => "#{Time.now.strftime("%Y-%m-%d")}_lazar_batch_prediction_#{endpoint}_#{params[:filename]}.csv", :type => "text/csv", :disposition => "attachment"
end
post '/predict/?' do
@@ -223,7 +263,7 @@ post '/predict/?' do
end
if @compounds.size == 0
- message = @dataset[:warnings]
+ message = @dataset.warnings
@dataset.delete
bad_request_error message
end
@@ -354,13 +394,6 @@ post '/predict/?' do
predictions["Cramer rules"] = output["cramer_rules"].collect{|rule| rule != "nil" ? rule : "none"}
predictions["Cramer rules, with extensions"] = output["cramer_rules_extensions"].collect{|rule| rule != "nil" ? rule : "none"}
predictions["compounds"] = @compounds
-
- if @dataset.warnings
- @dataset.warnings.each do |warning|
- w = warning.split(".").first
- csv << w.sub(/,/," and")+"\n"
- end
- end
# write csv
t[:csv] = csv
# write predictions
@@ -370,12 +403,6 @@ post '/predict/?' do
# save task
# append predictions as last action otherwise they won't save
# mongoid works with shallow copy via #dup
- if @dataset.warnings
- @dataset.warnings.each do |warning|
- w = warning.split(".").first
- @predictions["#{model}"] << w.sub(/,/," and")
- end
- end
t[:predictions] = @predictions
t.save
end#models
diff --git a/batch.rb b/batch.rb
index 2dd9359..0002279 100644
--- a/batch.rb
+++ b/batch.rb
@@ -65,12 +65,22 @@ module OpenTox
compound = nil
end
if compound.nil? # compound parsers may return nil
- #warn "Cannot parse #{compound_format} compound '#{identifier}' at line #{i+2} of #{source}, all entries are ignored."
- batch.compounds << "Cannot parse #{compound_format} compound '#{identifier}' at line #{i+2} of #{source}."
+ batch.warnings << "Cannot parse #{compound_format} compound '#{identifier}' at line #{i+2} of #{source}."
next
end
batch.compounds << compound.id
end
+ batch.compounds.duplicates.each do |duplicate|
+ dup = Compound.find duplicate
+ positions = []
+ batch.compounds.each_with_index do |co,i|
+ c = Compound.find co
+ if !c.blank? and c.inchi and c.inchi == dup.inchi
+ positions << i+1
+ end
+ end
+ batch.warnings << "Duplicate compound at ID #{positions.join(' and ')}."
+ end
batch.save
end
batch