From 9750e0309500259e9a56e267ce87984fb5bb5e53 Mon Sep 17 00:00:00 2001
From: gebele <gebele@in-silico.ch>
Date: Mon, 26 Nov 2018 15:29:26 +0000
Subject: clean out; better response codes; prepare for batch

---
 lib/dataset.rb | 122 ++++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 108 insertions(+), 14 deletions(-)

(limited to 'lib/dataset.rb')

diff --git a/lib/dataset.rb b/lib/dataset.rb
index 7c74f39..749167b 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -17,28 +17,122 @@ end
 
 # Get a dataset
 get "/dataset/:id/?" do
-  dataset = Dataset.find :id => params[:id]
-  not_found_error "Dataset with id: #{params[:id]} not found." unless dataset
-  case @accept
-  when "application/json"
-    dataset.data_entries.each do |k, v|
-      dataset.data_entries[k][:URI] = uri("/substance/#{k}")
+  if Task.where(id: params[:id]).exists?
+    task = Task.find params[:id]
+    halt 404, "Dataset with id: #{params[:id]} not found." unless task.percent == 100
+    $logger.debug task.inspect
+    response['Content-Type'] = "text/csv"
+    m = Model::Validation.find task.model_id
+    dataset = Batch.find task.dataset_id
+    @ids = dataset.ids
+    warnings = dataset.warnings.blank? ? nil : dataset.warnings.join("\n")
+    unless warnings.nil?
+      @parse = []
+      warnings.split("\n").each do |warning|
+        if warning =~ /^Cannot/
+          smi = warning.split("SMILES compound").last.split("at").first
+          line = warning.split("SMILES compound").last.split("at line").last.split("of").first.strip.to_i
+          @parse << "Cannot parse SMILES compound#{smi}at line #{line} of #{dataset.source.split("/").last}\n"
+        end
+      end
+      keys_array = []
+      warnings.split("\n").each do |warning|
+        if warning =~ /^Duplicate/
+          text = warning.split("ID").first
+          numbers = warning.split("ID").last.split("and")
+          keys_array << numbers.collect{|n| n.strip.to_i}
+        end
+      end
+      @dups = {}
+      keys_array.each do |keys|
+        keys.each do |key|
+          @dups[key] = "Duplicate compound at ID #{keys.join(" and ")}\n"
+        end
+      end
+    end
+    $logger.debug "dups: #{@dups}"
+    endpoint = "#{m.endpoint}_(#{m.species})"
+    tempfile = Tempfile.new
+    header = task.csv
+    lines = []
+    $logger.debug task.predictions
+    task.predictions[m.id.to_s].each_with_index do |hash,idx|
+      identifier = hash.keys[0]
+      prediction_id = hash.values[0]
+      # add duplicate warning at the end of a line if ID matches
+      if @dups[idx+1]
+        if prediction_id.is_a? BSON::ObjectId
+          if @ids.blank?
+            lines << "#{idx+1},#{identifier},#{Prediction.find(prediction_id).csv.tr("\n","")},#{@dups[idx+1]}"
+          else
+            lines << "#{idx+1},#{@ids[idx]},#{identifier},#{Prediction.find(prediction_id).csv.tr("\n","")},#{@dups[idx+1]}"
+          end
+        else
+          if @ids.blank?
+            lines << "#{idx+1},#{identifier},\n"
+          else
+            lines << "#{idx+1},#{@ids[idx]}#{identifier},\n"
+          end
+        end
+      else
+        if prediction_id.is_a? BSON::ObjectId
+          if @ids.blank?
+            lines << "#{idx+1},#{identifier},#{Prediction.find(prediction_id).csv}"
+          else
+            lines << "#{idx+1},#{@ids[idx]},#{identifier},#{Prediction.find(prediction_id).csv}"
+          end
+        else
+          if @ids.blank?
+            lines << "#{idx+1},#{identifier},\n"
+          else
+            lines << "#{idx+1},#{@ids[idx]}#{identifier},\n"
+          end
+        end
+      end
     end
-    dataset[:URI] = uri("/dataset/#{dataset.id}")
-    dataset[:substances] = uri("/dataset/#{dataset.id}/substances")
-    dataset[:features] = uri("/dataset/#{dataset.id}/features")
-    return dataset.to_json
-  when "text/csv", "application/csv"
-    return dataset.to_csv
+    (@parse && !@parse.blank?) ? tempfile.write(header+lines.join("")+"\n"+@parse.join("\n")) : tempfile.write(header+lines.join(""))
+    #tempfile.write(header+lines.join(""))
+    tempfile.rewind
+    ########################
+=begin
+    header = task.csv
+    lines = []
+    task.predictions.each_with_index do |result,idx|
+      identifier = result[0]
+      prediction_id = result[1]
+      prediction = Prediction.find prediction_id
+      lines << "#{idx+1},#{identifier},#{prediction.csv.tr("\n","")}"
+    end
+    return header+lines.join("\n")
+=end
+    return tempfile.read
   else
-    bad_request_error "Mime type #{@accept} is not supported."
+    dataset = Dataset.find :id => params[:id]
+    halt 400, "Dataset with id: #{params[:id]} not found." unless dataset
+    case @accept
+    when "application/json"
+      dataset.data_entries.each do |k, v|
+        dataset.data_entries[k][:URI] = uri("/substance/#{k}")
+      end
+      dataset[:URI] = uri("/dataset/#{dataset.id}")
+      dataset[:substances] = uri("/dataset/#{dataset.id}/substances")
+      dataset[:features] = uri("/dataset/#{dataset.id}/features")
+      return dataset.to_json
+    when "text/csv", "application/csv"
+      return dataset.to_csv
+    else
+      bad_request_error "Mime type #{@accept} is not supported."
+    end
   end
 end
 
 # Get a dataset attribute. One of compounds, nanoparticles, substances, features 
 get "/dataset/:id/:attribute/?" do
+  if Task.where(id: params[:id]).exists?
+    halt 400, "No attributes selection available for dataset with id: #{params[:id]}.".to_json
+  end
   dataset = Dataset.find :id => params[:id]
-  not_found_error "Dataset with id: #{params[:id]} not found." unless dataset
+  halt 400,  "Dataset with id: #{params[:id]} not found." unless dataset
   attribs = ["compounds", "nanoparticles", "substances", "features"]
   return "Attribute '#{params[:attribute]}' is not available. Choose one of #{attribs.join(', ')}." unless attribs.include? params[:attribute]
   out = dataset.send("#{params[:attribute]}")
-- 
cgit v1.2.3