From 3a11ba2918795821600b7113d0758415718d263a Mon Sep 17 00:00:00 2001
From: gebele <gebele@in-silico.ch>
Date: Mon, 11 Jun 2018 12:46:06 +0200
Subject: combine gui with rest

---
 lib/dataset.rb | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)
 create mode 100644 lib/dataset.rb

(limited to 'lib/dataset.rb')

diff --git a/lib/dataset.rb b/lib/dataset.rb
new file mode 100644
index 0000000..7c74f39
--- /dev/null
+++ b/lib/dataset.rb
@@ -0,0 +1,46 @@
+# Get all datasets
+get "/dataset/?" do
+  datasets = Dataset.all
+  case @accept
+  when "text/uri-list"
+    uri_list = datasets.collect{|dataset| uri("/dataset/#{dataset.id}")}
+    return uri_list.join("\n") + "\n"
+  when "application/json"
+    datasets = JSON.parse datasets.to_json
+    list = []
+    datasets.each{|d| list << uri("/dataset/#{d["_id"]["$oid"]}")}
+    return list.to_json
+  else
+    bad_request_error "Mime type #{@accept} is not supported."
+  end
+end
+
+# Get a dataset
+get "/dataset/:id/?" do
+  dataset = Dataset.find :id => params[:id]
+  not_found_error "Dataset with id: #{params[:id]} not found." unless dataset
+  case @accept
+  when "application/json"
+    dataset.data_entries.each do |k, v|
+      dataset.data_entries[k][:URI] = uri("/substance/#{k}")
+    end
+    dataset[:URI] = uri("/dataset/#{dataset.id}")
+    dataset[:substances] = uri("/dataset/#{dataset.id}/substances")
+    dataset[:features] = uri("/dataset/#{dataset.id}/features")
+    return dataset.to_json
+  when "text/csv", "application/csv"
+    return dataset.to_csv
+  else
+    bad_request_error "Mime type #{@accept} is not supported."
+  end
+end
+
+# Get a dataset attribute. One of compounds, nanoparticles, substances, features 
+get "/dataset/:id/:attribute/?" do
+  dataset = Dataset.find :id => params[:id]
+  not_found_error "Dataset with id: #{params[:id]} not found." unless dataset
+  attribs = ["compounds", "nanoparticles", "substances", "features"]
+  return "Attribute '#{params[:attribute]}' is not available. Choose one of #{attribs.join(', ')}." unless attribs.include? params[:attribute]
+  out = dataset.send("#{params[:attribute]}")
+  return out.to_json
+end
-- 
cgit v1.2.3


From 9750e0309500259e9a56e267ce87984fb5bb5e53 Mon Sep 17 00:00:00 2001
From: gebele <gebele@in-silico.ch>
Date: Mon, 26 Nov 2018 15:29:26 +0000
Subject: clean out; better response codes; prepare for batch

---
 lib/dataset.rb | 122 ++++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 108 insertions(+), 14 deletions(-)

(limited to 'lib/dataset.rb')

diff --git a/lib/dataset.rb b/lib/dataset.rb
index 7c74f39..749167b 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -17,28 +17,122 @@ end
 
 # Get a dataset
 get "/dataset/:id/?" do
-  dataset = Dataset.find :id => params[:id]
-  not_found_error "Dataset with id: #{params[:id]} not found." unless dataset
-  case @accept
-  when "application/json"
-    dataset.data_entries.each do |k, v|
-      dataset.data_entries[k][:URI] = uri("/substance/#{k}")
+  if Task.where(id: params[:id]).exists?
+    task = Task.find params[:id]
+    halt 404, "Dataset with id: #{params[:id]} not found." unless task.percent == 100
+    $logger.debug task.inspect
+    response['Content-Type'] = "text/csv"
+    m = Model::Validation.find task.model_id
+    dataset = Batch.find task.dataset_id
+    @ids = dataset.ids
+    warnings = dataset.warnings.blank? ? nil : dataset.warnings.join("\n")
+    unless warnings.nil?
+      @parse = []
+      warnings.split("\n").each do |warning|
+        if warning =~ /^Cannot/
+          smi = warning.split("SMILES compound").last.split("at").first
+          line = warning.split("SMILES compound").last.split("at line").last.split("of").first.strip.to_i
+          @parse << "Cannot parse SMILES compound#{smi}at line #{line} of #{dataset.source.split("/").last}\n"
+        end
+      end
+      keys_array = []
+      warnings.split("\n").each do |warning|
+        if warning =~ /^Duplicate/
+          text = warning.split("ID").first
+          numbers = warning.split("ID").last.split("and")
+          keys_array << numbers.collect{|n| n.strip.to_i}
+        end
+      end
+      @dups = {}
+      keys_array.each do |keys|
+        keys.each do |key|
+          @dups[key] = "Duplicate compound at ID #{keys.join(" and ")}\n"
+        end
+      end
+    end
+    $logger.debug "dups: #{@dups}"
+    endpoint = "#{m.endpoint}_(#{m.species})"
+    tempfile = Tempfile.new
+    header = task.csv
+    lines = []
+    $logger.debug task.predictions
+    task.predictions[m.id.to_s].each_with_index do |hash,idx|
+      identifier = hash.keys[0]
+      prediction_id = hash.values[0]
+      # add duplicate warning at the end of a line if ID matches
+      if @dups[idx+1]
+        if prediction_id.is_a? BSON::ObjectId
+          if @ids.blank?
+            lines << "#{idx+1},#{identifier},#{Prediction.find(prediction_id).csv.tr("\n","")},#{@dups[idx+1]}"
+          else
+            lines << "#{idx+1},#{@ids[idx]},#{identifier},#{Prediction.find(prediction_id).csv.tr("\n","")},#{@dups[idx+1]}"
+          end
+        else
+          if @ids.blank?
+            lines << "#{idx+1},#{identifier},\n"
+          else
+            lines << "#{idx+1},#{@ids[idx]}#{identifier},\n"
+          end
+        end
+      else
+        if prediction_id.is_a? BSON::ObjectId
+          if @ids.blank?
+            lines << "#{idx+1},#{identifier},#{Prediction.find(prediction_id).csv}"
+          else
+            lines << "#{idx+1},#{@ids[idx]},#{identifier},#{Prediction.find(prediction_id).csv}"
+          end
+        else
+          if @ids.blank?
+            lines << "#{idx+1},#{identifier},\n"
+          else
+            lines << "#{idx+1},#{@ids[idx]}#{identifier},\n"
+          end
+        end
+      end
     end
-    dataset[:URI] = uri("/dataset/#{dataset.id}")
-    dataset[:substances] = uri("/dataset/#{dataset.id}/substances")
-    dataset[:features] = uri("/dataset/#{dataset.id}/features")
-    return dataset.to_json
-  when "text/csv", "application/csv"
-    return dataset.to_csv
+    (@parse && !@parse.blank?) ? tempfile.write(header+lines.join("")+"\n"+@parse.join("\n")) : tempfile.write(header+lines.join(""))
+    #tempfile.write(header+lines.join(""))
+    tempfile.rewind
+    ########################
+=begin
+    header = task.csv
+    lines = []
+    task.predictions.each_with_index do |result,idx|
+      identifier = result[0]
+      prediction_id = result[1]
+      prediction = Prediction.find prediction_id
+      lines << "#{idx+1},#{identifier},#{prediction.csv.tr("\n","")}"
+    end
+    return header+lines.join("\n")
+=end
+    return tempfile.read
   else
-    bad_request_error "Mime type #{@accept} is not supported."
+    dataset = Dataset.find :id => params[:id]
+    halt 400, "Dataset with id: #{params[:id]} not found." unless dataset
+    case @accept
+    when "application/json"
+      dataset.data_entries.each do |k, v|
+        dataset.data_entries[k][:URI] = uri("/substance/#{k}")
+      end
+      dataset[:URI] = uri("/dataset/#{dataset.id}")
+      dataset[:substances] = uri("/dataset/#{dataset.id}/substances")
+      dataset[:features] = uri("/dataset/#{dataset.id}/features")
+      return dataset.to_json
+    when "text/csv", "application/csv"
+      return dataset.to_csv
+    else
+      bad_request_error "Mime type #{@accept} is not supported."
+    end
   end
 end
 
 # Get a dataset attribute. One of compounds, nanoparticles, substances, features 
 get "/dataset/:id/:attribute/?" do
+  if Task.where(id: params[:id]).exists?
+    halt 400, "No attributes selection available for dataset with id: #{params[:id]}.".to_json
+  end
   dataset = Dataset.find :id => params[:id]
-  not_found_error "Dataset with id: #{params[:id]} not found." unless dataset
+  halt 400,  "Dataset with id: #{params[:id]} not found." unless dataset
   attribs = ["compounds", "nanoparticles", "substances", "features"]
   return "Attribute '#{params[:attribute]}' is not available. Choose one of #{attribs.join(', ')}." unless attribs.include? params[:attribute]
   out = dataset.send("#{params[:attribute]}")
-- 
cgit v1.2.3


From 741701df8ff0861b3607a30e9aaf8b8a0c303cdf Mon Sep 17 00:00:00 2001
From: gebele <gebele@in-silico.ch>
Date: Thu, 13 Jun 2019 15:28:59 +0000
Subject: update with API

---
 lib/dataset.rb | 133 ++++++---------------------------------------------------
 1 file changed, 13 insertions(+), 120 deletions(-)

(limited to 'lib/dataset.rb')

diff --git a/lib/dataset.rb b/lib/dataset.rb
index 749167b..00685b8 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -1,136 +1,29 @@
 # Get all datasets
-get "/dataset/?" do
-  datasets = Dataset.all
+get "/api/dataset/?" do
+  datasets = Dataset.all #.limit(100)
   case @accept
-  when "text/uri-list"
-    uri_list = datasets.collect{|dataset| uri("/dataset/#{dataset.id}")}
-    return uri_list.join("\n") + "\n"
   when "application/json"
-    datasets = JSON.parse datasets.to_json
-    list = []
-    datasets.each{|d| list << uri("/dataset/#{d["_id"]["$oid"]}")}
-    return list.to_json
+    list = datasets.collect{|dataset| uri("/api/dataset/#{dataset.id}")}.to_json
+    return list
   else
-    bad_request_error "Mime type #{@accept} is not supported."
+    halt 400, "Mime type #{@accept} is not supported."
   end
 end
 
 # Get a dataset
-get "/dataset/:id/?" do
-  if Task.where(id: params[:id]).exists?
-    task = Task.find params[:id]
-    halt 404, "Dataset with id: #{params[:id]} not found." unless task.percent == 100
-    $logger.debug task.inspect
-    response['Content-Type'] = "text/csv"
-    m = Model::Validation.find task.model_id
-    dataset = Batch.find task.dataset_id
-    @ids = dataset.ids
-    warnings = dataset.warnings.blank? ? nil : dataset.warnings.join("\n")
-    unless warnings.nil?
-      @parse = []
-      warnings.split("\n").each do |warning|
-        if warning =~ /^Cannot/
-          smi = warning.split("SMILES compound").last.split("at").first
-          line = warning.split("SMILES compound").last.split("at line").last.split("of").first.strip.to_i
-          @parse << "Cannot parse SMILES compound#{smi}at line #{line} of #{dataset.source.split("/").last}\n"
-        end
-      end
-      keys_array = []
-      warnings.split("\n").each do |warning|
-        if warning =~ /^Duplicate/
-          text = warning.split("ID").first
-          numbers = warning.split("ID").last.split("and")
-          keys_array << numbers.collect{|n| n.strip.to_i}
-        end
-      end
-      @dups = {}
-      keys_array.each do |keys|
-        keys.each do |key|
-          @dups[key] = "Duplicate compound at ID #{keys.join(" and ")}\n"
-        end
-      end
-    end
-    $logger.debug "dups: #{@dups}"
-    endpoint = "#{m.endpoint}_(#{m.species})"
-    tempfile = Tempfile.new
-    header = task.csv
-    lines = []
-    $logger.debug task.predictions
-    task.predictions[m.id.to_s].each_with_index do |hash,idx|
-      identifier = hash.keys[0]
-      prediction_id = hash.values[0]
-      # add duplicate warning at the end of a line if ID matches
-      if @dups[idx+1]
-        if prediction_id.is_a? BSON::ObjectId
-          if @ids.blank?
-            lines << "#{idx+1},#{identifier},#{Prediction.find(prediction_id).csv.tr("\n","")},#{@dups[idx+1]}"
-          else
-            lines << "#{idx+1},#{@ids[idx]},#{identifier},#{Prediction.find(prediction_id).csv.tr("\n","")},#{@dups[idx+1]}"
-          end
-        else
-          if @ids.blank?
-            lines << "#{idx+1},#{identifier},\n"
-          else
-            lines << "#{idx+1},#{@ids[idx]}#{identifier},\n"
-          end
-        end
-      else
-        if prediction_id.is_a? BSON::ObjectId
-          if @ids.blank?
-            lines << "#{idx+1},#{identifier},#{Prediction.find(prediction_id).csv}"
-          else
-            lines << "#{idx+1},#{@ids[idx]},#{identifier},#{Prediction.find(prediction_id).csv}"
-          end
-        else
-          if @ids.blank?
-            lines << "#{idx+1},#{identifier},\n"
-          else
-            lines << "#{idx+1},#{@ids[idx]}#{identifier},\n"
-          end
-        end
-      end
-    end
-    (@parse && !@parse.blank?) ? tempfile.write(header+lines.join("")+"\n"+@parse.join("\n")) : tempfile.write(header+lines.join(""))
-    #tempfile.write(header+lines.join(""))
-    tempfile.rewind
-    ########################
-=begin
-    header = task.csv
-    lines = []
-    task.predictions.each_with_index do |result,idx|
-      identifier = result[0]
-      prediction_id = result[1]
-      prediction = Prediction.find prediction_id
-      lines << "#{idx+1},#{identifier},#{prediction.csv.tr("\n","")}"
-    end
-    return header+lines.join("\n")
-=end
-    return tempfile.read
+get "/api/dataset/:id/?" do
+  dataset = Dataset.find :id => params[:id]
+  halt 400, "Dataset with id: #{params[:id]} not found." unless dataset
+  case @accept
+  when "text/csv", "application/csv"
+    return dataset.to_csv
   else
-    dataset = Dataset.find :id => params[:id]
-    halt 400, "Dataset with id: #{params[:id]} not found." unless dataset
-    case @accept
-    when "application/json"
-      dataset.data_entries.each do |k, v|
-        dataset.data_entries[k][:URI] = uri("/substance/#{k}")
-      end
-      dataset[:URI] = uri("/dataset/#{dataset.id}")
-      dataset[:substances] = uri("/dataset/#{dataset.id}/substances")
-      dataset[:features] = uri("/dataset/#{dataset.id}/features")
-      return dataset.to_json
-    when "text/csv", "application/csv"
-      return dataset.to_csv
-    else
-      bad_request_error "Mime type #{@accept} is not supported."
-    end
+    bad_request_error "Mime type #{@accept} is not supported."
   end
 end
 
 # Get a dataset attribute. One of compounds, nanoparticles, substances, features 
-get "/dataset/:id/:attribute/?" do
-  if Task.where(id: params[:id]).exists?
-    halt 400, "No attributes selection available for dataset with id: #{params[:id]}.".to_json
-  end
+get "/api/dataset/:id/:attribute/?" do
   dataset = Dataset.find :id => params[:id]
   halt 400,  "Dataset with id: #{params[:id]} not found." unless dataset
   attribs = ["compounds", "nanoparticles", "substances", "features"]
-- 
cgit v1.2.3


From bba7061d7ff2420f4eb2b6f88362edb71bc6bf25 Mon Sep 17 00:00:00 2001
From: gebele <gebele@in-silico.ch>
Date: Tue, 18 Jun 2019 15:04:55 +0000
Subject: return training dataset from source

---
 lib/dataset.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/dataset.rb')

diff --git a/lib/dataset.rb b/lib/dataset.rb
index 00685b8..51407ca 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -16,7 +16,7 @@ get "/api/dataset/:id/?" do
   halt 400, "Dataset with id: #{params[:id]} not found." unless dataset
   case @accept
   when "text/csv", "application/csv"
-    return dataset.to_csv
+    return File.read dataset.source
   else
     bad_request_error "Mime type #{@accept} is not supported."
   end
-- 
cgit v1.2.3