From 3a11ba2918795821600b7113d0758415718d263a Mon Sep 17 00:00:00 2001 From: gebele Date: Mon, 11 Jun 2018 12:46:06 +0200 Subject: combine gui with rest --- lib/dataset.rb | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 lib/dataset.rb (limited to 'lib/dataset.rb') diff --git a/lib/dataset.rb b/lib/dataset.rb new file mode 100644 index 0000000..7c74f39 --- /dev/null +++ b/lib/dataset.rb @@ -0,0 +1,46 @@ +# Get all datasets +get "/dataset/?" do + datasets = Dataset.all + case @accept + when "text/uri-list" + uri_list = datasets.collect{|dataset| uri("/dataset/#{dataset.id}")} + return uri_list.join("\n") + "\n" + when "application/json" + datasets = JSON.parse datasets.to_json + list = [] + datasets.each{|d| list << uri("/dataset/#{d["_id"]["$oid"]}")} + return list.to_json + else + bad_request_error "Mime type #{@accept} is not supported." + end +end + +# Get a dataset +get "/dataset/:id/?" do + dataset = Dataset.find :id => params[:id] + not_found_error "Dataset with id: #{params[:id]} not found." unless dataset + case @accept + when "application/json" + dataset.data_entries.each do |k, v| + dataset.data_entries[k][:URI] = uri("/substance/#{k}") + end + dataset[:URI] = uri("/dataset/#{dataset.id}") + dataset[:substances] = uri("/dataset/#{dataset.id}/substances") + dataset[:features] = uri("/dataset/#{dataset.id}/features") + return dataset.to_json + when "text/csv", "application/csv" + return dataset.to_csv + else + bad_request_error "Mime type #{@accept} is not supported." + end +end + +# Get a dataset attribute. One of compounds, nanoparticles, substances, features +get "/dataset/:id/:attribute/?" do + dataset = Dataset.find :id => params[:id] + not_found_error "Dataset with id: #{params[:id]} not found." unless dataset + attribs = ["compounds", "nanoparticles", "substances", "features"] + return "Attribute '#{params[:attribute]}' is not available. Choose one of #{attribs.join(', ')}." unless attribs.include? params[:attribute] + out = dataset.send("#{params[:attribute]}") + return out.to_json +end -- cgit v1.2.3 From 9750e0309500259e9a56e267ce87984fb5bb5e53 Mon Sep 17 00:00:00 2001 From: gebele Date: Mon, 26 Nov 2018 15:29:26 +0000 Subject: clean out; better response codes; prepare for batch --- lib/dataset.rb | 122 ++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 108 insertions(+), 14 deletions(-) (limited to 'lib/dataset.rb') diff --git a/lib/dataset.rb b/lib/dataset.rb index 7c74f39..749167b 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -17,28 +17,122 @@ end # Get a dataset get "/dataset/:id/?" do - dataset = Dataset.find :id => params[:id] - not_found_error "Dataset with id: #{params[:id]} not found." unless dataset - case @accept - when "application/json" - dataset.data_entries.each do |k, v| - dataset.data_entries[k][:URI] = uri("/substance/#{k}") + if Task.where(id: params[:id]).exists? + task = Task.find params[:id] + halt 404, "Dataset with id: #{params[:id]} not found." unless task.percent == 100 + $logger.debug task.inspect + response['Content-Type'] = "text/csv" + m = Model::Validation.find task.model_id + dataset = Batch.find task.dataset_id + @ids = dataset.ids + warnings = dataset.warnings.blank? ? nil : dataset.warnings.join("\n") + unless warnings.nil? + @parse = [] + warnings.split("\n").each do |warning| + if warning =~ /^Cannot/ + smi = warning.split("SMILES compound").last.split("at").first + line = warning.split("SMILES compound").last.split("at line").last.split("of").first.strip.to_i + @parse << "Cannot parse SMILES compound#{smi}at line #{line} of #{dataset.source.split("/").last}\n" + end + end + keys_array = [] + warnings.split("\n").each do |warning| + if warning =~ /^Duplicate/ + text = warning.split("ID").first + numbers = warning.split("ID").last.split("and") + keys_array << numbers.collect{|n| n.strip.to_i} + end + end + @dups = {} + keys_array.each do |keys| + keys.each do |key| + @dups[key] = "Duplicate compound at ID #{keys.join(" and ")}\n" + end + end + end + $logger.debug "dups: #{@dups}" + endpoint = "#{m.endpoint}_(#{m.species})" + tempfile = Tempfile.new + header = task.csv + lines = [] + $logger.debug task.predictions + task.predictions[m.id.to_s].each_with_index do |hash,idx| + identifier = hash.keys[0] + prediction_id = hash.values[0] + # add duplicate warning at the end of a line if ID matches + if @dups[idx+1] + if prediction_id.is_a? BSON::ObjectId + if @ids.blank? + lines << "#{idx+1},#{identifier},#{Prediction.find(prediction_id).csv.tr("\n","")},#{@dups[idx+1]}" + else + lines << "#{idx+1},#{@ids[idx]},#{identifier},#{Prediction.find(prediction_id).csv.tr("\n","")},#{@dups[idx+1]}" + end + else + if @ids.blank? + lines << "#{idx+1},#{identifier},\n" + else + lines << "#{idx+1},#{@ids[idx]}#{identifier},\n" + end + end + else + if prediction_id.is_a? BSON::ObjectId + if @ids.blank? + lines << "#{idx+1},#{identifier},#{Prediction.find(prediction_id).csv}" + else + lines << "#{idx+1},#{@ids[idx]},#{identifier},#{Prediction.find(prediction_id).csv}" + end + else + if @ids.blank? + lines << "#{idx+1},#{identifier},\n" + else + lines << "#{idx+1},#{@ids[idx]}#{identifier},\n" + end + end + end end - dataset[:URI] = uri("/dataset/#{dataset.id}") - dataset[:substances] = uri("/dataset/#{dataset.id}/substances") - dataset[:features] = uri("/dataset/#{dataset.id}/features") - return dataset.to_json - when "text/csv", "application/csv" - return dataset.to_csv + (@parse && !@parse.blank?) ? tempfile.write(header+lines.join("")+"\n"+@parse.join("\n")) : tempfile.write(header+lines.join("")) + #tempfile.write(header+lines.join("")) + tempfile.rewind + ######################## +=begin + header = task.csv + lines = [] + task.predictions.each_with_index do |result,idx| + identifier = result[0] + prediction_id = result[1] + prediction = Prediction.find prediction_id + lines << "#{idx+1},#{identifier},#{prediction.csv.tr("\n","")}" + end + return header+lines.join("\n") +=end + return tempfile.read else - bad_request_error "Mime type #{@accept} is not supported." + dataset = Dataset.find :id => params[:id] + halt 400, "Dataset with id: #{params[:id]} not found." unless dataset + case @accept + when "application/json" + dataset.data_entries.each do |k, v| + dataset.data_entries[k][:URI] = uri("/substance/#{k}") + end + dataset[:URI] = uri("/dataset/#{dataset.id}") + dataset[:substances] = uri("/dataset/#{dataset.id}/substances") + dataset[:features] = uri("/dataset/#{dataset.id}/features") + return dataset.to_json + when "text/csv", "application/csv" + return dataset.to_csv + else + bad_request_error "Mime type #{@accept} is not supported." + end end end # Get a dataset attribute. One of compounds, nanoparticles, substances, features get "/dataset/:id/:attribute/?" do + if Task.where(id: params[:id]).exists? + halt 400, "No attributes selection available for dataset with id: #{params[:id]}.".to_json + end dataset = Dataset.find :id => params[:id] - not_found_error "Dataset with id: #{params[:id]} not found." unless dataset + halt 400, "Dataset with id: #{params[:id]} not found." unless dataset attribs = ["compounds", "nanoparticles", "substances", "features"] return "Attribute '#{params[:attribute]}' is not available. Choose one of #{attribs.join(', ')}." unless attribs.include? params[:attribute] out = dataset.send("#{params[:attribute]}") -- cgit v1.2.3 From 741701df8ff0861b3607a30e9aaf8b8a0c303cdf Mon Sep 17 00:00:00 2001 From: gebele Date: Thu, 13 Jun 2019 15:28:59 +0000 Subject: update with API --- lib/dataset.rb | 133 ++++++--------------------------------------------------- 1 file changed, 13 insertions(+), 120 deletions(-) (limited to 'lib/dataset.rb') diff --git a/lib/dataset.rb b/lib/dataset.rb index 749167b..00685b8 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -1,136 +1,29 @@ # Get all datasets -get "/dataset/?" do - datasets = Dataset.all +get "/api/dataset/?" do + datasets = Dataset.all #.limit(100) case @accept - when "text/uri-list" - uri_list = datasets.collect{|dataset| uri("/dataset/#{dataset.id}")} - return uri_list.join("\n") + "\n" when "application/json" - datasets = JSON.parse datasets.to_json - list = [] - datasets.each{|d| list << uri("/dataset/#{d["_id"]["$oid"]}")} - return list.to_json + list = datasets.collect{|dataset| uri("/api/dataset/#{dataset.id}")}.to_json + return list else - bad_request_error "Mime type #{@accept} is not supported." + halt 400, "Mime type #{@accept} is not supported." end end # Get a dataset -get "/dataset/:id/?" do - if Task.where(id: params[:id]).exists? - task = Task.find params[:id] - halt 404, "Dataset with id: #{params[:id]} not found." unless task.percent == 100 - $logger.debug task.inspect - response['Content-Type'] = "text/csv" - m = Model::Validation.find task.model_id - dataset = Batch.find task.dataset_id - @ids = dataset.ids - warnings = dataset.warnings.blank? ? nil : dataset.warnings.join("\n") - unless warnings.nil? - @parse = [] - warnings.split("\n").each do |warning| - if warning =~ /^Cannot/ - smi = warning.split("SMILES compound").last.split("at").first - line = warning.split("SMILES compound").last.split("at line").last.split("of").first.strip.to_i - @parse << "Cannot parse SMILES compound#{smi}at line #{line} of #{dataset.source.split("/").last}\n" - end - end - keys_array = [] - warnings.split("\n").each do |warning| - if warning =~ /^Duplicate/ - text = warning.split("ID").first - numbers = warning.split("ID").last.split("and") - keys_array << numbers.collect{|n| n.strip.to_i} - end - end - @dups = {} - keys_array.each do |keys| - keys.each do |key| - @dups[key] = "Duplicate compound at ID #{keys.join(" and ")}\n" - end - end - end - $logger.debug "dups: #{@dups}" - endpoint = "#{m.endpoint}_(#{m.species})" - tempfile = Tempfile.new - header = task.csv - lines = [] - $logger.debug task.predictions - task.predictions[m.id.to_s].each_with_index do |hash,idx| - identifier = hash.keys[0] - prediction_id = hash.values[0] - # add duplicate warning at the end of a line if ID matches - if @dups[idx+1] - if prediction_id.is_a? BSON::ObjectId - if @ids.blank? - lines << "#{idx+1},#{identifier},#{Prediction.find(prediction_id).csv.tr("\n","")},#{@dups[idx+1]}" - else - lines << "#{idx+1},#{@ids[idx]},#{identifier},#{Prediction.find(prediction_id).csv.tr("\n","")},#{@dups[idx+1]}" - end - else - if @ids.blank? - lines << "#{idx+1},#{identifier},\n" - else - lines << "#{idx+1},#{@ids[idx]}#{identifier},\n" - end - end - else - if prediction_id.is_a? BSON::ObjectId - if @ids.blank? - lines << "#{idx+1},#{identifier},#{Prediction.find(prediction_id).csv}" - else - lines << "#{idx+1},#{@ids[idx]},#{identifier},#{Prediction.find(prediction_id).csv}" - end - else - if @ids.blank? - lines << "#{idx+1},#{identifier},\n" - else - lines << "#{idx+1},#{@ids[idx]}#{identifier},\n" - end - end - end - end - (@parse && !@parse.blank?) ? tempfile.write(header+lines.join("")+"\n"+@parse.join("\n")) : tempfile.write(header+lines.join("")) - #tempfile.write(header+lines.join("")) - tempfile.rewind - ######################## -=begin - header = task.csv - lines = [] - task.predictions.each_with_index do |result,idx| - identifier = result[0] - prediction_id = result[1] - prediction = Prediction.find prediction_id - lines << "#{idx+1},#{identifier},#{prediction.csv.tr("\n","")}" - end - return header+lines.join("\n") -=end - return tempfile.read +get "/api/dataset/:id/?" do + dataset = Dataset.find :id => params[:id] + halt 400, "Dataset with id: #{params[:id]} not found." unless dataset + case @accept + when "text/csv", "application/csv" + return dataset.to_csv else - dataset = Dataset.find :id => params[:id] - halt 400, "Dataset with id: #{params[:id]} not found." unless dataset - case @accept - when "application/json" - dataset.data_entries.each do |k, v| - dataset.data_entries[k][:URI] = uri("/substance/#{k}") - end - dataset[:URI] = uri("/dataset/#{dataset.id}") - dataset[:substances] = uri("/dataset/#{dataset.id}/substances") - dataset[:features] = uri("/dataset/#{dataset.id}/features") - return dataset.to_json - when "text/csv", "application/csv" - return dataset.to_csv - else - bad_request_error "Mime type #{@accept} is not supported." - end + bad_request_error "Mime type #{@accept} is not supported." end end # Get a dataset attribute. One of compounds, nanoparticles, substances, features -get "/dataset/:id/:attribute/?" do - if Task.where(id: params[:id]).exists? - halt 400, "No attributes selection available for dataset with id: #{params[:id]}.".to_json - end +get "/api/dataset/:id/:attribute/?" do dataset = Dataset.find :id => params[:id] halt 400, "Dataset with id: #{params[:id]} not found." unless dataset attribs = ["compounds", "nanoparticles", "substances", "features"] -- cgit v1.2.3 From bba7061d7ff2420f4eb2b6f88362edb71bc6bf25 Mon Sep 17 00:00:00 2001 From: gebele Date: Tue, 18 Jun 2019 15:04:55 +0000 Subject: return training dataset from source --- lib/dataset.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/dataset.rb') diff --git a/lib/dataset.rb b/lib/dataset.rb index 00685b8..51407ca 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -16,7 +16,7 @@ get "/api/dataset/:id/?" do halt 400, "Dataset with id: #{params[:id]} not found." unless dataset case @accept when "text/csv", "application/csv" - return dataset.to_csv + return File.read dataset.source else bad_request_error "Mime type #{@accept} is not supported." end -- cgit v1.2.3