summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgebele <gebele@in-silico.ch>2018-07-25 15:16:27 +0000
committergebele <gebele@in-silico.ch>2018-07-25 15:16:27 +0000
commitc26965b89c307f9370ca5a3cf75646aad76dba3b (patch)
tree99f791ec1269beacedfad68068d4a0d341cf474d
parentc93afb84a46af1500679cc5b314b0aeffcd7e6e9 (diff)
handle original ids in batch object;
-rw-r--r--application.rb44
-rw-r--r--batch.rb18
-rw-r--r--views/batch.haml25
3 files changed, 52 insertions, 35 deletions
diff --git a/application.rb b/application.rb
index fbebc66..b0357e8 100644
--- a/application.rb
+++ b/application.rb
@@ -189,16 +189,27 @@ get '/predict/csv/:task/:model/:filename/?' do
response['Content-Type'] = "text/csv"
filename = params[:filename] =~ /\.csv$/ ? params[:filename].gsub(/\.csv$/,"") : params[:filename]
task = Task.find params[:task].to_s
- m = Model::Validation.find params[:model].to_s unless params[:model] == "Cramer"
+ m = Model::Validation.find params[:model].to_s unless params[:model] =~ /Cramer|Mazzatorta/
dataset = Batch.find_by(:name => filename)
+ $logger.debug dataset.inspect
@ids = dataset.ids
warnings = dataset.warnings.blank? ? nil : dataset.warnings.join("\n")
unless warnings.nil?
+ @parse = []
+ warnings.split("\n").each do |warning|
+ if warning =~ /^Cannot/
+ smi = warning.split("SMILES compound").last.split("at").first
+ line = warning.split("SMILES compound").last.split("at line").last.split("of").first.strip.to_i
+ @parse << "Cannot parse SMILES compound#{smi}at line #{line} of #{dataset.source.split("/").last}\n"
+ end
+ end
keys_array = []
warnings.split("\n").each do |warning|
- text = warning.split("ID").first
- numbers = warning.split("ID").last.split("and")
- keys_array << numbers.collect{|n| n.strip.to_i}
+ if warning =~ /^Duplicate/
+ text = warning.split("ID").first
+ numbers = warning.split("ID").last.split("and")
+ keys_array << numbers.collect{|n| n.strip.to_i}
+ end
end
@dups = {}
keys_array.each do |keys|
@@ -209,13 +220,15 @@ get '/predict/csv/:task/:model/:filename/?' do
end
if params[:model] == "Mazzatorta"
endpoint = "Lowest observed adverse effect level (LOAEL) (Rats) (Mazzatorta)"
+ elsif params[:model] == "Cramer"
+ endpoint = "Oral_toxicity_(Cramer_rules)"
else
- endpoint = (params[:model] == "Cramer") ? "Oral_toxicity_(Cramer_rules)" : (m.endpoint =~ /Mutagenicity/i ? "Consensus_mutagenicity" : "#{m.endpoint}_(#{m.species})")
+ endpoint = "#{m.endpoint}_(#{m.species})"
end
tempfile = Tempfile.new
- if params[:model] == "Cramer" || params[:model] == "Mazzatorta"
- # add duplicate warning at the end of a line if ID matches
- if @dups
+ if params[:model] =~ /Cramer|Mazzatorta/
+ # add duplicate and parse warnings
+ unless warnings.nil?
lines = task.csv.split("\n")
header = lines.shift
out = ""
@@ -226,7 +239,8 @@ get '/predict/csv/:task/:model/:filename/?' do
out << line+"\n"
end
end
- tempfile.write(header+"\n"+out)
+ (@parse && !@parse.blank?) ? tempfile.write(header+"\n"+out+"\n"+@parse.join("\n")) : tempfile.write(header+"\n"+out)
+ #tempfile.write(header+"\n"+out)
else
tempfile.write(task.csv)
end
@@ -244,12 +258,6 @@ get '/predict/csv/:task/:model/:filename/?' do
else
lines << "#{idx+1},#{@ids[idx]},#{identifier},#{Prediction.find(prediction_id).csv.tr("\n","")},#{@dups[idx+1]}"
end
- else
- if @ids.blank?
- lines << "#{idx+1},#{identifier},#{p},#{@dups[idx+1]}"
- else
- lines << "#{idx+1},#{@ids[idx]},#{identifier},#{p},#{@dups[idx+1]}"
- end
end
else
if prediction_id.is_a? BSON::ObjectId
@@ -267,8 +275,8 @@ get '/predict/csv/:task/:model/:filename/?' do
end
end
end
- csv = header + lines.join("")
- tempfile.write(csv)
+ (@parse && !@parse.blank?) ? tempfile.write(header+lines.join("")+"\n"+@parse.join("\n")) : tempfile.write(header+lines.join(""))
+ #tempfile.write(header+lines.join(""))
end
tempfile.rewind
send_file tempfile, :filename => "#{Time.now.strftime("%Y-%m-%d")}_lazar_batch_prediction_#{endpoint}_#{filename}.csv", :type => "text/csv", :disposition => "attachment"
@@ -278,6 +286,7 @@ post '/predict/?' do
# process batch prediction
if !params[:fileselect].blank? || !params[:existing].blank?
if !params[:existing].blank?
+ $logger.debug "Take file from database."
@dataset = Batch.find params[:existing].keys[0]
@compounds = @dataset.compounds
@identifiers = @dataset.identifiers
@@ -553,7 +562,6 @@ post '/predict/?' do
elsif model_id == "Mazzatorta"
prediction = LoaelMazzatorta.predict(@compound.smiles)
output = {}
- $logger.debug prediction
if prediction["value"]
output["mazzatorta"] = {:mmol_prediction => @compound.mg_to_mmol(prediction["value"].delog10p).signif(3),:prediction => prediction["value"].delog10p.signif(3)}
else
diff --git a/batch.rb b/batch.rb
index 2f99000..2e72396 100644
--- a/batch.rb
+++ b/batch.rb
@@ -29,9 +29,8 @@ module OpenTox
# original IDs
if table[0][0] =~ /ID/i
- ids = table.collect{|row| row.shift}
- ids.shift
- batch.ids = ids
+ @original_ids = table.collect{|row| row.shift}
+ @original_ids.shift
end
# features
@@ -60,8 +59,7 @@ module OpenTox
end
table.each_with_index do |vals,i|
- identifier = vals.shift.strip
- batch.identifiers << identifier
+ identifier = vals.shift.strip.gsub(/^'|'$/,"")
begin
case compound_format
when /SMILES/i
@@ -72,13 +70,17 @@ module OpenTox
rescue
compound = nil
end
- if compound.nil? # compound parsers may return nil
+ # collect only for present compounds
+ unless compound.nil?
+ batch.identifiers << identifier
+ batch.compounds << compound.id
+ batch.ids << @original_ids[i] if @original_ids
+ else
batch.warnings << "Cannot parse #{compound_format} compound '#{identifier}' at line #{i+2} of #{source}."
- next
end
- batch.compounds << compound.id
end
batch.compounds.duplicates.each do |duplicate|
+ $logger.debug "Duplicates found in #{name}."
dup = Compound.find duplicate
positions = []
batch.compounds.each_with_index do |co,i|
diff --git a/views/batch.haml b/views/batch.haml
index 76338bc..a7083cc 100644
--- a/views/batch.haml
+++ b/views/batch.haml
@@ -29,8 +29,10 @@
var aClient = new HttpClient();
aClient.get(uri, function(res) {
var response = JSON.parse(res);
- if (model_id =~ /Cramer|Mazzatorta/){
- $("img.circle").show();
+ if (model_id == "Cramer"){
+ $("img.circle_cramer").show();
+ } else if (model_id == "Mazzatorta"){
+ $("img.circle_mazza").show();
}else{
progress(response['percent'],id);
}
@@ -38,7 +40,12 @@
window.clearInterval(markers[id]);
$("a#downbutton_"+id).removeClass("disabled");
$("a#detailsbutton_"+id).removeClass("disabled");
- $("img.circle").hide();
+ if (model_id == "Cramer"){
+ $("img.circle_cramer").hide();
+ };
+ if (model_id == "Mazzatorta"){
+ $("img.circle_mazza").hide();
+ };
};
});
};
@@ -107,10 +114,9 @@
%div.col-md-6
- if model =~ /Mazzatorta/
%h5= "Lowest observed adverse effect level (LOAEL) (Mazzatorta) Rats"
- #pager{:id=>idx}
- else
%h5= (model == "Cramer") ? "Oral toxicity (Cramer rules)" : (m.endpoint =~ /Mutagenicity/i ? "Consensus mutagenicity" : "#{m.endpoint} (#{m.species})")
- #pager{:id=>idx}
+ #pager{:id=>idx}
%div.col-md-6.h5
%a.btn.btn-default.btn-xs.disabled{:id => "detailsbutton_#{idx}", :data=>{:toggle=>"collapse"}, :href=>"javascript:void(0)", :onclick=>"pagePredictions('#{task}','#{model}','#{idx}')", :style=>"font-size:small;"}
%span.glyphicon.glyphicon-menu-right
@@ -118,9 +124,11 @@
%a.btn.btn-default.btn-xs.disabled{:id => "downbutton_#{idx}", :href=>"#{to("/predict/csv/#{task}/#{model}/#{@filename}")}", :title=>"download", :style=>"font-size:small;"}
%span.glyphicon.glyphicon-download-alt
CSV
- - if model =~ /Cramer|Mazzatorta/
- %img.h2{:src=>"/images/wait30trans.gif", :id=>"circle", :class=>"circle", :alt=>"wait", :style=>"display:none;"}
- - else
+ - if model == "Cramer"
+ %img.h2{:src=>"/images/wait30trans.gif", :id=>"circle1", :class=>"circle_cramer", :alt=>"wait", :style=>"display:none;"}
+ - if model == "Mazzatorta"
+ %img.h2{:src=>"/images/wait30trans.gif", :id=>"circle2", :class=>"circle_mazza", :alt=>"wait", :style=>"display:none;"}
+ - if model !~ /Cramer|Mazzatorta/
%div{:id=>"progress_#{idx}", :style=>"width:100%;height:3px;position:relative;background-color:#ccc;"}
%div{:id=>"bar_#{idx}", :style=>"background-color: #4CAF50;width:10px;height:3px;position:absolute;"}
- # increase interval timer for large datasets
@@ -143,4 +151,3 @@
};
});
#data-container{:id=>idx,:style=>"width:100%;"}
- -#pager{:id=>idx}