summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--application.rb40
-rw-r--r--helper.rb20
-rw-r--r--model.rb38
-rw-r--r--parser.rb27
-rw-r--r--views/create.haml30
-rw-r--r--views/csv_format.haml23
-rw-r--r--views/excel_format.haml64
-rw-r--r--views/help.haml101
-rwxr-xr-xviews/layout.haml5
-rw-r--r--views/model.haml2
-rw-r--r--views/prediction.haml4
11 files changed, 195 insertions, 159 deletions
diff --git a/application.rb b/application.rb
index fecdb63..40aabef 100644
--- a/application.rb
+++ b/application.rb
@@ -21,9 +21,7 @@ end
get '/models/?' do
@models = ToxCreateModel.all(:order => [ :created_at.desc ])
- @models.each do |model|
- process_model(model)
- end
+ @models.each { |model| model.process }
haml :models
end
@@ -53,7 +51,7 @@ end
get '/model/:id/:view/?' do
response['Content-Type'] = 'text/plain'
model = ToxCreateModel.get(params[:id])
- process_model(model)
+ model.process
begin
case params[:view]
@@ -79,16 +77,8 @@ get '/create' do
haml :create
end
-get '/about' do
- haml :about
-end
-
-get '/csv_format' do
- haml :csv_format
-end
-
-get '/excel_format' do
- haml :excel_format
+get '/help' do
+ haml :help
end
get "/confidence" do
@@ -120,12 +110,12 @@ post '/upload' do # create a new model
redirect url_for('/create')
end
- #begin
+ begin
@model.task_uri = OpenTox::Algorithm::Lazar.create_model(:dataset_uri => parser.dataset_uri, :prediction_feature => feature_uri)
- #rescue
- # flash[:notice] = "Model creation failed. Please check if the input file is in a valid #{link_to "Excel", "/excel_format"} or #{link_to "CSV", "/csv_format"} format."
- # redirect url_for('/create')
- #end
+ rescue
+ flash[:notice] = "Model creation failed. Please check if the input file is in a valid #{link_to "Excel", "/excel_format"} or #{link_to "CSV", "/csv_format"} format."
+ redirect url_for('/create')
+ end
validation_task_uri = OpenTox::Validation.crossvalidation(
:algorithm_uri => OpenTox::Algorithm::Lazar.uri,
@@ -180,16 +170,19 @@ post '/predict/?' do # post chemical name to model
db_activities = []
LOGGER.debug "curl -X POST -d 'compound_uri=#{@compound.uri}' -H 'Accept:application/x-yaml' #{model.uri}"
prediction = YAML.load(`curl -X POST -d 'compound_uri=#{@compound.uri}' -H 'Accept:application/x-yaml' #{model.uri}`)
+ # TODO check if prediction failed - returns string
source = prediction.creator
if prediction.data[@compound.uri]
- if source.to_s.match(/model/)
+ if source.to_s.match(/model/) # real prediction
prediction = prediction.data[@compound.uri].first.values.first
- if prediction[File.join(@@config[:services]["opentox-model"],"lazar#classification")]
+ LOGGER.debug prediction[File.join(@@config[:services]["opentox-model"],"lazar#classification")]
+ LOGGER.debug prediction[File.join(@@config[:services]["opentox-model"],"lazar#confidence")]
+ if !prediction[File.join(@@config[:services]["opentox-model"],"lazar#classification")].nil?
@predictions << {:title => model.name, :prediction => prediction[File.join(@@config[:services]["opentox-model"],"lazar#classification")], :confidence => prediction[File.join(@@config[:services]["opentox-model"],"lazar#confidence")]}
- elsif prediction[File.join(@@config[:services]["opentox-model"],"lazar#regression")]
+ elsif !prediction[File.join(@@config[:services]["opentox-model"],"lazar#regression")].nil?
@predictions << {:title => model.name, :prediction => prediction[File.join(@@config[:services]["opentox-model"],"lazar#regression")], :confidence => prediction[File.join(@@config[:services]["opentox-model"],"lazar#confidence")]}
end
- else
+ else # database value
prediction = prediction.data[@compound.uri].first.values
@predictions << {:title => model.name, :measured_activities => prediction}
end
@@ -197,6 +190,7 @@ post '/predict/?' do # post chemical name to model
@predictions << {:title => model.name, :prediction => "not available (no similar compounds in the training dataset)"}
end
end
+ LOGGER.debug @predictions.inspect
haml :prediction
end
diff --git a/helper.rb b/helper.rb
index 90529f6..4a5f739 100644
--- a/helper.rb
+++ b/helper.rb
@@ -10,25 +10,5 @@ helpers do
end
act
end
-
- def process_model(model)
- if !model.uri and model.status == "Completed"
- model.uri = RestClient.get(File.join(model.task_uri, 'resultURI')).body
- model.save
- end
- if !model.validation_uri and model.validation_status == "Completed"
- begin
- model.validation_uri = RestClient.get(File.join(model.validation_task_uri, 'resultURI')).body
- LOGGER.debug "Validation URI: #{model.validation_uri}"
- model.validation_report_task_uri = RestClient.post(File.join(@@config[:services]["opentox-validation"],"/report/crossvalidation"), :validation_uris => model.validation_uri).body
- LOGGER.debug "Validation Report Task URI: #{model.validation_report_task_uri}"
- model.save
- rescue
- end
- end
- if model.validation_report_task_uri and !model.validation_report_uri and model.validation_report_status == 'Completed'
- model.validation_report_uri = RestClient.get(File.join(model.validation_report_task_uri, 'resultURI')).body
- end
- end
end
diff --git a/model.rb b/model.rb
index 36670d9..e35507b 100644
--- a/model.rb
+++ b/model.rb
@@ -1,4 +1,5 @@
class ToxCreateModel
+
include DataMapper::Resource
property :id, Serial
property :name, String, :length => 255
@@ -56,6 +57,21 @@ class ToxCreateModel
end
end
+ def type
+ lazar = RestClient.get(@uri, :accept => "application/x-yaml").body
+ #LOGGER.debug lazar
+ lazar = YAML.load(lazar)
+ #LOGGER.debug lazar.inspect
+ case lazar.dependentVariables
+ when /classification/
+ return "classification"
+ when /regression/
+ return "regression"
+ else
+ return "unknown"
+ end
+ end
+
def validation
begin
uri = File.join(@validation_uri, 'statistics')
@@ -93,8 +109,26 @@ class ToxCreateModel
end
end
+ def process
+ if !@uri and status == "Completed"
+ @uri = RestClient.get(File.join(@task_uri, 'resultURI')).body
+ save
+ end
+ if !@validation_uri and validation_status == "Completed"
+ begin
+ @validation_uri = RestClient.get(File.join(@validation_task_uri, 'resultURI')).body
+ LOGGER.debug "Validation URI: #{@validation_uri}"
+ @validation_report_task_uri = RestClient.post(File.join(@@config[:services]["opentox-validation"],"/report/crossvalidation"), :validation_uris => @validation_uri).body
+ LOGGER.debug "Validation Report Task URI: #{@validation_report_task_uri}"
+ save
+ rescue
+ end
+ end
+ if @validation_report_task_uri and !@validation_report_uri and validation_report_status == 'Completed'
+ @validation_report_uri = RestClient.get(File.join(@validation_report_task_uri, 'resultURI')).body
+ end
+ end
+
end
DataMapper.auto_upgrade!
-
-
diff --git a/parser.rb b/parser.rb
index 040714a..8754531 100644
--- a/parser.rb
+++ b/parser.rb
@@ -37,14 +37,18 @@ class Parser
@dataset.data[items[0]] = [] unless @dataset.data[items[0]]
case @type
when "classification"
- case items[1].to_i.to_s
- when '1'
+ case items[1].to_s
+ when TRUE_REGEXP
@dataset.data[items[0]] << {@feature_uri => true }
- when '0'
+ when FALSE_REGEXP
@dataset.data[items[0]] << {@feature_uri => false }
end
when "regression"
- @dataset.data[items[0]] << {@feature_uri => items[1]}
+ if items[1].to_f == 0
+ @activity_errors << "Row #{items[2]}: Zero values not allowed for regression datasets - entry ignored."
+ else
+ @dataset.data[items[0]] << {@feature_uri => items[1].to_f}
+ end
end
end
@dataset_uri = @dataset.save
@@ -78,7 +82,7 @@ class Parser
book.default_sheet = 0
1.upto(book.last_row) do |row|
input = validate( book.cell(row,1), book.cell(row,2), row ) # smiles, activity
- @data << input
+ @data << input if input
end
File.safe_unlink(@file[:tempfile])
rescue
@@ -93,18 +97,23 @@ class Parser
@smiles_errors << "Row #{row}: " + [smiles,act].join(", ")
return false
end
- if !numeric?(act)
+ unless numeric?(act) or classification?(act)
@activity_errors << "Row #{row}: " + [smiles,act].join(", ")
return false
end
@duplicates[compound.inchi] = [] unless @duplicates[compound.inchi]
@duplicates[compound.inchi] << "Row #{row}: " + [smiles, act].join(", ")
- @type = "regression" unless act.to_f == 0.0 or act.to_f == 1.0
+ @type = "regression" unless classification?(act)
@nr_compounds += 1
- [ compound.uri, act.to_f ]
+ [ compound.uri, act , row ]
end
def numeric?(object)
- true if Float(object) rescue false
+ true if Float(object) rescue false
+ end
+
+ def classification?(object)
+ !object.to_s.strip.match(TRUE_REGEXP).nil? or !object.to_s.strip.match(FALSE_REGEXP).nil?
end
+
end
diff --git a/views/create.haml b/views/create.haml
index 0f50756..3cf05c1 100644
--- a/views/create.haml
+++ b/views/create.haml
@@ -2,28 +2,32 @@
%p
This service creates
- %a{:href => 'http://lazar.in-silico.de'} lazar
- %em classification
- models (i.e. models that discriminate between toxic/nontoxic compounds) from your uploaded datasets. Here are
- = link_to "instructions", '/excel_format'
- , for creating training datasets in Excel.
+ %ul
+ %li
+ %a{:href => 'http://lazar.in-silico.de'} lazar
+ %em classification
+ models (i.e. models that discriminate between toxic/nontoxic compounds) and
+ %li
+ %a{:href => 'http://lazar.in-silico.de'} lazar
+ %em regression
+ models (i.e. models that predict quantitative values, e.g. LC50's)
+ from your uploaded datasets. Further modelling algorithms will be added in future versions.
+
%p
- Facilities to create models for quantitative values (e.g. LC50s) and further modelling algorithms will be added in future versions.
+ Please read the
+ = link_to "instructions for creating training datasets", '/help'
+ before submitting.
%form{ :action => url_for('/upload'), :method => "post", :enctype => "multipart/form-data" }
%fieldset
- -#%legend
- Upload training data and create a
- %a{:href => 'http://lazar.in-silico.de'} lazar
- model
- %label{:for => 'endpoint'} 1. Enter a name for your endpoint:
+ %label{:for => 'endpoint'} 1. Enter endpoint name and unit (for regression):
%input{:type => 'text', :name => 'endpoint', :id => 'endpoint', :size => '50'}
%br
%label{:for => 'file'}
2. Upload training data in
- = link_to "Excel", '/excel_format'
+ = link_to "Excel", '/help'
or
- = link_to "CSV", '/csv_format'
+ = link_to "CSV", '/help'
format:
%input{:type => 'file', :name => 'file', :id => 'file', :size => '41'}
%input{ :type => "submit", :value => "Create model"}
diff --git a/views/csv_format.haml b/views/csv_format.haml
deleted file mode 100644
index 999bb68..0000000
--- a/views/csv_format.haml
+++ /dev/null
@@ -1,23 +0,0 @@
-= link_to "Back to model creation", '/create'
-%p
- The input file should contain two columns, separated by a comma. Enter in the first column the chemical structure in
- %a{:href => "http://en.wikipedia.org/wiki/Simplified_molecular_input_line_entry_specification"} SMILES
- format, in the second column the activity classification (1: active, 0: inactive), e.g.
-
-.code
- %code
- %br CC(=O)Nc1ccc(O)cc1, 1
- %br O=c1[nH]cnc2[nH]ncc12, 1
- %br CCCCNc1cc(cc(c1Oc2ccccc2)S(=O)(=O)N)C(=O)O, 1
- %br CC(C)(C)NCC(O)COc1cccc2NC(=O)CCc12, 1
- %br CN(C)CCCC1(OCc2cc(C#N)ccc21)c3ccc(F)cc3, 1
- %br CCC(CC)CCN1C(=O)CN=C(C2CCCCC2F)c3cc(Cl)ccc13, 0
- %br CCN(CC)CC(=O)Nc1c(C)cccc1C, 0
- %br CC(C)(C)NCC(O)COc1cccc2CC(O)C(O)Cc12, 0
- %br CN1CCCC1c2cccnc2, 0
-
-%p
- Here is an example for download:
- = link_to "hamster_carcinogenicity.csv", "/hamster_carcinogenicity.csv"
-
-%p You can create input files in Excel: Create a sheet with two columns and export them as CSV file with the "Save As" option from the menu, selecting the CSV (comma delimited) format.
diff --git a/views/excel_format.haml b/views/excel_format.haml
deleted file mode 100644
index 4cbbd08..0000000
--- a/views/excel_format.haml
+++ /dev/null
@@ -1,64 +0,0 @@
-= link_to "Back to model creation", '/create'
-%p
- The Excel input file should contain a single spreadsheet with two columns. Enter in the first column the chemical structure in
- %a{:href => "http://en.wikipedia.org/wiki/Simplified_molecular_input_line_entry_specification"} SMILES
- format, in the second column the activity classification (1: active, 0: inactive), e.g.
-
-- n = 0
-
-.code
- %table
- %tr
- %td
- %th A
- %th B
- %tr
- - n += 1
- %th= n
- %td CC(=O)Nc1ccc(O)cc1
- %td 1
- %tr
- - n += 1
- %th= n
- %td O=c1[nH]cnc2[nH]ncc12
- %td 1
- %tr
- - n += 1
- %th= n
- %td CCCCNc1cc(cc(c1Oc2ccccc2)S(=O)(=O)N)C(=O)O
- %td 1
- %tr
- - n += 1
- %th= n
- %td CC(C)(C)NCC(O)COc1cccc2NC(=O)CCc12
- %td 1
- %tr
- - n += 1
- %th= n
- %td CN(C)CCCC1(OCc2cc(C#N)ccc21)c3ccc(F)cc3
- %td 1
- %tr
- - n += 1
- %th= n
- %td CCC(CC)CCN1C(=O)CN=C(C2CCCCC2F)c3cc(Cl)ccc13
- %td 0
- %tr
- - n += 1
- %th= n
- %td CCN(CC)CC(=O)Nc1c(C)cccc1C
- %td 0
- %tr
- - n += 1
- %th= n
- %td CC(C)(C)NCC(O)COc1cccc2CC(O)C(O)Cc12
- %td 0
- %tr
- - n += 1
- %th= n
- %td CN1CCCC1c2cccnc2
- %td 0
-
-%p
- Here is an example file for download:
- = link_to "hamster_carcinogenicity.xls", "/hamster_carcinogenicity.xls"
-
diff --git a/views/help.haml b/views/help.haml
new file mode 100644
index 0000000..52339ce
--- /dev/null
+++ b/views/help.haml
@@ -0,0 +1,101 @@
+= link_to "Back to model creation", '/create'
+%p
+ Input files have two columns. Enter in the first column the chemical structure in
+ %a{:href => "http://en.wikipedia.org/wiki/Simplified_molecular_input_line_entry_specification"} SMILES
+ format, in the second column the toxic activity.
+%dl
+ %dt Classification datasets
+ %dd Please use 1/0, active/inactive or true/false to indicate active/inactive compounds.
+ %dt Regression datasets
+ %dd
+ Enter a quantitative value. For optimal performance you should
+ %ul
+ %li use molar units
+ %li enter non-logarithmic values (logarithms are taken internally)
+ %li avoid 0 activities (will be ignored)
+%p
+ Input files are accepted in
+ %a{:href => "http://en.wikipedia.org/wiki/Microsoft_Excel"} Excel
+ and
+ %a{:href => "en.wikipedia.org/wiki/Comma-separated_values"} CSV
+ formats.
+
+%h3 Excel example
+
+- n = 0
+
+.code
+ %table
+ %tr
+ %td
+ %th A
+ %th B
+ %tr
+ - n += 1
+ %th= n
+ %td CC(=O)Nc1ccc(O)cc1
+ %td 1
+ %tr
+ - n += 1
+ %th= n
+ %td O=c1[nH]cnc2[nH]ncc12
+ %td 1
+ %tr
+ - n += 1
+ %th= n
+ %td CCCCNc1cc(cc(c1Oc2ccccc2)S(=O)(=O)N)C(=O)O
+ %td 1
+ %tr
+ - n += 1
+ %th= n
+ %td CC(C)(C)NCC(O)COc1cccc2NC(=O)CCc12
+ %td 1
+ %tr
+ - n += 1
+ %th= n
+ %td CN(C)CCCC1(OCc2cc(C#N)ccc21)c3ccc(F)cc3
+ %td 1
+ %tr
+ - n += 1
+ %th= n
+ %td CCC(CC)CCN1C(=O)CN=C(C2CCCCC2F)c3cc(Cl)ccc13
+ %td 0
+ %tr
+ - n += 1
+ %th= n
+ %td CCN(CC)CC(=O)Nc1c(C)cccc1C
+ %td 0
+ %tr
+ - n += 1
+ %th= n
+ %td CC(C)(C)NCC(O)COc1cccc2CC(O)C(O)Cc12
+ %td 0
+ %tr
+ - n += 1
+ %th= n
+ %td CN1CCCC1c2cccnc2
+ %td 0
+
+%p
+ Excel example file for download:
+ = link_to "hamster_carcinogenicity.xls", "/hamster_carcinogenicity.xls"
+
+%h3 CSV example
+
+.code
+ %code
+ %br CC(=O)Nc1ccc(O)cc1, 1
+ %br O=c1[nH]cnc2[nH]ncc12, 1
+ %br CCCCNc1cc(cc(c1Oc2ccccc2)S(=O)(=O)N)C(=O)O, 1
+ %br CC(C)(C)NCC(O)COc1cccc2NC(=O)CCc12, 1
+ %br CN(C)CCCC1(OCc2cc(C#N)ccc21)c3ccc(F)cc3, 1
+ %br CCC(CC)CCN1C(=O)CN=C(C2CCCCC2F)c3cc(Cl)ccc13, 0
+ %br CCN(CC)CC(=O)Nc1c(C)cccc1C, 0
+ %br CC(C)(C)NCC(O)COc1cccc2CC(O)C(O)Cc12, 0
+ %br CN1CCCC1c2cccnc2, 0
+
+%p
+ CSV example for download:
+ = link_to "hamster_carcinogenicity.csv", "/hamster_carcinogenicity.csv"
+
+%p You can create CSV files in Excel: Create a sheet with two columns and export them as CSV file with the "Save As" option from the menu, selecting the CSV (comma delimited) format.
diff --git a/views/layout.haml b/views/layout.haml
index 012e296..7935b33 100755
--- a/views/layout.haml
+++ b/views/layout.haml
@@ -23,11 +23,10 @@
= link_to "Inspect", "/models"
%li{:class => ("selected" if /predict/ =~ request.path )}
= link_to "Predict", "/predict"
- %li{:class => ("selected" if /about/ =~ request.path )}
- = link_to "About", "/about"
+ %li{:class => ("selected" if /help/ =~ request.path )}
+ = link_to "Help", "/help"
.content
-
- if `hostname`.match(/ot-test|ot-dev/)
.notice
This service is for testing purposes only - once a week all models will be deleted. Please send bug reports and feature requests to our
diff --git a/views/model.haml b/views/model.haml
index fd1d114..e336de6 100644
--- a/views/model.haml
+++ b/views/model.haml
@@ -33,6 +33,8 @@
%dd
%a{:href => "http://www.in-silico.de/articles/modi020905.pdf"} #{File.basename model.algorithm}
-# %a{:href => model.algorithm} RDF/XML
+ %dt Type:
+ %dd= model.type
%dt Descriptors:
%dd
%a{:href => 'http://www.maunz.de/libfminer2-bbrc-doc/'} Fminer backbone refinement classes
diff --git a/views/prediction.haml b/views/prediction.haml
index 993c966..96385ef 100644
--- a/views/prediction.haml
+++ b/views/prediction.haml
@@ -41,12 +41,12 @@
= activity(p[:prediction])
- else
%br
- %em= p[:prediction]
+ %em= sprintf('%.03g', p[:prediction])
- if p[:confidence]
%br
(
%a{:href => "#", :id => "linkConfidence#{p.object_id}"} Confidence
- = ": #{sprintf('%.03f', p[:confidence].to_f.abs)}"
+ = ": #{sprintf('%.03g', p[:confidence].to_f.abs)}"
:javascript
$("a#linkConfidence#{p.object_id}").click(function () {
$("dl#confidence").toggle();