diff options
author | Christoph Helma <helma@in-silico.ch> | 2010-07-02 12:40:02 +0200 |
---|---|---|
committer | Christoph Helma <helma@in-silico.ch> | 2010-07-02 12:40:02 +0200 |
commit | 258e55e5cbd1355a30dad6bbe2a2638609db9a18 (patch) | |
tree | 9c8d60afd594ef7a13e4fd01593737826befcf85 | |
parent | 80cb52f477870bd78b0f92e69650872da56fb1a5 (diff) |
regression validation (partially) working
-rw-r--r-- | application.rb | 40 | ||||
-rw-r--r-- | helper.rb | 20 | ||||
-rw-r--r-- | model.rb | 38 | ||||
-rw-r--r-- | parser.rb | 27 | ||||
-rw-r--r-- | views/create.haml | 30 | ||||
-rw-r--r-- | views/csv_format.haml | 23 | ||||
-rw-r--r-- | views/excel_format.haml | 64 | ||||
-rw-r--r-- | views/help.haml | 101 | ||||
-rwxr-xr-x | views/layout.haml | 5 | ||||
-rw-r--r-- | views/model.haml | 2 | ||||
-rw-r--r-- | views/prediction.haml | 4 |
11 files changed, 195 insertions, 159 deletions
diff --git a/application.rb b/application.rb index fecdb63..40aabef 100644 --- a/application.rb +++ b/application.rb @@ -21,9 +21,7 @@ end get '/models/?' do @models = ToxCreateModel.all(:order => [ :created_at.desc ]) - @models.each do |model| - process_model(model) - end + @models.each { |model| model.process } haml :models end @@ -53,7 +51,7 @@ end get '/model/:id/:view/?' do response['Content-Type'] = 'text/plain' model = ToxCreateModel.get(params[:id]) - process_model(model) + model.process begin case params[:view] @@ -79,16 +77,8 @@ get '/create' do haml :create end -get '/about' do - haml :about -end - -get '/csv_format' do - haml :csv_format -end - -get '/excel_format' do - haml :excel_format +get '/help' do + haml :help end get "/confidence" do @@ -120,12 +110,12 @@ post '/upload' do # create a new model redirect url_for('/create') end - #begin + begin @model.task_uri = OpenTox::Algorithm::Lazar.create_model(:dataset_uri => parser.dataset_uri, :prediction_feature => feature_uri) - #rescue - # flash[:notice] = "Model creation failed. Please check if the input file is in a valid #{link_to "Excel", "/excel_format"} or #{link_to "CSV", "/csv_format"} format." - # redirect url_for('/create') - #end + rescue + flash[:notice] = "Model creation failed. Please check if the input file is in a valid #{link_to "Excel", "/excel_format"} or #{link_to "CSV", "/csv_format"} format." + redirect url_for('/create') + end validation_task_uri = OpenTox::Validation.crossvalidation( :algorithm_uri => OpenTox::Algorithm::Lazar.uri, @@ -180,16 +170,19 @@ post '/predict/?' do # post chemical name to model db_activities = [] LOGGER.debug "curl -X POST -d 'compound_uri=#{@compound.uri}' -H 'Accept:application/x-yaml' #{model.uri}" prediction = YAML.load(`curl -X POST -d 'compound_uri=#{@compound.uri}' -H 'Accept:application/x-yaml' #{model.uri}`) + # TODO check if prediction failed - returns string source = prediction.creator if prediction.data[@compound.uri] - if source.to_s.match(/model/) + if source.to_s.match(/model/) # real prediction prediction = prediction.data[@compound.uri].first.values.first - if prediction[File.join(@@config[:services]["opentox-model"],"lazar#classification")] + LOGGER.debug prediction[File.join(@@config[:services]["opentox-model"],"lazar#classification")] + LOGGER.debug prediction[File.join(@@config[:services]["opentox-model"],"lazar#confidence")] + if !prediction[File.join(@@config[:services]["opentox-model"],"lazar#classification")].nil? @predictions << {:title => model.name, :prediction => prediction[File.join(@@config[:services]["opentox-model"],"lazar#classification")], :confidence => prediction[File.join(@@config[:services]["opentox-model"],"lazar#confidence")]} - elsif prediction[File.join(@@config[:services]["opentox-model"],"lazar#regression")] + elsif !prediction[File.join(@@config[:services]["opentox-model"],"lazar#regression")].nil? @predictions << {:title => model.name, :prediction => prediction[File.join(@@config[:services]["opentox-model"],"lazar#regression")], :confidence => prediction[File.join(@@config[:services]["opentox-model"],"lazar#confidence")]} end - else + else # database value prediction = prediction.data[@compound.uri].first.values @predictions << {:title => model.name, :measured_activities => prediction} end @@ -197,6 +190,7 @@ post '/predict/?' do # post chemical name to model @predictions << {:title => model.name, :prediction => "not available (no similar compounds in the training dataset)"} end end + LOGGER.debug @predictions.inspect haml :prediction end @@ -10,25 +10,5 @@ helpers do end act end - - def process_model(model) - if !model.uri and model.status == "Completed" - model.uri = RestClient.get(File.join(model.task_uri, 'resultURI')).body - model.save - end - if !model.validation_uri and model.validation_status == "Completed" - begin - model.validation_uri = RestClient.get(File.join(model.validation_task_uri, 'resultURI')).body - LOGGER.debug "Validation URI: #{model.validation_uri}" - model.validation_report_task_uri = RestClient.post(File.join(@@config[:services]["opentox-validation"],"/report/crossvalidation"), :validation_uris => model.validation_uri).body - LOGGER.debug "Validation Report Task URI: #{model.validation_report_task_uri}" - model.save - rescue - end - end - if model.validation_report_task_uri and !model.validation_report_uri and model.validation_report_status == 'Completed' - model.validation_report_uri = RestClient.get(File.join(model.validation_report_task_uri, 'resultURI')).body - end - end end @@ -1,4 +1,5 @@ class ToxCreateModel + include DataMapper::Resource property :id, Serial property :name, String, :length => 255 @@ -56,6 +57,21 @@ class ToxCreateModel end end + def type + lazar = RestClient.get(@uri, :accept => "application/x-yaml").body + #LOGGER.debug lazar + lazar = YAML.load(lazar) + #LOGGER.debug lazar.inspect + case lazar.dependentVariables + when /classification/ + return "classification" + when /regression/ + return "regression" + else + return "unknown" + end + end + def validation begin uri = File.join(@validation_uri, 'statistics') @@ -93,8 +109,26 @@ class ToxCreateModel end end + def process + if !@uri and status == "Completed" + @uri = RestClient.get(File.join(@task_uri, 'resultURI')).body + save + end + if !@validation_uri and validation_status == "Completed" + begin + @validation_uri = RestClient.get(File.join(@validation_task_uri, 'resultURI')).body + LOGGER.debug "Validation URI: #{@validation_uri}" + @validation_report_task_uri = RestClient.post(File.join(@@config[:services]["opentox-validation"],"/report/crossvalidation"), :validation_uris => @validation_uri).body + LOGGER.debug "Validation Report Task URI: #{@validation_report_task_uri}" + save + rescue + end + end + if @validation_report_task_uri and !@validation_report_uri and validation_report_status == 'Completed' + @validation_report_uri = RestClient.get(File.join(@validation_report_task_uri, 'resultURI')).body + end + end + end DataMapper.auto_upgrade! - - @@ -37,14 +37,18 @@ class Parser @dataset.data[items[0]] = [] unless @dataset.data[items[0]] case @type when "classification" - case items[1].to_i.to_s - when '1' + case items[1].to_s + when TRUE_REGEXP @dataset.data[items[0]] << {@feature_uri => true } - when '0' + when FALSE_REGEXP @dataset.data[items[0]] << {@feature_uri => false } end when "regression" - @dataset.data[items[0]] << {@feature_uri => items[1]} + if items[1].to_f == 0 + @activity_errors << "Row #{items[2]}: Zero values not allowed for regression datasets - entry ignored." + else + @dataset.data[items[0]] << {@feature_uri => items[1].to_f} + end end end @dataset_uri = @dataset.save @@ -78,7 +82,7 @@ class Parser book.default_sheet = 0 1.upto(book.last_row) do |row| input = validate( book.cell(row,1), book.cell(row,2), row ) # smiles, activity - @data << input + @data << input if input end File.safe_unlink(@file[:tempfile]) rescue @@ -93,18 +97,23 @@ class Parser @smiles_errors << "Row #{row}: " + [smiles,act].join(", ") return false end - if !numeric?(act) + unless numeric?(act) or classification?(act) @activity_errors << "Row #{row}: " + [smiles,act].join(", ") return false end @duplicates[compound.inchi] = [] unless @duplicates[compound.inchi] @duplicates[compound.inchi] << "Row #{row}: " + [smiles, act].join(", ") - @type = "regression" unless act.to_f == 0.0 or act.to_f == 1.0 + @type = "regression" unless classification?(act) @nr_compounds += 1 - [ compound.uri, act.to_f ] + [ compound.uri, act , row ] end def numeric?(object) - true if Float(object) rescue false + true if Float(object) rescue false + end + + def classification?(object) + !object.to_s.strip.match(TRUE_REGEXP).nil? or !object.to_s.strip.match(FALSE_REGEXP).nil? end + end diff --git a/views/create.haml b/views/create.haml index 0f50756..3cf05c1 100644 --- a/views/create.haml +++ b/views/create.haml @@ -2,28 +2,32 @@ %p This service creates - %a{:href => 'http://lazar.in-silico.de'} lazar - %em classification - models (i.e. models that discriminate between toxic/nontoxic compounds) from your uploaded datasets. Here are - = link_to "instructions", '/excel_format' - , for creating training datasets in Excel. + %ul + %li + %a{:href => 'http://lazar.in-silico.de'} lazar + %em classification + models (i.e. models that discriminate between toxic/nontoxic compounds) and + %li + %a{:href => 'http://lazar.in-silico.de'} lazar + %em regression + models (i.e. models that predict quantitative values, e.g. LC50's) + from your uploaded datasets. Further modelling algorithms will be added in future versions. + %p - Facilities to create models for quantitative values (e.g. LC50s) and further modelling algorithms will be added in future versions. + Please read the + = link_to "instructions for creating training datasets", '/help' + before submitting. %form{ :action => url_for('/upload'), :method => "post", :enctype => "multipart/form-data" } %fieldset - -#%legend - Upload training data and create a - %a{:href => 'http://lazar.in-silico.de'} lazar - model - %label{:for => 'endpoint'} 1. Enter a name for your endpoint: + %label{:for => 'endpoint'} 1. Enter endpoint name and unit (for regression): %input{:type => 'text', :name => 'endpoint', :id => 'endpoint', :size => '50'} %br %label{:for => 'file'} 2. Upload training data in - = link_to "Excel", '/excel_format' + = link_to "Excel", '/help' or - = link_to "CSV", '/csv_format' + = link_to "CSV", '/help' format: %input{:type => 'file', :name => 'file', :id => 'file', :size => '41'} %input{ :type => "submit", :value => "Create model"} diff --git a/views/csv_format.haml b/views/csv_format.haml deleted file mode 100644 index 999bb68..0000000 --- a/views/csv_format.haml +++ /dev/null @@ -1,23 +0,0 @@ -= link_to "Back to model creation", '/create' -%p - The input file should contain two columns, separated by a comma. Enter in the first column the chemical structure in - %a{:href => "http://en.wikipedia.org/wiki/Simplified_molecular_input_line_entry_specification"} SMILES - format, in the second column the activity classification (1: active, 0: inactive), e.g. - -.code - %code - %br CC(=O)Nc1ccc(O)cc1, 1 - %br O=c1[nH]cnc2[nH]ncc12, 1 - %br CCCCNc1cc(cc(c1Oc2ccccc2)S(=O)(=O)N)C(=O)O, 1 - %br CC(C)(C)NCC(O)COc1cccc2NC(=O)CCc12, 1 - %br CN(C)CCCC1(OCc2cc(C#N)ccc21)c3ccc(F)cc3, 1 - %br CCC(CC)CCN1C(=O)CN=C(C2CCCCC2F)c3cc(Cl)ccc13, 0 - %br CCN(CC)CC(=O)Nc1c(C)cccc1C, 0 - %br CC(C)(C)NCC(O)COc1cccc2CC(O)C(O)Cc12, 0 - %br CN1CCCC1c2cccnc2, 0 - -%p - Here is an example for download: - = link_to "hamster_carcinogenicity.csv", "/hamster_carcinogenicity.csv" - -%p You can create input files in Excel: Create a sheet with two columns and export them as CSV file with the "Save As" option from the menu, selecting the CSV (comma delimited) format. diff --git a/views/excel_format.haml b/views/excel_format.haml deleted file mode 100644 index 4cbbd08..0000000 --- a/views/excel_format.haml +++ /dev/null @@ -1,64 +0,0 @@ -= link_to "Back to model creation", '/create' -%p - The Excel input file should contain a single spreadsheet with two columns. Enter in the first column the chemical structure in - %a{:href => "http://en.wikipedia.org/wiki/Simplified_molecular_input_line_entry_specification"} SMILES - format, in the second column the activity classification (1: active, 0: inactive), e.g. - -- n = 0 - -.code - %table - %tr - %td - %th A - %th B - %tr - - n += 1 - %th= n - %td CC(=O)Nc1ccc(O)cc1 - %td 1 - %tr - - n += 1 - %th= n - %td O=c1[nH]cnc2[nH]ncc12 - %td 1 - %tr - - n += 1 - %th= n - %td CCCCNc1cc(cc(c1Oc2ccccc2)S(=O)(=O)N)C(=O)O - %td 1 - %tr - - n += 1 - %th= n - %td CC(C)(C)NCC(O)COc1cccc2NC(=O)CCc12 - %td 1 - %tr - - n += 1 - %th= n - %td CN(C)CCCC1(OCc2cc(C#N)ccc21)c3ccc(F)cc3 - %td 1 - %tr - - n += 1 - %th= n - %td CCC(CC)CCN1C(=O)CN=C(C2CCCCC2F)c3cc(Cl)ccc13 - %td 0 - %tr - - n += 1 - %th= n - %td CCN(CC)CC(=O)Nc1c(C)cccc1C - %td 0 - %tr - - n += 1 - %th= n - %td CC(C)(C)NCC(O)COc1cccc2CC(O)C(O)Cc12 - %td 0 - %tr - - n += 1 - %th= n - %td CN1CCCC1c2cccnc2 - %td 0 - -%p - Here is an example file for download: - = link_to "hamster_carcinogenicity.xls", "/hamster_carcinogenicity.xls" - diff --git a/views/help.haml b/views/help.haml new file mode 100644 index 0000000..52339ce --- /dev/null +++ b/views/help.haml @@ -0,0 +1,101 @@ += link_to "Back to model creation", '/create' +%p + Input files have two columns. Enter in the first column the chemical structure in + %a{:href => "http://en.wikipedia.org/wiki/Simplified_molecular_input_line_entry_specification"} SMILES + format, in the second column the toxic activity. +%dl + %dt Classification datasets + %dd Please use 1/0, active/inactive or true/false to indicate active/inactive compounds. + %dt Regression datasets + %dd + Enter a quantitative value. For optimal performance you should + %ul + %li use molar units + %li enter non-logarithmic values (logarithms are taken internally) + %li avoid 0 activities (will be ignored) +%p + Input files are accepted in + %a{:href => "http://en.wikipedia.org/wiki/Microsoft_Excel"} Excel + and + %a{:href => "en.wikipedia.org/wiki/Comma-separated_values"} CSV + formats. + +%h3 Excel example + +- n = 0 + +.code + %table + %tr + %td + %th A + %th B + %tr + - n += 1 + %th= n + %td CC(=O)Nc1ccc(O)cc1 + %td 1 + %tr + - n += 1 + %th= n + %td O=c1[nH]cnc2[nH]ncc12 + %td 1 + %tr + - n += 1 + %th= n + %td CCCCNc1cc(cc(c1Oc2ccccc2)S(=O)(=O)N)C(=O)O + %td 1 + %tr + - n += 1 + %th= n + %td CC(C)(C)NCC(O)COc1cccc2NC(=O)CCc12 + %td 1 + %tr + - n += 1 + %th= n + %td CN(C)CCCC1(OCc2cc(C#N)ccc21)c3ccc(F)cc3 + %td 1 + %tr + - n += 1 + %th= n + %td CCC(CC)CCN1C(=O)CN=C(C2CCCCC2F)c3cc(Cl)ccc13 + %td 0 + %tr + - n += 1 + %th= n + %td CCN(CC)CC(=O)Nc1c(C)cccc1C + %td 0 + %tr + - n += 1 + %th= n + %td CC(C)(C)NCC(O)COc1cccc2CC(O)C(O)Cc12 + %td 0 + %tr + - n += 1 + %th= n + %td CN1CCCC1c2cccnc2 + %td 0 + +%p + Excel example file for download: + = link_to "hamster_carcinogenicity.xls", "/hamster_carcinogenicity.xls" + +%h3 CSV example + +.code + %code + %br CC(=O)Nc1ccc(O)cc1, 1 + %br O=c1[nH]cnc2[nH]ncc12, 1 + %br CCCCNc1cc(cc(c1Oc2ccccc2)S(=O)(=O)N)C(=O)O, 1 + %br CC(C)(C)NCC(O)COc1cccc2NC(=O)CCc12, 1 + %br CN(C)CCCC1(OCc2cc(C#N)ccc21)c3ccc(F)cc3, 1 + %br CCC(CC)CCN1C(=O)CN=C(C2CCCCC2F)c3cc(Cl)ccc13, 0 + %br CCN(CC)CC(=O)Nc1c(C)cccc1C, 0 + %br CC(C)(C)NCC(O)COc1cccc2CC(O)C(O)Cc12, 0 + %br CN1CCCC1c2cccnc2, 0 + +%p + CSV example for download: + = link_to "hamster_carcinogenicity.csv", "/hamster_carcinogenicity.csv" + +%p You can create CSV files in Excel: Create a sheet with two columns and export them as CSV file with the "Save As" option from the menu, selecting the CSV (comma delimited) format. diff --git a/views/layout.haml b/views/layout.haml index 012e296..7935b33 100755 --- a/views/layout.haml +++ b/views/layout.haml @@ -23,11 +23,10 @@ = link_to "Inspect", "/models" %li{:class => ("selected" if /predict/ =~ request.path )} = link_to "Predict", "/predict" - %li{:class => ("selected" if /about/ =~ request.path )} - = link_to "About", "/about" + %li{:class => ("selected" if /help/ =~ request.path )} + = link_to "Help", "/help" .content - - if `hostname`.match(/ot-test|ot-dev/) .notice This service is for testing purposes only - once a week all models will be deleted. Please send bug reports and feature requests to our diff --git a/views/model.haml b/views/model.haml index fd1d114..e336de6 100644 --- a/views/model.haml +++ b/views/model.haml @@ -33,6 +33,8 @@ %dd %a{:href => "http://www.in-silico.de/articles/modi020905.pdf"} #{File.basename model.algorithm} -# %a{:href => model.algorithm} RDF/XML + %dt Type: + %dd= model.type %dt Descriptors: %dd %a{:href => 'http://www.maunz.de/libfminer2-bbrc-doc/'} Fminer backbone refinement classes diff --git a/views/prediction.haml b/views/prediction.haml index 993c966..96385ef 100644 --- a/views/prediction.haml +++ b/views/prediction.haml @@ -41,12 +41,12 @@ = activity(p[:prediction]) - else %br - %em= p[:prediction] + %em= sprintf('%.03g', p[:prediction]) - if p[:confidence] %br ( %a{:href => "#", :id => "linkConfidence#{p.object_id}"} Confidence - = ": #{sprintf('%.03f', p[:confidence].to_f.abs)}" + = ": #{sprintf('%.03g', p[:confidence].to_f.abs)}" :javascript $("a#linkConfidence#{p.object_id}").click(function () { $("dl#confidence").toggle(); |