11 files changed, 195 insertions, 159 deletions
diff --git a/application.rb b/application.rb
index fecdb63..40aabef 100644
--- a/application.rb
+++ b/application.rb
@@ -21,9 +21,7 @@ end
 
 get '/models/?' do
 	@models = ToxCreateModel.all(:order => [ :created_at.desc ])
-	@models.each do |model|
-		process_model(model)
-	end
+	@models.each { |model| model.process }
 	haml :models
 end
 
@@ -53,7 +51,7 @@ end
 get '/model/:id/:view/?' do
   response['Content-Type'] = 'text/plain'
 	model = ToxCreateModel.get(params[:id])
-  process_model(model)
+  model.process
 
   begin
     case params[:view]
@@ -79,16 +77,8 @@ get '/create' do
 	haml :create
 end
 
-get '/about' do
-	haml :about
-end
-
-get '/csv_format' do
-	haml :csv_format
-end
-
-get '/excel_format' do
-	haml :excel_format
+get '/help' do
+	haml :help
 end
 
 get "/confidence" do
@@ -120,12 +110,12 @@ post '/upload' do # create a new model
 		redirect url_for('/create')
 	end
 
-	#begin
+	begin
 		@model.task_uri = OpenTox::Algorithm::Lazar.create_model(:dataset_uri => parser.dataset_uri, :prediction_feature => feature_uri)
-	#rescue
-	#	flash[:notice] = "Model creation failed. Please check if the input file is in a valid #{link_to "Excel", "/excel_format"} or #{link_to "CSV", "/csv_format"} format."
-	#	redirect url_for('/create')
-	#end
+	rescue
+		flash[:notice] = "Model creation failed. Please check if the input file is in a valid #{link_to "Excel", "/excel_format"} or #{link_to "CSV", "/csv_format"} format."
+		redirect url_for('/create')
+	end
 
 	validation_task_uri = OpenTox::Validation.crossvalidation(
 		:algorithm_uri => OpenTox::Algorithm::Lazar.uri,
@@ -180,16 +170,19 @@ post '/predict/?' do # post chemical name to model
 		db_activities = []
 		LOGGER.debug "curl -X POST -d 'compound_uri=#{@compound.uri}' -H 'Accept:application/x-yaml' #{model.uri}"
 		prediction = YAML.load(`curl -X POST -d 'compound_uri=#{@compound.uri}' -H 'Accept:application/x-yaml' #{model.uri}`)
+		# TODO check if prediction failed - returns string
 		source = prediction.creator
 		if prediction.data[@compound.uri]
-			if source.to_s.match(/model/)
+			if source.to_s.match(/model/) # real prediction
 				prediction = prediction.data[@compound.uri].first.values.first
-				if prediction[File.join(@@config[:services]["opentox-model"],"lazar#classification")]
+				LOGGER.debug prediction[File.join(@@config[:services]["opentox-model"],"lazar#classification")]
+				LOGGER.debug prediction[File.join(@@config[:services]["opentox-model"],"lazar#confidence")]
+				if !prediction[File.join(@@config[:services]["opentox-model"],"lazar#classification")].nil?
 					@predictions << {:title => model.name, :prediction => prediction[File.join(@@config[:services]["opentox-model"],"lazar#classification")], :confidence => prediction[File.join(@@config[:services]["opentox-model"],"lazar#confidence")]}
-				elsif prediction[File.join(@@config[:services]["opentox-model"],"lazar#regression")]
+				elsif !prediction[File.join(@@config[:services]["opentox-model"],"lazar#regression")].nil?
 					@predictions << {:title => model.name, :prediction => prediction[File.join(@@config[:services]["opentox-model"],"lazar#regression")], :confidence => prediction[File.join(@@config[:services]["opentox-model"],"lazar#confidence")]}
 				end
-			else
+			else # database value
 				prediction = prediction.data[@compound.uri].first.values
 				@predictions << {:title => model.name, :measured_activities => prediction}
 			end
@@ -197,6 +190,7 @@ post '/predict/?' do # post chemical name to model
 			@predictions << {:title => model.name, :prediction => "not available (no similar compounds in the training dataset)"}
 		end
 	end
+	LOGGER.debug @predictions.inspect
 
 	haml :prediction
 end
diff --git a/helper.rb b/helper.rb
index 90529f6..4a5f739 100644
--- a/helper.rb
+++ b/helper.rb
@@ -10,25 +10,5 @@ helpers do
 		end
 		act
 	end
-
-  def process_model(model)
-    if !model.uri and model.status == "Completed"
-			model.uri = RestClient.get(File.join(model.task_uri, 'resultURI')).body
-			model.save
-		end
-		if !model.validation_uri and model.validation_status == "Completed"
-			begin
-				model.validation_uri = RestClient.get(File.join(model.validation_task_uri, 'resultURI')).body
-				LOGGER.debug "Validation URI: #{model.validation_uri}"
-				model.validation_report_task_uri = RestClient.post(File.join(@@config[:services]["opentox-validation"],"/report/crossvalidation"), :validation_uris => model.validation_uri).body
-				LOGGER.debug "Validation Report Task URI: #{model.validation_report_task_uri}"
-				model.save
-			rescue
-			end
-		end
-		if model.validation_report_task_uri and !model.validation_report_uri and model.validation_report_status == 'Completed'
-			model.validation_report_uri = RestClient.get(File.join(model.validation_report_task_uri, 'resultURI')).body
-		end
-  end
 end
 
diff --git a/model.rb b/model.rb
index 36670d9..e35507b 100644
--- a/model.rb
+++ b/model.rb
@@ -1,4 +1,5 @@
 class ToxCreateModel
+
 	include DataMapper::Resource
 	property :id, Serial
 	property :name, String, :length => 255
@@ -56,6 +57,21 @@ class ToxCreateModel
 		end
 	end
 
+	def type
+		lazar = RestClient.get(@uri, :accept => "application/x-yaml").body
+		#LOGGER.debug lazar
+		lazar = YAML.load(lazar)
+		#LOGGER.debug lazar.inspect
+		case lazar.dependentVariables
+		when /classification/
+			return "classification"
+		when /regression/
+			return "regression"
+		else
+			return "unknown"
+		end
+	end
+
 	def validation
 		begin
 			uri = File.join(@validation_uri, 'statistics')
@@ -93,8 +109,26 @@ class ToxCreateModel
 		end
 	end
 
+  def process
+    if !@uri and status == "Completed"
+			@uri = RestClient.get(File.join(@task_uri, 'resultURI')).body
+			save
+		end
+		if !@validation_uri and validation_status == "Completed"
+			begin
+				@validation_uri = RestClient.get(File.join(@validation_task_uri, 'resultURI')).body
+				LOGGER.debug "Validation URI: #{@validation_uri}"
+				@validation_report_task_uri = RestClient.post(File.join(@@config[:services]["opentox-validation"],"/report/crossvalidation"), :validation_uris => @validation_uri).body
+				LOGGER.debug "Validation Report Task URI: #{@validation_report_task_uri}"
+				save
+			rescue
+			end
+		end
+		if @validation_report_task_uri and !@validation_report_uri and validation_report_status == 'Completed'
+			@validation_report_uri = RestClient.get(File.join(@validation_report_task_uri, 'resultURI')).body
+		end
+  end
+
 end
 
 DataMapper.auto_upgrade!
-
-
diff --git a/parser.rb b/parser.rb
index 040714a..8754531 100644
--- a/parser.rb
+++ b/parser.rb
@@ -37,14 +37,18 @@ class Parser
         @dataset.data[items[0]] = [] unless @dataset.data[items[0]]
 				case @type
 				when "classification"
-					case items[1].to_i.to_s
-					when '1'
+					case items[1].to_s
+					when TRUE_REGEXP
 						@dataset.data[items[0]] << {@feature_uri => true }
-					when '0'
+					when FALSE_REGEXP
 						@dataset.data[items[0]] << {@feature_uri => false }
 					end
 				when "regression"
-					@dataset.data[items[0]] << {@feature_uri => items[1]}
+					if items[1].to_f == 0
+						@activity_errors << "Row #{items[2]}: Zero values not allowed for regression datasets - entry ignored."
+					else
+						@dataset.data[items[0]] << {@feature_uri => items[1].to_f}
+					end
 				end
 		end
 		@dataset_uri = @dataset.save
@@ -78,7 +82,7 @@ class Parser
 			book.default_sheet = 0
 			1.upto(book.last_row) do |row|
 				input = validate( book.cell(row,1), book.cell(row,2), row ) # smiles, activity
-				@data << input
+				@data << input if input
 			end
 			File.safe_unlink(@file[:tempfile])
 		rescue
@@ -93,18 +97,23 @@ class Parser
 			@smiles_errors << "Row #{row}: " + [smiles,act].join(", ") 
 			return false
 		end
-		if !numeric?(act)
+		unless numeric?(act) or classification?(act)
 			@activity_errors << "Row #{row}: " + [smiles,act].join(", ")
 			return false
 		end
 		@duplicates[compound.inchi] = [] unless @duplicates[compound.inchi]
 		@duplicates[compound.inchi] << "Row #{row}: " + [smiles, act].join(", ")
-		@type = "regression" unless act.to_f == 0.0 or act.to_f == 1.0
+		@type = "regression" unless classification?(act)
 		@nr_compounds += 1
-		[ compound.uri, act.to_f ]
+		[ compound.uri, act , row ]
 	end
 
 	def numeric?(object)
-		  true if Float(object) rescue false
+		true if Float(object) rescue false
+	end
+
+	def classification?(object)
+		!object.to_s.strip.match(TRUE_REGEXP).nil? or !object.to_s.strip.match(FALSE_REGEXP).nil?
 	end
+
 end
diff --git a/views/create.haml b/views/create.haml
index 0f50756..3cf05c1 100644
--- a/views/create.haml
+++ b/views/create.haml
@@ -2,28 +2,32 @@
 
   %p
     This service creates 
-    %a{:href => 'http://lazar.in-silico.de'} lazar
-    %em classification
-    models (i.e. models that discriminate between toxic/nontoxic compounds) from your uploaded datasets. Here are
-    = link_to "instructions", '/excel_format'
-    , for creating training datasets in Excel.
+    %ul
+      %li
+        %a{:href => 'http://lazar.in-silico.de'} lazar
+        %em classification
+        models (i.e. models that discriminate between toxic/nontoxic compounds) and
+      %li
+        %a{:href => 'http://lazar.in-silico.de'} lazar
+        %em regression
+        models (i.e. models that predict quantitative values, e.g. LC50's)
+    from your uploaded datasets. Further modelling algorithms will be added in future versions.
+ 
   %p
-    Facilities to create models for quantitative values (e.g. LC50s) and further modelling algorithms will be added in future versions.
+    Please read the 
+    = link_to "instructions for creating training datasets", '/help'
+    before submitting.
 
   %form{ :action => url_for('/upload'), :method => "post", :enctype => "multipart/form-data" }
     %fieldset
-      -#%legend
-        Upload training data and create a
-        %a{:href => 'http://lazar.in-silico.de'} lazar
-        model
-      %label{:for => 'endpoint'} 1. Enter a name for your endpoint:
+      %label{:for => 'endpoint'} 1. Enter endpoint name and unit (for regression):
       %input{:type => 'text', :name => 'endpoint', :id => 'endpoint', :size => '50'}
       %br
       %label{:for => 'file'}
         2. Upload training data in
-        = link_to "Excel", '/excel_format'
+        = link_to "Excel", '/help'
         or
-        = link_to "CSV", '/csv_format'
+        = link_to "CSV", '/help'
         format:
       %input{:type => 'file', :name => 'file', :id => 'file', :size => '41'}
     %input{ :type => "submit", :value => "Create model"}
diff --git a/views/csv_format.haml b/views/csv_format.haml
deleted file mode 100644
index 999bb68..0000000
--- a/views/csv_format.haml
+++ /dev/null
@@ -1,23 +0,0 @@
-= link_to "Back to model creation", '/create'
-%p
-  The input file should contain two columns, separated by a comma. Enter in the first column the chemical structure in
-  %a{:href => "http://en.wikipedia.org/wiki/Simplified_molecular_input_line_entry_specification"} SMILES
-  format, in the second column the activity classification (1: active, 0: inactive), e.g.
-
-.code
-  %code
-    %br CC(=O)Nc1ccc(O)cc1, 1  
-    %br O=c1[nH]cnc2[nH]ncc12, 1  
-    %br CCCCNc1cc(cc(c1Oc2ccccc2)S(=O)(=O)N)C(=O)O, 1  
-    %br CC(C)(C)NCC(O)COc1cccc2NC(=O)CCc12, 1  
-    %br CN(C)CCCC1(OCc2cc(C#N)ccc21)c3ccc(F)cc3, 1  
-    %br CCC(CC)CCN1C(=O)CN=C(C2CCCCC2F)c3cc(Cl)ccc13, 0 
-    %br CCN(CC)CC(=O)Nc1c(C)cccc1C, 0 
-    %br CC(C)(C)NCC(O)COc1cccc2CC(O)C(O)Cc12, 0 
-    %br CN1CCCC1c2cccnc2, 0 
-
-%p
-  Here is an example for download:
-  = link_to "hamster_carcinogenicity.csv", "/hamster_carcinogenicity.csv"
-
-%p You can create input files in Excel: Create a sheet with two columns and export them as CSV file with the "Save As" option from the menu, selecting the CSV (comma delimited) format.
diff --git a/views/excel_format.haml b/views/excel_format.haml
deleted file mode 100644
index 4cbbd08..0000000
--- a/views/excel_format.haml
+++ /dev/null
@@ -1,64 +0,0 @@
-= link_to "Back to model creation", '/create'
-%p
-  The Excel input file should contain a single spreadsheet with two columns. Enter in the first column the chemical structure in
-  %a{:href => "http://en.wikipedia.org/wiki/Simplified_molecular_input_line_entry_specification"} SMILES
-  format, in the second column the activity classification (1: active, 0: inactive), e.g.
-
-- n = 0
-
-.code
-  %table
-    %tr
-      %td
-      %th A
-      %th B
-    %tr
-      - n += 1
-      %th= n
-      %td CC(=O)Nc1ccc(O)cc1
-      %td 1  
-    %tr
-      - n += 1
-      %th= n
-      %td O=c1[nH]cnc2[nH]ncc12
-      %td 1  
-    %tr
-      - n += 1
-      %th= n
-      %td CCCCNc1cc(cc(c1Oc2ccccc2)S(=O)(=O)N)C(=O)O
-      %td 1  
-    %tr
-      - n += 1
-      %th= n
-      %td CC(C)(C)NCC(O)COc1cccc2NC(=O)CCc12
-      %td 1  
-    %tr
-      - n += 1
-      %th= n
-      %td CN(C)CCCC1(OCc2cc(C#N)ccc21)c3ccc(F)cc3
-      %td 1  
-    %tr
-      - n += 1
-      %th= n
-      %td CCC(CC)CCN1C(=O)CN=C(C2CCCCC2F)c3cc(Cl)ccc13
-      %td 0 
-    %tr
-      - n += 1
-      %th= n
-      %td CCN(CC)CC(=O)Nc1c(C)cccc1C
-      %td 0 
-    %tr
-      - n += 1
-      %th= n
-      %td CC(C)(C)NCC(O)COc1cccc2CC(O)C(O)Cc12
-      %td 0 
-    %tr
-      - n += 1
-      %th= n
-      %td CN1CCCC1c2cccnc2
-      %td 0 
-
-%p
-  Here is an example file for download:
-  = link_to "hamster_carcinogenicity.xls", "/hamster_carcinogenicity.xls"
-
diff --git a/views/help.haml b/views/help.haml
new file mode 100644
index 0000000..52339ce
--- /dev/null
+++ b/views/help.haml
@@ -0,0 +1,101 @@
+= link_to "Back to model creation", '/create'
+%p
+  Input files have two columns. Enter in the first column the chemical structure in
+  %a{:href => "http://en.wikipedia.org/wiki/Simplified_molecular_input_line_entry_specification"} SMILES
+  format, in the second column the toxic activity.
+%dl
+  %dt Classification datasets 
+  %dd Please use 1/0, active/inactive or true/false to indicate active/inactive compounds.
+  %dt Regression datasets
+  %dd
+    Enter a quantitative value. For optimal performance you should
+    %ul
+      %li use molar units
+      %li enter non-logarithmic values (logarithms are taken internally)
+      %li avoid 0 activities (will be ignored)
+%p 
+  Input files are accepted in 
+  %a{:href => "http://en.wikipedia.org/wiki/Microsoft_Excel"} Excel
+  and
+  %a{:href => "en.wikipedia.org/wiki/Comma-separated_values"} CSV
+  formats.
+
+%h3 Excel example
+
+- n = 0
+
+.code
+  %table
+    %tr
+      %td
+      %th A
+      %th B
+    %tr
+      - n += 1
+      %th= n
+      %td CC(=O)Nc1ccc(O)cc1
+      %td 1  
+    %tr
+      - n += 1
+      %th= n
+      %td O=c1[nH]cnc2[nH]ncc12
+      %td 1  
+    %tr
+      - n += 1
+      %th= n
+      %td CCCCNc1cc(cc(c1Oc2ccccc2)S(=O)(=O)N)C(=O)O
+      %td 1  
+    %tr
+      - n += 1
+      %th= n
+      %td CC(C)(C)NCC(O)COc1cccc2NC(=O)CCc12
+      %td 1  
+    %tr
+      - n += 1
+      %th= n
+      %td CN(C)CCCC1(OCc2cc(C#N)ccc21)c3ccc(F)cc3
+      %td 1  
+    %tr
+      - n += 1
+      %th= n
+      %td CCC(CC)CCN1C(=O)CN=C(C2CCCCC2F)c3cc(Cl)ccc13
+      %td 0 
+    %tr
+      - n += 1
+      %th= n
+      %td CCN(CC)CC(=O)Nc1c(C)cccc1C
+      %td 0 
+    %tr
+      - n += 1
+      %th= n
+      %td CC(C)(C)NCC(O)COc1cccc2CC(O)C(O)Cc12
+      %td 0 
+    %tr
+      - n += 1
+      %th= n
+      %td CN1CCCC1c2cccnc2
+      %td 0 
+
+%p
+  Excel example file for download:
+  = link_to "hamster_carcinogenicity.xls", "/hamster_carcinogenicity.xls"
+
+%h3 CSV example
+
+.code
+  %code
+    %br CC(=O)Nc1ccc(O)cc1, 1  
+    %br O=c1[nH]cnc2[nH]ncc12, 1  
+    %br CCCCNc1cc(cc(c1Oc2ccccc2)S(=O)(=O)N)C(=O)O, 1  
+    %br CC(C)(C)NCC(O)COc1cccc2NC(=O)CCc12, 1  
+    %br CN(C)CCCC1(OCc2cc(C#N)ccc21)c3ccc(F)cc3, 1  
+    %br CCC(CC)CCN1C(=O)CN=C(C2CCCCC2F)c3cc(Cl)ccc13, 0 
+    %br CCN(CC)CC(=O)Nc1c(C)cccc1C, 0 
+    %br CC(C)(C)NCC(O)COc1cccc2CC(O)C(O)Cc12, 0 
+    %br CN1CCCC1c2cccnc2, 0 
+
+%p
+  CSV example for download:
+  = link_to "hamster_carcinogenicity.csv", "/hamster_carcinogenicity.csv"
+
+%p You can create CSV files in Excel: Create a sheet with two columns and export them as CSV file with the "Save As" option from the menu, selecting the CSV (comma delimited) format.
diff --git a/views/layout.haml b/views/layout.haml
index 012e296..7935b33 100755
--- a/views/layout.haml
+++ b/views/layout.haml
@@ -23,11 +23,10 @@
           = link_to "Inspect", "/models"
         %li{:class => ("selected" if /predict/ =~ request.path )}
           = link_to "Predict", "/predict"
-        %li{:class => ("selected" if /about/ =~ request.path )}
-          = link_to "About", "/about"
+        %li{:class => ("selected" if /help/ =~ request.path )}
+          = link_to "Help", "/help"
 
     .content
-
       - if `hostname`.match(/ot-test|ot-dev/)
         .notice 
           This service is for testing purposes only - once a week all models will be deleted. Please send bug reports and feature requests to our 
diff --git a/views/model.haml b/views/model.haml
index fd1d114..e336de6 100644
--- a/views/model.haml
+++ b/views/model.haml
@@ -33,6 +33,8 @@
         %dd
           %a{:href => "http://www.in-silico.de/articles/modi020905.pdf"} #{File.basename model.algorithm}
           -# %a{:href => model.algorithm} RDF/XML
+        %dt Type:
+        %dd= model.type
         %dt Descriptors:
         %dd
           %a{:href => 'http://www.maunz.de/libfminer2-bbrc-doc/'} Fminer backbone refinement classes
diff --git a/views/prediction.haml b/views/prediction.haml
index 993c966..96385ef 100644
--- a/views/prediction.haml
+++ b/views/prediction.haml
@@ -41,12 +41,12 @@
                 = activity(p[:prediction])
             - else
               %br
-                %em= p[:prediction]
+                %em= sprintf('%.03g', p[:prediction])
             - if p[:confidence]
               %br
                 (
                 %a{:href => "#", :id => "linkConfidence#{p.object_id}"} Confidence
-                = ": #{sprintf('%.03f', p[:confidence].to_f.abs)}"
+                = ": #{sprintf('%.03g', p[:confidence].to_f.abs)}"
                 :javascript
                   $("a#linkConfidence#{p.object_id}").click(function () {
                     $("dl#confidence").toggle();