From c38f9f2e4c2dec4ef163c6c76d64f5dbf4974a5a Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 21 Nov 2012 10:09:48 +0100 Subject: Initial GUI --- Gemfile | 6 + VERSION | 1 + aop.gemspec | 27 ++++ application.rb | 197 ++++++++++++++++++++++++++--- config.ru | 6 + false_positives.rb | 39 ++++++ pubchem-test.rb | 8 +- pubchem.rb | 295 +++++++++++++++++++++++++++++-------------- stat.rb | 30 +++++ unicorn.rb | 1 + validation.rb | 35 +++++ views/assays.haml | 6 + views/compound.haml | 46 +++++++ views/fp.haml | 41 ++++++ views/index.haml | 27 ---- views/layout.haml | 26 ++++ views/neighbors.haml | 43 +++++++ views/predicted_assays.haml | 6 + views/predicted_targets.haml | 6 + views/select.haml | 8 ++ views/targets.haml | 6 + 21 files changed, 718 insertions(+), 142 deletions(-) create mode 100644 Gemfile create mode 100644 VERSION create mode 100644 aop.gemspec create mode 100644 config.ru create mode 100644 false_positives.rb create mode 100755 stat.rb create mode 100644 unicorn.rb create mode 100644 validation.rb create mode 100644 views/assays.haml create mode 100644 views/compound.haml create mode 100644 views/fp.haml create mode 100644 views/layout.haml create mode 100644 views/neighbors.haml create mode 100644 views/predicted_assays.haml create mode 100644 views/predicted_targets.haml create mode 100644 views/select.haml create mode 100644 views/targets.haml diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..48f78eb --- /dev/null +++ b/Gemfile @@ -0,0 +1,6 @@ +source :gemcutter +gemspec +gem "haml" +gem "dalli" +gem "opentox-server", :path => "../opentox-server" +gem "opentox-client", :path => "../opentox-client" diff --git a/VERSION b/VERSION new file mode 100644 index 0000000..3be2757 --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +0.0.1pre1 diff --git a/aop.gemspec b/aop.gemspec new file mode 100644 index 0000000..fe90a75 --- /dev/null +++ b/aop.gemspec @@ -0,0 +1,27 @@ +# -*- encoding: utf-8 -*- +$:.push File.expand_path("../lib", __FILE__) + +Gem::Specification.new do |s| + s.name = "aop" + s.version = File.read("./VERSION") + s.authors = ["Christoph Helma","Denis Gebele","Micha Rautenberg"] + s.email = ["helma@in-silico.ch","gebele@in-silico.ch","rautenenberg@in-silico.ch"] + s.homepage = "http://github.com/OpenTox/task" + s.summary = %q{AOP GUI} + s.description = %q{AOP GUI} + s.license = 'GPL-3' + #s.platform = Gem::Platform::CURRENT + + s.rubyforge_project = "aop" + + s.files = `git ls-files`.split("\n") + #s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n") + #s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) } + #s.require_paths = ["lib"] + s.required_ruby_version = '>= 1.9.2' + + # specify any dependencies here; for example: + s.add_runtime_dependency "opentox-server" + s.post_install_message = "Please configure your service in ~/.opentox/config/aop.rb" +end + diff --git a/application.rb b/application.rb index 2a77013..a3d46c3 100644 --- a/application.rb +++ b/application.rb @@ -1,22 +1,183 @@ -require 'sinatra' -require "sinatra/reloader" -require "haml" -require 'yaml' require "./pubchem.rb" -also_reload './pubchem.rb' - -get '/?' do - #@neighbors = OpenTox::PubChemNeighbors.new - smiles = "OC(=O)C1=C(C=CC=C1)OC(=O)C" - #smiles = "c1cc(CC)ccc1" - #smiles = "CC(=O)Nc1ccc(O)cc1" - smiles = "C1=CC(=C(C=C1Cl)Cl)OCC(=O)O" - #@compound.from_smiles smiles - #@neighbors.from_smiles smiles +require 'rack/session/dalli' +module OpenTox + class Application < Service + set :static, true + set :root, File.dirname(__FILE__) + also_reload './pubchem.rb' + #enable :sessions + use Rack::Session::Dalli, :cache => Dalli::Client.new + + @@pug_uri = "http://pubchem.ncbi.nlm.nih.gov/rest/pug/" + + helpers do + =begin - File.open("compound.yaml","w+"){|f| f.puts @compound.to_yaml} - @compound = YAML.load_file "compound.yaml" + def pubchem_search url + attempts = 0 + result = nil + begin + attempts += 1 + json = RestClient.get url, :timeout => 90000000 + result = JSON.parse json + while result["Waiting"] do + sleep 2 + listkey = result["Waiting"]["ListKey"] + result = pubchem_search File.join(@pug_uri, "compound", "listkey", listkey, "cids", "JSON") + end + rescue + if $!.message =~ /Timeout/i and attempts < 4 + sleep 2 + retry + elsif $!.message =~ /Timeout/i and attempts >= 4 + File.open("timeouts","a+"){|f| f.puts url} + puts url + puts $!.message + elsif $!.message.match /404/ + #not_found_error #TODO + else + puts url + puts $!.message + end + end + end =end - @neighbors = YAML.load_file "search.yaml" - haml :index + + def image_uri cid + File.join @@pug_uri, "compound", "cid", cid, "PNG"#?record_type=3d&image_size=small" + end + + end + + before '/cid/:cid/*' do + session[:compound] = PubChemCompound.new params[:cid] unless session[:compound] and session[:compound].cid == params[:cid] + end + + get '/?' do + haml :index + end + + get '/cid/:cid/?' do + session[:compound] = PubChemCompound.new params[:cid] + haml :compound + end + + get '/search/?' do + #begin + cids = RestClientWrapper.get(File.join(@@pug_uri,"compound","name",URI.escape(params[:name]),"cids","TXT")).split("\n") + if cids.size == 1 + session[:compound] = PubChemCompound.new cids.first + haml :compound + elsif cids.size > 1 + @compounds = cids.collect{|cid| PubChemCompound.new cid } + haml :select + end + #rescue + #haml :not_found + #end + end + + get '/cid/:cid/targets/?' do + if params[:cid] == session[:compound].cid + @assays = session[:compound].targets + else + @assays = PubChemCompound.new(params[:cid]).targets + end + haml :targets, :layout => false + end + + get '/cid/:cid/nontargets/?' do + if params[:cid] == session[:compound].cid + @assays = session[:compound].non_targets + else + @assays = PubChemCompound.new(params[:cid]).non_targets + end + haml :targets, :layout => false + end + + get '/cid/:cid/other_active_assays/?' do + if params[:cid] == session[:compound].cid + @assays = session[:compound].active_assays - session[:compound].targets + else + compound = PubChemCompound.new(params[:cid]) + @assays = compound.active_assays - compound.targets + end + haml :assays, :layout => false + end + + get '/cid/:cid/other_inactive_assays/?' do + if params[:cid] == session[:compound].cid + @assays = session[:compound].inactive_assays - session[:compound].non_targets + else + compound = PubChemCompound.new(params[:cid]) + @assays = compound.inactive_assays - compound.non_targets + end + haml :assays, :layout => false + end + + get '/cid/:cid/predicted_targets/?' do + if params[:cid] == session[:compound].cid + @assays = session[:compound].predicted_targets + else + @assays = PubChemCompound.new(params[:cid]).predicted_targets + end + haml :predicted_targets, :layout => false + end + + get '/cid/:cid/predicted_nontargets/?' do + if params[:cid] == session[:compound].cid + @assays = session[:compound].predicted_non_targets + else + @assays = PubChemCompound.new(params[:cid]).predicted_non_targets + end + haml :predicted_targets, :layout => false + end + + get '/cid/:cid/other_predicted_active_assays/?' do + if params[:cid] == session[:compound].cid + @assays = session[:compound].predicted_active_assays - session[:compound].predicted_targets + else + compound = PubChemCompound.new(params[:cid]) + @assays = compound.predicted_active_assays - compound.predicted_targets + end + haml :predicted_assays, :layout => false + end + + get '/cid/:cid/other_predicted_inactive_assays/?' do + if params[:cid] == session[:compound].cid + @assays = session[:compound].predicted_inactive_assays - session[:compound].predicted_non_targets + else + compound = PubChemCompound.new(params[:cid]) + @assays = compound.predicted_inactive_assays - compound.predicted_non_targets + end + haml :assays, :layout => false + end + + get '/cid/:cid/neighbors/?' do + haml :neighbors, :layout => false + end + + + get '/cid/:cid/cosine/:cid2/?' do + session[:compound].cosine(PubChemCompound.new(params[:cid2])).to_s + end + + get '/fp/?' do + @fp = [] + YAML.load_file("false_positives.yaml").each do |pred| + pred[:fp_targets].each do |gi,t| + @fp << { + "CID" => pred[:cid], + "Target GI" => gi, + "p_active" => t[:p][:active].first, + "p_inactive" => t[:p][:inactive].first, + :assays => t[:measured], + :neighbors => t[:neighbors] + } + end + end + @fp.sort!{|a,b| b["p_active"] <=> a["p_active"]} + haml :fp + end + end end diff --git a/config.ru b/config.ru new file mode 100644 index 0000000..4050809 --- /dev/null +++ b/config.ru @@ -0,0 +1,6 @@ +SERVICE = "aop" +require 'bundler' +Bundler.require +timeout = 600 +require './application.rb' +run OpenTox::Application diff --git a/false_positives.rb b/false_positives.rb new file mode 100644 index 0000000..9453679 --- /dev/null +++ b/false_positives.rb @@ -0,0 +1,39 @@ +#!/usr/bin/env ruby +require "./pubchem.rb" + +false_positives = YAML.load_file("false_positives.yaml") +#false_positives = [] +until false_positives.size > 100 do + result = {} + @compound = OpenTox::PubChemCompound.new + # http://www.ncbi.nlm.nih.gov/sites/entrez?term=all%5Bfilt%5D&cmd=search&db=pccompound + @compound.cid = Random.new.rand(1..35611104) + puts @compound.cid + if @compound.targets and @compound.non_targets and !(@compound.targets + @compound.non_targets).empty? + puts "predicting ..." + result[:cid] = @compound.cid + measured_non_targets = @compound.non_targets.collect{|t| t["Target GI"]}.compact.uniq + predicted_targets = @compound.predicted_targets.collect{|t| t[:target_gi] if t[:prediction] == "active"}.compact.uniq + + result[:fp_targets] = {} + (predicted_targets & measured_non_targets).each do |gi| + result[:fp_targets][gi] = {:p => {:active => nil, :inactive => nil}, :measured => [], :neighbors => []} + result[:fp_targets][gi][:measured] = @compound.inactive_assays.select{|a| a["Target GI"] == gi} + result[:fp_targets][gi][:p][:active] = @compound.predicted_targets.collect{|t| t[:p_active] if t[:target_gi] == gi}.compact.uniq + result[:fp_targets][gi][:p][:inactive] = @compound.predicted_targets.collect{|t| t[:p_inactive] if t[:target_gi] == gi}.compact.uniq + + @compound.neighbors.select{|n| n.assays.collect{|a| a["Target GI"]}.include? gi }.each do |neighbor| + result[:fp_targets][gi][:neighbors] << { + :cid => neighbor.cid, + :similarity => neighbor.similarity, + :assays => neighbor.assays.select{|a| a["Target GI"] == gi } + } + end + end + unless result[:fp_targets].empty? + false_positives << result + File.open("false_positives.yaml","w+") {|f| f.puts false_positives.to_yaml} + end + end +end + diff --git a/pubchem-test.rb b/pubchem-test.rb index 273698e..a3e7799 100644 --- a/pubchem-test.rb +++ b/pubchem-test.rb @@ -7,7 +7,7 @@ class AOPTest < Test::Unit::TestCase def setup @pug_uri = "http://pubchem.ncbi.nlm.nih.gov/rest/pug/" - @compound = OpenTox::PubChemCompound.new + @compound = OpenTox::PubChemCompound.new #3036 @compound.cid = 1983 #@compound.from_name "2,4-D" end @@ -18,8 +18,9 @@ class AOPTest < Test::Unit::TestCase puts @compound.to_smiles puts "measured targets" puts @compound.targets.collect{|t| t["Target Name"]}.to_yaml +=begin puts "predicted targets" - puts @compound.predicted_targets.select{|t| t[:prediction] == "active"}.to_yaml + puts @compound.predicted_targets.select{|t| t[:prediction] == "active"}.size puts "predicted non_targets" puts @compound.predicted_targets.select{|t| t[:prediction] == "inactive"}.size @@ -38,8 +39,11 @@ class AOPTest < Test::Unit::TestCase puts (predicted_nontarget_gis - measured_nontarget_gis).size print "incorrect predicted targets: " puts (measured_nontarget_gis & predicted_target_gis).size + puts (measured_nontarget_gis & predicted_target_gis).sort.to_yaml + puts @compound.predicted_targets.select{|t| t[:prediction] == "active"}.to_yaml print "incorrect predicted non-targets: " puts (measured_target_gis & predicted_nontarget_gis).size +=end =begin @compound.neighbors.each do |n| #print n.cid diff --git a/pubchem.rb b/pubchem.rb index 891110c..a6893fb 100644 --- a/pubchem.rb +++ b/pubchem.rb @@ -1,154 +1,221 @@ require '../opentox-client/lib/opentox-client.rb' require 'json' +require 'base64' def Math.gauss(x, sigma = 0.3) d = 1.0 - x.to_f Math.exp(-(d*d)/(2*sigma*sigma)) end -module PubChem - - attr_accessor :result - - def initialize - @pug_uri = "http://pubchem.ncbi.nlm.nih.gov/rest/pug/" - end - - def pubchem_search url - json = RestClient.get url#, :accept => "application/json", :timeout => 90000000 - @result = JSON.parse json - rescue - puts url - puts $!.message - @result = nil - end - -end - module OpenTox + # doc @ http://pubchem.ncbi.nlm.nih.gov/pug_rest/ class PubChemCompound < Compound - include PubChem - # doc @ http://pubchem.ncbi.nlm.nih.gov/pug_rest/ attr_writer :cid attr_accessor :similarity, :p, :assays - def initialize - super - @summary = [] - @similarity_threshold = 75 - @neighbors = [] - @predicted_targets = [] + def initialize cid=nil + @pug_uri = "http://pubchem.ncbi.nlm.nih.gov/rest/pug/" + @cid = cid + @assays = nil + @similarity_threshold = 85 + @neighbors = nil + @predicted_assays = nil + #@predicted_targets = nil + #@priors = {} + #@priors = JSON.parse(File.read("priors.json")) end - def from_name name - @inchi = RestClientWrapper.get File.join(CACTUS_URI,URI.escape(name),"stdinchi") + def fingerprint + unless @fingerprint + begin + # ftp://ftp.ncbi.nlm.nih.gov/pubchem/specifications/pubchem_fingerprints.txt + base64key = `curl http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/#{cid}/SDF|grep -A1 PUBCHEM_CACTVS_SUBSKEYS|sed '1d'`.chomp + @fingerprint = Base64.decode64(base64key)[4..-1].unpack("B*").first[0..-8].split(//).collect{|c| c == "1"} + rescue + end + end + @fingerprint + end + + def self.from_name name + pug_uri = "http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name" + compounds = [] + session[:name] = name + cid = RestClientWrapper.get(File.join(pug_uri,URI.escape(name),"cids","TXT")) + #puts response + #response.split("\n") do |cid| + puts cid + compound = OpenTox::PubChemCompound.new + compound.cid = cid.chomp + compounds << compound + #end + compounds end def neighbors - if @neighbors.empty? - pubchem_search File.join(@pug_uri, "compound", "similarity", "cid", cid.to_s, "JSON")+"?Threshold=#{@similarity_threshold}&MaxRecords=100" - listkey = @result["Waiting"]["ListKey"] - while @result["Waiting"] do - sleep 1 - pubchem_search File.join(@pug_uri, "compound", "listkey", listkey, "assaysummary", "JSON") + unless @neighbors + @neighbors = [] + result = pubchem_search File.join(@pug_uri, "compound", "similarity", "cid", cid.to_s, "JSON")+"?Threshold=#{@similarity_threshold}&MaxRecords=100" + while result["Waiting"] do + sleep 2 + listkey = result["Waiting"]["ListKey"] + result = pubchem_search File.join(@pug_uri, "compound", "listkey", listkey, "cids", "JSON") + #result = pubchem_search File.join(@pug_uri, "compound", "listkey", listkey, "assaysummary", "JSON") end - columns = @result["Table"]["Columns"]["Column"] - table = @result["Table"]["Row"].collect{|cell| cell.values.flatten} - cid_idx = columns.index("CID") - cids = table.collect{|r| r[cid_idx]}.uniq - cids.each do |cid| + puts "Neighbor CIDs received" + result["IdentifierList"]["CID"].each do |cid| unless cid.to_s == @cid.to_s - tab = table.collect{|r| r if r[cid_idx] == cid}.compact - c = PubChemCompound.new - c.extract_result columns, tab - c.similarity = tanimoto c - @neighbors << c unless (c.targets + c.non_targets).empty? + c = PubChemCompound.new cid.to_s + @neighbors << c if c.assays #and !(c.targets + c.non_targets).empty? + end + end if result and result["IdentifierList"] +=begin + if result and result["Table"] + columns = result["Table"]["Columns"]["Column"] + table = result["Table"]["Row"].collect{|cell| cell.values.flatten} + cid_idx = columns.index("CID") + cids = table.collect{|r| r[cid_idx]}.uniq + cids.each do |cid| + unless cid.to_s == @cid.to_s + tab = table.collect{|r| r if r[cid_idx] == cid}.compact + c = PubChemCompound.new + c.extract_result columns, tab + c.similarity = tanimoto c + @neighbors << c unless (c.targets + c.non_targets).empty? + end end end - @neighbors.sort!{|a,b| b.similarity <=> a.similarity} +=end + #@neighbors.sort!{|a,b| b.similarity <=> a.similarity} end @neighbors end - def summary - if @summary.empty? - pubchem_search File.join(@pug_uri, "compound", "cid", cid.to_s, "assaysummary", "JSON") - extract_result @result["Table"]["Columns"]["Column"], @result["Table"]["Row"].collect{|cell| cell.values.flatten} + def assays + unless @assays + result = pubchem_search File.join(@pug_uri, "compound", "cid", cid.to_s, "assaysummary", "JSON") + extract_result result["Table"]["Columns"]["Column"], result["Table"]["Row"].collect{|cell| cell.values.flatten} if result and result["Table"] end - @summary + @assays end def active_assays - summary.select{|a| a["Activity Outcome"] == "active"} + assays.select{|a| a["Activity Outcome"] == "active"} if assays end def inactive_assays - summary.select{|a| a["Activity Outcome"] == "inactive"} + assays.select{|a| a["Activity Outcome"] == "inactive"} if assays end def targets - active_assays.select{|a| a["Target GI"]} + active_assays.select{|a| a["Target GI"]} if assays end def non_targets - inactive_assays.select{|a| a["Target GI"]} + inactive_assays.select{|a| a["Target GI"]} if assays end - def predicted_targets - if @predicted_targets.empty? - target_gis = neighbors.collect{|n| n.summary.collect{|a| a["Target GI"]}}.flatten.compact.uniq - target_gis.each do |gid| - target = {:target_gi => gid} + def predicted_assays + unless @predicted_assays + @predicted_assays = [] + neighbors.collect{|n| n.assays.collect{|a| a["AID"]}}.flatten.compact.uniq.each do |aid| + predicted_assay = {"AID" => aid} neighbors.each do |neighbor| - if neighbor.similarity > 0.5 # avoid downweighting - search = neighbor.summary.select{|a| a["Target GI"] == gid} - unless search.empty? or search.size == 1 - print "+++ (" - print search.size - puts ")" - puts search.inspect - end + if similarity(neighbor) > 0.5 # avoid downweighting + search = neighbor.assays.select{|a| a["AID"] == aid} search.each do |assay| - target[:aid] ||= assay["AID"] - target[:name] ||= assay["Target Name"] - target[:assay_name] ||= assay["Assay Name"] - target[:active_similarities] ||= [] - target[:inactive_similarities] ||= [] + predicted_assay["Target GI"] ||= assay["Target GI"] + predicted_assay["Target Name"] ||= assay["Target Name"] + predicted_assay["Assay Name"] ||= assay["Assay Name"] + predicted_assay[:active_similarities] ||= [] + predicted_assay[:inactive_similarities] ||= [] if assay["Activity Outcome"] == "active" - target[:p_active] ? target[:p_active] = target[:p_active]*neighbor.similarity : target[:p_active] = neighbor.similarity - target[:p_inactive] ? target[:p_inactive] = target[:p_inactive]*(1-neighbor.similarity) : target[:p_inactive] = 1-neighbor.similarity - target[:active_similarities] << neighbor.similarity + predicted_assay[:p_active] ? predicted_assay[:p_active] = predicted_assay[:p_active]*similarity(neighbor) : predicted_assay[:p_active] = similarity(neighbor) + predicted_assay[:p_inactive] ? predicted_assay[:p_inactive] = predicted_assay[:p_inactive]*(1-similarity(neighbor)) : predicted_assay[:p_inactive] = 1-similarity(neighbor) + predicted_assay[:active_similarities] << similarity(neighbor) elsif assay["Activity Outcome"] == "inactive" - target[:p_active] ? target[:p_active] = target[:p_active]*(1-neighbor.similarity) : target[:p_active] = 1-neighbor.similarity - target[:p_inactive] ? target[:p_inactive] = target[:p_inactive]*neighbor.similarity : target[:p_inactive] = neighbor.similarity - target[:inactive_similarities] << neighbor.similarity + predicted_assay[:p_active] ? predicted_assay[:p_active] = predicted_assay[:p_active]*(1-similarity(neighbor)) : predicted_assay[:p_active] = 1-similarity(neighbor) + predicted_assay[:p_inactive] ? predicted_assay[:p_inactive] = predicted_assay[:p_inactive]*similarity(neighbor) : predicted_assay[:p_inactive] = similarity(neighbor) + predicted_assay[:inactive_similarities] << similarity(neighbor) end end end end - if target[:p_active] and target[:p_inactive] and target[:p_active] + target[:p_inactive] != 0 - target[:p_active] = target[:p_active]/(target[:p_active]+target[:p_inactive]) - target[:p_inactive] = target[:p_inactive]/(target[:p_active]+target[:p_inactive]) - if target[:p_active] > target[:p_inactive] - target[:prediction] = "active" - elsif target[:p_active] < target[:p_inactive] - target[:prediction] = "inactive" + if predicted_assay[:p_active] and predicted_assay[:p_inactive] and predicted_assay[:p_active] != 0 and predicted_assay[:p_inactive] != 0 + predicted_assay[:p_active] = predicted_assay[:p_active]/(predicted_assay[:p_active]+predicted_assay[:p_inactive]) + predicted_assay[:p_inactive] = predicted_assay[:p_inactive]/(predicted_assay[:p_active]+predicted_assay[:p_inactive]) + if predicted_assay[:p_active] > predicted_assay[:p_inactive] + predicted_assay[:prediction] = "active" + elsif predicted_assay[:p_active] < predicted_assay[:p_inactive] + predicted_assay[:prediction] = "inactive" end - @predicted_targets << target + @predicted_assays << predicted_assay end end - @predicted_targets.sort{|a,b| b[:p_active] <=> a[:p_active]} + #@predicted_targets.sort{|a,b| b[:p_active] <=> a[:p_active]} end - @predicted_targets + @predicted_assays + end + + def predicted_active_assays + predicted_assays.select{|a| a[:prediction] == "active"} if predicted_assays + end + + def predicted_inactive_assays + predicted_assays.select{|a| a[:prediction] == "inactive"} if predicted_assays + end + + def predicted_targets + predicted_active_assays.select{|a| a[:target_gi]} if predicted_assays + end + + def predicted_non_targets + inactive_assays.select{|a| a[:target_gi]} if predicted_assays end def to_smiles RestClient.get(File.join(@pug_uri, "compound", "cid", cid.to_s, "property", "CanonicalSMILES", "TXT")).strip end + def image_uri + File.join @pug_uri, "compound", "cid", @cid, "PNG"#?record_type=3d&image_size=small" + end + + def similarity compound + cosine compound + end + def tanimoto compound + if fingerprint and compound.fingerprint + m11 = 0.0 + m1 = 0.0 + fingerprint.each_index do |i| + m11 += 1 if (@fingerprint[i] and compound.fingerprint[i]) + m1 += 1 if (@fingerprint[i] or compound.fingerprint[i]) + end + m11/m1 + end + end + + def cosine compound + if fingerprint and compound.fingerprint + m11 = 0.0 + m01 = 0.0 + m10 = 0.0 + m00 = 0.0 + fingerprint.each_index do |i| + m11 += 1 if (@fingerprint[i] and compound.fingerprint[i]) + m01 += 1 if (!@fingerprint[i] and compound.fingerprint[i]) + m10 += 1 if (@fingerprint[i] and !compound.fingerprint[i]) + m00 += 1 if (!@fingerprint[i] and !compound.fingerprint[i]) + end + m11/((m01+m11)*(m10+m11))**0.5 + end + end + +=begin f1 = File.open(File.join(".","tmp",SecureRandom.uuid+".smi"),"w+") f1.puts to_smiles f1.close @@ -160,20 +227,62 @@ module OpenTox File.delete(f2.path) sim end +=end + + def pubchem_search url + attempts = 0 + begin + attempts += 1 + json = RestClient.get url, :timeout => 90000000 + puts url + JSON.parse json + rescue + if $!.message =~ /Timeout/i and attempts < 4 + sleep 2 + retry + elsif $!.message =~ /Timeout/i and attempts >= 4 + File.open("timeouts","a+"){|f| f.puts url} + puts url + puts $!.message + nil + elsif $!.message.match /404/ + nil + else + puts url + puts $!.message + nil + end + end + end def extract_result columns, table + @assays = [] table.each do |row| - @summary << {} + @assays << {} row.each_with_index do |cell,i| if columns[i] == "CID" @cid = cell if @cid.nil? else - cell.blank? ? @summary.last[columns[i]] = nil : @summary.last[columns[i]] = cell + cell.blank? ? @assays.last[columns[i]] = nil : @assays.last[columns[i]] = cell end end end end + def priors aid + unless @priors[aid] + @priors[aid] = {"nr_active" => 0, "nr_inactive" => 0} + result = nil + result = pubchem_search File.join(@pug_uri, "assay", "aid", aid.to_s, "cids", "JSON?cids_type=active&list_return=listkey") + @priors[aid]["nr_active"] = result["IdentifierList"]["Size"].to_i if result + result = nil + result = pubchem_search File.join(@pug_uri, "assay", "aid", aid.to_s, "cids", "JSON?cids_type=inactive&list_return=listkey") + @priors[aid]["nr_inactive"] = result["IdentifierList"]["Size"].to_i if result + File.open("priors.json","w+"){|f| f.puts @priors.to_json} + end + @priors[aid] + end + =begin def assay_summary assay if assay["Target GI"] and !@assays[assay["AID"]] @@ -262,10 +371,6 @@ module OpenTox def to_name RestClient.get(File.join(@pug_uri, "compound", "cid", @cid, "property", "IUPACName", "TXT")).strip end - - def to_image_uri - File.join @pug_uri, "compound", "cid", @cid, "PNG?record_type=3d&image_size=small" - end =end end diff --git a/stat.rb b/stat.rb new file mode 100755 index 0000000..9f590d8 --- /dev/null +++ b/stat.rb @@ -0,0 +1,30 @@ +#!/usr/bin/env ruby +require 'yaml' + +stat = {:tp => 0, :tn => 0, :fp => 0, :fn => 0, :tp_p => [], :fp_p => []} +thresh = 0.05 +Dir["./validation/*yaml"].each do |f| + data = YAML.load_file f + pa = data[:predicted][:active].select{|gi| data[:predicted][:p][gi][:p_active] > thresh } + pi = data[:predicted][:inactive].select{|gi| data[:predicted][:p][gi][:p_inactive] > thresh } + stat[:tp] += (pa & data[:measured][:active]).size + stat[:tn] += (pi & data[:measured][:inactive]).size + stat[:fp] += (pa & data[:measured][:inactive]).size + stat[:fn] += (pi & data[:measured][:active]).size + (pa & data[:measured][:active]).each{|gi| stat[:tp_p] << data[:predicted][:p][gi][:p_active] } + (pa & data[:measured][:inactive]).each{|gi| stat[:fp_p] << data[:predicted][:p][gi][:p_active] } +end + +stat[:tp_p].sort! +stat[:fp_p].sort! +puts stat.to_yaml +print "accuracy: " +puts (stat[:tp]+stat[:tn])/(stat[:tp]+stat[:tn]+stat[:fp]+stat[:fn]).to_f +print "sensitivity: " +puts stat[:tp]/(stat[:tp]+stat[:fn]).to_f +print "specificity: " +puts stat[:tn]/(stat[:tn]+stat[:fp]).to_f +print "positive predictive value: " +puts stat[:tp]/(stat[:tp]+stat[:fp]).to_f +print "negative predictive value: " +puts stat[:tn]/(stat[:tn]+stat[:fn]).to_f diff --git a/unicorn.rb b/unicorn.rb new file mode 100644 index 0000000..59b9883 --- /dev/null +++ b/unicorn.rb @@ -0,0 +1 @@ +timeout 600 diff --git a/validation.rb b/validation.rb new file mode 100644 index 0000000..0e95fc3 --- /dev/null +++ b/validation.rb @@ -0,0 +1,35 @@ +#!/usr/bin/env ruby +require "./pubchem.rb" + +until Dir["./validation/*.yaml"].size > 1000 do +#100.times do + result = {} + @compound = OpenTox::PubChemCompound.new + # http://www.ncbi.nlm.nih.gov/sites/entrez?term=all%5Bfilt%5D&cmd=search&db=pccompound + @compound.cid = Random.new.rand(1..35611104) + puts @compound.cid + unless File.exists? "./validation/#{@compound.cid}.yaml" + if (@compound.targets + @compound.non_targets).size > 0 + #if @compound.assays#.empty? + begin + puts "predicting ..." + result[:cid] = @compound.cid + result[:measured] = {} + result[:predicted] = {} + result[:measured][:active] = @compound.targets.collect{|t| t["Target GI"]}.compact.uniq + result[:measured][:inactive] = @compound.non_targets.collect{|t| t["Target GI"]}.compact.uniq + result[:predicted][:active] = @compound.predicted_targets.collect{|t| t[:target_gi] if t[:prediction] == "active"}.compact.uniq + result[:predicted][:inactive] = @compound.predicted_targets.collect{|t| t[:target_gi] if t[:prediction] == "inactive"}.compact.uniq + result[:predicted][:p] = {} + @compound.predicted_targets.each do |t| + result[:predicted][:p][t[:target_gi]] = {} + result[:predicted][:p][t[:target_gi]][:p_active] = t[:p_active] + result[:predicted][:p][t[:target_gi]][:p_inactive] = t[:p_inactive] + end + File.open("./validation/#{@compound.cid}.yaml","w+"){|f| f.puts result.to_yaml} + puts result.to_yaml + rescue + end + end + end +end diff --git a/views/assays.haml b/views/assays.haml new file mode 100644 index 0000000..e995842 --- /dev/null +++ b/views/assays.haml @@ -0,0 +1,6 @@ +%ul + - @assays.each do |assay| + %li + %a{:href => "/aid/#{assay["AID"]}"} #{assay['Assay Name']} + AID: + = assay["AID"] diff --git a/views/compound.haml b/views/compound.haml new file mode 100644 index 0000000..7614142 --- /dev/null +++ b/views/compound.haml @@ -0,0 +1,46 @@ +:javascript + $(document).ready(function() { + display(".targets", "/cid/#{session[:compound].cid}/targets"); + //display(".nontargets", "/cid/#{session[:compound].cid}/targets"); + display(".active_assays", "/cid/#{session[:compound].cid}/other_active_assays"); + //display(".inactive_assays", "/cid/#{session[:compound].cid}/other_active_assays"); + display(".predicted_targets", "/cid/#{session[:compound].cid}/predicted_targets"); + display(".predicted_active_assays", "/cid/#{session[:compound].cid}/other_predicted_active_assays"); + display(".neighbors", "/cid/#{session[:compound].cid}/neighbors"); + }); + +%script{:type => "text/javascript", :src => "sorttable.js"} +%table{:class => "sortable"} + %tr + %th Structure + %th + %th Targets (experimental data) + %th Other assays (experimental data) + %tr + %td{:valign => "top"} + %img{:src => session[:compound].image_uri} + %td + %td{:valign => "top"} + .targets + -# %h2 Non-Targets + .nontargets + %td{:valign => "top"} + .active_assays + -# %h2 Inactive assays + .inactive_assays + %tr + %th + %th + %th Targets (predicted) + %th Other assays (predicted) + %tr + %td + %td + %td{:valign => "top"} + .predicted_targets + %td{:valign => "top"} + .predicted_active_assays + +%h2 Neighbors +.neighbors + diff --git a/views/fp.haml b/views/fp.haml new file mode 100644 index 0000000..4bffba3 --- /dev/null +++ b/views/fp.haml @@ -0,0 +1,41 @@ +!!! 5 +%style{:type => "text/css" } + //dt { float: left; clear: left; width: 100px; text-align: right; font-weight: bold; color: green; } + //dt { float: left; clear: left; text-align: left; font-weight: bold; color: green; } + table { border-top: 2px black } + dl {text-align:left;} + dt { display: inline; text-align: left; font-weight: bold; color: green; } + dt:after { content: ": "; } + dd { display: inline; text-align: left; margin:0 } + dd:after { content: '\A'; white-space: pre; } +- @fp.each do |p| + %table{:class => "sortable"} + %tr + %th + %img{:src => "http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/#{p["CID"]}/PNG"} + %th + %dl + - [ "CID", "Target GI", "p_active", "p_inactive" ].each do |k| + %dt= k + %dd= p[k] + - p[:assays].each do |a| + %th + %dl + - a.each do |k,v| + %dt= k + %dd= v + - p[:neighbors].each do |n| + %tr + %td + %img{:src => "http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/#{n[:cid]}/PNG"} + %td + %dl + - ["CID", "Similarity"].each do |k| + %dt= k + %dd= n[k.downcase.to_sym] + - n[:assays].each do |a| + %td + %dl + - a.each do |k,v| + %dt= k + %dd= v diff --git a/views/index.haml b/views/index.haml index b75a9ab..e69de29 100644 --- a/views/index.haml +++ b/views/index.haml @@ -1,27 +0,0 @@ -!!! 5 -%script{:type => "text/javascript", :src => "sorttable.js"} -%table{:class => "sortable"} - %tr - %th - = @neighbors.query.to_name - %br - %img{:src => @neighbors.query.to_image_uri} - %th Structure - %th Properties - %th Targets - %th Assays - - @neighbors.neighbors.each do |neighbor| - - sim = neighbor.structure_similarity @neighbors.query - %tr{:sorttable_customkey => sim} - %td - = neighbor.to_name - %br - %img{:src => neighbor.to_image_uri} - %td - = sim - %td - = neighbor.property_similarity @neighbors.query - %td - = neighbor.target_similarity @neighbors.query - %td - = neighbor.assay_similarity @neighbors.query diff --git a/views/layout.haml b/views/layout.haml new file mode 100644 index 0000000..8641692 --- /dev/null +++ b/views/layout.haml @@ -0,0 +1,26 @@ +!!! 5 +%html + %head + %script{:type => "text/javascript", :src => "jquery-1.8.2.js"} + :javascript + function display(element,uri) { + $(element).html("\"Searching"); + $.ajax({ + url: uri, + success: function(data){ + $(element).html(data); + }, + error: function(data,textStatus,message){ + $(element).html(message); + } + }); + } + + %body + %h1 adverse outcome pathways + %form{:name => "form", :action => '/search', :method => "GET"} + %fieldset + %label{:for => 'identifier'} Compound name: + %input{:type => 'text', :name => 'name', :id => 'name', :size => '60'} + %input{ :type => "submit", :value => "Predict" } + = yield diff --git a/views/neighbors.haml b/views/neighbors.haml new file mode 100644 index 0000000..54905ce --- /dev/null +++ b/views/neighbors.haml @@ -0,0 +1,43 @@ +%script{:type => "text/javascript", :src => "sorttable.js"} +%table{:class => "sortable"} + - session[:compound].neighbors[0..10].each do |compound| + %tr + %td{:valign => "top"} + %img{:src => compound.image_uri} + %td{:id => "sim#{compound.cid}", :valign => "top"} + %img{:src => "/spinning-wait-icons/wait30trans.gif"} + :javascript + $.ajax({ + url: "/cid/#{session[:compound].cid}/cosine/#{compound.cid}", + success: function(data){ + $("#sim#{compound.cid}").html(data); + }, + error: function(data,textStatus,message){ + $("#sim#{compound.cid}").html(message); + } + }); + %td{:id => "targets#{compound.cid}", :valign => "top"} + %img{:src => "/spinning-wait-icons/wait30trans.gif"} + :javascript + $.ajax({ + url: "/cid/#{compound.cid}/targets", + success: function(data){ + $("#targets#{compound.cid}").html(data); + }, + error: function(data,textStatus,message){ + $("#targets#{compound.cid}").html(message); + } + }); + %td{:id => "assays#{compound.cid}", :valign => "top"} + %img{:src => "/spinning-wait-icons/wait30trans.gif"} + :javascript + $.ajax({ + url: "/cid/#{compound.cid}/other_active_assays", + success: function(data){ + $("#assays#{compound.cid}").html(data); + }, + error: function(data,textStatus,message){ + $("#assays#{compound.cid}").html(message); + } + }); + %td diff --git a/views/predicted_assays.haml b/views/predicted_assays.haml new file mode 100644 index 0000000..e995842 --- /dev/null +++ b/views/predicted_assays.haml @@ -0,0 +1,6 @@ +%ul + - @assays.each do |assay| + %li + %a{:href => "/aid/#{assay["AID"]}"} #{assay['Assay Name']} + AID: + = assay["AID"] diff --git a/views/predicted_targets.haml b/views/predicted_targets.haml new file mode 100644 index 0000000..1a172ff --- /dev/null +++ b/views/predicted_targets.haml @@ -0,0 +1,6 @@ +%ul + - @assays.each do |assay| + %li + %a{:href => "/aid/#{assay["AID"]}"} #{assay['Target Name']} + Target GI: + = assay['Target GI'] diff --git a/views/select.haml b/views/select.haml new file mode 100644 index 0000000..d6e3d20 --- /dev/null +++ b/views/select.haml @@ -0,0 +1,8 @@ +%p + More than one compound found for + = "\"#{params[:name]}\"." + Please select a structure: +- @compounds.each do |compound| + %a{:href => "/cid/#{compound.cid}"} + %img{:src => compound.image_uri } + diff --git a/views/targets.haml b/views/targets.haml new file mode 100644 index 0000000..1a172ff --- /dev/null +++ b/views/targets.haml @@ -0,0 +1,6 @@ +%ul + - @assays.each do |assay| + %li + %a{:href => "/aid/#{assay["AID"]}"} #{assay['Target Name']} + Target GI: + = assay['Target GI'] -- cgit v1.2.3