summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2012-11-21 10:09:48 +0100
committerChristoph Helma <helma@in-silico.ch>2012-11-21 10:09:48 +0100
commitc38f9f2e4c2dec4ef163c6c76d64f5dbf4974a5a (patch)
tree4f4b71056b1e8728720fb2a5f6886dc2dfe634cd
parent9661a67983ffc93ee02bc12b20b9afb38e199d79 (diff)
Initial GUI
-rw-r--r--Gemfile6
-rw-r--r--VERSION1
-rw-r--r--aop.gemspec27
-rw-r--r--application.rb197
-rw-r--r--config.ru6
-rw-r--r--false_positives.rb39
-rw-r--r--pubchem-test.rb8
-rw-r--r--pubchem.rb295
-rwxr-xr-xstat.rb30
-rw-r--r--unicorn.rb1
-rw-r--r--validation.rb35
-rw-r--r--views/assays.haml6
-rw-r--r--views/compound.haml46
-rw-r--r--views/fp.haml41
-rw-r--r--views/index.haml27
-rw-r--r--views/layout.haml26
-rw-r--r--views/neighbors.haml43
-rw-r--r--views/predicted_assays.haml6
-rw-r--r--views/predicted_targets.haml6
-rw-r--r--views/select.haml8
-rw-r--r--views/targets.haml6
21 files changed, 718 insertions, 142 deletions
diff --git a/Gemfile b/Gemfile
new file mode 100644
index 0000000..48f78eb
--- /dev/null
+++ b/Gemfile
@@ -0,0 +1,6 @@
+source :gemcutter
+gemspec
+gem "haml"
+gem "dalli"
+gem "opentox-server", :path => "../opentox-server"
+gem "opentox-client", :path => "../opentox-client"
diff --git a/VERSION b/VERSION
new file mode 100644
index 0000000..3be2757
--- /dev/null
+++ b/VERSION
@@ -0,0 +1 @@
+0.0.1pre1
diff --git a/aop.gemspec b/aop.gemspec
new file mode 100644
index 0000000..fe90a75
--- /dev/null
+++ b/aop.gemspec
@@ -0,0 +1,27 @@
+# -*- encoding: utf-8 -*-
+$:.push File.expand_path("../lib", __FILE__)
+
+Gem::Specification.new do |s|
+ s.name = "aop"
+ s.version = File.read("./VERSION")
+ s.authors = ["Christoph Helma","Denis Gebele","Micha Rautenberg"]
+ s.email = ["helma@in-silico.ch","gebele@in-silico.ch","rautenenberg@in-silico.ch"]
+ s.homepage = "http://github.com/OpenTox/task"
+ s.summary = %q{AOP GUI}
+ s.description = %q{AOP GUI}
+ s.license = 'GPL-3'
+ #s.platform = Gem::Platform::CURRENT
+
+ s.rubyforge_project = "aop"
+
+ s.files = `git ls-files`.split("\n")
+ #s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
+ #s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
+ #s.require_paths = ["lib"]
+ s.required_ruby_version = '>= 1.9.2'
+
+ # specify any dependencies here; for example:
+ s.add_runtime_dependency "opentox-server"
+ s.post_install_message = "Please configure your service in ~/.opentox/config/aop.rb"
+end
+
diff --git a/application.rb b/application.rb
index 2a77013..a3d46c3 100644
--- a/application.rb
+++ b/application.rb
@@ -1,22 +1,183 @@
-require 'sinatra'
-require "sinatra/reloader"
-require "haml"
-require 'yaml'
require "./pubchem.rb"
-also_reload './pubchem.rb'
-
-get '/?' do
- #@neighbors = OpenTox::PubChemNeighbors.new
- smiles = "OC(=O)C1=C(C=CC=C1)OC(=O)C"
- #smiles = "c1cc(CC)ccc1"
- #smiles = "CC(=O)Nc1ccc(O)cc1"
- smiles = "C1=CC(=C(C=C1Cl)Cl)OCC(=O)O"
- #@compound.from_smiles smiles
- #@neighbors.from_smiles smiles
+require 'rack/session/dalli'
+module OpenTox
+ class Application < Service
+ set :static, true
+ set :root, File.dirname(__FILE__)
+ also_reload './pubchem.rb'
+ #enable :sessions
+ use Rack::Session::Dalli, :cache => Dalli::Client.new
+
+ @@pug_uri = "http://pubchem.ncbi.nlm.nih.gov/rest/pug/"
+
+ helpers do
+
=begin
- File.open("compound.yaml","w+"){|f| f.puts @compound.to_yaml}
- @compound = YAML.load_file "compound.yaml"
+ def pubchem_search url
+ attempts = 0
+ result = nil
+ begin
+ attempts += 1
+ json = RestClient.get url, :timeout => 90000000
+ result = JSON.parse json
+ while result["Waiting"] do
+ sleep 2
+ listkey = result["Waiting"]["ListKey"]
+ result = pubchem_search File.join(@pug_uri, "compound", "listkey", listkey, "cids", "JSON")
+ end
+ rescue
+ if $!.message =~ /Timeout/i and attempts < 4
+ sleep 2
+ retry
+ elsif $!.message =~ /Timeout/i and attempts >= 4
+ File.open("timeouts","a+"){|f| f.puts url}
+ puts url
+ puts $!.message
+ elsif $!.message.match /404/
+ #not_found_error #TODO
+ else
+ puts url
+ puts $!.message
+ end
+ end
+ end
=end
- @neighbors = YAML.load_file "search.yaml"
- haml :index
+
+ def image_uri cid
+ File.join @@pug_uri, "compound", "cid", cid, "PNG"#?record_type=3d&image_size=small"
+ end
+
+ end
+
+ before '/cid/:cid/*' do
+ session[:compound] = PubChemCompound.new params[:cid] unless session[:compound] and session[:compound].cid == params[:cid]
+ end
+
+ get '/?' do
+ haml :index
+ end
+
+ get '/cid/:cid/?' do
+ session[:compound] = PubChemCompound.new params[:cid]
+ haml :compound
+ end
+
+ get '/search/?' do
+ #begin
+ cids = RestClientWrapper.get(File.join(@@pug_uri,"compound","name",URI.escape(params[:name]),"cids","TXT")).split("\n")
+ if cids.size == 1
+ session[:compound] = PubChemCompound.new cids.first
+ haml :compound
+ elsif cids.size > 1
+ @compounds = cids.collect{|cid| PubChemCompound.new cid }
+ haml :select
+ end
+ #rescue
+ #haml :not_found
+ #end
+ end
+
+ get '/cid/:cid/targets/?' do
+ if params[:cid] == session[:compound].cid
+ @assays = session[:compound].targets
+ else
+ @assays = PubChemCompound.new(params[:cid]).targets
+ end
+ haml :targets, :layout => false
+ end
+
+ get '/cid/:cid/nontargets/?' do
+ if params[:cid] == session[:compound].cid
+ @assays = session[:compound].non_targets
+ else
+ @assays = PubChemCompound.new(params[:cid]).non_targets
+ end
+ haml :targets, :layout => false
+ end
+
+ get '/cid/:cid/other_active_assays/?' do
+ if params[:cid] == session[:compound].cid
+ @assays = session[:compound].active_assays - session[:compound].targets
+ else
+ compound = PubChemCompound.new(params[:cid])
+ @assays = compound.active_assays - compound.targets
+ end
+ haml :assays, :layout => false
+ end
+
+ get '/cid/:cid/other_inactive_assays/?' do
+ if params[:cid] == session[:compound].cid
+ @assays = session[:compound].inactive_assays - session[:compound].non_targets
+ else
+ compound = PubChemCompound.new(params[:cid])
+ @assays = compound.inactive_assays - compound.non_targets
+ end
+ haml :assays, :layout => false
+ end
+
+ get '/cid/:cid/predicted_targets/?' do
+ if params[:cid] == session[:compound].cid
+ @assays = session[:compound].predicted_targets
+ else
+ @assays = PubChemCompound.new(params[:cid]).predicted_targets
+ end
+ haml :predicted_targets, :layout => false
+ end
+
+ get '/cid/:cid/predicted_nontargets/?' do
+ if params[:cid] == session[:compound].cid
+ @assays = session[:compound].predicted_non_targets
+ else
+ @assays = PubChemCompound.new(params[:cid]).predicted_non_targets
+ end
+ haml :predicted_targets, :layout => false
+ end
+
+ get '/cid/:cid/other_predicted_active_assays/?' do
+ if params[:cid] == session[:compound].cid
+ @assays = session[:compound].predicted_active_assays - session[:compound].predicted_targets
+ else
+ compound = PubChemCompound.new(params[:cid])
+ @assays = compound.predicted_active_assays - compound.predicted_targets
+ end
+ haml :predicted_assays, :layout => false
+ end
+
+ get '/cid/:cid/other_predicted_inactive_assays/?' do
+ if params[:cid] == session[:compound].cid
+ @assays = session[:compound].predicted_inactive_assays - session[:compound].predicted_non_targets
+ else
+ compound = PubChemCompound.new(params[:cid])
+ @assays = compound.predicted_inactive_assays - compound.predicted_non_targets
+ end
+ haml :assays, :layout => false
+ end
+
+ get '/cid/:cid/neighbors/?' do
+ haml :neighbors, :layout => false
+ end
+
+
+ get '/cid/:cid/cosine/:cid2/?' do
+ session[:compound].cosine(PubChemCompound.new(params[:cid2])).to_s
+ end
+
+ get '/fp/?' do
+ @fp = []
+ YAML.load_file("false_positives.yaml").each do |pred|
+ pred[:fp_targets].each do |gi,t|
+ @fp << {
+ "CID" => pred[:cid],
+ "Target GI" => gi,
+ "p_active" => t[:p][:active].first,
+ "p_inactive" => t[:p][:inactive].first,
+ :assays => t[:measured],
+ :neighbors => t[:neighbors]
+ }
+ end
+ end
+ @fp.sort!{|a,b| b["p_active"] <=> a["p_active"]}
+ haml :fp
+ end
+ end
end
diff --git a/config.ru b/config.ru
new file mode 100644
index 0000000..4050809
--- /dev/null
+++ b/config.ru
@@ -0,0 +1,6 @@
+SERVICE = "aop"
+require 'bundler'
+Bundler.require
+timeout = 600
+require './application.rb'
+run OpenTox::Application
diff --git a/false_positives.rb b/false_positives.rb
new file mode 100644
index 0000000..9453679
--- /dev/null
+++ b/false_positives.rb
@@ -0,0 +1,39 @@
+#!/usr/bin/env ruby
+require "./pubchem.rb"
+
+false_positives = YAML.load_file("false_positives.yaml")
+#false_positives = []
+until false_positives.size > 100 do
+ result = {}
+ @compound = OpenTox::PubChemCompound.new
+ # http://www.ncbi.nlm.nih.gov/sites/entrez?term=all%5Bfilt%5D&cmd=search&db=pccompound
+ @compound.cid = Random.new.rand(1..35611104)
+ puts @compound.cid
+ if @compound.targets and @compound.non_targets and !(@compound.targets + @compound.non_targets).empty?
+ puts "predicting ..."
+ result[:cid] = @compound.cid
+ measured_non_targets = @compound.non_targets.collect{|t| t["Target GI"]}.compact.uniq
+ predicted_targets = @compound.predicted_targets.collect{|t| t[:target_gi] if t[:prediction] == "active"}.compact.uniq
+
+ result[:fp_targets] = {}
+ (predicted_targets & measured_non_targets).each do |gi|
+ result[:fp_targets][gi] = {:p => {:active => nil, :inactive => nil}, :measured => [], :neighbors => []}
+ result[:fp_targets][gi][:measured] = @compound.inactive_assays.select{|a| a["Target GI"] == gi}
+ result[:fp_targets][gi][:p][:active] = @compound.predicted_targets.collect{|t| t[:p_active] if t[:target_gi] == gi}.compact.uniq
+ result[:fp_targets][gi][:p][:inactive] = @compound.predicted_targets.collect{|t| t[:p_inactive] if t[:target_gi] == gi}.compact.uniq
+
+ @compound.neighbors.select{|n| n.assays.collect{|a| a["Target GI"]}.include? gi }.each do |neighbor|
+ result[:fp_targets][gi][:neighbors] << {
+ :cid => neighbor.cid,
+ :similarity => neighbor.similarity,
+ :assays => neighbor.assays.select{|a| a["Target GI"] == gi }
+ }
+ end
+ end
+ unless result[:fp_targets].empty?
+ false_positives << result
+ File.open("false_positives.yaml","w+") {|f| f.puts false_positives.to_yaml}
+ end
+ end
+end
+
diff --git a/pubchem-test.rb b/pubchem-test.rb
index 273698e..a3e7799 100644
--- a/pubchem-test.rb
+++ b/pubchem-test.rb
@@ -7,7 +7,7 @@ class AOPTest < Test::Unit::TestCase
def setup
@pug_uri = "http://pubchem.ncbi.nlm.nih.gov/rest/pug/"
- @compound = OpenTox::PubChemCompound.new
+ @compound = OpenTox::PubChemCompound.new #3036
@compound.cid = 1983
#@compound.from_name "2,4-D"
end
@@ -18,8 +18,9 @@ class AOPTest < Test::Unit::TestCase
puts @compound.to_smiles
puts "measured targets"
puts @compound.targets.collect{|t| t["Target Name"]}.to_yaml
+=begin
puts "predicted targets"
- puts @compound.predicted_targets.select{|t| t[:prediction] == "active"}.to_yaml
+ puts @compound.predicted_targets.select{|t| t[:prediction] == "active"}.size
puts "predicted non_targets"
puts @compound.predicted_targets.select{|t| t[:prediction] == "inactive"}.size
@@ -38,8 +39,11 @@ class AOPTest < Test::Unit::TestCase
puts (predicted_nontarget_gis - measured_nontarget_gis).size
print "incorrect predicted targets: "
puts (measured_nontarget_gis & predicted_target_gis).size
+ puts (measured_nontarget_gis & predicted_target_gis).sort.to_yaml
+ puts @compound.predicted_targets.select{|t| t[:prediction] == "active"}.to_yaml
print "incorrect predicted non-targets: "
puts (measured_target_gis & predicted_nontarget_gis).size
+=end
=begin
@compound.neighbors.each do |n|
#print n.cid
diff --git a/pubchem.rb b/pubchem.rb
index 891110c..a6893fb 100644
--- a/pubchem.rb
+++ b/pubchem.rb
@@ -1,154 +1,221 @@
require '../opentox-client/lib/opentox-client.rb'
require 'json'
+require 'base64'
def Math.gauss(x, sigma = 0.3)
d = 1.0 - x.to_f
Math.exp(-(d*d)/(2*sigma*sigma))
end
-module PubChem
-
- attr_accessor :result
-
- def initialize
- @pug_uri = "http://pubchem.ncbi.nlm.nih.gov/rest/pug/"
- end
-
- def pubchem_search url
- json = RestClient.get url#, :accept => "application/json", :timeout => 90000000
- @result = JSON.parse json
- rescue
- puts url
- puts $!.message
- @result = nil
- end
-
-end
-
module OpenTox
+ # doc @ http://pubchem.ncbi.nlm.nih.gov/pug_rest/
class PubChemCompound < Compound
- include PubChem
- # doc @ http://pubchem.ncbi.nlm.nih.gov/pug_rest/
attr_writer :cid
attr_accessor :similarity, :p, :assays
- def initialize
- super
- @summary = []
- @similarity_threshold = 75
- @neighbors = []
- @predicted_targets = []
+ def initialize cid=nil
+ @pug_uri = "http://pubchem.ncbi.nlm.nih.gov/rest/pug/"
+ @cid = cid
+ @assays = nil
+ @similarity_threshold = 85
+ @neighbors = nil
+ @predicted_assays = nil
+ #@predicted_targets = nil
+ #@priors = {}
+ #@priors = JSON.parse(File.read("priors.json"))
end
- def from_name name
- @inchi = RestClientWrapper.get File.join(CACTUS_URI,URI.escape(name),"stdinchi")
+ def fingerprint
+ unless @fingerprint
+ begin
+ # ftp://ftp.ncbi.nlm.nih.gov/pubchem/specifications/pubchem_fingerprints.txt
+ base64key = `curl http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/#{cid}/SDF|grep -A1 PUBCHEM_CACTVS_SUBSKEYS|sed '1d'`.chomp
+ @fingerprint = Base64.decode64(base64key)[4..-1].unpack("B*").first[0..-8].split(//).collect{|c| c == "1"}
+ rescue
+ end
+ end
+ @fingerprint
+ end
+
+ def self.from_name name
+ pug_uri = "http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name"
+ compounds = []
+ session[:name] = name
+ cid = RestClientWrapper.get(File.join(pug_uri,URI.escape(name),"cids","TXT"))
+ #puts response
+ #response.split("\n") do |cid|
+ puts cid
+ compound = OpenTox::PubChemCompound.new
+ compound.cid = cid.chomp
+ compounds << compound
+ #end
+ compounds
end
def neighbors
- if @neighbors.empty?
- pubchem_search File.join(@pug_uri, "compound", "similarity", "cid", cid.to_s, "JSON")+"?Threshold=#{@similarity_threshold}&MaxRecords=100"
- listkey = @result["Waiting"]["ListKey"]
- while @result["Waiting"] do
- sleep 1
- pubchem_search File.join(@pug_uri, "compound", "listkey", listkey, "assaysummary", "JSON")
+ unless @neighbors
+ @neighbors = []
+ result = pubchem_search File.join(@pug_uri, "compound", "similarity", "cid", cid.to_s, "JSON")+"?Threshold=#{@similarity_threshold}&MaxRecords=100"
+ while result["Waiting"] do
+ sleep 2
+ listkey = result["Waiting"]["ListKey"]
+ result = pubchem_search File.join(@pug_uri, "compound", "listkey", listkey, "cids", "JSON")
+ #result = pubchem_search File.join(@pug_uri, "compound", "listkey", listkey, "assaysummary", "JSON")
end
- columns = @result["Table"]["Columns"]["Column"]
- table = @result["Table"]["Row"].collect{|cell| cell.values.flatten}
- cid_idx = columns.index("CID")
- cids = table.collect{|r| r[cid_idx]}.uniq
- cids.each do |cid|
+ puts "Neighbor CIDs received"
+ result["IdentifierList"]["CID"].each do |cid|
unless cid.to_s == @cid.to_s
- tab = table.collect{|r| r if r[cid_idx] == cid}.compact
- c = PubChemCompound.new
- c.extract_result columns, tab
- c.similarity = tanimoto c
- @neighbors << c unless (c.targets + c.non_targets).empty?
+ c = PubChemCompound.new cid.to_s
+ @neighbors << c if c.assays #and !(c.targets + c.non_targets).empty?
+ end
+ end if result and result["IdentifierList"]
+=begin
+ if result and result["Table"]
+ columns = result["Table"]["Columns"]["Column"]
+ table = result["Table"]["Row"].collect{|cell| cell.values.flatten}
+ cid_idx = columns.index("CID")
+ cids = table.collect{|r| r[cid_idx]}.uniq
+ cids.each do |cid|
+ unless cid.to_s == @cid.to_s
+ tab = table.collect{|r| r if r[cid_idx] == cid}.compact
+ c = PubChemCompound.new
+ c.extract_result columns, tab
+ c.similarity = tanimoto c
+ @neighbors << c unless (c.targets + c.non_targets).empty?
+ end
end
end
- @neighbors.sort!{|a,b| b.similarity <=> a.similarity}
+=end
+ #@neighbors.sort!{|a,b| b.similarity <=> a.similarity}
end
@neighbors
end
- def summary
- if @summary.empty?
- pubchem_search File.join(@pug_uri, "compound", "cid", cid.to_s, "assaysummary", "JSON")
- extract_result @result["Table"]["Columns"]["Column"], @result["Table"]["Row"].collect{|cell| cell.values.flatten}
+ def assays
+ unless @assays
+ result = pubchem_search File.join(@pug_uri, "compound", "cid", cid.to_s, "assaysummary", "JSON")
+ extract_result result["Table"]["Columns"]["Column"], result["Table"]["Row"].collect{|cell| cell.values.flatten} if result and result["Table"]
end
- @summary
+ @assays
end
def active_assays
- summary.select{|a| a["Activity Outcome"] == "active"}
+ assays.select{|a| a["Activity Outcome"] == "active"} if assays
end
def inactive_assays
- summary.select{|a| a["Activity Outcome"] == "inactive"}
+ assays.select{|a| a["Activity Outcome"] == "inactive"} if assays
end
def targets
- active_assays.select{|a| a["Target GI"]}
+ active_assays.select{|a| a["Target GI"]} if assays
end
def non_targets
- inactive_assays.select{|a| a["Target GI"]}
+ inactive_assays.select{|a| a["Target GI"]} if assays
end
- def predicted_targets
- if @predicted_targets.empty?
- target_gis = neighbors.collect{|n| n.summary.collect{|a| a["Target GI"]}}.flatten.compact.uniq
- target_gis.each do |gid|
- target = {:target_gi => gid}
+ def predicted_assays
+ unless @predicted_assays
+ @predicted_assays = []
+ neighbors.collect{|n| n.assays.collect{|a| a["AID"]}}.flatten.compact.uniq.each do |aid|
+ predicted_assay = {"AID" => aid}
neighbors.each do |neighbor|
- if neighbor.similarity > 0.5 # avoid downweighting
- search = neighbor.summary.select{|a| a["Target GI"] == gid}
- unless search.empty? or search.size == 1
- print "+++ ("
- print search.size
- puts ")"
- puts search.inspect
- end
+ if similarity(neighbor) > 0.5 # avoid downweighting
+ search = neighbor.assays.select{|a| a["AID"] == aid}
search.each do |assay|
- target[:aid] ||= assay["AID"]
- target[:name] ||= assay["Target Name"]
- target[:assay_name] ||= assay["Assay Name"]
- target[:active_similarities] ||= []
- target[:inactive_similarities] ||= []
+ predicted_assay["Target GI"] ||= assay["Target GI"]
+ predicted_assay["Target Name"] ||= assay["Target Name"]
+ predicted_assay["Assay Name"] ||= assay["Assay Name"]
+ predicted_assay[:active_similarities] ||= []
+ predicted_assay[:inactive_similarities] ||= []
if assay["Activity Outcome"] == "active"
- target[:p_active] ? target[:p_active] = target[:p_active]*neighbor.similarity : target[:p_active] = neighbor.similarity
- target[:p_inactive] ? target[:p_inactive] = target[:p_inactive]*(1-neighbor.similarity) : target[:p_inactive] = 1-neighbor.similarity
- target[:active_similarities] << neighbor.similarity
+ predicted_assay[:p_active] ? predicted_assay[:p_active] = predicted_assay[:p_active]*similarity(neighbor) : predicted_assay[:p_active] = similarity(neighbor)
+ predicted_assay[:p_inactive] ? predicted_assay[:p_inactive] = predicted_assay[:p_inactive]*(1-similarity(neighbor)) : predicted_assay[:p_inactive] = 1-similarity(neighbor)
+ predicted_assay[:active_similarities] << similarity(neighbor)
elsif assay["Activity Outcome"] == "inactive"
- target[:p_active] ? target[:p_active] = target[:p_active]*(1-neighbor.similarity) : target[:p_active] = 1-neighbor.similarity
- target[:p_inactive] ? target[:p_inactive] = target[:p_inactive]*neighbor.similarity : target[:p_inactive] = neighbor.similarity
- target[:inactive_similarities] << neighbor.similarity
+ predicted_assay[:p_active] ? predicted_assay[:p_active] = predicted_assay[:p_active]*(1-similarity(neighbor)) : predicted_assay[:p_active] = 1-similarity(neighbor)
+ predicted_assay[:p_inactive] ? predicted_assay[:p_inactive] = predicted_assay[:p_inactive]*similarity(neighbor) : predicted_assay[:p_inactive] = similarity(neighbor)
+ predicted_assay[:inactive_similarities] << similarity(neighbor)
end
end
end
end
- if target[:p_active] and target[:p_inactive] and target[:p_active] + target[:p_inactive] != 0
- target[:p_active] = target[:p_active]/(target[:p_active]+target[:p_inactive])
- target[:p_inactive] = target[:p_inactive]/(target[:p_active]+target[:p_inactive])
- if target[:p_active] > target[:p_inactive]
- target[:prediction] = "active"
- elsif target[:p_active] < target[:p_inactive]
- target[:prediction] = "inactive"
+ if predicted_assay[:p_active] and predicted_assay[:p_inactive] and predicted_assay[:p_active] != 0 and predicted_assay[:p_inactive] != 0
+ predicted_assay[:p_active] = predicted_assay[:p_active]/(predicted_assay[:p_active]+predicted_assay[:p_inactive])
+ predicted_assay[:p_inactive] = predicted_assay[:p_inactive]/(predicted_assay[:p_active]+predicted_assay[:p_inactive])
+ if predicted_assay[:p_active] > predicted_assay[:p_inactive]
+ predicted_assay[:prediction] = "active"
+ elsif predicted_assay[:p_active] < predicted_assay[:p_inactive]
+ predicted_assay[:prediction] = "inactive"
end
- @predicted_targets << target
+ @predicted_assays << predicted_assay
end
end
- @predicted_targets.sort{|a,b| b[:p_active] <=> a[:p_active]}
+ #@predicted_targets.sort{|a,b| b[:p_active] <=> a[:p_active]}
end
- @predicted_targets
+ @predicted_assays
+ end
+
+ def predicted_active_assays
+ predicted_assays.select{|a| a[:prediction] == "active"} if predicted_assays
+ end
+
+ def predicted_inactive_assays
+ predicted_assays.select{|a| a[:prediction] == "inactive"} if predicted_assays
+ end
+
+ def predicted_targets
+ predicted_active_assays.select{|a| a[:target_gi]} if predicted_assays
+ end
+
+ def predicted_non_targets
+ inactive_assays.select{|a| a[:target_gi]} if predicted_assays
end
def to_smiles
RestClient.get(File.join(@pug_uri, "compound", "cid", cid.to_s, "property", "CanonicalSMILES", "TXT")).strip
end
+ def image_uri
+ File.join @pug_uri, "compound", "cid", @cid, "PNG"#?record_type=3d&image_size=small"
+ end
+
+ def similarity compound
+ cosine compound
+ end
+
def tanimoto compound
+ if fingerprint and compound.fingerprint
+ m11 = 0.0
+ m1 = 0.0
+ fingerprint.each_index do |i|
+ m11 += 1 if (@fingerprint[i] and compound.fingerprint[i])
+ m1 += 1 if (@fingerprint[i] or compound.fingerprint[i])
+ end
+ m11/m1
+ end
+ end
+
+ def cosine compound
+ if fingerprint and compound.fingerprint
+ m11 = 0.0
+ m01 = 0.0
+ m10 = 0.0
+ m00 = 0.0
+ fingerprint.each_index do |i|
+ m11 += 1 if (@fingerprint[i] and compound.fingerprint[i])
+ m01 += 1 if (!@fingerprint[i] and compound.fingerprint[i])
+ m10 += 1 if (@fingerprint[i] and !compound.fingerprint[i])
+ m00 += 1 if (!@fingerprint[i] and !compound.fingerprint[i])
+ end
+ m11/((m01+m11)*(m10+m11))**0.5
+ end
+ end
+
+=begin
f1 = File.open(File.join(".","tmp",SecureRandom.uuid+".smi"),"w+")
f1.puts to_smiles
f1.close
@@ -160,20 +227,62 @@ module OpenTox
File.delete(f2.path)
sim
end
+=end
+
+ def pubchem_search url
+ attempts = 0
+ begin
+ attempts += 1
+ json = RestClient.get url, :timeout => 90000000
+ puts url
+ JSON.parse json
+ rescue
+ if $!.message =~ /Timeout/i and attempts < 4
+ sleep 2
+ retry
+ elsif $!.message =~ /Timeout/i and attempts >= 4
+ File.open("timeouts","a+"){|f| f.puts url}
+ puts url
+ puts $!.message
+ nil
+ elsif $!.message.match /404/
+ nil
+ else
+ puts url
+ puts $!.message
+ nil
+ end
+ end
+ end
def extract_result columns, table
+ @assays = []
table.each do |row|
- @summary << {}
+ @assays << {}
row.each_with_index do |cell,i|
if columns[i] == "CID"
@cid = cell if @cid.nil?
else
- cell.blank? ? @summary.last[columns[i]] = nil : @summary.last[columns[i]] = cell
+ cell.blank? ? @assays.last[columns[i]] = nil : @assays.last[columns[i]] = cell
end
end
end
end
+ def priors aid
+ unless @priors[aid]
+ @priors[aid] = {"nr_active" => 0, "nr_inactive" => 0}
+ result = nil
+ result = pubchem_search File.join(@pug_uri, "assay", "aid", aid.to_s, "cids", "JSON?cids_type=active&list_return=listkey")
+ @priors[aid]["nr_active"] = result["IdentifierList"]["Size"].to_i if result
+ result = nil
+ result = pubchem_search File.join(@pug_uri, "assay", "aid", aid.to_s, "cids", "JSON?cids_type=inactive&list_return=listkey")
+ @priors[aid]["nr_inactive"] = result["IdentifierList"]["Size"].to_i if result
+ File.open("priors.json","w+"){|f| f.puts @priors.to_json}
+ end
+ @priors[aid]
+ end
+
=begin
def assay_summary assay
if assay["Target GI"] and !@assays[assay["AID"]]
@@ -262,10 +371,6 @@ module OpenTox
def to_name
RestClient.get(File.join(@pug_uri, "compound", "cid", @cid, "property", "IUPACName", "TXT")).strip
end
-
- def to_image_uri
- File.join @pug_uri, "compound", "cid", @cid, "PNG?record_type=3d&image_size=small"
- end
=end
end
diff --git a/stat.rb b/stat.rb
new file mode 100755
index 0000000..9f590d8
--- /dev/null
+++ b/stat.rb
@@ -0,0 +1,30 @@
+#!/usr/bin/env ruby
+require 'yaml'
+
+stat = {:tp => 0, :tn => 0, :fp => 0, :fn => 0, :tp_p => [], :fp_p => []}
+thresh = 0.05
+Dir["./validation/*yaml"].each do |f|
+ data = YAML.load_file f
+ pa = data[:predicted][:active].select{|gi| data[:predicted][:p][gi][:p_active] > thresh }
+ pi = data[:predicted][:inactive].select{|gi| data[:predicted][:p][gi][:p_inactive] > thresh }
+ stat[:tp] += (pa & data[:measured][:active]).size
+ stat[:tn] += (pi & data[:measured][:inactive]).size
+ stat[:fp] += (pa & data[:measured][:inactive]).size
+ stat[:fn] += (pi & data[:measured][:active]).size
+ (pa & data[:measured][:active]).each{|gi| stat[:tp_p] << data[:predicted][:p][gi][:p_active] }
+ (pa & data[:measured][:inactive]).each{|gi| stat[:fp_p] << data[:predicted][:p][gi][:p_active] }
+end
+
+stat[:tp_p].sort!
+stat[:fp_p].sort!
+puts stat.to_yaml
+print "accuracy: "
+puts (stat[:tp]+stat[:tn])/(stat[:tp]+stat[:tn]+stat[:fp]+stat[:fn]).to_f
+print "sensitivity: "
+puts stat[:tp]/(stat[:tp]+stat[:fn]).to_f
+print "specificity: "
+puts stat[:tn]/(stat[:tn]+stat[:fp]).to_f
+print "positive predictive value: "
+puts stat[:tp]/(stat[:tp]+stat[:fp]).to_f
+print "negative predictive value: "
+puts stat[:tn]/(stat[:tn]+stat[:fn]).to_f
diff --git a/unicorn.rb b/unicorn.rb
new file mode 100644
index 0000000..59b9883
--- /dev/null
+++ b/unicorn.rb
@@ -0,0 +1 @@
+timeout 600
diff --git a/validation.rb b/validation.rb
new file mode 100644
index 0000000..0e95fc3
--- /dev/null
+++ b/validation.rb
@@ -0,0 +1,35 @@
+#!/usr/bin/env ruby
+require "./pubchem.rb"
+
+until Dir["./validation/*.yaml"].size > 1000 do
+#100.times do
+ result = {}
+ @compound = OpenTox::PubChemCompound.new
+ # http://www.ncbi.nlm.nih.gov/sites/entrez?term=all%5Bfilt%5D&cmd=search&db=pccompound
+ @compound.cid = Random.new.rand(1..35611104)
+ puts @compound.cid
+ unless File.exists? "./validation/#{@compound.cid}.yaml"
+ if (@compound.targets + @compound.non_targets).size > 0
+ #if @compound.assays#.empty?
+ begin
+ puts "predicting ..."
+ result[:cid] = @compound.cid
+ result[:measured] = {}
+ result[:predicted] = {}
+ result[:measured][:active] = @compound.targets.collect{|t| t["Target GI"]}.compact.uniq
+ result[:measured][:inactive] = @compound.non_targets.collect{|t| t["Target GI"]}.compact.uniq
+ result[:predicted][:active] = @compound.predicted_targets.collect{|t| t[:target_gi] if t[:prediction] == "active"}.compact.uniq
+ result[:predicted][:inactive] = @compound.predicted_targets.collect{|t| t[:target_gi] if t[:prediction] == "inactive"}.compact.uniq
+ result[:predicted][:p] = {}
+ @compound.predicted_targets.each do |t|
+ result[:predicted][:p][t[:target_gi]] = {}
+ result[:predicted][:p][t[:target_gi]][:p_active] = t[:p_active]
+ result[:predicted][:p][t[:target_gi]][:p_inactive] = t[:p_inactive]
+ end
+ File.open("./validation/#{@compound.cid}.yaml","w+"){|f| f.puts result.to_yaml}
+ puts result.to_yaml
+ rescue
+ end
+ end
+ end
+end
diff --git a/views/assays.haml b/views/assays.haml
new file mode 100644
index 0000000..e995842
--- /dev/null
+++ b/views/assays.haml
@@ -0,0 +1,6 @@
+%ul
+ - @assays.each do |assay|
+ %li
+ %a{:href => "/aid/#{assay["AID"]}"} #{assay['Assay Name']}
+ AID:
+ = assay["AID"]
diff --git a/views/compound.haml b/views/compound.haml
new file mode 100644
index 0000000..7614142
--- /dev/null
+++ b/views/compound.haml
@@ -0,0 +1,46 @@
+:javascript
+ $(document).ready(function() {
+ display(".targets", "/cid/#{session[:compound].cid}/targets");
+ //display(".nontargets", "/cid/#{session[:compound].cid}/targets");
+ display(".active_assays", "/cid/#{session[:compound].cid}/other_active_assays");
+ //display(".inactive_assays", "/cid/#{session[:compound].cid}/other_active_assays");
+ display(".predicted_targets", "/cid/#{session[:compound].cid}/predicted_targets");
+ display(".predicted_active_assays", "/cid/#{session[:compound].cid}/other_predicted_active_assays");
+ display(".neighbors", "/cid/#{session[:compound].cid}/neighbors");
+ });
+
+%script{:type => "text/javascript", :src => "sorttable.js"}
+%table{:class => "sortable"}
+ %tr
+ %th Structure
+ %th
+ %th Targets (experimental data)
+ %th Other assays (experimental data)
+ %tr
+ %td{:valign => "top"}
+ %img{:src => session[:compound].image_uri}
+ %td
+ %td{:valign => "top"}
+ .targets
+ -# %h2 Non-Targets
+ .nontargets
+ %td{:valign => "top"}
+ .active_assays
+ -# %h2 Inactive assays
+ .inactive_assays
+ %tr
+ %th
+ %th
+ %th Targets (predicted)
+ %th Other assays (predicted)
+ %tr
+ %td
+ %td
+ %td{:valign => "top"}
+ .predicted_targets
+ %td{:valign => "top"}
+ .predicted_active_assays
+
+%h2 Neighbors
+.neighbors
+
diff --git a/views/fp.haml b/views/fp.haml
new file mode 100644
index 0000000..4bffba3
--- /dev/null
+++ b/views/fp.haml
@@ -0,0 +1,41 @@
+!!! 5
+%style{:type => "text/css" }
+ //dt { float: left; clear: left; width: 100px; text-align: right; font-weight: bold; color: green; }
+ //dt { float: left; clear: left; text-align: left; font-weight: bold; color: green; }
+ table { border-top: 2px black }
+ dl {text-align:left;}
+ dt { display: inline; text-align: left; font-weight: bold; color: green; }
+ dt:after { content: ": "; }
+ dd { display: inline; text-align: left; margin:0 }
+ dd:after { content: '\A'; white-space: pre; }
+- @fp.each do |p|
+ %table{:class => "sortable"}
+ %tr
+ %th
+ %img{:src => "http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/#{p["CID"]}/PNG"}
+ %th
+ %dl
+ - [ "CID", "Target GI", "p_active", "p_inactive" ].each do |k|
+ %dt= k
+ %dd= p[k]
+ - p[:assays].each do |a|
+ %th
+ %dl
+ - a.each do |k,v|
+ %dt= k
+ %dd= v
+ - p[:neighbors].each do |n|
+ %tr
+ %td
+ %img{:src => "http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/#{n[:cid]}/PNG"}
+ %td
+ %dl
+ - ["CID", "Similarity"].each do |k|
+ %dt= k
+ %dd= n[k.downcase.to_sym]
+ - n[:assays].each do |a|
+ %td
+ %dl
+ - a.each do |k,v|
+ %dt= k
+ %dd= v
diff --git a/views/index.haml b/views/index.haml
index b75a9ab..e69de29 100644
--- a/views/index.haml
+++ b/views/index.haml
@@ -1,27 +0,0 @@
-!!! 5
-%script{:type => "text/javascript", :src => "sorttable.js"}
-%table{:class => "sortable"}
- %tr
- %th
- = @neighbors.query.to_name
- %br
- %img{:src => @neighbors.query.to_image_uri}
- %th Structure
- %th Properties
- %th Targets
- %th Assays
- - @neighbors.neighbors.each do |neighbor|
- - sim = neighbor.structure_similarity @neighbors.query
- %tr{:sorttable_customkey => sim}
- %td
- = neighbor.to_name
- %br
- %img{:src => neighbor.to_image_uri}
- %td
- = sim
- %td
- = neighbor.property_similarity @neighbors.query
- %td
- = neighbor.target_similarity @neighbors.query
- %td
- = neighbor.assay_similarity @neighbors.query
diff --git a/views/layout.haml b/views/layout.haml
new file mode 100644
index 0000000..8641692
--- /dev/null
+++ b/views/layout.haml
@@ -0,0 +1,26 @@
+!!! 5
+%html
+ %head
+ %script{:type => "text/javascript", :src => "jquery-1.8.2.js"}
+ :javascript
+ function display(element,uri) {
+ $(element).html("<img src=\"/spinning-wait-icons/wait30trans.gif\" alt=\"Searching PubChem\">");
+ $.ajax({
+ url: uri,
+ success: function(data){
+ $(element).html(data);
+ },
+ error: function(data,textStatus,message){
+ $(element).html(message);
+ }
+ });
+ }
+
+ %body
+ %h1 adverse outcome pathways
+ %form{:name => "form", :action => '/search', :method => "GET"}
+ %fieldset
+ %label{:for => 'identifier'} Compound name:
+ %input{:type => 'text', :name => 'name', :id => 'name', :size => '60'}
+ %input{ :type => "submit", :value => "Predict" }
+ = yield
diff --git a/views/neighbors.haml b/views/neighbors.haml
new file mode 100644
index 0000000..54905ce
--- /dev/null
+++ b/views/neighbors.haml
@@ -0,0 +1,43 @@
+%script{:type => "text/javascript", :src => "sorttable.js"}
+%table{:class => "sortable"}
+ - session[:compound].neighbors[0..10].each do |compound|
+ %tr
+ %td{:valign => "top"}
+ %img{:src => compound.image_uri}
+ %td{:id => "sim#{compound.cid}", :valign => "top"}
+ %img{:src => "/spinning-wait-icons/wait30trans.gif"}
+ :javascript
+ $.ajax({
+ url: "/cid/#{session[:compound].cid}/cosine/#{compound.cid}",
+ success: function(data){
+ $("#sim#{compound.cid}").html(data);
+ },
+ error: function(data,textStatus,message){
+ $("#sim#{compound.cid}").html(message);
+ }
+ });
+ %td{:id => "targets#{compound.cid}", :valign => "top"}
+ %img{:src => "/spinning-wait-icons/wait30trans.gif"}
+ :javascript
+ $.ajax({
+ url: "/cid/#{compound.cid}/targets",
+ success: function(data){
+ $("#targets#{compound.cid}").html(data);
+ },
+ error: function(data,textStatus,message){
+ $("#targets#{compound.cid}").html(message);
+ }
+ });
+ %td{:id => "assays#{compound.cid}", :valign => "top"}
+ %img{:src => "/spinning-wait-icons/wait30trans.gif"}
+ :javascript
+ $.ajax({
+ url: "/cid/#{compound.cid}/other_active_assays",
+ success: function(data){
+ $("#assays#{compound.cid}").html(data);
+ },
+ error: function(data,textStatus,message){
+ $("#assays#{compound.cid}").html(message);
+ }
+ });
+ %td
diff --git a/views/predicted_assays.haml b/views/predicted_assays.haml
new file mode 100644
index 0000000..e995842
--- /dev/null
+++ b/views/predicted_assays.haml
@@ -0,0 +1,6 @@
+%ul
+ - @assays.each do |assay|
+ %li
+ %a{:href => "/aid/#{assay["AID"]}"} #{assay['Assay Name']}
+ AID:
+ = assay["AID"]
diff --git a/views/predicted_targets.haml b/views/predicted_targets.haml
new file mode 100644
index 0000000..1a172ff
--- /dev/null
+++ b/views/predicted_targets.haml
@@ -0,0 +1,6 @@
+%ul
+ - @assays.each do |assay|
+ %li
+ %a{:href => "/aid/#{assay["AID"]}"} #{assay['Target Name']}
+ Target GI:
+ = assay['Target GI']
diff --git a/views/select.haml b/views/select.haml
new file mode 100644
index 0000000..d6e3d20
--- /dev/null
+++ b/views/select.haml
@@ -0,0 +1,8 @@
+%p
+ More than one compound found for
+ = "\"#{params[:name]}\"."
+ Please select a structure:
+- @compounds.each do |compound|
+ %a{:href => "/cid/#{compound.cid}"}
+ %img{:src => compound.image_uri }
+
diff --git a/views/targets.haml b/views/targets.haml
new file mode 100644
index 0000000..1a172ff
--- /dev/null
+++ b/views/targets.haml
@@ -0,0 +1,6 @@
+%ul
+ - @assays.each do |assay|
+ %li
+ %a{:href => "/aid/#{assay["AID"]}"} #{assay['Target Name']}
+ Target GI:
+ = assay['Target GI']