summaryrefslogtreecommitdiff
path: root/lib/compound.rb
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2016-03-14 15:25:50 +0100
committerChristoph Helma <helma@in-silico.ch>2016-03-14 15:25:50 +0100
commit0c5d2e678908a2d4aea43efbedbedc2c0439be30 (patch)
tree2a056c6edc088a37b7e489de435706af79f324e7 /lib/compound.rb
parent989f20ae58c3ecb0ce62bc4468c3dab2599637b3 (diff)
descriptor tests
Diffstat (limited to 'lib/compound.rb')
-rw-r--r--lib/compound.rb67
1 files changed, 24 insertions, 43 deletions
diff --git a/lib/compound.rb b/lib/compound.rb
index 8c11831..2a79fd6 100644
--- a/lib/compound.rb
+++ b/lib/compound.rb
@@ -1,7 +1,3 @@
-# TODO: check
-# *** Open Babel Error in ParseFile
-# Could not find contribution data file.
-
CACTUS_URI="http://cactus.nci.nih.gov/chemical/structure/"
module OpenTox
@@ -9,7 +5,6 @@ module OpenTox
class Compound
require_relative "unique_descriptors.rb"
include OpenTox
- include OpenTox::Descriptor
DEFAULT_FINGERPRINT = "MP2D"
@@ -22,7 +17,6 @@ module OpenTox
field :png_id, type: BSON::ObjectId
field :svg_id, type: BSON::ObjectId
field :sdf_id, type: BSON::ObjectId
- field :molecular_weight, type: Float
field :fingerprints, type: Hash, default: {}
field :default_fingerprint_size, type: Integer
field :physchem_descriptors, type: Hash, default: {}
@@ -30,7 +24,6 @@ module OpenTox
field :features, type: Hash, default: {}
index({smiles: 1}, {unique: true})
- #index({default_fingerprint: 1}, {unique: false})
# Overwrites standard Mongoid method to create fingerprints before database insertion
def self.find_or_create_by params
@@ -106,7 +99,24 @@ module OpenTox
end
end
save
- physchem_descriptors
+ physchem_descriptors.select{|id,v| descriptors.collect{|d| d.id.to_s}.include? id}
+ end
+
+ def smarts_match smarts, count=false
+ obconversion = OpenBabel::OBConversion.new
+ obmol = OpenBabel::OBMol.new
+ obconversion.set_in_format('smi')
+ obconversion.read_string(obmol,self.smiles)
+ smarts_pattern = OpenBabel::OBSmartsPattern.new
+ smarts.collect do |sma|
+ smarts_pattern.init(sma.smarts)
+ if smarts_pattern.match(obmol)
+ count ? value = smarts_pattern.get_map_list.to_a.size : value = 1
+ else
+ value = 0
+ end
+ value
+ end
end
# Create a compound from smiles string
@@ -281,34 +291,16 @@ module OpenTox
training_dataset = Dataset.find(params[:training_dataset_id])
prediction_feature = training_dataset.features.first
training_dataset.compounds.each do |compound|
- #unless self == compound
- candidate_fingerprint = compound.fingerprint params[:type]
- sim = (query_fingerprint & candidate_fingerprint).size/(query_fingerprint | candidate_fingerprint).size.to_f
- feature_values = training_dataset.values(compound,prediction_feature)
- neighbors << {"_id" => compound.id, "features" => {prediction_feature.id.to_s => feature_values}, "tanimoto" => sim} if sim >= params[:min_sim]
- #end
+ candidate_fingerprint = compound.fingerprint params[:type]
+ sim = (query_fingerprint & candidate_fingerprint).size/(query_fingerprint | candidate_fingerprint).size.to_f
+ feature_values = training_dataset.values(compound,prediction_feature)
+ neighbors << {"_id" => compound.id, "features" => {prediction_feature.id.to_s => feature_values}, "tanimoto" => sim} if sim >= params[:min_sim]
end
neighbors.sort!{|a,b| b["tanimoto"] <=> a["tanimoto"]}
end
neighbors
end
- def fminer_neighbors params
- bad_request_error "Incorrect parameters for Compound#fminer_neighbors. Please provide :feature_dataset_id, :min_sim." unless params[:feature_dataset_id] and params[:min_sim]
- feature_dataset = Dataset.find params[:feature_dataset_id]
- query_fingerprint = Algorithm::Descriptor.smarts_match(self, feature_dataset.features)
- neighbors = []
-
- # find neighbors
- feature_dataset.data_entries.each_with_index do |candidate_fingerprint, i|
- sim = Algorithm::Similarity.tanimoto candidate_fingerprint, query_fingerprint
- if sim >= params[:min_sim]
- neighbors << [feature_dataset.compound_ids[i],sim] # use compound_ids, instantiation of Compounds is too time consuming
- end
- end
- neighbors
- end
-
def physchem_neighbors params
feature_dataset = Dataset.find params[:feature_dataset_id]
query_fingerprint = Algorithm.run params[:feature_calculation_algorithm], self, params[:descriptors]
@@ -317,13 +309,7 @@ module OpenTox
# TODO implement pearson and cosine similarity separatly
R.assign "x", query_fingerprint
R.assign "y", candidate_fingerprint
- # pearson r
- #sim = R.eval("cor(x,y,use='complete.obs',method='pearson')").to_ruby
- #p "pearson"
- #p sim
- #p "cosine"
sim = R.eval("x %*% y / sqrt(x%*%x * y%*%y)").to_ruby.first
- #p sim
if sim >= params[:min_sim]
neighbors << [feature_dataset.compound_ids[i],sim] # use compound_ids, instantiation of Compounds is too time consuming
end
@@ -357,9 +343,6 @@ module OpenTox
]
$mongo["compounds"].aggregate(aggregate).select{|r| r["dataset_ids"].include? params[:training_dataset_id]}
-
-
- #$mongo["compounds"].aggregate(aggregate).collect{ |r| [r["_id"], r["tanimoto"]] }
end
@@ -378,10 +361,8 @@ module OpenTox
# Calculate molecular weight of Compound with OB and store it in object
# @return [Float] molecular weight
def molecular_weight
- if self["molecular_weight"]==0.0 || self["molecular_weight"].nil?
- update(:molecular_weight => OpenTox::Algorithm::Descriptor.physchem(self, ["Openbabel.MW"]).first)
- end
- self["molecular_weight"].to_f
+ mw_feature = PhysChem.find_or_create_by(:name => "Openbabel.MW")
+ physchem([mw_feature])[mw_feature.id.to_s]
end
private