7 files changed, 152 insertions, 163 deletions
diff --git a/lib/caret.rb b/lib/caret.rb
index 18bfc41..7e4f771 100644
--- a/lib/caret.rb
+++ b/lib/caret.rb
@@ -12,7 +12,7 @@ module OpenTox
           independent_variables.delete_at i
           query_variables.delete_at i
         end
-        if independent_variables.flatten.uniq == ["NA"] 
+        if independent_variables.flatten.uniq == ["NA"] or independent_variables.flatten.uniq == [] 
           prediction = Algorithm::Regression::weighted_average dependent_variables:dependent_variables, weights:weights
           prediction[:warning] = "No variables for regression model. Using weighted average of similar substances."
         elsif
diff --git a/lib/compound.rb b/lib/compound.rb
index a399169..8a1143b 100644
--- a/lib/compound.rb
+++ b/lib/compound.rb
@@ -136,9 +136,6 @@ module OpenTox
     # @param inchi [String] smiles InChI string
     # @return [OpenTox::Compound] Compound
     def self.from_inchi inchi
-      # Temporary workaround for OpenBabels Inchi bug
-      # http://sourceforge.net/p/openbabel/bugs/957/
-      # bug has not been fixed in latest git/development version
       #smiles = `echo "#{inchi}" | "#{File.join(File.dirname(__FILE__),"..","openbabel","bin","babel")}" -iinchi - -ocan`.chomp.strip
       smiles = obconversion(inchi,"inchi","can")
       if smiles.empty?
diff --git a/lib/crossvalidation.rb b/lib/crossvalidation.rb
index be680ae..5a05955 100644
--- a/lib/crossvalidation.rb
+++ b/lib/crossvalidation.rb
@@ -6,6 +6,7 @@ module OpenTox
       field :folds, type: Integer, default: 10
 
       def self.create model, n=10
+        $logger.debug model.algorithms
         klass = ClassificationCrossValidation if model.is_a? Model::LazarClassification
         klass = RegressionCrossValidation if model.is_a? Model::LazarRegression
         bad_request_error "Unknown model class #{model.class}." unless klass
diff --git a/lib/import.rb b/lib/import.rb
index 8e57401..aa2ee75 100644
--- a/lib/import.rb
+++ b/lib/import.rb
@@ -5,106 +5,95 @@ module OpenTox
     class Enanomapper
       include OpenTox
 
-      def self.mirror dir="."
-        #get list of bundle URIs
+      # time critical step: JSON parsing (>99%), Oj brings only minor speed gains (~1%)
+      def self.import dir="."
+        datasets = {}
         bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle?media=application%2Fjson'))["dataset"]
-        File.open(File.join(dir,"bundles.json"),"w+"){|f| f.puts JSON.pretty_generate(bundles)}
         bundles.each do |bundle|
+          datasets[bundle["URI"]] = Dataset.find_or_create_by(:source => bundle["URI"],:name => bundle["title"])
           $logger.debug bundle["title"]
           nanoparticles = JSON.parse(RestClientWrapper.get(bundle["dataset"]+"?media=application%2Fjson"))["dataEntry"]
-          $logger.debug nanoparticles.size
-          nanoparticles.each do |nanoparticle|
-            uuid = nanoparticle["values"]["https://data.enanomapper.net/identifier/uuid"]
-            $logger.debug uuid
-            File.open(File.join(dir,"nanoparticle-#{uuid}.json"),"w+"){|f| f.puts JSON.pretty_generate(nanoparticle)}
-            studies = JSON.parse(RestClientWrapper.get(File.join(nanoparticle["compound"]["URI"],"study")))["study"]
-            $logger.debug uuid if studies.size < 1 
-            studies.each do |study|
-              File.open(File.join(dir,"study-#{study["uuid"]}.json"),"w+"){|f| f.puts JSON.pretty_generate(study)}
-            end
-          end
-        end
-      end
-
-      def self.import dir="."
-        start_time = Time.now
-        t1 = 0
-        t2 = 0
-        datasets = {}
-        JSON.parse(File.read(File.join(dir,"bundles.json"))).each do |bundle|
-          datasets[bundle["URI"]] = Dataset.find_or_create_by(:source => bundle["URI"],:name => bundle["title"])
-        end
-        Dir[File.join(dir,"study*.json")].each do |s|
-          t = Time.now
-          study = JSON.parse(File.read(s))
-          np = JSON.parse(File.read(File.join(dir,"nanoparticle-#{study['owner']['substance']['uuid']}.json")))
-          core = {}
-          coating = []
-          np["composition"].each do |c|
-            if c["relation"] == "HAS_CORE"
-              core = {
-                :uri => c["component"]["compound"]["URI"],
-                :name => c["component"]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23ChemicalNameDefault"]
-              }
-            elsif c["relation"] == "HAS_COATING"
-              coating << {
-                :uri => c["component"]["compound"]["URI"],
-                :name => c["component"]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23ChemicalNameDefault"]
-              }
+          nanoparticles.each_with_index do |np,n|
+            core_id = nil
+            coating_ids = []
+            np["composition"].each do |c|
+              uri = c["component"]["compound"]["URI"]
+              uri = CGI.escape File.join(uri,"&media=application/json")
+              data = JSON.parse(RestClientWrapper.get "https://data.enanomapper.net/query/compound/url/all?media=application/json&search=#{uri}")
+              smiles = data["dataEntry"][0]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23SMILESDefault"]
+              names = []
+              names << data["dataEntry"][0]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23ChemicalNameDefault"]
+              names << data["dataEntry"][0]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23IUPACNameDefault"]
+              if smiles
+                compound = Compound.find_or_create_by(:smiles => smiles)
+                compound.name = names.first
+                compound.names = names.compact
+              else
+                compound = Compound.find_or_create_by(:name => names.first,:names => names)
+              end
+              compound.save
+              if c["relation"] == "HAS_CORE"
+                core_id = compound.id.to_s
+              elsif c["relation"] == "HAS_COATING"
+                coating_ids << compound.id.to_s
+              end
+            end if np["composition"]
+            nanoparticle = Nanoparticle.find_or_create_by(
+              :name => np["values"]["https://data.enanomapper.net/identifier/name"],
+              :source => np["compound"]["URI"],
+              :core_id => core_id,
+              :coating_ids => coating_ids
+            )
+            np["bundles"].keys.each do |bundle_uri|
+              nanoparticle.dataset_ids << datasets[bundle_uri].id
             end
-          end if np["composition"]
-          nanoparticle = Nanoparticle.find_or_create_by(
-            :name => np["values"]["https://data.enanomapper.net/identifier/name"],
-            :source => np["compound"]["URI"],
-            :core => core,
-            :coating => coating
-          )
-          np["bundles"].keys.each do |bundle_uri|
-            nanoparticle.dataset_ids << datasets[bundle_uri].id
-          end
-
-          dataset = datasets[np["bundles"].keys.first]
-          proteomics_features = {}
-          category = study["protocol"]["topcategory"]
-          source = study["protocol"]["category"]["term"]
 
-          study["effects"].each do |effect|
+            studies = JSON.parse(RestClientWrapper.get(File.join(np["compound"]["URI"],"study")))["study"]
+            studies.each do |study|
+              dataset = datasets[np["bundles"].keys.first]
+              proteomics_features = {}
+              category = study["protocol"]["topcategory"]
+              source = study["protocol"]["category"]["term"]
+              study["effects"].each do |effect|
 
-            effect["result"]["textValue"] ?  klass = NominalFeature : klass = NumericFeature
-            effect["conditions"].delete_if { |k, v| v.nil? }
+                effect["result"]["textValue"] ?  klass = NominalFeature : klass = NumericFeature
+                effect["conditions"].delete_if { |k, v| v.nil? }
 
-            if study["protocol"]["category"]["title"].match(/Proteomics/) and effect["result"]["textValue"] and effect["result"]["textValue"].length > 50 # parse proteomics data
+                if study["protocol"]["category"]["title"].match(/Proteomics/) and effect["result"]["textValue"] and effect["result"]["textValue"].length > 50 # parse proteomics data
 
-              JSON.parse(effect["result"]["textValue"]).each do |identifier, value| # time critical step
-                proteomics_features[identifier] ||= NumericFeature.find_or_create_by(:name => identifier, :category => "Proteomics", :unit => "Spectral counts", :source => source,:measured => true)
-                nanoparticle.parse_ambit_value proteomics_features[identifier], value, dataset
-              end
-            else
-              name = effect["endpoint"]
-              unit = effect["result"]["unit"]
-              warnings = []
-              case name
-              when "Log2 transformed" # use a sensible name
-                name = "log2(Net cell association)"
-                warnings = ["Original name was 'Log2 transformed'"]
-                unit = "log2(mL/ug(Mg))"
-              when "Total protein (BCA assay)"
-                category = "P-CHEM"
-                warnings = ["Category changed from TOX to P-CHEM"]
+                  JSON.parse(effect["result"]["textValue"]).each do |identifier, value| # time critical step
+                    proteomics_features[identifier] ||= NumericFeature.find_or_create_by(:name => identifier, :category => "Proteomics", :unit => "Spectral counts", :source => source,:measured => true)
+                    nanoparticle.parse_ambit_value proteomics_features[identifier], value, dataset
+                  end
+                else
+                  name = effect["endpoint"]
+                  unit = effect["result"]["unit"]
+                  warnings = []
+                  case name
+                  when "Log2 transformed" # use a sensible name
+                    name = "log2(Net cell association)"
+                    warnings = ["Original name was 'Log2 transformed'"]
+                    unit = "log2(mL/ug(Mg))"
+                  when "Total protein (BCA assay)"
+                    category = "P-CHEM"
+                    warnings = ["Category changed from TOX to P-CHEM"]
+                  end
+                  feature = klass.find_or_create_by(
+                    :name => name,
+                    :unit => unit,
+                    :category => category,
+                    :conditions => effect["conditions"],
+                    :source => study["protocol"]["category"]["term"],
+                    :measured => true,
+                    :warnings => warnings
+                  )
+                  nanoparticle.parse_ambit_value feature, effect["result"], dataset
+                end
               end
-              feature = klass.find_or_create_by(
-                :name => name,
-                :unit => unit,
-                :category => category,
-                :conditions => effect["conditions"],
-                :source => study["protocol"]["category"]["term"],
-                :measured => true,
-                :warnings => warnings
-              )
-              nanoparticle.parse_ambit_value feature, effect["result"], dataset
             end
+            nanoparticle.save
+            print "#{n}, "
           end
-          nanoparticle.save
         end
         datasets.each { |u,d| d.save }
       end
diff --git a/lib/model.rb b/lib/model.rb
index adcbcc6..e8b30ca 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -81,7 +81,6 @@ module OpenTox
               :method => "properties",
               :categories => ["P-CHEM"],
             },
-            #:descriptors => ["P-CHEM","Proteomics"],
             :similarity => {
               :method => "Algorithm::Similarity.weighted_cosine",
               :min => 0.5
@@ -103,11 +102,12 @@ module OpenTox
             parameters.each do |p,v|
               model.algorithms[type] ||= {}
               model.algorithms[type][p] = v
+              model.algorithms[:descriptors].delete :categories if type == :descriptors and p == :type
             end
           else
             model.algorithms[type] = parameters
           end
-        end
+        end if algorithms
 
         # parse dependent_variables from training dataset
         training_dataset.substances.each do |substance|
@@ -140,10 +140,11 @@ module OpenTox
           model.algorithms[:descriptors].delete(:features)
           model.algorithms[:descriptors].delete(:type)
           model.substances.each_with_index do |s,i|
-            s.calculate_properties(features).each_with_index do |v,j|
+            props = s.calculate_properties(features)
+            props.each_with_index do |v,j|
               model.independent_variables[j] ||= []
               model.independent_variables[j][i] = v
-            end
+            end if props and !props.empty?
           end
         # parse independent_variables
         when "properties"
@@ -152,7 +153,7 @@ module OpenTox
           categories.each do |category|
             Feature.where(category:category).each{|f| feature_ids << f.id.to_s}
           end
-          properties = model.substances.collect { |s| s.properties }
+          properties = model.substances.collect { |s| s.properties  }
           property_ids = properties.collect{|p| p.keys}.flatten.uniq
           model.descriptor_ids = feature_ids & property_ids
           model.independent_variables = model.descriptor_ids.collect{|i| properties.collect{|p| p[i] ? p[i].median : nil}}
@@ -220,10 +221,10 @@ module OpenTox
             prediction[:measurements] << dependent_variables[i]
             prediction[:warning] = "Substance '#{substance.name}, id:#{substance.id}' has been excluded from neighbors, because it is identical with the query substance."
           else
-            next if substance.is_a? Nanoparticle and substance.core != Nanoparticle.find(s).core
             if fingerprints?
               neighbor_descriptors = fingerprints[i]
             else
+              next if substance.is_a? Nanoparticle and substance.core != Nanoparticle.find(s).core # necessary for nanoparticle properties predictions
               neighbor_descriptors = scaled_variables.collect{|v| v[i]}
             end
             sim = Algorithm.run algorithms[:similarity][:method], [similarity_descriptors, neighbor_descriptors, descriptor_weights]
@@ -246,6 +247,7 @@ module OpenTox
         elsif neighbor_similarities.size == 1
           prediction.merge!({:value => dependent_variables.first, :probabilities => nil, :warning => "Only one similar compound in the training set. Predicting its experimental value.", :neighbors => [{:id => neighbor_ids.first, :similarity => neighbor_similarities.first}]})
         else
+          query_descriptors.collect!{|d| d ? 1 : 0} if algorithms[:feature_selection] and algorithms[:descriptors][:method] == "fingerprint"
           # call prediction algorithm
           result = Algorithm.run algorithms[:prediction][:method], dependent_variables:neighbor_dependent_variables,independent_variables:neighbor_independent_variables ,weights:neighbor_similarities, query_variables:query_descriptors
           prediction.merge! result
@@ -328,7 +330,7 @@ module OpenTox
     class LazarRegression < Lazar
     end
 
-    class Prediction
+    class Validation
 
       include OpenTox
       include Mongoid::Document
@@ -340,7 +342,6 @@ module OpenTox
       field :unit, type: String
       field :model_id, type: BSON::ObjectId
       field :repeated_crossvalidation_id, type: BSON::ObjectId
-      field :leave_one_out_validation_id, type: BSON::ObjectId
 
       def predict object
         model.predict object
@@ -354,6 +355,10 @@ module OpenTox
         Lazar.find model_id
       end
 
+      def algorithms
+        model.algorithms
+      end
+
       def prediction_feature
         model.prediction_feature
       end
@@ -366,10 +371,6 @@ module OpenTox
         repeated_crossvalidation.crossvalidations
       end
 
-      def leave_one_out_validation
-        Validation::LeaveOneOut.find leave_one_out_validation_id
-      end
-
       def regression?
         model.is_a? LazarRegression
       end
@@ -381,63 +382,38 @@ module OpenTox
       def self.from_csv_file file
         metadata_file = file.sub(/csv$/,"json")
         bad_request_error "No metadata file #{metadata_file}" unless File.exist? metadata_file
-        prediction_model = self.new JSON.parse(File.read(metadata_file))
+        model_validation = self.new JSON.parse(File.read(metadata_file))
         training_dataset = Dataset.from_csv_file file
         model = Lazar.create training_dataset: training_dataset
-        prediction_model[:model_id] = model.id
-        prediction_model[:repeated_crossvalidation_id] = Validation::RepeatedCrossValidation.create(model).id
-        #prediction_model[:leave_one_out_validation_id] = Validation::LeaveOneOut.create(model).id
-        prediction_model.save
-        prediction_model
+        model_validation[:model_id] = model.id
+        model_validation[:repeated_crossvalidation_id] = Validation::RepeatedCrossValidation.create(model).id
+        model_validation.save
+        model_validation
       end
 
-    end
-
-    class NanoPrediction < Prediction
-
-      def self.from_json_dump dir, category
-        Import::Enanomapper.import dir
-        training_dataset = Dataset.where(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
-        unless training_dataset
-          Import::Enanomapper.import File.join(File.dirname(__FILE__),"data","enm")
+      def self.from_enanomapper training_dataset: nil, prediction_feature:nil, algorithms: nil
+        
+        # find/import training_dataset
+        training_dataset ||= Dataset.where(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
+        unless training_dataset # try to import from json dump
+          Import::Enanomapper.import
           training_dataset = Dataset.where(name: "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
+          bad_request_error "Cannot import 'Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles' dataset" unless training_dataset
         end
-        prediction_model = self.new(
-          :endpoint => "log2(Net cell association)",
-          :source => "https://data.enanomapper.net/",
-          :species => "A549 human lung epithelial carcinoma cells",
-          :unit => "log2(ug/Mg)"
-        )
-        prediction_feature = Feature.where(name: "log2(Net cell association)", category: "TOX").first
-        model = Model::LazarRegression.create(prediction_feature: prediction_feature, training_dataset: training_dataset)
-        prediction_model[:model_id] = model.id
-        repeated_cv = Validation::RepeatedCrossValidation.create model
-        prediction_model[:repeated_crossvalidation_id] = Validation::RepeatedCrossValidation.create(model).id
-        #prediction_model[:leave_one_out_validation_id] = Validation::LeaveOneOut.create(model).id
-        prediction_model.save
-        prediction_model
-      end
+        prediction_feature ||= Feature.where(name: "log2(Net cell association)", category: "TOX").first
 
-      def self.create dir: dir, algorithms: algorithms
-        training_dataset = Dataset.where(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
-        unless training_dataset
-          Import::Enanomapper.import dir
-          training_dataset = Dataset.where(name: "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
-        end
-        prediction_model = self.new(
-          :endpoint => "log2(Net cell association)",
-          :source => "https://data.enanomapper.net/",
+        model_validation = self.new(
+          :endpoint => prediction_feature.name,
+          :source => prediction_feature.source,
           :species => "A549 human lung epithelial carcinoma cells",
-          :unit => "log2(ug/Mg)"
+          :unit => prediction_feature.unit
         )
-        prediction_feature = Feature.where(name: "log2(Net cell association)", category: "TOX").first
-        model = Model::LazarRegression.create(prediction_feature: prediction_feature, training_dataset: training_dataset, algorithms: algorithms)
-        prediction_model[:model_id] = model.id
+        model = LazarRegression.create prediction_feature: prediction_feature, training_dataset: training_dataset, algorithms: algorithms
+        model_validation[:model_id] = model.id
         repeated_cv = Validation::RepeatedCrossValidation.create model
-        prediction_model[:repeated_crossvalidation_id] = Validation::RepeatedCrossValidation.create(model).id
-        #prediction_model[:leave_one_out_validation_id] = Validation::LeaveOneOut.create(model).id
-        prediction_model.save
-        prediction_model
+        model_validation[:repeated_crossvalidation_id] = repeated_cv.id
+        model_validation.save
+        model_validation
       end
 
     end
diff --git a/lib/nanoparticle.rb b/lib/nanoparticle.rb
index 23e155c..02d9a89 100644
--- a/lib/nanoparticle.rb
+++ b/lib/nanoparticle.rb
@@ -3,8 +3,30 @@ module OpenTox
   class Nanoparticle < Substance
     include OpenTox
 
-    field :core, type: Hash, default: {}
-    field :coating, type: Array, default: []
+    field :core_id, type: String, default: nil
+    field :coating_ids, type: Array, default: []
+
+    def core
+      Compound.find core_id
+    end
+
+    def coating
+      coating_ids.collect{|i| Compound.find i }
+    end
+
+    def fingerprint type=DEFAULT_FINGERPRINT
+      core_fp = core.fingerprint type
+      coating_fp = coating.collect{|c| c.fingerprint type}.flatten.uniq.compact
+      (core_fp.empty? or coating_fp.empty?) ? [] : (core_fp+coating_fp).uniq.compact
+    end
+
+    def calculate_properties descriptors=PhysChem::OPENBABEL
+      if core.smiles and !coating.collect{|c| c.smiles}.compact.empty?
+        core_prop = core.calculate_properties descriptors
+        coating_prop = coating.collect{|c| c.calculate_properties descriptors if c.smiles}
+        descriptors.collect_with_index{|d,i| [core_prop[i],coating_prop.collect{|c| c[i] if c}]}
+      end
+    end
 
     def add_feature feature, value, dataset
       unless feature.name == "ATOMIC COMPOSITION" or feature.name == "FUNCTIONAL GROUP" # redundand
@@ -37,28 +59,28 @@ module OpenTox
         add_feature feature, v["loValue"], dataset
       elsif v.keys.size == 2 and v["errorValue"]
         add_feature feature, v["loValue"], dataset
-        warn "Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'."
+        #warn "Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'."
       elsif v.keys.size == 2 and v["loQualifier"] == "mean"
         add_feature feature, v["loValue"], dataset
-        warn "'#{feature.name}' is a mean value. Original data is not available."
+        #warn "'#{feature.name}' is a mean value. Original data is not available."
       elsif v.keys.size == 2 and v["loQualifier"] #== ">="
-        warn "Only min value available for '#{feature.name}', entry ignored"
+        #warn "Only min value available for '#{feature.name}', entry ignored"
       elsif v.keys.size == 2 and v["upQualifier"] #== ">="
-        warn "Only max value available for '#{feature.name}', entry ignored"
+        #warn "Only max value available for '#{feature.name}', entry ignored"
       elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"].nil? and v["upQualifier"].nil?
         add_feature feature, v["loValue"], dataset
-        warn "loQualifier and upQualifier are empty."
+        #warn "loQualifier and upQualifier are empty."
       elsif v.keys.size == 3 and v["loValue"] and v["loQualifier"] == "" and v["upQualifier"] == ""
         add_feature feature, v["loValue"], dataset
-        warn "loQualifier and upQualifier are empty."
+        #warn "loQualifier and upQualifier are empty."
       elsif v.keys.size == 4 and v["loValue"] and v["loQualifier"].nil? and v["upQualifier"].nil?
         add_feature feature, v["loValue"], dataset
-        warn "loQualifier and upQualifier are empty."
+        #warn "loQualifier and upQualifier are empty."
       elsif v.size == 4 and v["loQualifier"] and v["upQualifier"] and v["loValue"] and v["upValue"]
-        add_feature feature, [v["loValue"],v["upValue"]].mean, dataset
-        warn "Using mean value of range #{v["loValue"]} - #{v["upValue"]} for '#{feature.name}'. Original data is not available."
+        #add_feature feature, [v["loValue"],v["upValue"]].mean, dataset
+        #warn "Using mean value of range #{v["loValue"]} - #{v["upValue"]} for '#{feature.name}'. Original data is not available."
       elsif v.size == 4 and v["loQualifier"] == "mean" and v["errorValue"]
-        warn "'#{feature.name}' is a mean value. Original data is not available. Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'."
+        #warn "'#{feature.name}' is a mean value. Original data is not available. Ignoring errorValue '#{v["errorValue"]}' for '#{feature.name}'."
         add_feature feature, v["loValue"], dataset
       elsif v == {} # do nothing
       else
diff --git a/lib/similarity.rb b/lib/similarity.rb
index 772e812..0901936 100644
--- a/lib/similarity.rb
+++ b/lib/similarity.rb
@@ -19,6 +19,10 @@ module OpenTox
         ( fingerprints[0] & fingerprints[1]).size/(fingerprints[0]|fingerprints[1]).size.to_f
       end
 
+      #def self.weighted_tanimoto fingerprints
+        #( fingerprints[0] & fingerprints[1]).size/(fingerprints[0]|fingerprints[1]).size.to_f
+      #end
+
       def self.euclid scaled_properties
         sq = scaled_properties[0].zip(scaled_properties[1]).map{|a,b| (a - b) ** 2}
         Math.sqrt(sq.inject(0) {|s,c| s + c})