summaryrefslogtreecommitdiff
path: root/lib/model.rb
blob: 76c5963f788345d251ea40ba8365a48bb99d39ba (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
=begin
* Name: lazar.rb
* Description: Lazar model representation
* Author: Andreas Maunz <andreas@maunz.de>
* Date: 10/2012
=end

module OpenTox

  class Model

    def initialize(*args)
      if args.size == 2
        super(*args)# We have uri and subjectid
      end
      if args.size == 1
        prepare_prediction_model(args[0]) # We have a hash (prediction time)
      end
    end

    # Internal use only
    def prepare_prediction_model(params)
      params.each {|k,v|
        self.class.class_eval { attr_accessor k.to_sym }
        instance_variable_set(eval(":@"+k), v)
      }
      ["cmpds", "fps", "acts", "n_prop", "q_prop", "neighbors"].each {|k|
        self.class.class_eval { attr_accessor k.to_sym }
        instance_variable_set(eval(":@"+k), [])
      }
    end
    private :prepare_prediction_model

    # Fills model in with data for prediction
    # Avoids associative lookups, since canonization to InChI takes time
    # @param [OpenTox::Dataset] training dataset
    # @param [OpenTox::Dataset] feature dataset
    # @param [OpenTox::Feature] prediction feature
    # @param [Hash] compound fingerprints
    # @param [String] subjectid
    def add_data(training_dataset, feature_dataset, prediction_feature, compound_fingerprints, subjectid)
      training_dataset.build_feature_positions
      prediction_feature_pos = training_dataset.feature_positions[prediction_feature.uri]
      training_dataset.compounds.each_with_index { |cmpd, idx|
        act = training_dataset.data_entries[idx][prediction_feature_pos]
        @acts << training_dataset.value_map(prediction_feature).invert[act]
        @n_prop << feature_dataset.data_entries[idx].collect.to_a
        @cmpds << cmpd.uri
      }
      @q_prop = feature_dataset.features.collect { |f| 
        val = compound_fingerprints[f.title]
        bad_request_error "Can not parse value '#{val}' to numeric" if val and !val.numeric?
        val ? val.to_f : 0.0
      } # query structure
    end


    # Check parameters for plausibility
    # Prepare lazar object (includes graph mining)
    # @param[Array] lazar parameters as strings
    # @param[Hash] REST parameters, as input by user
    def check_params(lazar_params, params)

      unless params[:feature_generation_uri]
        bad_request_error "Please provide a feature generation uri" 
      end
      feature_generation_uri = params[:feature_generation_uri]

      unless training_dataset = OpenTox::Dataset.find(params[:dataset_uri], @subjectid) # AM: find is a shim
        resource_not_found_error "Dataset '#{params[:dataset_uri]}' not found." 
      end
      training_dataset_uri = training_dataset.uri

      unless params[:prediction_feature] # try to read prediction_feature from dataset
        resource_not_found_error "Please provide a prediction_feature parameter" unless training_dataset.features.size == 1
        params[:prediction_feature] = training_dataset.features.first.uri
      end

      unless training_dataset.find_feature( params[:prediction_feature] ) # AM: find_feature is a shim
        resource_not_found_error "No feature '#{params[:prediction_feature]}' in dataset '#{params[:dataset_uri]}'" 
      end
      prediction_feature = OpenTox::Feature.find(params[:prediction_feature], @subjectid) # AM: find is a shim
      prediction_feature_uri = prediction_feature.uri

      if params[:feature_dataset_uri]
        feature_dataset_uri = params[:feature_dataset_uri]
      else
        feature_dataset_uri = OpenTox::Algorithm.new(feature_generation_uri).run(params)
      end

      if (feature_generation_uri =~ /fminer/)
        feature_calculation_algorithm = "match"
        if (params[:nr_hits] == "true")
          feature_calculation_algorithm = "match_hits"
        end
      elsif feature_generation_uri =~ /dataset.*\/pc/
        feature_calculation_algorithm = "lookup"
      end

      if feature_calculation_algorithm == "lookup"
        similarity_algorithm = "cosine"
        min_sim = 0.7
      elsif feature_calculation_algorithm =~ /match/
        similarity_algorithm = "tanimoto"
        min_sim = 0.3
      end
      if params[:min_sim] and params[:min_sim].numeric?
        min_sim = params[:min_sim].to_f # AM: frequent manual option
      end

      if prediction_feature.feature_type == "classification"
        prediction_algorithm = "weighted_majority_vote"
      elsif prediction_feature.feature_type == "regression"
        prediction_algorithm = "local_svm_regression"
      end
      if params[:prediction_algorithm] and OpenTox::Algorithm::Neighbors.respond_to? params[:prediction_algorithm]
        prediction_algorithm = params[:prediction_algorithm] # AM: frequent manual option
      end

      propositionalized = true
      if prediction_algorithm =~ /majority_vote/
        propositionalized = false
      end

      if params[:pc_type]
        pc_type = params[:pc_type] 
      end

      if params[:lib]
        lib = params[:lib]
      end

      min_train_performance = $lazar_min_train_performance_default
      if params[:min_train_performance] and params[:min_train_performance].numeric?
        min_train_performance = params[:min_train_performance].to_f # AM: frequent manual option
      end


      lazar_params.collect { |p|
        val = eval(p)
        { DC.title => p, OT.paramValue => (val.nil? ? "" : val) }
      }.compact
    end


  end

end