diff options
author | Christoph Helma <helma@in-silico.ch> | 2011-05-23 14:33:31 +0000 |
---|---|---|
committer | Christoph Helma <helma@in-silico.ch> | 2011-05-23 14:33:31 +0000 |
commit | 6cb25029a9e6f8dc397f642892a020ab4de4c0f4 (patch) | |
tree | d341ac0a742efcd96d78b56d75c24b3cff7d1477 /lazar.rb | |
parent | f48e41548ebb693c1abba8ad1b2a671fae0c118a (diff) | |
parent | 5b9783ef4eee9a15e801c5781848d5bc9e488110 (diff) |
fminer.rb
Diffstat (limited to 'lazar.rb')
-rw-r--r-- | lazar.rb | 70 |
1 files changed, 41 insertions, 29 deletions
@@ -35,17 +35,17 @@ post '/lazar/?' do halt 404, "Dataset #{dataset_uri} not found." unless training_activities = OpenTox::Dataset.new(dataset_uri) training_activities.load_all(@subjectid) - prediction_feature = params[:prediction_feature] - unless prediction_feature # try to read prediction_feature from dataset + prediction_feature = OpenTox::Feature.find(params[:prediction_feature],@subjectid) + unless params[:prediction_feature] # try to read prediction_feature from dataset halt 404, "#{training_activities.features.size} features in dataset #{dataset_uri}. Please provide a prediction_feature parameter." unless training_activities.features.size == 1 - prediction_feature = training_activities.features.keys.first - params[:prediction_feature] = prediction_feature + prediction_feature = OpenTox::Feature.find(training_activities.features.keys.first,@subjectid) + params[:prediction_feature] = prediction_feature.uri # pass to feature mining service end feature_generation_uri = @@feature_generation_default unless feature_generation_uri = params[:feature_generation_uri] - halt 404, "No feature #{prediction_feature} in dataset #{params[:dataset_uri]}. (features: "+ - training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(prediction_feature) + halt 404, "No feature #{prediction_feature.uri} in dataset #{params[:dataset_uri]}. (features: "+ + training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(prediction_feature.uri) task = OpenTox::Task.create("Create lazar model",url_for('/lazar',:full)) do |task| @@ -55,7 +55,7 @@ post '/lazar/?' do if params[:feature_dataset_uri] feature_dataset_uri = params[:feature_dataset_uri] training_features = OpenTox::Dataset.new(feature_dataset_uri) - case training_features.feature_type + case training_features.feature_type(@subjectid) when "classification" lazar.similarity_algorithm = "Similarity.tanimoto" when "regression" @@ -73,7 +73,6 @@ post '/lazar/?' do if prediction_feature.feature_type == "regression" && feature_generation_uri.match(/fminer/) params[:feature_type] = "paths" end - prediction_feature = prediction_feature.uri #hotfix this will change in future version see development branch 2011/04/06 mr feature_dataset_uri = OpenTox::Algorithm::Generic.new(feature_generation_uri).run(params).to_s training_features = OpenTox::Dataset.new(feature_dataset_uri) end @@ -82,21 +81,24 @@ post '/lazar/?' do halt 404, "Dataset #{feature_dataset_uri} not found." if training_features.nil? # sorted features for index lookups - lazar.features = training_features.features.sort if training_features.feature_type == "regression" + + lazar.features = training_features.features.sort if prediction_feature.feature_type == "regression" and lazar.feature_calculation_algorithm != "Substructure.match" training_features.data_entries.each do |compound,entry| lazar.fingerprints[compound] = [] unless lazar.fingerprints[compound] entry.keys.each do |feature| - if feature_generation_uri.match(/fminer/) - smarts = training_features.features[feature][OT.smarts] - lazar.fingerprints[compound] << smarts - unless lazar.features.include? smarts - lazar.features << smarts - lazar.p_values[smarts] = training_features.features[feature][OT.pValue] - lazar.effects[smarts] = training_features.features[feature][OT.effect] + if lazar.feature_calculation_algorithm == "Substructure.match" + if training_features.features[feature] + smarts = training_features.features[feature][OT.smarts] + lazar.fingerprints[compound] << smarts + unless lazar.features.include? smarts + lazar.features << smarts + lazar.p_values[smarts] = training_features.features[feature][OT.pValue] + lazar.effects[smarts] = training_features.features[feature][OT.effect] + end end else - case training_features.feature_type + case training_features.feature_type(@subjectid) when "classification" # fingerprints are sets if entry[feature].flatten.size == 1 @@ -117,27 +119,37 @@ post '/lazar/?' do end end + @training_classes = training_activities.feature_classes(prediction_feature.uri, @subjectid) if prediction_feature.feature_type == "classification" + lazar.prediction_algorithm = "Neighbors.local_svm_regression" if prediction_feature.feature_type == "regression" + training_activities.data_entries.each do |compound,entry| lazar.activities[compound] = [] unless lazar.activities[compound] - unless entry[params[:prediction_feature]].empty? - entry[params[:prediction_feature]].each do |value| - case value.to_s - when "true" - lazar.activities[compound] << true - when "false" - lazar.activities[compound] << false - else + unless entry[prediction_feature.uri].empty? + entry[prediction_feature.uri].each do |value| + if prediction_feature.feature_type == "classification" + case value.to_s + when "true" + lazar.activities[compound] << true + when "false" + lazar.activities[compound] << false + when /#{@training_classes.last}/ + lazar.activities[compound] << true + when /#{@training_classes.first}/ + lazar.activities[compound] << false + else + LOGGER.warn "Unknown class \"#{value.to_s}\"." + end + elsif prediction_feature.feature_type == "regression" halt 404, "0 values not allowed in training dataset. log10 is calculated internally." if value.to_f == 0 lazar.activities[compound] << value.to_f - lazar.prediction_algorithm = "Neighbors.local_svm_regression" end end end end - lazar.metadata[DC.title] = "lazar model for #{URI.decode(File.basename(prediction_feature))}" + lazar.metadata[DC.title] = "lazar model for #{URI.decode(File.basename(prediction_feature.uri))}" # TODO: fix dependentVariable - lazar.metadata[OT.dependentVariables] = params[:prediction_feature] + lazar.metadata[OT.dependentVariables] = prediction_feature.uri lazar.metadata[OT.trainingDataset] = dataset_uri lazar.metadata[OT.featureDataset] = feature_dataset_uri if training_activities.feature_type.to_s == "classification" @@ -148,7 +160,7 @@ post '/lazar/?' do lazar.metadata[OT.parameters] = [ {DC.title => "dataset_uri", OT.paramValue => dataset_uri}, - {DC.title => "prediction_feature", OT.paramValue => prediction_feature}, + {DC.title => "prediction_feature", OT.paramValue => prediction_feature.uri}, {DC.title => "feature_generation_uri", OT.paramValue => feature_generation_uri} ] |