From 3bb6365594d168281019bdec303e70c123414ce4 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Mon, 4 Apr 2011 18:51:11 +0200 Subject: OT.isA substituted by RDF.type, identification of feature_types by RDF.type --- fminer.rb | 76 +++++++++++++++++++++++++++++++++++++++--------------------- last-utils | 2 +- lazar.rb | 74 +++++++++++++++++++++++++++++++++------------------------- libfminer | 2 +- openbabel.rb | 2 +- 5 files changed, 95 insertions(+), 61 deletions(-) diff --git a/fminer.rb b/fminer.rb index 0b18c01..92326b6 100644 --- a/fminer.rb +++ b/fminer.rb @@ -23,7 +23,7 @@ get "/fminer/bbrc/?" do DC.title => 'fminer backbone refinement class representatives', DC.creator => "andreas@maunz.de, helma@in-silico.ch", DC.contributor => "vorgrimmlerdavid@gmx.de", - OT.isA => OTA.PatternMiningSupervised, + RDF.type => [OTA.PatternMiningSupervised], OT.parameters => [ { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" }, @@ -44,7 +44,7 @@ get "/fminer/last/?" do DC.title => 'fminer latent structure class representatives', DC.creator => "andreas@maunz.de, helma@in-silico.ch", DC.contributor => "vorgrimmlerdavid@gmx.de", - OT.isA => OTA.PatternMiningSupervised, + RDF.type => [OTA.PatternMiningSupervised], OT.parameters => [ { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, { DC.description => "Feature URI for dependent variable", OT.paramScope => "mandatory", DC.title => "prediction_feature" }, @@ -70,7 +70,7 @@ post '/fminer/bbrc/?' do halt 404, "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil? halt 404, "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil? - prediction_feature = params[:prediction_feature] + prediction_feature = OpenTox::Feature.find params[:prediction_feature] training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}", @subjectid halt 404, "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless training_dataset.features and training_dataset.features.include?(params[:prediction_feature]) @@ -88,6 +88,11 @@ post '/fminer/bbrc/?' do @@bbrc.SetBackbone(eval params[:backbone]) if params[:backbone] and ( params[:backbone] == "true" or params[:backbone] == "false" ) # convert string to boolean @@bbrc.SetChisqSig(params[:min_chisq_significance]) if params[:min_chisq_significance] @@bbrc.SetConsoleOut(false) + if prediction_feature.feature_type == "regression" + @@bbrc.SetRegression(true) + else + @training_classes = training_dataset.feature_classes(prediction_feature.uri) + end feature_dataset = OpenTox::Dataset.new(nil, @subjectid) feature_dataset.add_metadata({ @@ -119,21 +124,30 @@ post '/fminer/bbrc/?' do next end entry.each do |feature,values| - if feature == prediction_feature + if feature == prediction_feature.uri values.each do |value| if value.nil? LOGGER.warn "No #{feature} activiity for #{compound.to_s}." else - case value.to_s - when "true" - nr_active += 1 - activity = 1 - when "false" - nr_inactive += 1 - activity = 0 - else + if prediction_feature.feature_type == "classification" + case value.to_s + when "true" + nr_active += 1 + activity = 1 + when "false" + nr_inactive += 1 + activity = 0 + when /#{@training_classes.last}/ + nr_active += 1 + activity = 1 + when /#{@training_classes.first}/ + nr_inactive += 1 + activity = 0 + else + LOGGER.warn "Unknown class \"#{value.to_s}\"." + end + elsif prediction_feature.feature_type == "regression" activity = value.to_f - @@bbrc.SetRegression(true) end begin @@bbrc.AddCompound(smiles,id) @@ -192,7 +206,7 @@ post '/fminer/bbrc/?' do features << smarts metadata = { OT.hasSource => url_for('/fminer/bbrc', :full), - OT.isA => OT.Substructure, + RDF.type => [OT.Substructure], OT.smarts => smarts, OT.pValue => p_value.to_f, OT.effect => effect, @@ -283,23 +297,31 @@ post '/fminer/last/?' do if value.nil? LOGGER.warn "No #{feature} activiity for #{compound.to_s}." else - case value.to_s - when "true" - nr_active += 1 - activity = 1 - when "false" - nr_inactive += 1 - activity = 0 - else + if prediction_feature.feature_type == "classification" + case value.to_s + when "true" + nr_active += 1 + activity = 1 + when "false" + nr_inactive += 1 + activity = 0 + when /#{@training_classes.last}/ + nr_active += 1 + activity = 1 + when /#{@training_classes.first}/ + nr_inactive += 1 + activity = 0 + else + LOGGER.warn "Unknown class \"#{value.to_s}." + end + elsif prediction_feature.feature_type == "regression" activity = value.to_f - @@last.SetRegression(true) end begin - @@last.AddCompound(smiles,id) - @@last.AddActivity(activity, id) + @@bbrc.AddCompound(smiles,id) + @@bbrc.AddActivity(activity, id) all_activities[id]=activity # DV: insert global information compounds[id] = compound - smi[id] = smiles # AM LAST: changed this to store SMILES. id += 1 rescue LOGGER.warn "Could not add " + smiles + "\t" + value.to_s + " to fminer" @@ -340,7 +362,7 @@ post '/fminer/last/?' do unless features.include? smarts features << smarts metadata = { - OT.isA => OT.Substructure, + RDF.type => [OT.Substructure], OT.hasSource => feature_dataset.uri, OT.smarts => smarts, OT.pValue => p_value.to_f, diff --git a/last-utils b/last-utils index daafa32..75bea76 160000 --- a/last-utils +++ b/last-utils @@ -1 +1 @@ -Subproject commit daafa32e330b27111df6dc7193a6ed72fae2be45 +Subproject commit 75bea7645601fd296aa68c6678ee9b0a49a7b918 diff --git a/lazar.rb b/lazar.rb index 2f3ec28..af2740b 100644 --- a/lazar.rb +++ b/lazar.rb @@ -35,17 +35,17 @@ post '/lazar/?' do halt 404, "Dataset #{dataset_uri} not found." unless training_activities = OpenTox::Dataset.new(dataset_uri) training_activities.load_all(@subjectid) - prediction_feature = params[:prediction_feature] - unless prediction_feature # try to read prediction_feature from dataset + prediction_feature = OpenTox::Feature.find(params[:prediction_feature],@subjectid) + unless params[:prediction_feature] # try to read prediction_feature from dataset halt 404, "#{training_activities.features.size} features in dataset #{dataset_uri}. Please provide a prediction_feature parameter." unless training_activities.features.size == 1 - prediction_feature = training_activities.features.keys.first - params[:prediction_feature] = prediction_feature + prediction_feature = OpenTox::Feature.find(training_activities.features.keys.first,@subjectid) + #params[:prediction_feature] = prediction_feature end feature_generation_uri = @@feature_generation_default unless feature_generation_uri = params[:feature_generation_uri] - halt 404, "No feature #{prediction_feature} in dataset #{params[:dataset_uri]}. (features: "+ - training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(prediction_feature) + halt 404, "No feature #{prediction_feature.uri} in dataset #{params[:dataset_uri]}. (features: "+ + training_activities.features.inspect+")" unless training_activities.features and training_activities.features.include?(prediction_feature.uri) task = OpenTox::Task.create("Create lazar model",url_for('/lazar',:full)) do |task| @@ -55,7 +55,7 @@ post '/lazar/?' do if params[:feature_dataset_uri] feature_dataset_uri = params[:feature_dataset_uri] training_features = OpenTox::Dataset.new(feature_dataset_uri) - case training_features.feature_type + case prediction_feature.feature_type when "classification" lazar.similarity_algorithm = "Similarity.tanimoto" when "regression" @@ -77,21 +77,23 @@ post '/lazar/?' do halt 404, "Dataset #{feature_dataset_uri} not found." if training_features.nil? # sorted features for index lookups - lazar.features = training_features.features.sort if training_features.feature_type == "regression" + lazar.features = training_features.features.sort if prediction_feature.feature_type == "regression" training_features.data_entries.each do |compound,entry| lazar.fingerprints[compound] = [] unless lazar.fingerprints[compound] entry.keys.each do |feature| if feature_generation_uri.match(/fminer/) - smarts = training_features.features[feature][OT.smarts] - lazar.fingerprints[compound] << smarts - unless lazar.features.include? smarts - lazar.features << smarts - lazar.p_values[smarts] = training_features.features[feature][OT.pValue] - lazar.effects[smarts] = training_features.features[feature][OT.effect] + if training_features.features[feature] + smarts = training_features.features[feature][OT.smarts] + lazar.fingerprints[compound] << smarts + unless lazar.features.include? smarts + lazar.features << smarts + lazar.p_values[smarts] = training_features.features[feature][OT.pValue] + lazar.effects[smarts] = training_features.features[feature][OT.effect] + end end else - case training_features.feature_type + case prediction_feature.feature_type when "classification" # fingerprints are sets if entry[feature].flatten.size == 1 @@ -107,43 +109,53 @@ post '/lazar/?' do else LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}" end + lazar.prediction_algorithm = "Neighbors.local_svm_regression" end end end end + @training_classes = training_activities.feature_classes(prediction_feature.uri) if prediction_feature.feature_type == "classification" + training_activities.data_entries.each do |compound,entry| lazar.activities[compound] = [] unless lazar.activities[compound] - unless entry[params[:prediction_feature]].empty? - entry[params[:prediction_feature]].each do |value| - case value.to_s - when "true" - lazar.activities[compound] << true - when "false" - lazar.activities[compound] << false - else + unless entry[prediction_feature.uri].empty? + entry[prediction_feature.uri].each do |value| + if prediction_feature.feature_type == "classification" + case value.to_s + when "true" + lazar.activities[compound] << true + when "false" + lazar.activities[compound] << false + when /#{@training_classes.last}/ + lazar.activities[compound] << true + when /#{@training_classes.first}/ + lazar.activities[compound] << false + else + LOGGER.warn "Unknown class \"#{value.to_s}\"." + end + elsif prediction_feature.feature_type == "regression" halt 404, "0 values not allowed in training dataset. log10 is calculated internally." if value.to_f == 0 lazar.activities[compound] << value.to_f - lazar.prediction_algorithm = "Neighbors.local_svm_regression" end end end end - lazar.metadata[DC.title] = "lazar model for #{URI.decode(File.basename(prediction_feature))}" + lazar.metadata[DC.title] = "lazar model for #{URI.decode(File.basename(prediction_feature.uri))}" # TODO: fix dependentVariable - lazar.metadata[OT.dependentVariables] = params[:prediction_feature] + lazar.metadata[OT.dependentVariables] = prediction_feature.uri lazar.metadata[OT.trainingDataset] = dataset_uri lazar.metadata[OT.featureDataset] = feature_dataset_uri - if training_activities.feature_type.to_s == "classification" - lazar.metadata[OT.isA] = OTA.ClassificationLazySingleTarget - elsif training_activities.feature_type.to_s == "regression" - lazar.metadata[OT.isA] = OTA.RegressionLazySingleTarget + if prediction_feature.feature_type == "classification" + lazar.metadata[RDF.type] = [OTA.ClassificationLazySingleTarget] + elsif prediction_feature.feature_type == "regression" + lazar.metadata[RDF.type] = [OTA.RegressionLazySingleTarget] end lazar.metadata[OT.parameters] = [ {DC.title => "dataset_uri", OT.paramValue => dataset_uri}, - {DC.title => "prediction_feature", OT.paramValue => prediction_feature}, + {DC.title => "prediction_feature", OT.paramValue => prediction_feature.uri}, {DC.title => "feature_generation_uri", OT.paramValue => feature_generation_uri} ] diff --git a/libfminer b/libfminer index 01b8e50..d51f5e7 160000 --- a/libfminer +++ b/libfminer @@ -1 +1 @@ -Subproject commit 01b8e50e8e6fb3ce29fc8bf0a65a8c6f6af94b3f +Subproject commit d51f5e784ce0f5b7ef1c47c52ea55d1c874ec2e6 diff --git a/openbabel.rb b/openbabel.rb index 3a873c0..1644455 100644 --- a/openbabel.rb +++ b/openbabel.rb @@ -44,7 +44,7 @@ get '/openbabel/:property' do DC.title => params[:property], DC.creator => "helma@in-silico.ch", DC.description => description, - OT.isA => OTA.DescriptorCalculation, + RDF.type => [OTA.DescriptorCalculation], } response['Content-Type'] = 'application/rdf+xml' algorithm.to_rdfxml -- cgit v1.2.3