From 0f1e80c3dcbbbc0d8b2f916de68d6d0c86b53ec2 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Fri, 6 May 2011 19:29:55 +0200 Subject: read acceptValue from ambit datasets, set acceptValue when create dataset from csv/excel --- lib/feature.rb | 13 ------------- lib/parser.rb | 40 ++++++++++++++++++++++++++++------------ 2 files changed, 28 insertions(+), 25 deletions(-) diff --git a/lib/feature.rb b/lib/feature.rb index f6e2dfd..b631e46 100644 --- a/lib/feature.rb +++ b/lib/feature.rb @@ -16,19 +16,6 @@ module OpenTox feature end - # provides domain (possible target values) of classification feature - # @return [Array] list with possible target values - def domain - if metadata[OT.acceptValue] - raise "accept value found, remove hack and implement correctly" - else - if @uri=~/feature\/26221/ || @uri=~/feature\/221726/ - return ["mutagen" , "nonmutagen"] - end - return [true, false] - end - end - # provides feature type, possible types are "regression" or "classification" # @return [String] feature type, unknown if OT.isA property is unknown/ not set def feature_type diff --git a/lib/parser.rb b/lib/parser.rb index db746c1..7bdee95 100644 --- a/lib/parser.rb +++ b/lib/parser.rb @@ -163,6 +163,7 @@ module OpenTox data = {} feature_values = {} feature = {} + feature_accept_values = {} other_statements = {} `rapper -i rdfxml -o ntriples #{file.path} 2>/dev/null`.each_line do |line| triple = line.chomp.split(' ',3) @@ -182,6 +183,9 @@ module OpenTox if triple[2]=~/#{OT.Compound}/i and !data[triple[0]] data[triple[0]] = {:compound => triple[0], :values => []} end + when /#{OT.acceptValue}/i # acceptValue in ambit datasets is only provided in dataset/ no in dataset//features + feature_accept_values[triple[0]] = [] unless feature_accept_values[triple[0]] + feature_accept_values[triple[0]] << triple[2] else end end @@ -192,20 +196,25 @@ module OpenTox @dataset.add_compound(entry[:compound]) else entry[:values].each do |value_id| - split = feature_values[value_id].split(/\^\^/) - case split[-1] - when XSD.double, XSD.float - value = split.first.to_f - when XSD.boolean - value = split.first=~/(?i)true/ ? true : false - else - value = split.first + if feature_values[value_id] + split = feature_values[value_id].split(/\^\^/) + case split[-1] + when XSD.double, XSD.float + value = split.first.to_f + when XSD.boolean + value = split.first=~/(?i)true/ ? true : false + else + value = split.first + end end @dataset.add entry[:compound],feature[value_id],value end end end load_features subjectid + feature_accept_values.each do |feature, values| + @dataset.features[feature][OT.acceptValue] = values + end @dataset.metadata = load_metadata(subjectid) @dataset end @@ -348,16 +357,23 @@ module OpenTox when OT.NominalFeature case value.to_s when TRUE_REGEXP - @dataset.add(compound.uri, feature, true ) + val = true when FALSE_REGEXP - @dataset.add(compound.uri, feature, false ) + val = false end when OT.NumericFeature - @dataset.add compound.uri, feature, value.to_f + val = value.to_f when OT.StringFeature - @dataset.add compound.uri, feature, value.to_s + val = value.to_s @activity_errors << smiles+", "+row.join(", ") end + if val!=nil + @dataset.add(compound.uri, feature, val) + if type!=OT.NumericFeature + @dataset.features[feature][OT.acceptValue] = [] unless @dataset.features[feature][OT.acceptValue] + @dataset.features[feature][OT.acceptValue] << val.to_s unless @dataset.features[feature][OT.acceptValue].include?(val.to_s) + end + end end end -- cgit v1.2.3