From 87eb7cc1e079821c2f7c5e101e7e392e9bd10f00 Mon Sep 17 00:00:00 2001 From: davor Date: Tue, 24 May 2011 09:35:11 +0200 Subject: Fixing regression detection --- lib/parser.rb | 66 +++++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 51 insertions(+), 15 deletions(-) diff --git a/lib/parser.rb b/lib/parser.rb index 7bdee95..8deaa91 100644 --- a/lib/parser.rb +++ b/lib/parser.rb @@ -277,7 +277,23 @@ module OpenTox def load_spreadsheet(book) book.default_sheet = 0 add_features book.row(1) - 2.upto(book.last_row) { |i| add_values book.row(i) } + + # AM: fix mixed read in + regression_features=false + 2.upto(book.last_row) { |i| + row = book.row(i) + smiles = row.shift + row.each_index do |i| + value = row[i] + type = feature_type(value) + if type == OT.NumericFeature + regression_features=true + break + end + end + } + + 2.upto(book.last_row) { |i| add_values book.row(i),regression_features } warnings @dataset end @@ -289,7 +305,23 @@ module OpenTox row = 0 input = csv.split("\n") add_features split_row(input.shift) - input.each { |row| add_values split_row(row) } + + + # AM: fix mixed read in + regression_features=false + input.each { |row| + row = split_row(row) + smiles = row.shift + row.each_index do |i| + value = row[i] + type = feature_type(value) + if type == OT.NumericFeature + regression_features=true + break + end + end + } + input.each { |row| add_values split_row(row),regression_features } warnings @dataset end @@ -335,7 +367,7 @@ module OpenTox end end - def add_values(row) + def add_values(row, regression_features=false) smiles = row.shift compound = Compound.from_smiles(smiles) @@ -353,19 +385,23 @@ module OpenTox @feature_types[feature] << type - case type - when OT.NominalFeature - case value.to_s - when TRUE_REGEXP - val = true - when FALSE_REGEXP - val = false - end - when OT.NumericFeature + if (regression_features) val = value.to_f - when OT.StringFeature - val = value.to_s - @activity_errors << smiles+", "+row.join(", ") + else + case type + when OT.NominalFeature + case value.to_s + when TRUE_REGEXP + val = true + when FALSE_REGEXP + val = false + end + when OT.NumericFeature + val = value.to_f + when OT.StringFeature + val = value.to_s + @activity_errors << smiles+", "+row.join(", ") + end end if val!=nil @dataset.add(compound.uri, feature, val) -- cgit v1.2.3 From 4a7ba2adb0743cd225ad5c2cf9f71c896d87b157 Mon Sep 17 00:00:00 2001 From: davor Date: Tue, 24 May 2011 10:45:53 +0200 Subject: Created dedicated function for value sweeping --- lib/parser.rb | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/lib/parser.rb b/lib/parser.rb index 8deaa91..4984292 100644 --- a/lib/parser.rb +++ b/lib/parser.rb @@ -282,15 +282,8 @@ module OpenTox regression_features=false 2.upto(book.last_row) { |i| row = book.row(i) - smiles = row.shift - row.each_index do |i| - value = row[i] - type = feature_type(value) - if type == OT.NumericFeature - regression_features=true - break - end - end + regression_features = detect_regression_features row + break if regression_features=true } 2.upto(book.last_row) { |i| add_values book.row(i),regression_features } @@ -311,21 +304,15 @@ module OpenTox regression_features=false input.each { |row| row = split_row(row) - smiles = row.shift - row.each_index do |i| - value = row[i] - type = feature_type(value) - if type == OT.NumericFeature - regression_features=true - break - end - end + regression_features = detect_regression_features row + break if regression_features=true } input.each { |row| add_values split_row(row),regression_features } warnings @dataset end + private def warnings @@ -367,6 +354,18 @@ module OpenTox end end + def detect_regression_features row + regression_features=false + row.each_index do |i| + value = row[i] + type = feature_type(value) + if type == OT.NumericFeature + regression_features=true + end + end + regression_features + end + def add_values(row, regression_features=false) smiles = row.shift -- cgit v1.2.3