Skip to content

Commit

Permalink
Updated Regx Options, updated timespan, examples, tests
Browse files Browse the repository at this point in the history
  • Loading branch information
sandy ganz committed Jun 20, 2018
1 parent b115dd5 commit ba3477b
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 13 deletions.
7 changes: 7 additions & 0 deletions Examples/Example/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 108,13 @@ private static void Main(string[] args)
translated = st.RegxDefined("ScriptTags", string.Empty).RegxDefined("TagsSimple", string.Empty).ToString();

Console.WriteLine("Strip Script and Tags : >{0}<", translated);

// reset and set up a predefined match pattern and set regx case sensitivity
st.Set("wtf does RemoveWTF do? Is WtF Case SeNsItIvE?");
st.RegxMatchesDefined.Add("RemoveWTF", @"(wtf)|(what the)\s (hell|$hit)");

translated = st.RegxIgnoreCase().RegxDefined("RemoveWTF", "XXX").ToString();
Console.WriteLine("New Pre-defined Match : >{0}<", translated);
}
}
}
46 changes: 39 additions & 7 deletions Source/Scrubbie/Scrub.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 14,16 @@ public class Scrub
public List<(string, string)> RegxTuples { private set; get; }
public Dictionary<char, char> CharTransDict { private set; get; }
private string _translatedStr;
private double _tkoSeconds;
private TimeSpan _tkoSeconds;
private RegexOptions _regxOptions;

/// <summary>
/// Sets the MatchTimeout value for all regx calls
/// </summary>
public double TkoSeconds
{
set => _tkoSeconds = value <= 0.0 ? DefaultTkoSeconds : value;
get => _tkoSeconds;
set => _tkoSeconds = value <= 0.0 ? TimeSpan.FromSeconds(DefaultTkoSeconds) : TimeSpan.FromSeconds(value);
get => _tkoSeconds.TotalSeconds;
}

/// <summary>
Expand Down Expand Up @@ -52,6 53,9 @@ public int CacheSize
{ "NonAscii", @"[^\x00-\x7F] \ *(?:[^\x00-\x7F]| )*" }, // removes all non-Ascii
{ "TagsSimple" , @"\<[^\>]*\>" }, // strip tags, simple version
{ "ScriptTags" , @"<script[^>]*>[\s\S]*?</script>" },
{ "ENNumber", @"[ -]?([0-9] ([.][0-9]*)?|[.][0-9] )"}, // format with a decimal as a period, no commas for 1000's
{ "EUumber", @"[ -]?([0-9] ([,][0-9]*)?|[,][0-9] )"}, // format with a decimal as a comma, no period for 1000's
{ "UniNumber", @"[ -]?([0-9] ([.,][0-9]*)?|[,.][0-9] )"}, // picks up numbers with either comma, period in either place. May not be valid numbers
};

/// <summary>
Expand All @@ -73,7 77,11 @@ public Scrub(string origString)

// set local time out (TKO) for all regx's

TkoSeconds = DefaultTkoSeconds;
_tkoSeconds = TimeSpan.FromSeconds(DefaultTkoSeconds);

// set to match case (not not ignore it)

_regxOptions = RegexOptions.None;
}

/// <summary>
Expand Down Expand Up @@ -145,6 153,30 @@ public void SetRegxTranslator(List<(string, string)> regxTuplesList = null)
}
}

/// <summary>
/// Sets the Regx pattern matcher to Ignore case. This can
/// be used prior to any regx call. It does not affect any Map
/// function as those typically require the dictionay to be
/// setup prior. So be warned this is ONLY for REGX's not MAP
/// </summary>
/// <param name="ignoreCase"></param>
/// <returns></returns>
public Scrub RegxIgnoreCase(bool ignoreCase = true)
{
if (ignoreCase)
{
// add flag
_regxOptions |= RegexOptions.IgnoreCase;
}
else
{
// remove flag (Think bitwise)
_regxOptions &= ~RegexOptions.IgnoreCase;
}

return this;
}

/// <summary>
/// Translates given string based on the the characters in the dictionary. If character is
/// not in the dictionay, it is pass thru untouched. Size of string is not changed.
Expand Down Expand Up @@ -178,7 210,7 @@ public Scrub Strip(string matchRegx)
{
// Call static replace method, strip and save

_translatedStr = Regex.Replace(_translatedStr, matchRegx, String.Empty, RegexOptions.None, TimeSpan.FromSeconds(TkoSeconds));
_translatedStr = Regex.Replace(_translatedStr, matchRegx, String.Empty, _regxOptions, _tkoSeconds);

return this;
}
Expand Down Expand Up @@ -270,7 302,7 @@ public Scrub RegxTranslate()
{
// static will compile and cache the regx for each one

_translatedStr = Regex.Replace(_translatedStr, regxTuple.Item1, regxTuple.Item2, RegexOptions.None, TimeSpan.FromSeconds(TkoSeconds));
_translatedStr = Regex.Replace(_translatedStr, regxTuple.Item1, regxTuple.Item2, _regxOptions, _tkoSeconds);
}

return this;
Expand All @@ -297,7 329,7 @@ public Scrub RegxDefined(string preDefined, string replacement = "")

// static will compile and cache the regx for each one

_translatedStr = Regex.Replace(_translatedStr, RegxMatchesDefined[preDefined], replacement, RegexOptions.None, TimeSpan.FromSeconds(TkoSeconds));
_translatedStr = Regex.Replace(_translatedStr, RegxMatchesDefined[preDefined], replacement, _regxOptions, _tkoSeconds);

return this;
}
Expand Down
12 changes: 6 additions & 6 deletions Tests/IntegrationTests/ScrubbieIntegrationTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 17,10 @@ public void Predefined_CompactWhitespace_Compacted()
Scrub st = new Scrub(sentence);

// Compact whitespaces to one space, note does not imply trim!
// overides default empty string replace to replace with single space
// note trailing space at end of string

st.RegxDefined("CompactWhitespace");
st.RegxDefined("WhitespaceCompact", " ");

Assert.AreEqual(expectedSentance, st.ToString());
}
Expand All @@ -27,15 29,13 @@ public void Predefined_CompactWhitespace_Compacted()
public void Predefined_InvalidName_Untouched()
{
string sentence = "¿¡Señor, the Chevrolet guys don't like Dodge guys, and and no one like MaZdA, Ola Senor?! ";
string expectedSentance = "¿¡Señor, the Chevrolet guys don't like Dodge guys, and and no one like MaZdA, Ola Senor?! ";
string expectedSentance = "¿¡Señor,theChevroletguysdon'tlikeDodgeguys,andandnoonelikeMaZdA,OlaSenor?!";

Scrub st = new Scrub(sentence);

// Compact whitespaces to one space, note does not imply trim!

st.RegxDefined("NotInTheListOfDefined");
// Invalid pre-defined patter, should throw

Assert.AreEqual(expectedSentance, st.ToString());
Assert.ThrowsException<KeyNotFoundException>(() => st.RegxDefined("NotInTheListOfDefined"));
}

[TestMethod]
Expand Down
10 changes: 10 additions & 0 deletions Tests/UnitTests/ScrubbieTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -211,5 211,15 @@ public void SetRegxTimeOut_MatchTimeOut_Matches()

Assert.AreEqual(expectedTKO, st.TkoSeconds);
}

[TestMethod]
public void SetRegxOptions_MatchSetting_Matches()
{
Scrub st = new Scrub("");

// TBD

}

}
}

0 comments on commit ba3477b

Please sign in to comment.