mIRC Home    About    Download    Register    News    Help

Topic Options
#18237 - 05/04/03 11:20 PM $regsub matching to remove duplicate color codes.
Tat Offline
Babel fish

Registered: 12/01/03
Posts: 87
... Continued from previous posting.

The following alias removes almost flawlessly duplicate color codes from any text given to it.

The almost flawlessly is based on the fact that:

11,2031234 this bugger's broke a little more.

Fails to work properly. It's not that sevear seeing as I actually spent time to think up something that would break it. But, basicly I think it needs a quick addition to delete one digit color codes.

I think:
$regsub(%a, /(\d)([^\d])/,0\1\2, %a)
$regsub(%a,/(\d\d?\x2C)(\d)([^\d])/, \10\2\3, %a)
will catch and kill single color color codes. Actually making a few lines in the alias unneeded.

Just tested, and I'm wrong. Also needs to check all the way back to the front.

Also perhaps the color codes should all be converted to hex.  is hard to tell from , but \x03 is easy to tell from \x02 not that the readablity is that high to begin with.

[code]
alias ddc {
var %a = $1
!.echo -q $&
$regsub(%a,/(?:\d\d?(\x2C\d\d?)?)+(?(1)|())(\d\d?)(\x2C\d\d?|)/g,$+(\2,$cr,\1,$lf,\3,$lf),%a) $&
$regsub(%a,/\r(?:\x2C\d\d?)?\n(\x2C\d\d?)\n|\r(\x2C\d\d?|)\n\n/g,\1,%a) $&
$regsub(%a,/(\d[^]*[^\d][^]*)(?=\D|$)/g,\1,%a) $&
$regsub(%a,/(?<=(\d\d)(\x2C\d\d))([^]+)\1\2?/g,\3,%a) $&
$regsub(%a,/(?<=(\d\d)(\x2C\d))([^\d][^]*)\1\2?(?!\d)/g,\3,%a) $&
$regsub(%a,/(?<=(\d)(\x2C\d\d))([^]+)\1\2?/g,\3,%a) $&
$regsub(%a,/(?<=(\d)(\x2C\d))([^\d][^]*)\1\2?(?!\d)/g,\3,%a) $&
$regsub(%a,/(?<=(\d\d))(\x2C(?:[^\d][^]*)?|[^\x2C][^]*)\1/g,\2,%a) $&
$regsub(%a,/(?<=(\d))(\x2C(?:[^\d][^]*)?|[^\x2C\d][^]*)\1(?!\d)/g,\2,%a) $&
$regsub(%a,/\d\d?(\x2C\d\d?)?(?!\d)/g,,%a)
!.echo -q $&
$regsub(%a,/(^|(?:(?:(?:(?:[^]\d|[^\D])\d|\D)\x2C|[^\x2C])\d|[^\d\x2C]))(?=\d)|(^|(?:\d\d?(?:\x2C\d\d?)?)?)(?!\d)|((?:[]|(?:\d\d?(?:\x2C\d\d?)?)?)*[])(?=\d)|((?:[]|(?:\d\d?(?:\x2C\d\d?)?)?)+)(?!\d)/g,\1,%a) $&
$regsub(%a,/(^|(?:(?:(?:(?:[^]\d|[^\D])\d|\D)\x2C|[^\x2C])\d|[^\d\x2C]))(?=\d)|(^|(?:\d\d?(?:\x2C\d\d?)?)?)(?!\d)|((?:[]|(?:\d\d?(?:\x2C\d\d?)?)?)*[])(?=\d)|((?:[]|(?:\d\d?(?:\x2C\d\d?)?)?)+)(?!\d)/g,\1,%a) $&
$regsub(%a,/(^|(?:(?:(?:(?:[^]\d|[^\D])\d|\D)\x2C|[^\x2C])\d|[^\d\x2C]))(?=\d)|(^|(?:\d\d?(?:\x2C\d\d?)?)?)(?!\d)|((?:[]|(?:\d\d?(?:\x2C\d\d?)?)?)*[])(?=\d)|((?:[]|(?:\d\d?(?:\x2C\d\d?)?)?)+)(?!\d)/g,\1,%a)
if $regex(%a,/(\d)/) {
return $remove($left(%a,$calc($regml(1).pos - 1)),) $+ $mid(%a,$regml(1).pos)
}
return $remove(%a,)
}
[\code]

Top
#18238 - 05/04/03 11:31 PM Re: $regsub matching to remove duplicate color codes.
Collective Offline
Planetary brain

Registered: 10/12/02
Posts: 3138
Loc: London, UK
Please fix the code tags, stretches the page...

Top
#18239 - 06/04/03 02:12 AM Re: $regsub matching to remove duplicate color codes.
qwerty Offline
Hoopy frood

Registered: 07/01/03
Posts: 2523
Good catch, I didn't think of this case. Here's the fix (guess what... one more $regsub):
Code:
alias ddc {
  var %a
  !.echo -q $&amp;
    $regsub($1,/(?:\d\d?(\x2C\d\d?)?)+(?(1)|())(\d\d?)(\x2C\d\d?|)/g,$+(\2,$cr,\1,$lf,\3,$lf),%a) $&amp;
    $regsub(%a,/(?&lt;=\r\x2C)(?=\d\n.*\n\d)/g,0,%a) $&amp;
    $regsub(%a,/\r(?:\x2C\d\d?)?\n(\x2C\d\d?)\n|\r(\x2C\d\d?|)\n\n/g,\1,%a) $&amp;
    $regsub(%a,/(\d[^]*[^\d][^]*)(?=\D|$)/g,\1,%a) $&amp;
    $regsub(%a,/(?&lt;=(\d\d)(\x2C\d\d))([^]+)\1\2?/g,\3,%a) $&amp;
    $regsub(%a,/(?&lt;=(\d\d)(\x2C\d))([^\d][^]*)\1\2?(?!\d)/g,\3,%a) $&amp;
    $regsub(%a,/(?&lt;=(\d)(\x2C\d\d))([^]+)\1\2?/g,\3,%a) $&amp;
    $regsub(%a,/(?&lt;=(\d)(\x2C\d))([^\d][^]*)\1\2?(?!\d)/g,\3,%a) $&amp;
    $regsub(%a,/(?&lt;=(\d\d))(\x2C(?:[^\d][^]*)?|[^\x2C][^]*)\1/g,\2,%a) $&amp;
    $regsub(%a,/(?&lt;=(\d))(\x2C(?:[^\d][^]*)?|[^\x2C\d][^]*)\1(?!\d)/g,\2,%a) $&amp;
    $regsub(%a,/\d\d?(\x2C\d\d?)?(?!\d)/g,,%a)
  !.echo -q $&amp;
    $regsub(%a,/(^|(?:(?:(?:(?:[^]\d|[^\D])\d|\D)\x2C|[^\x2C])\d|[^\d\x2C]))(?=\d)|(^|(?:\d\d?(?:\x2C\d\d?)?)?)(?!\d)|((?:[]|(?:\d\d?(?:\x2C\d\d?)?)?)*[])(?=\d)|((?:[]|(?:\d\d?(?:\x2C\d\d?)?)?)+)(?!\d)/g,\1,%a) $&amp;
    $regsub(%a,/(^|(?:(?:(?:(?:[^]\d|[^\D])\d|\D)\x2C|[^\x2C])\d|[^\d\x2C]))(?=\d)|(^|(?:\d\d?(?:\x2C\d\d?)?)?)(?!\d)|((?:[]|(?:\d\d?(?:\x2C\d\d?)?)?)*[])(?=\d)|((?:[]|(?:\d\d?(?:\x2C\d\d?)?)?)+)(?!\d)/g,\1,%a) $&amp;
    $regsub(%a,/(^|(?:(?:(?:(?:[^]\d|[^\D])\d|\D)\x2C|[^\x2C])\d|[^\d\x2C]))(?=\d)|(^|(?:\d\d?(?:\x2C\d\d?)?)?)(?!\d)|((?:[]|(?:\d\d?(?:\x2C\d\d?)?)?)*[])(?=\d)|((?:[]|(?:\d\d?(?:\x2C\d\d?)?)?)+)(?!\d)/g,\1,%a)
  if $regex(%a,/(\d)/) { return $remove($left(%a,$calc($regml(1).pos - 1)),) $+ $mid(%a,$regml(1).pos) }
  return $remove(%a,)
}
What that extra $regsub does is check if the background colour is in one-digit form and if the text afterwards begins with a number. If both conditions are met, a 0 is added in front of the (one-digit) background colour number.
_________________________
/.timerQ 1 0 echo /.timerQ 1 0 $timer(Q).com

Top
#18240 - 06/04/03 07:19 AM Re: $regsub matching to remove duplicate color codes.
codemastr Offline
Hoopy frood

Registered: 12/12/02
Posts: 2809
Holy God! I think you've just managed to create the most complex regex ever used to complete such a simple task!

Top
#18241 - 06/04/03 07:50 AM Re: $regsub matching to remove duplicate color codes.
Chimera Offline
Ameglian cow

Registered: 13/12/02
Posts: 17
Loc: Australia
Can you add the ability for it to change the color code (for the text) if it matches $color(background) to $color(own) if there is either no background code set or the background code is also the same as $color(background), or has someone already done this separately?
_________________________
LUA scripting forum

Top
#18242 - 06/04/03 05:12 PM Re: $regsub matching to remove duplicate color codes.
qwerty Offline
Hoopy frood

Registered: 07/01/03
Posts: 2523
You simply don't know what this alias does exactly. The "simple" task it performs is explained here. If you think there's a simpler/shorter way, I'd be more than happy to see it.
_________________________
/.timerQ 1 0 echo /.timerQ 1 0 $timer(Q).com

Top
#18243 - 06/04/03 06:47 PM Re: $regsub matching to remove duplicate color codes.
qwerty Offline
Hoopy frood

Registered: 07/01/03
Posts: 2523
This is what I came up with:
Code:
alias bg2own {
  var %b = $color(background), %i = %b + 16, %a 
  while %i &lt; 99 {
    %a = $addtok(%a,%i,124)
    inc %i 16
  }
  %a = $iif(%b &lt; 10,(?:0 $+ $+(%b,|,%b,(?!\d)) $+ )| $+ %a,$+(%b,|,%a))
  !.echo -q $regsub($1,/(?&lt;=)(?: $+ %a $+ )(?:(?!\x2C\d)|(?=\x2C(?: $+ %a $+ )))/g,$color(own).dd,%a)
  return %a
}
The loop is there so that the alias catches the background colour in the form
$color(background) + N*16
for N >= 0 (ie catch the cycling of colours). The performance wouldn't be affected in the majority of cases, since very few people us 'cycling' colours, I guess.

However, there is one little problem, which is hard to overcome with regular expressions. Consider this case: my $color(background) is white and $color(own) is black. In the following string
0,0one 4two 0,3three 00four
the alias will correctly replace the first "0" with "01", but will also replace the "00" (right before "four") with "01". This isn't necessary though, because the previous colour code has specified background "3" (green), and the green is preserver after the 4th colour code. Unfortunately, the regex cannot "look behind" in this way and see the previous bg colour.

I could try some tricks, similar to the ones applied in the $ddc alias, although I'm afraid it would become pretty ugly. I'll let you know if I find an acceptable way to solve this.

Of course, you could do it in pure scripting, parsing the entire string and storing stuff in vars etc, but I guess this is out of the question (it would get pretty slow).
_________________________
/.timerQ 1 0 echo /.timerQ 1 0 $timer(Q).com

Top
#18244 - 06/04/03 06:49 PM Re: $regsub matching to remove duplicate color codes.
codemastr Offline
Hoopy frood

Registered: 12/12/02
Posts: 2809
I do know exactly what it does, and I also know that rather than using a series of several regexes that each need to be compiled, matched, and replaced (slow), I'd simply sequentally go through the string and do the replacement char-by-char by hand.

Top
#18245 - 06/04/03 07:05 PM Re: $regsub matching to remove duplicate color codes.
qwerty Offline
Hoopy frood

Registered: 07/01/03
Posts: 2523
And you think that with a char-by-char loop, you could achieve the same thing? I think you don't realise the complexity of the problem (I'm pretty sure the lower bound is above O(n) for one!) and the fact that looping through chars in mirc is slower than calling a huge $regsub (where everything is done in C level).
_________________________
/.timerQ 1 0 echo /.timerQ 1 0 $timer(Q).com

Top
#18246 - 06/04/03 07:40 PM Re: $regsub matching to remove duplicate color codes.
codemastr Offline
Hoopy frood

Registered: 12/12/02
Posts: 2809
I take it you don't know anything about regex? the fastest regex library in existence (which mIRC does NOT use) has a complexity of O(M^2N) where M is the length of the regex and N is the length of the text to match. So, if the bound is O(N) per char, that would mean the complexity is O(N^2). If that lib were used, it would be slower seeing as how the length of the regexs, for most strings is going to be greater than the length of the actual text.

PCRE, which is what mIRC uses, does not have a deterministic mechanism, but for most of the regexes you have there, the complexity is O(N^2) but since you have more than one, that would make the complexity greater than a single O(N^2) because it would be approximately O(MN^2) where M is the number of regsub calls. Therefore the regex would be M times slower than doing a char-by-char comparison.

Top
#18247 - 06/04/03 09:30 PM Re: $regsub matching to remove duplicate color codes.
qwerty Offline
Hoopy frood

Registered: 07/01/03
Posts: 2523
What you said might be true but is totally besides the point here. When I was talking about complexity, I was refering to the scripting language*. My alias is 'constant-time' in the scripting language, it's a fixed number of $regsub calls. You must allow the freedom to consider 1 $regsub() call 'constant-time' here, because compared to the scripting language speed, $regsub CAN be considered instant and independent of the input or regex string length. This will always be faster, even if the underlying C code that does the job uses an algorithm as complex as O(N^3) for example.

Theoretically, if N was big enough (this means VERY big), $ddc() could get slower than a scripting loop, although it's really hard to tell when we don't even know what mirc does internally. But N will never be big enough for this to happen, first of all because of the ~950-char limit in mirc. Likewise, $ddc() could be slower than a scripting loop for small strings (fex a pure-scripting alias would terminate quickly for the string "abcde", perhaps quicker than $ddc(), which has to evaluate 13 $regsub's). But this is a price we are willing to pay: we are interested in making potentially very slow things (fex strings of 400 chars or higher) go fast, not make fast things (fex strings of 5 chars) go even faster. A scripter could even check $len($1) beforehand and use the appropriate method.

A char-by-char loop would be definitely faster than all those $regsub's IF it was written in C (fex a dll that does it). In scripting though, this is far from true.


* On second thought, I'm not sure that the lower bound for this problem is above O(N). What I'm sure of though, is that, even if one found an algorithm in mirc that's O(N), it would require a hell of a lot of constants (ie, in mirc terms, storing a lot of stuff in variables or a hash table and doing comparisons). And constants in mirc are quite expensive.
_________________________
/.timerQ 1 0 echo /.timerQ 1 0 $timer(Q).com

Top
#18248 - 10/04/03 06:02 AM Re: $regsub matching to remove duplicate color codes.
Tat Offline
Babel fish

Registered: 12/01/03
Posts: 87
The whole alias also doesn't just do the background or the
given colors. It might be possible to go through it char by char
recording the bold, reverse, underline, foreground color,
background color. But, regexs are pretty much instant.
Standard time complexity breaks down with scripting
languages because standardly the scripting language is
about two orders of magnatude slower. So, you can't
optimally write own sorting routine. Because you can the
varables into a window and filter and be done much much
faster. In this case the same thing is true. I've run tests it
takes 10ms for about the longest most colorful string I could
find. I ran a blank loop 900 times (length of the string) it took
56ms. You can't do it faster than regexs. And the current
regex cover the zillion different cases. Although, I would like
to see a mIRC alias do the same thing. Other people might like
not wasting characters on extra color codes that aren't going
to affect the actual output.

Top
#18249 - 10/04/03 06:14 AM Re: $regsub matching to remove duplicate color codes.
Tat Offline
Babel fish

Registered: 12/01/03
Posts: 87
Cases are as follows:
Color Codes:
12,1312,1403Hi returns 03,14Hi
12Hi13Hello returns 12Hi13Hello
12Hello 12There 12You returns 12Hello There You
12,4051234Hey returns 05,041234Hey

Bold Codes:
Hello Hi returns Hello Hi
Hello returns Hello
Hello returns Hello

Underline and Reverse codes work the same as bold.
And all codes work fine interleaved with each other.

Any code, unneeded for the actual color display is removed.
In some cases redundant parts of display codes are moved.

In a standard bot colored bot log bot message about 20
characters are removed. In certain codes about 60 are
removed. Depending on how the color was implemented.

Top
#18250 - 10/04/03 10:00 AM Re: $regsub matching to remove duplicate color codes.
Tat Offline
Babel fish

Registered: 12/01/03
Posts: 87
Not sure, why but somehow it had trouble killing double bolds
It might be recommendable to use the last three lines like it was
before it was changed.

Code:
    $regsub(%a,/((?:[]|(?:\d\d?(?:\x2C\d\d?)?)?)+|\s)?/g,\1,%a) $&amp;
    $regsub(%a,/((?:[]|(?:\d\d?(?:\x2C\d\d?)?)?)+|\s)?/g,\1,%a) $&amp;
    $regsub(%a,/((?:[]|(?:\d\d?(?:\x2C\d\d?)?)?)+|\s)?/g,\1,%a)

Top
#18251 - 10/04/03 10:47 AM Re: $regsub matching to remove duplicate color codes.
qwerty Offline
Hoopy frood

Registered: 07/01/03
Posts: 2523
Can you give me an example that breaks it?
_________________________
/.timerQ 1 0 echo /.timerQ 1 0 $timer(Q).com

Top
#18252 - 11/04/03 04:22 AM Re: $regsub matching to remove duplicate color codes.
qwerty Offline
Hoopy frood

Registered: 07/01/03
Posts: 2523
Nevermind, I see what you mean and you're right. It was a stupid mistake, one of those bugs that are born when you try to fix other bugs. Anyway, I fixed it and at the same time used a cleaner and more readable method to do it, so it will be easier to update if you find another bug :tongue:
Code:
alias ddc {
  var %a, %b = /^()BB|()BB(?=[^,\d]|,\D)|(\D|(?:[^\d]|[^]\d)\d)BB(?=,\d)|((?:(?:(?:(?:[^]\d|[^\D])\d|\D),|[^,])\d|(?:\D|(?:[^\d]|[^]\d)\d),|[^\d,]))BB(?=\d)|B((?:[UR]|(?:\d\d?(?:,\d\d?)?)?)*(?:[UR]|\d\d(?:,\d\d)?))B(?=\d)|B((?:[UR]|(?:\d\d?(?:,\d\d?)?)?)*(?:[UR]|(?:\d\d?,\d\d?)?))B(?=,\d)|B((?:[UR]|(?:\d\d?(?:,\d\d?)?)?)+)B(?=[^,\d]|,\D)/g
  !.echo -q $&amp;
    $regsub($1,/(?:\d\d?(\x2C\d\d?)?)+(?(1)|())(\d\d?)(\x2C\d\d?|)/g,$+(\2,$cr,\1,$lf,\3,$lf),%a) $&amp;
    $regsub(%a,/(?&lt;=\r\x2C)(?=\d\n[^\n]*\n\d)/g,0,%a) $&amp;
    $regsub(%a,/\r(?:\x2C\d\d?)?\n(\x2C\d\d?)\n|\r(\x2C\d\d?|)\n\n/g,\1,%a) $&amp;
    $regsub(%a,/(\d[^]*[^\d][^]*)(?=\D|$)/g,\1,%a) $&amp;
    $regsub(%a,/(?&lt;=(\d\d)(\x2C\d\d))([^]+)\1\2?/g,\3,%a) $&amp;
    $regsub(%a,/(?&lt;=(\d\d)(\x2C\d))([^\d][^]*)\1\2?(?!\d)/g,\3,%a) $&amp;
    $regsub(%a,/(?&lt;=(\d)(\x2C\d\d))([^]+)\1\2?/g,\3,%a) $&amp;
    $regsub(%a,/(?&lt;=(\d)(\x2C\d))([^\d][^]*)\1\2?(?!\d)/g,\3,%a) $&amp;
    $regsub(%a,/(?&lt;=(\d\d))(\x2C(?:[^\d][^]*)?|[^\x2C][^]*)\1/g,\2,%a) $&amp;
    $regsub(%a,/(?&lt;=(\d))(\x2C(?:[^\d][^]*)?|[^\x2C\d][^]*)\1(?!\d)/g,\2,%a) $&amp;
    $regsub(%a,/\d\d?(\x2C\d\d?)?(?!\d)/g,,%a) $&amp;
    $regsub(%a,$replacecs(%b,B,,U,,R,),\1,%a) $&amp;
    $regsub(%a,$replacecs(%b,U,,B,,R,),\1,%a) $&amp;
    $regsub(%a,$replacecs(%b,R,,B,,U,),\1,%a)
  if $regex(%a,/(\d)/) { return $remove($left(%a,$calc($regml(1).pos - 1)),) $+ $mid(%a,$regml(1).pos) }
  return $remove(%a,)
}
_________________________
/.timerQ 1 0 echo /.timerQ 1 0 $timer(Q).com

Top