Thursday, October 13, 2011

Reasons why ChessKISS 64 is slower than 32, part 2

 While debugging I've found this, quite interesting...

Search.pas.898: if board.LastPieceCaptured <> nil then //same value and index
0000000000615F3D 488B8338040000   mov rax,[rbx+$00000438]
0000000000615F44 90               nop
0000000000615F45 4885C0           test rax,rax
0000000000615F48 745D             jz $0000000000615fa7
Search.pas.900: recaptures[side] := board.LastPieceCaptured.GetMiddleValue + TMoveHelper.GetTo(aMove);
0000000000615F4A 488B8338040000   mov rax,[rbx+$00000438]

LastPieceCaptured is loaded into RAX, the pointer is valid, thus I'm going to use it, but again the compiler is loading the same value into the same register, what about the NOP operation in between?

Testing ChessKISS

 I'm doing some changes in order to see if somehow I can improve the performance of the 64 bits version, but this of course needs to be tested, so here we go...


Arena tournament

Rank Engine Score Me Ch Bi Mi Ro Pr ZO Pu Ts S-B
1 Mediocre 14,0/16 · · 11 =1 11 01 1= 11 11 11 102,50
2 ChessKISS 11,5/16 00 · · =1 01 11 1= 1= 11 11 73,50
3 BigLion 9,0/16 =0 =0 · · 10 =1 10 11 =1 01 62,00
4 Micromax48_c2d 9,0/16 00 10 01 · · 10 11 01 11 01 60,50
5 Roce38 6,0/16 10 00 =0 01 · · =0 01 == 10 47,00
6 Predateur 2.0 6,0/16 0= 0= 01 00 =1 · · 0= 10 10 44,25
7 ZOIDBERG 6,0/16 00 0= 00 10 10 1= · · 01 10 40,25
8 Pulsar2009-9b 5,5/16 00 00 =0 00 == 01 10 · · 11 32,50
9 Tscp181 5,0/16 00 00 10 10 01 01 01 00 · · 36,00



Second after Mediocre, not bad at all, the engine seems to be behave identically.

Wednesday, October 12, 2011

Reasons why ChessKISS 64 is slower than 32

Optimized assembler in 32 bits mode:

Board.pas.1234: data.EnPassant := nil;
005412BF 8B45C4           mov eax,[ebp-$3c]
005412C2 33D2             xor edx,edx
005412C4 899008020000     mov [eax+$00000208],edx
Board.pas.1237: data.LastPieceCaptured := nil;
005412CA 8B45C4           mov eax,[ebp-$3c]
005412CD 33D2             xor edx,edx
005412CF 899024020000     mov [eax+$00000224],edx
Board.pas.1238: data.LastPieceMoved := nil;
005412D5 8B45C4           mov eax,[ebp-$3c]
005412D8 33D2             xor edx,edx
005412DA 899020020000     mov [eax+$00000220],edx
Board.pas.1239: data.LastMove := aMove;
005412E0 8B45C4           mov eax,[ebp-$3c]
005412E3 89B828020000     mov [eax+$00000228],edi
Board.pas.1240: piece := nil;
005412E9 33F6             xor esi,esi

Optimized assembler in 64 bits mode:

Board.pas.1234: data.EnPassant := nil;
00000000005E423F 488B8518030000   mov rax,[rbp+$00000318]
00000000005E4246 48C7800804000000000000 mov qword ptr [rax+$00000408],$0000000000000000
Board.pas.1237: data.LastPieceCaptured := nil;
00000000005E4251 488B8518030000   mov rax,[rbp+$00000318]
00000000005E4258 48C7803004000000000000 mov qword ptr [rax+$00000430],$0000000000000000
Board.pas.1238: data.LastPieceMoved := nil;
00000000005E4263 488B8518030000   mov rax,[rbp+$00000318]
00000000005E426A 48C7802804000000000000 mov qword ptr [rax+$00000428],$0000000000000000
Board.pas.1239: data.LastMove := aMove;
00000000005E4275 488B8518030000   mov rax,[rbp+$00000318]
00000000005E427C 8B8D50030000     mov ecx,[rbp+$00000350]
00000000005E4282 898838040000     mov [rax+$00000438],ecx

Both versions suffer from the same issue, loading into the same register the same value over and over (in EAX or RAX the compiler is loading the data variable), how can this still happening?

Thursday, October 6, 2011

Creating chaining tasks

 In the unit BB.Task there is a class called TTask that helps creating task, it exposes this interface:

ITask = interface
    procedure AddParam(const aName, aValue: string);
    procedure AttachObject(aObject: TObject);
    function Available: boolean;
    procedure ClearParams;
    procedure ContinueWith(aTask: ITask); overload;
    function ContinueWith(aProc: TProc): ITask; overload;
    function ContinueWith(aEvent: TTaskEvent): ITask; overload;
    function GetAttachedTask: ITask;
    function GetAttachedObject: TObject;
    function GetEvent: TTaskEvent;
    function GetProc: TProc;
    function GetExceptionObject: TObject;
    function GetExceptionEvent: TExceptionEvent;
    function GetLock: ILock;
    function GetName: string;
    function GetParam(const aName: string): string;
    function GetTerminateEvent: TNotifyEvent;
    function GetAttachedObjectBehaviour: TAttachedObjectBehaviour;
    procedure Run;
    procedure Stop;
    procedure SendMessage(aTask: ITask; aMessage: TMessage); overload;
    procedure SendMessage(aTask: ITask; const aText: string); overload;
    procedure SetLock(aLock: ILock);
    procedure SetAttachedObjectBehaviour(aValue: TAttachedObjectBehaviour);
    procedure ReceiveMessage(aMessage: TMessage);
    procedure SetPriority(aValue: TThreadPriority);
    function Terminated: boolean;
    function Wait(aTime: cardinal): boolean;
  end;

Like this is really easy to create task based on plain methods, other tasks or closures, let's see an easy example:

unit Unit52;

interface

uses
  Winapi.Windows, Winapi.Messages, System.SysUtils, System.Variants, System.Classes, Vcl.Graphics,
  Vcl.Controls, Vcl.Forms, Vcl.Dialogs, BB.Task;

type
  TForm51 = class(TForm)
    procedure FormCreate(Sender: TObject);
  private
    procedure One;
    procedure Two;
    procedure Three;
    { Private declarations }
  public
    { Public declarations }
  end;

var
  Form51: TForm51;

implementation

{$R *.dfm}

procedure TForm51.FormCreate(Sender: TObject);
var
  TaskA, TaskB: ITask;

begin
  TaskB := TTask.Create(Three);

  TaskA := TTask.Create(One);
  TaskA.ContinueWith(Two).ContinueWith(
    procedure
    begin
      Beep;
    end
  ).ContinueWith(TaskB);
  TaskA.Run;
end;

procedure TForm51.One;
begin
  Sleep(1000);
end;

procedure TForm51.Three;
begin
  Sleep(3000);
end;

procedure TForm51.Two;
begin
  Sleep(2000);
end;

end.

  We are creating task A that will execute method one, after that, method ONE will execute method TWO and after that this task will launch the closure, at the end the closure will call the Task B that is executing method THREE. Is also possible to send messages and objects among different task.

Quite handy.

Monday, October 3, 2011

New class TCountdownEventEx for versions prior to XE2

I've discovered some new handy classes in XE2 and I said to myself why not implement them into prior versions via my own class?

TCountdownEventEx = class
  private
    FCurrent,
    FInitial: integer;
    FEvent: TEventEx;
  public
    constructor Create(aSize: integer);
    destructor Destroy; override;
    procedure Add(aSize: integer);
    function IsFinish: boolean;
    procedure Reset;
    procedure Signal;
    procedure Wait(aTimeout: cardinal);

    property Current: integer read FCurrent;
    property Initial: integer read FInitial;
  end;


{ TCountdownEventEx }

procedure TCountdownEventEx.Add(aSize: integer);
begin
  if IsFinish then
    raise Exception.Create('Already signalled');

  TInterlockedEx.Add(FCurrent, aSize);
end;

constructor TCountdownEventEx.Create(aSize: integer);
begin
  FInitial := aSize;
  FEvent := TEventEx.Create;

  Reset;
end;

destructor TCountdownEventEx.Destroy;
begin
  FEvent.Free;

  inherited;
end;

procedure TCountdownEventEx.Reset;
begin
  FEvent.Reset;
  FCurrent := FInitial;
end;


procedure TCountdownEventEx.Signal;
begin
  TInterlockedEx.Dec(FCurrent);
  if FCurrent = 0 then
    FEvent.Sign;
end;


function TCountdownEventEx.IsFinish: boolean;
begin
  Exit(FCurrent <= 0);
end;

procedure TCountdownEventEx.Wait(aTimeout: cardinal);
begin
  FEvent.Wait(aTimeout);
end;

I've used the same example as in Delphi help and it seems to work...
program TCountdownEvent_example;

{$APPTYPE CONSOLE}

uses
  SysUtils,
  BB.Sync,
  Classes;

var
  RunwayFlag: TCountdownEventEx;

type
  TThreadIgnite = class(TThread)
  private
    procedure Execute; override;
  end;

procedure TThreadIgnite.Execute;
begin
  RunwayFlag.Wait(INFINITE);
  Writeln('Ignited');
end;

var
  Thread: TThreadIgnite;

begin
  RunwayFlag := TCountdownEventEx.Create(200);

  Thread := TThreadIgnite.Create(True);
  Thread.Start;

  while not RunwayFlag.IsFinish do
  begin
    RunwayFlag.Signal;
    Writeln(RunwayFlag.Current);
  end;

end. { Put breakpoint here to see the console output. }

TInterlocked

 Delphi XE2? has out of the box the TInterlocked helper functions, but prior to that there is only the Windows API, what's why I have my own version TInterlockedEx:


{ TInterlockedEx }

class function TInterlockedEx.CAS(var aTarget: integer; aCurrentVal, aNewVal: integer): boolean;
begin
  {
  //Compare, if equal then set and return TRUE
  if aTarget = aCurrentVal then
  begin
    aTarget := aNewVal;
    result := true;
  else
    result := false;
  }

  result := InterlockedCompareExchange(aTarget, aNewVal, aCurrentVal) = aCurrentVal;
end;

class procedure TInterlockedEx.Add(var aValue: integer; aCounter: integer);
//EDX,ECX
{$IFDEF WIN32}
ASM
  LOCK  XADD [EDX], ECX
  MOV   EAX, [EDX]
END;
{$ELSE}
begin
  _Lock.Lock;
  try
    System.Inc(aValue, aCounter);
  finally
    _Lock.Unlock;
  end;
end;
{$ENDIF}

{$IFDEF WIN32}
class function TInterlockedEx.CAS(var aTarget: pointer; aCurrentVal, aNewVal: pointer): boolean;
begin
  result := CAS(integer(aTarget), integer(aCurrentVal), integer(aNewVal));
end;

class function TInterlockedEx.CAS(var aTarget: TObject; aCurrentVal, aNewVal: TObject): boolean;
begin
  result := CAS(integer(aTarget), integer(aCurrentVal), integer(aNewVal));
end;
{$ENDIF}

class function TInterlockedEx.Dec(var aValue: integer): integer;
begin
  result := InterlockedDecrement(aValue);
end;

class function TInterlockedEx.Inc(var aValue: int64): integer;
begin
  if _Lock <> nil then  //This is also call by _Lock.Create(), that's why we check first
    _Lock.Lock;
  try
    System.Inc(aValue);
    result := aValue;
  finally
    if _Lock <> nil then
      _Lock.Unlock;
  end;
end;

class function TInterlockedEx.Exchange(var aValA, aValB: integer): integer;
begin
  result := InterlockedExchange(aValA, aValB);
end;

class procedure TInterlockedEx.Sub(var aValue: integer; aCounter: integer);
//EDX,ECX
{$IFDEF WIN32}
ASM
  NEG   ECX //1-3 = 1+(-3)
  LOCK  XADD [EDX], ECX
  MOV   EAX, [EDX]
END;
{$ELSE}
begin
  _Lock.Lock;
  try
    System.Dec(aValue, aCounter);
  finally
    _Lock.Unlock;
  end;
end;
{$ENDIF}

class function TInterlockedEx.Inc(var aValue: integer): integer;
begin
  result := InterlockedIncrement(aValue);
end;

class function TInterlockedEx.CAS(var aTarget: LongBool; aCurrentVal, aNewVal: LongBool): boolean;
begin
  result := CAS(integer(aTarget), integer(aCurrentVal), integer(aNewVal));
end;

class function TInterlockedEx.CAS(var aTarget: cardinal; aCurrentVal, aNewVal: cardinal): boolean;
begin
  result := CAS(integer(aTarget), integer(aCurrentVal), integer(aNewVal));
end;

class function TInterlockedEx.Dec(var aValue: int64): integer;
begin
  _Lock.Lock;
  try
    System.Dec(aValue);
    result := aValue;
  finally
    _Lock.Unlock;
  end;
end;

It works for 32 bits (with optimized assembler) and also for 64 bits (not so optimized yet...)

Saturday, October 1, 2011