{$INCLUDE ..\cDefines.inc}
unit cUnicodeReader;

{                                                                              }
{                        Unicode Reader class v3.03                            }
{                                                                              }
{         This unit is copyright  2002 by David Butler (david@e.co.za)        }
{                                                                              }
{                  This unit is part of Delphi Fundamentals.                   }
{                Its original file name is cUnicodeReader.pas                  }
{       The latest version is available from the Fundamentals home page        }
{                     http://fundementals.sourceforge.net/                     }
{                                                                              }
{                I invite you to use this unit, free of charge.                }
{        I invite you to distibute this unit, but it must be for free.         }
{             I also invite you to contribute to its development,              }
{             but do not distribute a modified copy of this file.              }
{                                                                              }
{          A forum is available on SourceForge for general discussion          }
{             http://sourceforge.net/forum/forum.php?forum_id=2117             }
{                                                                              }
{                                                                              }
{ Description:                                                                 }
{   Unicode reader class.                                                      }
{                                                                              }
{ Revision history:                                                            }
{   19/04/2002  0.01  Initial version                                          }
{   28/10/2002  3.02  Refactored for Fundamentals 3.                           }
{   29/10/2002  3.03  Bug fixes and improvements.                              }
{                                                                              }

interface

uses
  // Fundamentals
  cUtils,
  cReaders,
  cUnicodeChar,
  cUnicodeCodecs,
  cUnicode;



{                                                                              }
{ TUnicodeReader                                                               }
{                                                                              }
type
  TUnicodeReader = class
  protected
    FReader      : AReader;
    FReaderOwner : Boolean;
    FReaderPos   : Int64;
    FCodec       : AUnicodeCodec;
    FCodecOwner  : Boolean;
    FBuffer      : WideString;
    FBufPos      : Integer;
    FBufLen      : Integer;
    FRawBuf      : Pointer;
    FRawSize     : Integer;

    procedure ReadError;
    function  BufferChars(const Count: Integer): Integer;
    function  GetBuffer(const Count: Integer): Boolean;

  public
    constructor Create(const Reader: AReader; const ReaderOwner: Boolean = True;
                const Codec: AUnicodeCodec = nil; const CodecOwner: Boolean = True);
    destructor Destroy; override;

    property  Codec: AUnicodeCodec read FCodec;

    procedure Reset;
    function  EOF: Boolean;

    procedure Skip(const Count: Integer);
    function  SkipAll(const CharMatchFunc: WideCharMatchFunction): Integer;

    function  MatchChar(const CharMatchFunc: WideCharMatchFunction;
              const Skip: Boolean): Boolean;
    function  MatchWideChar(const Ch: WideChar; const Skip: Boolean): Boolean;

    function  MatchAnsiStr(const S: String; const CaseSensitive: Boolean;
              const Skip: Boolean): Boolean;
    function  MatchAnsiStrDelimited(const S: String; const CaseSensitive: Boolean;
              const Delimiter: WideCharMatchFunction;
              const Skip: Boolean): Boolean;

    function  MatchChars(const CharMatchFunc: WideCharMatchFunction): Integer;
    function  MatchAnsiChars(const C: CharSet): Integer;

    function  LocateAnsiChar(const C: CharSet): Integer;
    function  LocateAnsiStr(const S: String; const CaseSensitive: Boolean): Integer;

    function  PeekChar: WideChar;
    function  ReadChar: WideChar;
    function  SkipAndPeek(var Ch: WideChar): Boolean;

    function  ReadChars(const CharMatchFunc: WideCharMatchFunction): WideString;
    function  ReadAnsiChars(const C: CharSet): String;

    function  ReadToAnsiChar(const C: CharSet;
              const SkipDelimiter: Boolean = False): WideString;
    function  ReadToAnsiStr(const S: String; const CaseSensitive: Boolean = True;
              const SkipDelimiter: Boolean = False): WideString;
  end;
  EUnicodeReader = class(EUnicode);
  EUnicodeReaderReadError = class(EUnicodeReader);



{                                                                              }
{ TUnicodeMemoryReader                                                         }
{                                                                              }
type
  TUnicodeMemoryReader = class(TUnicodeReader)
  public
    constructor Create(const Data: Pointer; const Size: Integer;
                const Codec: AUnicodeCodec = nil; const CodecOwner: Boolean = True);
  end;



{                                                                              }
{ TUnicodeFileReader                                                           }
{                                                                              }
type
  TUnicodeFileReader = class(TUnicodeReader)
  public
    constructor Create(const FileName: String;
                const Codec: AUnicodeCodec = nil; const CodecOwner: Boolean = True);
  end;



implementation

uses
  // Delphi
  SysUtils;



{                                                                              }
{ TUnicodeReader                                                               }
{                                                                              }
const
  ReaderBlockSize = 1024;
  BufferPackLevel = ReaderBlockSize * 4;

constructor TUnicodeReader.Create(const Reader: AReader; const ReaderOwner: Boolean;
    const Codec: AUnicodeCodec; const CodecOwner: Boolean);
begin
  inherited Create;
  Assert(Assigned(Reader), 'Assigned(Reader)');
  FReader := Reader;
  FReaderOwner := ReaderOwner;
  FReaderPos := Reader.Position;
  FCodec := Codec;
  FCodecOwner := CodecOwner;
  GetMem(FRawBuf, ReaderBlockSize);
end;

destructor TUnicodeReader.Destroy;
begin
  if Assigned(FRawBuf) then
    FreeMem(FRawBuf);
  if FReaderOwner then
    FreeAndNil(FReader);
  if FCodecOwner then
    FreeAndNil(FCodec);
  inherited Destroy;
end;

procedure TUnicodeReader.ReadError;
begin
  raise EUnicodeReaderReadError.Create('Read error');
end;

procedure TUnicodeReader.Reset;
begin
  FReader.Position := FReaderPos;
  FBuffer := '';
  FBufPos := 0;
  FBufLen := 0;
end;

function TUnicodeReader.EOF: Boolean;
begin
  if FBufPos < FBufLen then
    Result := False else
    Result := FReader.EOF;
end;

function TUnicodeReader.BufferChars(const Count: Integer): Integer;
var I, J: Integer;
    P: PByte;
    S: WideString;
    Q: PWideChar;
begin
  // Check available characters
  Result := FBufLen - FBufPos;
  if Result >= Count then
    exit;
  // Pack buffer
  if FBufLen > 0 then
    if Result <= 0 then
      begin
        FBuffer := '';
        FBufPos := 0;
        FBufLen := 0;
      end else
    if FBufPos >= BufferPackLevel then
      begin
        if Result > 0 then
          Move(FBuffer[FBufPos + 1], Pointer(FBuffer)^, Sizeof(WideChar) * Result);
        FBufPos := 0;
        SetLength(FBuffer, Result);
        FBufLen := Result;
      end;
  // Fill buffer
  Repeat
    P := FRawBuf;
    Inc(P, FRawSize);
    J := FReader.Read(P^, ReaderBlockSize - FRawSize);
    if J = 0 then
      exit;
    Inc(FRawSize, J);
    if Assigned(FCodec) then
      begin
        // use codec to decode
        if FBufLen = 0 then
          FCodec.DecodeStr(FRawBuf, FRawSize, FBuffer, I) else
          begin
            FCodec.DecodeStr(FRawBuf, FRawSize, S, I);
            FBuffer := FBuffer + S;
          end;
      end else
      begin
        // read raw 16-bit unicode
        I := FRawSize div Sizeof(WideChar);
        SetLength(FBuffer, FBufLen + I);
        Q := Pointer(FBuffer);
        Inc(Q, FBufLen);
        I := I * Sizeof(WideChar);
        Move(FRawBuf^, Q^, I);
      end;
    FBufLen := Length(FBuffer);
    if I < FRawSize then
      Move(P^, FRawBuf^, FRawSize - I);
    Dec(FRawSize, I);
    Result := FBufLen - FBufPos;
  Until Result >= Count;
end;

function TUnicodeReader.GetBuffer(const Count: Integer): Boolean;
begin
  Result := FBufLen - FBufPos >= Count;
  if Result then
    exit;
  Result := BufferChars(Count) >= Count;
end;

procedure TUnicodeReader.Skip(const Count: Integer);
begin
  if Count <= 0 then
    exit;
  if not GetBuffer(Count) then
    ReadError;
  Inc(FBufPos, Count);
end;

function TUnicodeReader.SkipAll(const CharMatchFunc: WideCharMatchFunction): Integer;
var P: PWideChar;
    N, I: Integer;
begin
  Result := 0;
  Repeat
    N := BufferChars(1);
    if N <= 0 then
      exit;
    P := Pointer(FBuffer);
    Inc(P, FBufPos);
    For I := 1 to N do
      if not CharMatchFunc(P^) then
        exit else
        begin
          Inc(Result);
          Inc(FBufPos);
          Inc(P);
        end;
  Until False;
end;

function TUnicodeReader.MatchChar(const CharMatchFunc: WideCharMatchFunction; const Skip: Boolean): Boolean;
var P: PWideChar;
begin
  if GetBuffer(1) then
    begin
      P := Pointer(FBuffer);
      Inc(P, FBufPos);
      Result := CharMatchFunc(P^);
      if Skip and Result then
        Inc(FBufPos);
    end else
    Result := False;
end;

function TUnicodeReader.MatchWideChar(const Ch: WideChar; const Skip: Boolean): Boolean;
var P: PWideChar;
begin
  if GetBuffer(1) then
    begin
      P := Pointer(FBuffer);
      Inc(P, FBufPos);
      Result := P^ = Ch;
      if Skip and Result then
        Inc(FBufPos);
    end else
    Result := False;
end;

function TUnicodeReader.MatchAnsiStr(const S: String; const CaseSensitive: Boolean; const Skip: Boolean): Boolean;
var L: Integer;
    P: PWideChar;
begin
  L := Length(S);
  if not GetBuffer(L) then
    begin
      Result := False;
      exit;
    end;
  P := Pointer(FBuffer);
  Inc(P, FBufPos);
  Result := WideMatchAnsiStr(S, P, CaseSensitive);
  if Skip and Result then
    Inc(FBufPos, L);
end;

function TUnicodeReader.MatchAnsiStrDelimited(const S: String; const CaseSensitive: Boolean; const Delimiter: WideCharMatchFunction; const Skip: Boolean): Boolean;
var L: Integer;
    P: PWideChar;
begin
  L := Length(S);
  if not GetBuffer(L + 1) then
    begin
      Result := False;
      exit;
    end;
  P := Pointer(FBuffer);
  Inc(P, FBufPos);
  Result := WideMatchAnsiStr(S, P, CaseSensitive);
  if not Result then
    exit;
  Inc(P, L);
  Result := Delimiter(P^);
  if Skip and Result then
    Inc(FBufPos, L);
end;

function TUnicodeReader.MatchChars(const CharMatchFunc: WideCharMatchFunction): Integer;
var P: PWideChar;
    N, I: Integer;
begin
  Result := 0;
  Repeat
    N := BufferChars(Result + 1);
    if N < Result + 1 then
      exit;
    P := Pointer(FBuffer);
    Inc(P, FBufPos + Result);
    For I := Result + 1 to N do
      if not CharMatchFunc(P^) then
        exit else
        begin
          Inc(Result);
          Inc(P);
        end;
  Until False;
end;

function TUnicodeReader.MatchAnsiChars(const C: CharSet): Integer;
var P: PWideChar;
    N, I: Integer;
begin
  Result := 0;
  Repeat
    N := BufferChars(Result + 1);
    if N < Result + 1 then
      exit;
    P := Pointer(FBuffer);
    Inc(P, FBufPos + Result);
    For I := Result + 1 to N do
      if (Ord(P^) > $FF) or not (Char(Byte(P^)) in C) then
        exit else
        begin
          Inc(Result);
          Inc(P);
        end;
  Until False;
end;

function TUnicodeReader.LocateAnsiChar(const C: CharSet): Integer;
var P: PWideChar;
    N, I: Integer;
begin
  Result := 0;
  Repeat
    N := BufferChars(Result + 1);
    if N < Result + 1 then
      begin
        Result := -1;
        exit;
      end;
    P := Pointer(FBuffer);
    Inc(P, FBufPos + Result);
    For I := Result + 1 to N do
      if (Ord(P^) <= $FF) and (Char(Byte(P^)) in C) then
        exit else
        begin
          Inc(Result);
          Inc(P);
        end;
  Until False;
end;

function TUnicodeReader.LocateAnsiStr(const S: String; const CaseSensitive: Boolean): Integer;
var P: PWideChar;
    M, N, I: Integer;
begin
  Result := 0;
  M := Length(S);
  Repeat
    N := BufferChars(Result + M);
    if N < Result + M then
      begin
        Result := -1;
        exit;
      end;
    P := Pointer(FBuffer);
    Inc(P, FBufPos + Result);
    For I := Result + 1 to N - M + 1 do
      if WideMatchAnsiStr(S, P, CaseSensitive) then
        exit else
        begin
          Inc(Result);
          Inc(P);
        end;
  Until False;
end;

function TUnicodeReader.PeekChar: WideChar;
var P: PWideChar;
begin
  if not GetBuffer(1) then
    ReadError;
  P := Pointer(FBuffer);
  Inc(P, FBufPos);
  Result := P^;
end;

function TUnicodeReader.ReadChar: WideChar;
var P : PWideChar;
begin
  if not GetBuffer(1) then
    ReadError;
  P := Pointer(FBuffer);
  Inc(P, FBufPos);
  Result := P^;
  Inc(FBufPos);
end;

function TUnicodeReader.SkipAndPeek(var Ch: WideChar): Boolean;
var P: PWideChar;
    C: Integer;
begin
  // Skip
  C := FBufLen - FBufPos;
  if C >= 2 then
    begin
      Inc(FBufPos);
      Result := True;
    end else
    begin
      Result := GetBuffer(2);
      if FBufPos < FBufLen then
        Inc(FBufPos);
    end;
  if Result then
    begin
      // Peek
      P := Pointer(FBuffer);
      Inc(P, FBufPos);
      Ch := P^;
    end else
    Ch := WideChar(#0);
end;

function TUnicodeReader.ReadChars(const CharMatchFunc: WideCharMatchFunction): WideString;
var P: PWideChar;
    L: Integer;
begin
  L := MatchChars(CharMatchFunc);
  if L = 0 then
    Result := '' else
    begin
      SetLength(Result, L);
      P := Pointer(FBuffer);
      Inc(P, FBufPos);
      Move(P^, Pointer(Result)^, Sizeof(WideChar) * L);
      Inc(FBufPos, L);
    end;
end;

function TUnicodeReader.ReadAnsiChars(const C: CharSet): String;
var P : PWideChar;
    L : Integer;
begin
  L := MatchAnsiChars(C);
  if L = 0 then
    Result := '' else
    begin
      SetLength(Result, L);
      P := Pointer(FBuffer);
      Inc(P, FBufPos);
      Result := WideToLongString(P, L);
      Inc(FBufPos, L);
    end;
end;

function TUnicodeReader.ReadToAnsiChar(const C: CharSet; const SkipDelimiter: Boolean): WideString;
var P: PWideChar;
    L: Integer;
begin
  L := LocateAnsiChar(C);
  if L = 0 then
    Result := '' else
    begin
      if L < 0 then
        L := FBufLen - FBufPos;
      SetLength(Result, L);
      P := Pointer(FBuffer);
      Inc(P, FBufPos);
      Move(P^, Pointer(Result)^, Sizeof(WideChar) * L);
      Inc(FBufPos, L);
    end;
  if (L >= 0) and SkipDelimiter then
    Inc(FBufPos);
end;

function TUnicodeReader.ReadToAnsiStr(const S: String; const CaseSensitive: Boolean;
    const SkipDelimiter: Boolean): WideString;
var P: PWideChar;
    L: Integer;
begin
  L := LocateAnsiStr(S, CaseSensitive);
  if L = 0 then
    Result := '' else
    begin
      if L < 0 then
        L := FBufLen - FBufPos;
      SetLength(Result, L);
      P := Pointer(FBuffer);
      Inc(P, FBufPos);
      Move(P^, Pointer(Result)^, Sizeof(WideChar) * L);
      Inc(FBufPos, L);
    end;
  if (L >= 0) and SkipDelimiter then
    Inc(FBufPos, Length(S));
end;



{                                                                              }
{ TUnicodeMemoryReader                                                         }
{                                                                              }
constructor TUnicodeMemoryReader.Create(const Data: Pointer; const Size: Integer;
    const Codec: AUnicodeCodec; const CodecOwner: Boolean);
begin
  inherited Create(TMemoryReader.Create(Data, Size), True, Codec, CodecOwner);
end;



{                                                                              }
{ TUnicodeFileReader                                                           }
{                                                                              }
constructor TUnicodeFileReader.Create(const FileName: String;
    const Codec: AUnicodeCodec; const CodecOwner: Boolean);
begin
  inherited Create(TFileReader.Create(FileName), True, Codec, CodecOwner);
end;



end.

