xidelbase.pas 147 KB
Newer Older
Benito van der Zander's avatar
Benito van der Zander committed
1 2 3 4
{
Copyright (C) 2012 - 2017 Benito van der Zander (BeniBela)
                          benito@benibela.de
                          www.benibela.de
5

Benito van der Zander's avatar
Benito van der Zander committed
6 7 8 9
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
10 11 12 13 14 15 16

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
Benito van der Zander's avatar
Benito van der Zander committed
17 18
along with this program.  If not, see <http://www.gnu.org/licenses/>.

19 20
}

21
unit xidelbase;
22 23 24 25 26 27 28

{$mode objfpc}{$H+}
{$modeswitch advancedrecords}

interface

uses
29
  Classes,         {$ifdef win32} windows, {$endif}
30
  extendedhtmlparser,  xquery, sysutils, bbutils, simplehtmltreeparser, multipagetemplate,
31
  internetaccess, contnrs, simplexmltreeparserfpdom, xquery_module_file, xquery_module_math,
Benito van der Zander's avatar
Benito van der Zander committed
32
  rcmdline,math
33 34 35 36
  ;

var cgimode: boolean = false;
    allowInternetAccess: boolean = true;
Benito van der Zander's avatar
Benito van der Zander committed
37
    allowFileAccess: boolean = true;
38
    xqueryDefaultCollation: string = '';
39
    mycmdline: TCommandLineReader;
40
    defaultUserAgent: string = 'Mozilla/5.0 (compatible; Xidel)';
41

42
    majorVersion: integer = 0;
43
    minorVersion: integer = 9;
44
    buildVersion: integer = 7;
45

46

47 48
type EXidelException = class(Exception);

49
var
50
    onPostParseCmdLine: procedure ();
51
    onPrepareInternet: function (const useragent, proxy: string; onReact: TTransferReactEvent): tinternetaccess;
52
    onRetrieve: function (const method, url, postdata, headers: string): string;
53
    onPreOutput: procedure (extractionKind: TExtractionKind);
54

55

56 57 58 59
procedure perform;

implementation

60
uses process, strutils, bigdecimalmath, xquery_json, xquery__regex, xquery_utf8 {$ifdef unix},termio{$endif};
61
//{$R xidelbase.res}
62

63 64
///////////////LCL IMPORT
//uses lazutf8;
65
{$ifdef windows}
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
function WinCPToUTF8(const s: string): string; {$ifdef WinCe}inline;{$endif}
// result has codepage CP_ACP
var
  UTF16WordCnt: SizeInt;
  UTF16Str: UnicodeString;
begin
  {$ifdef WinCE}
  Result := SysToUtf8(s);
  {$else}
  Result:=s;(*
  if IsASCII(Result) then begin
    {$ifdef FPC_HAS_CPSTRING}
    // prevent codepage conversion magic
    SetCodePage(RawByteString(Result), CP_ACP, False);
    {$endif}
    exit;
  end;        *)
  UTF16WordCnt:=MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, Pointer(s), length(s), nil, 0);
  // this will null-terminate
  if UTF16WordCnt>0 then
  begin
    setlength(UTF16Str, UTF16WordCnt);
    MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, Pointer(s), length(s), @UTF16Str[1], UTF16WordCnt);
    Result:=UTF8Encode(UTF16Str);
    {$ifdef FPC_HAS_CPSTRING}
    // prevent codepage conversion magic
    SetCodePage(system.RawByteString(Result), CP_ACP, False);
    {$endif}
  end;
  {$endif}
end;

function ConsoleToUTF8(const s: string): string;// converts UTF8 string to console encoding (used by Write, WriteLn)
{$ifNdef WinCE}
var
  Dst: PChar;
{$endif}
begin
  {$ifdef WinCE}
  Result := SysToUTF8(s);
  {$else}
  Dst := AllocMem((Length(s) + 1) * SizeOf(Char));
  if OemToChar(PChar(s), Dst) then
    Result := StrPas(Dst)
  else
    Result := s;
  FreeMem(Dst);
  Result := WinCPToUTF8(Result);
  {$endif}
end;
(*
function UTF8ToConsole(const s: string): string;
{$ifNdef WinCE}
var
  Dst: PChar;
{$endif}
begin
  {$ifdef WinCE}
  Result := UTF8ToSys(s);
  {$else WinCE}
  {$ifndef NO_CP_RTL}
  Result := UTF8ToWinCP(s);
  {$else NO_CP_RTL}
  Result := UTF8ToSys(s); // Kept for compatibility
  {$endif NO_CP_RTL}
  Dst := AllocMem((Length(Result) + 1) * SizeOf(Char));
  if CharToOEM(PChar(Result), Dst) then
    Result := StrPas(Dst);
  FreeMem(Dst);
  {$ifndef NO_CP_RTL}
  SetCodePage(RawByteString(Result), CP_OEMCP, False);
  {$endif NO_CP_RTL}
  {$endif WinCE}
end;*)

141
{$endif}
142 143 144 145 146 147 148 149 150

function GetEnvironmentVariableUTF8(const EnvVar: string): String;
begin
  {$IFDEF FPC_RTL_UNICODE}
  Result:=UTF16ToUTF8(SysUtils.GetEnvironmentVariable(UTF8ToUTF16(EnvVar)));
  {$ELSE}
  // on Windows SysUtils.GetEnvironmentString returns OEM encoded string
  // so ConsoleToUTF8 function should be used!
  // RTL issue: http://bugs.freepascal.org/view.php?id=15233
151
  Result:={$ifdef windows}ConsoleToUTF8{$endif}(SysUtils.GetEnvironmentVariable({UTF8ToSys}(EnvVar)));
152 153 154 155 156 157
  {$ENDIF}
end;


/////////////////////////////////////////////////////

158
type TOutputFormat = (ofAdhoc, ofJsonWrapped, ofXMLWrapped, ofRawXML, ofRawHTML, ofBash, ofWindowsCmd);
159 160 161 162
     TColorOptions = (cAuto, cNever, cAlways, cJSON, cXML);
     TMyConsoleColors = (ccNormal, ccRedBold, ccGreenBold, ccBlueBold, ccPurpleBold, ccYellowBold, ccCyanBold,
                                   ccRed, ccGreen, ccBlue, ccPurple, ccYellow
      );
163 164
var //output options
    outputFormat: TOutputFormat;
165
    windowsCmdPercentageEscape: string;
166
    hasOutputEncoding: (oeAbsent,oeConvert,oePassRaw) = oeAbsent;
167 168
    outputHeader, outputFooter, outputSeparator: string;
    //outputArraySeparator: array[toutputformat] of string = ('',  ', ', '</e><e>', '', '', '', '');
169
    {$ifdef win32}systemEncodingIsUTF8: boolean = true;{$endif}
170 171 172
    colorizing: TColorOptions;

    lastConsoleColor: TMyConsoleColors = ccNormal;
173
    isStdinTTY: boolean = false;
174 175
    isStderrTTY: boolean = false;
    isStdoutTTY: boolean = false;
176

177
    internet: TInternetAccess;
178

179
type TInputFormat = (ifAuto, ifXML, ifHTML, ifXMLStrict, ifJSON, ifJSONStrict);
180

181 182
var
    globalDefaultInputFormat: TInputFormat;
183

184
type
185 186 187 188 189
IData = interface //data interface, so we do not have to care about memory managment
function rawData: string;
function baseUri: string;
function displayBaseUri: string;
function contenttype: string;
190
function headers: TStringList;
191 192 193 194
function recursionLevel: integer;
function inputFormat: TInputFormat;
end;

195 196
{ THtmlTemplateParserBreaker }

197
THtmlTemplateParserBreaker = class(THtmlTemplateParser)
198 199
  ignorenamespaces: boolean;

200 201 202
  procedure initParsingModel(const data: IData);
  procedure parseHTML(const data: IData);
  procedure parseHTMLSimple(const data: IData);
203
  procedure closeVariableLog;
204 205

  procedure parseDoc(sender: TXQueryEngine; html,uri,contenttype: string; var node: TTreeNode);
206 207
end;

208 209
 { TTemplateReaderBreaker }

210 211 212 213 214
 TTemplateReaderBreaker = class(TMultipageTemplateReader)
   constructor create();
   procedure setTemplate(atemplate: TMultiPageTemplate);
   procedure perform(actions: TStringArray);
   procedure selfLog(sender: TMultipageTemplateReader; logged: string; debugLevel: integer);
215

216 217
 end;

218

219 220 221 222 223
    //data processing classes
var htmlparser:THtmlTemplateParserBreaker;
    xpathparser: TXQueryEngine;
    multipage: TTemplateReaderBreaker;
    multipagetemp: TMultiPageTemplate;
224
    currentRoot: TTreeNode;
225

226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259
procedure setTerminalColor(err: boolean; color: TMyConsoleColors);
{$ifdef unix}
const colorCodes: array[TMyConsoleColors] of string = (
   #27'[0m', #27'[1;31m', #27'[1;32m', #27'[1;34m', #27'[1;35m', #27'[1;33m', #27'[1;36m',
             #27'[0;31m', #27'[0;32m', #27'[0;34m', #27'[0;35m', #27'[0;33m'
   );
var
  f: TextFile;
{$endif}
{$ifdef windows}
const colorCodes: array[TMyConsoleColors] of integer = (
   FOREGROUND_RED or FOREGROUND_GREEN or FOREGROUND_BLUE,
     FOREGROUND_RED or FOREGROUND_INTENSITY, FOREGROUND_GREEN or FOREGROUND_INTENSITY, FOREGROUND_BLUE or FOREGROUND_INTENSITY, FOREGROUND_RED or FOREGROUND_BLUE or FOREGROUND_INTENSITY, FOREGROUND_RED or FOREGROUND_GREEN or FOREGROUND_INTENSITY, FOREGROUND_BLUE or FOREGROUND_GREEN or FOREGROUND_INTENSITY,
     FOREGROUND_RED, FOREGROUND_GREEN, FOREGROUND_BLUE, FOREGROUND_RED or FOREGROUND_BLUE, FOREGROUND_RED or FOREGROUND_GREEN
   );
var handle: Integer;
{$endif}
begin
  if err and not isStderrTTY then exit;
  if not err and not isStdoutTTY then exit;
  if color <> lastConsoleColor then begin
    if err then Flush(stderr) else flush(StdOut);
    {$ifdef unix}
    if err then f := stderr else f := stdout;
    write(f, colorCodes[color]);
    {$endif}
    {$ifdef windows}
    if err then handle := StdErrorHandle else handle := StdOutputHandle;
    SetConsoleTextAttribute(handle, colorCodes[color]);
    {$endif}
    lastConsoleColor := color;
  end;
end;

260
procedure w(const s: string);
261
{$ifdef win32}
262 263
var
  temp, temp2: String;
264
{$endif}
265 266
begin
  if s = '' then exit;
267 268 269 270
  {$IFDEF FPC_HAS_CPSTRING}
  write(s);
  {$ELSE}
  fpc 3 is required now
271 272 273 274 275 276 277
  if (outputEncoding = eUTF8) or (outputEncoding = eUnknown) then write(s)
  {$ifdef win32}
  else if outputEncoding = eUnknownUser1 then begin
    if systemEncodingIsUTF8 then temp := s
    else temp := Utf8ToAnsi(s);
    SetLength(temp2, length(temp)+1);
    if charToOEM(pchar(temp), pchar(temp2)) then
278
      write(pchar(temp2));
279 280 281
  end
  {$endif}
  else write(strConvertFromUtf8(s, outputEncoding));
282
  {$ENDIF}
283 284 285 286 287 288 289 290
end;

procedure wln(const s: string = '');
begin
  w(s);
  w(LineEnding);
end;

291
var stacklen: SizeInt;
292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442
    stack: TLongintArray;

procedure wcolor(const s: string; color: TColorOptions);
const JSON_COLOR_OBJECT_PAREN: TMyConsoleColors = ccYellowBold;
      JSON_COLOR_OBJECT_KEY: TMyConsoleColors = ccPurpleBold;
      JSON_COLOR_ARRAY_PAREN: TMyConsoleColors = ccGreenBold;
{$ifdef windows}
      JSON_COLOR_STRING: TMyConsoleColors = ccCyanBold; //green is ugly on windows
{$else}
      JSON_COLOR_STRING: TMyConsoleColors = ccGreen;
{$endif}

      JSON_STATE_ARRAY = 1;
      JSON_STATE_OBJECTVALUE = 2;
      JSON_STATE_OBJECTKEY = 3;

      XML_COLOR_COMMENT: TMyConsoleColors = ccBlue;
      XML_COLOR_TAG: TMyConsoleColors = ccYellowBold;
      XML_COLOR_ATTRIB_NAME: TMyConsoleColors = ccPurpleBold;
      XML_COLOR_ATTRIB_VALUE: TMyConsoleColors = ccGreenBold;


var pos, lastpos: integer;

  procedure colorChange(c: TMyConsoleColors);
  begin
    w(copy(s, lastpos, pos - lastpos));
    setTerminalColor(false, c);
    lastpos:=pos;
  end;

var    quote: Char;
    scriptSpecialCase: Boolean;
begin
  case color of
    cJSON: begin
      if stacklen = 0 then arrayAddFast(stack, stacklen, 0);
      pos := 1;
      lastpos := 1;

      while pos <= length(s) do begin
        case s[pos] of
          '{', '}': begin
            if s[pos] = '{' then arrayAddFast(stack, stacklen, JSON_STATE_OBJECTKEY)
            else if stacklen > 1 then dec(stacklen);
            colorChange(JSON_COLOR_OBJECT_PAREN);
            inc(pos);
            colorChange(ccNormal);
          end;
          '[', ']': begin
            if s[pos] = '[' then arrayAddFast(stack, stacklen, JSON_STATE_ARRAY)
            else if stacklen > 1 then dec(stacklen);
            colorChange(JSON_COLOR_ARRAY_PAREN);
            inc(pos);
            colorChange(ccNormal);
          end;
          ',', ':': begin
            case stack[stacklen-1] of
              JSON_STATE_OBJECTKEY, JSON_STATE_OBJECTVALUE: begin
                colorChange(JSON_COLOR_OBJECT_PAREN);
                if s[pos] = ',' then stack[stacklen-1] := JSON_STATE_OBJECTKEY
                else stack[stacklen-1] := JSON_STATE_OBJECTVALUE;
              end;
              JSON_STATE_ARRAY: colorChange(JSON_COLOR_ARRAY_PAREN);
            end;
            inc(pos);
            colorChange(ccNormal);
          end;
          '"': begin
            case stack[stacklen-1] of
              JSON_STATE_OBJECTKEY: colorChange(JSON_COLOR_OBJECT_KEY);
              else colorChange(JSON_COLOR_STRING);
            end;
            inc(pos);
            while (pos <= length(s)) and (s[pos] <> '"') do begin
              if s[pos] = '\' then inc(pos);
              inc(pos);
            end;
            inc(pos);
          end
          else inc(pos);
        end;
      end;
      colorChange(ccNormal)
    end;
    cXML: begin
      pos := 1;
      lastpos := 1;
      scriptSpecialCase := false;
      while pos <= length(s) do begin
        case s[pos] of
          '<': if scriptSpecialCase and not striBeginsWith(@s[pos], '</script') then inc(pos)
          else begin
            colorChange(XML_COLOR_TAG);
            if (pos + 1) <= length(s) then begin
              case s[pos+1] of
                '/', '?': inc(pos,2);
                '!': if strBeginsWith(@s[pos], '<!--') then begin
                  colorChange(XML_COLOR_COMMENT);
                  inc(pos,3);
                  while (pos + 3 <= length(s)) and ((s[pos] <> '-') or (s[pos+1] <> '-')or (s[pos+2] <> '>')) do inc(pos);
                  inc(pos);
                  continue;
                end;
              end;
            end;
            scriptSpecialCase := striBeginsWith(@s[pos], '<script');
            while (pos <= length(s)) and not (s[pos] in ['>','/','?',#0..#32]) do inc(pos);
            while (pos <= length(s)) do begin
              case s[pos] of
                '>','/','?': begin
                  colorChange(XML_COLOR_TAG);
                  if s[pos] <> '>' then inc(pos);
                  break;
                end;
                 #0..#32: ;
                 else begin
                   colorChange(XML_COLOR_ATTRIB_NAME);
                   while (pos <= length(s)) and not (s[pos] in ['=','/','>']) do inc(pos);
                   colorChange(XML_COLOR_TAG);
                   if s[pos] <> '=' then break;
                   inc(pos);
                   while (pos <= length(s)) and (s[pos] in [#0..#32]) do inc(pos);
                   colorChange(XML_COLOR_ATTRIB_VALUE);
                   if (pos <= length(s)) then
                     case s[pos] of
                       '''', '"': begin
                         quote := s[pos];
                         inc(pos);
                         while (pos <= length(s)) and (s[pos] <> quote) do inc(pos);
                         inc(pos);
                       end;
                       else while (pos <= length(s)) and not (s[pos] in [#0..#32]) do inc(pos);
                     end;
                   continue;
                 end;
              end;
              inc(pos);
            end;
            inc(pos);
            colorChange(ccNormal);
          end;
          else inc(pos);
        end;
      end;
      colorChange(ccNormal)
    end;
    else w(s);
  end;
end;

443 444
var firstItem: boolean = true;

445
procedure writeItem(const s: string; color: TColorOptions = cNever);
446 447 448 449
begin
  if not firstItem then begin
    w(outputSeparator);
  end;
450
  wcolor(s, color);
451 452 453
  firstItem := false;
end;

454
procedure writeVarName(const s: string; color: TColorOptions = cNever);
455
begin
456
  writeItem(s, color);
457 458 459 460 461 462 463 464 465
  firstItem := true; //prevent another line break / separator
end;

var firstGroup: boolean = true;

procedure writeBeginGroup;
begin
  case outputFormat of
    ofXMLWrapped: begin
466
      wcolor('<e>', cXML);
467
    end;
468
    ofJsonWrapped: if not firstGroup then wcolor(', ' + LineEnding, cJSON);
469 470 471 472 473 474 475
  end;
  firstGroup := false;
end;

procedure writeEndGroup;
begin
  case outputFormat of
476 477 478
    ofXMLWrapped: begin
      wcolor('</e>' + LineEnding, cXML);
    end;
479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514
  end;
end;

{procedure printBeginValueGroup;
begin

end;

procedure printBeginValue(varname: string);
begin
  case outputFormat of
    ofXMLWrapped: w('<e>');
  end;
  firstValue := false;
end;

procedure printInnerValueSeparator;
begin
  if not firstValue then begin
    w(outputSeparator);
    //w(outputArraySeparator[outputFormat]);
  end;
  firstValue := false;
end;

procedure printEndValue;
begin
end;

procedure printEndValueGroup;
begin

end;                             }


function joined(s: array of string): string; //for command line help
515 516 517 518 519 520 521 522
var
  i: Integer;
begin
  if length(s) = 0 then exit('');
  result := s[0];
  for i:=1 to high(s) do result := result + LineEnding + s[i];
end;

523 524
function strLoadFromFileChecked(const fn: string): string;
begin
525
  result := strLoadFromFileUTF8(fn);
526
  if strBeginsWith(result, '#!') then result := strAfter(result, #10);
527
  if Result = '' then raise EXidelException.Create('File '+fn+' is empty.');
528 529 530 531 532 533 534 535 536 537 538 539 540
end;

function strReadFromStdin: string;
var s:string;
begin
  result:='';
  while not EOF(Input) do begin
    ReadLn(s);
    result+=s+LineEnding;
  end;
end;


541
function setTextEncoding(var t: TextFile; e: string): integer;
542
var
543
  codepage: Integer;
544
  str: String;
545
begin
546 547
  codepage := strEncodingFromName(e);
  if codepage = CP_NONE then begin
548
    if striEqual(e, 'input') then codepage := -1
549 550
    else writeln(stderr, 'Unknown encoding: ',e)
  end;
551 552 553 554 555 556 557 558 559 560
  result := codepage;
  if codepage <> -1 then
    SetTextCodePage(t, codepage);
end;

procedure setOutputEncoding(e: string);
var
  codepage: Integer;
begin
  codepage := setTextEncoding(output, e);
561
  if codepage <> -1 then begin
562
    hasOutputEncoding := oeConvert;
563 564
    //SetTextCodePage(StdErr, codepage);
  end else begin
565
    hasOutputEncoding := oePassRaw;
566 567
    SetTextCodePage(Output, CP_ACP); //all our strings claim to be ACP (=UTF8) so there should be no conversion?
    //SetTextCodePage(StdErr, CP_ACP);
568 569 570
  end;
end;

571 572

type
573 574 575

  { TOptionReaderWrapper }

576 577 578 579 580
  TOptionReaderWrapper = class
    function read(const name: string; out value: string): boolean; virtual; abstract;
    function read(const name: string; out value: integer): boolean; virtual; abstract;
    function read(const name: string; out value: boolean): boolean; virtual; abstract;
    function read(const name: string; out value: Extended): boolean; virtual; abstract;
581
    function read(const name: string; out value: IXQValue): boolean; virtual;
582
    function read(const name: string; out inputformat: TInputFormat): boolean; virtual;
583
  end;
584

585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604
  { TOptionReaderFromCommandLine }

  TOptionReaderFromCommandLine = class(TOptionReaderWrapper)
    constructor create(cmdLine: TCommandLineReader);
    function read(const name: string; out value: string): boolean; override;
    function read(const name: string; out value: integer): boolean; override;
    function read(const name: string; out value: boolean): boolean; override;
    function read(const name: string; out value: Extended): boolean; override;
  private
    acmdLine: TCommandLineReader;
  end;

  { TOptionReaderFromObject }

  TOptionReaderFromObject = class(TOptionReaderWrapper)
    constructor create(aobj: TXQValueObject);
    function read(const name: string; out value: string): boolean; override;
    function read(const name: string; out value: integer): boolean; override;
    function read(const name: string; out value: boolean): boolean; override;
    function read(const name: string; out value: Extended): boolean; override;
605
    function read(const name: string; out value: IXQValue): boolean; override;
606 607 608 609 610 611 612 613
  private
    obj: TXQValueObject;
  end;

type

{ TData }

614 615 616
{ TDataObject }

TDataObject = class(TInterfacedObject, IData)
617 618 619 620
{private todo: optimize
  fparsed: TTreeDocument;
  function GetParsed: TTreeDocument;
public}
621 622
private
  frawdata: string;
623
  fbaseurl, fdisplaybaseurl: string;
624
  fcontenttype: string;
625
  frecursionLevel: integer;
626
  finputformat: TInputFormat;
627
  fheaders: TStringList;
628 629
public
  function rawData: string;
630 631
  function baseUri: string;
  function displayBaseUri: string;
632
  function contentType: string;
633
  function headers: TStringList;
634
  function recursionLevel: integer;
635
  function inputFormat: TInputFormat;
636
  constructor create(somedata: string; aurl: string; acontenttype: string = '');
637
  destructor Destroy; override;
638 639 640 641 642 643 644 645 646
  //property parsed:TTreeDocument read GetParsed;
end;

TDataProcessing = class;
TProcessingContext = class;

{ TFollowTo }

TFollowTo = class
647
  nextAction: integer; //the next action after the action yielding the data, so an action does not process its own follows
648
  inputFormat: TInputFormat;
649
  class function createFromRetrievalAddress(data: string): TFollowTo;
650

651
  function clone: TFollowTo; virtual; abstract;
652
  function retrieve(parent: TProcessingContext; arecursionLevel: integer): IData; virtual; abstract;
653 654
  procedure replaceVariables; virtual;
  function equalTo(ft: TFollowTo): boolean; virtual; abstract;
Benito van der Zander's avatar
Benito van der Zander committed
655
  procedure readOptions(reader: TOptionReaderWrapper); virtual;
656
  procedure assign(other: TFollowTo); virtual;
657 658 659 660 661
end;

{ THTTPRequest }

THTTPRequest = class(TFollowTo)
662 663 664
private
  variablesReplaced: boolean;
public
665 666 667
  url: string;
  method: string;
  data: string;
668
  header: string;
669
  multipart: string;
670
  rawURL: boolean;
671 672
  constructor create(aurl: string);
  function clone: TFollowTo; override;
673
  function retrieve(parent: TProcessingContext; arecursionLevel: integer): IData; override;
674 675
  procedure replaceVariables; override;
  function equalTo(ft: TFollowTo): boolean; override;
Benito van der Zander's avatar
Benito van der Zander committed
676
  procedure readOptions(reader: TOptionReaderWrapper); override;
677 678 679 680 681 682 683 684
end;

{ TFileRequest }

TFileRequest = class(TFollowTo)
  url: string;
  constructor create(aurl: string);
  function clone: TFollowTo; override;
685
  function retrieve(parent: TProcessingContext; arecursionLevel: integer): IData; override;
686 687 688 689 690 691 692 693 694 695
  procedure replaceVariables; override;
  function equalTo(ft: TFollowTo): boolean; override;
end;

{ TDirectDataRequest }

TDirectDataRequest = class(TFollowTo)
  data: string;
  constructor create(adata: string);
  function clone: TFollowTo; override;
696
  function retrieve(parent: TProcessingContext; arecursionLevel: integer): IData; override;
697 698 699 700 701 702 703 704
  function equalTo(ft: TFollowTo): boolean; override;
  //procedure replaceVariables;  do not replace vars in direct data
end;

{ TStdinDataRequest }

TStdinDataRequest = class(TFollowTo)
  function clone: TFollowTo; override;
705
  function retrieve(parent: TProcessingContext; arecursionLevel: integer): IData; override;
706 707 708
  function equalTo(ft: TFollowTo): boolean; override;
end;

709 710 711
{ TFollowToProcessedData }

TFollowToProcessedData = class(TFollowTo)
712 713
  data: IData;
  constructor create(d: IData);
714
  function clone: TFollowTo; override;
715 716 717 718 719 720 721 722 723 724
  function retrieve(parent: TProcessingContext; arecursionLevel: integer): IData; override;
  function equalTo(ft: TFollowTo): boolean; override;
end;

TFollowToXQVObject = class(TFollowTo)
  v: IXQValue;
  basedata: IData;
  constructor create(const abasedata: IData; const av: IXQValue);
  function clone: TFollowTo; override;
  function retrieve(parent: TProcessingContext; arecursionLevel: integer): IData; override;
725 726 727
  function equalTo(ft: TFollowTo): boolean; override;
end;

728 729 730 731 732 733 734 735 736 737 738 739
{TFollowXQV = class(TFollowTo)
  xqv: TXQValue;
  //can be url/http-request, file(?), data
  //object with arbitrary options
  //sequence of previous
end;}


{ TFollowToList }

TFollowToList = class(TFpObjectList)
  constructor Create;
740
  procedure merge(l: TFollowToList; nextAction: integer = 0);
741 742 743
  function first: TFollowTo;

  procedure add(ft: TFollowTo);
744
  procedure merge(dest: IXQValue; basedata: IData; parent: TProcessingContext);
745 746 747

  function containsEqual(ft: TFollowTo): boolean;
private
748 749
  procedure addBasicUrl(absurl: string; baseurl: string; inputFormat: TInputFormat);
  procedure addObject(absurl: string; baseurl: string; options: TXQValueObject; fallBackInputFormat: TInputFormat);
750 751 752 753 754 755 756 757
end;



{ TDataProcessing }

TDataProcessing = class
  parent: TProcessingContext;
758
  function process(data: IData): TFollowToList; virtual; abstract;
759 760 761 762 763

  procedure readOptions(reader: TOptionReaderWrapper); virtual;
  procedure initFromCommandLine(cmdLine: TCommandLineReader); virtual;
  procedure mergeWithObject(obj: TXQValueObject); virtual;

764
  function clone(newparent: TProcessingContext): TDataProcessing; virtual; abstract;
765 766 767 768 769 770 771
end;


{ TDownload }

TDownload = class(TDataProcessing)
  downloadTarget: string;
772
  function process(data: IData): TFollowToList; override;
773
  procedure readOptions(reader: TOptionReaderWrapper); override;
774
  function clone(newparent: TProcessingContext): TDataProcessing; override;
775
end;
776

777
{ TExtraction }
778
TExtraction = class(TDataProcessing)
779
 extract: string;
780
 extractQueryCache: IXQuery;
781
 extractExclude, extractInclude: TStringArray;
782
 extractKind: TExtractionKind;
783

784 785
 templateActions: TStringArray;

786 787
 defaultName: string;
 printVariables: set of (pvLog, pvCondensedLog, pvFinal);
788 789
 printTypeAnnotations,  hideVariableNames: boolean;
 printedNodeFormat: TTreeNodeSerialization;
790
 printedJSONFormat: (jisDefault, jisPretty, jisCompact);
791

792 793
 inputFormat: TInputFormat;

794
 constructor create;
795

796
 procedure readOptions(reader: TOptionReaderWrapper); override;
797 798 799

 procedure setVariables(v: string);

800
 procedure printExtractedValue(value: IXQValue; invariable: boolean);
801
 procedure printCmdlineVariable(const name: string; const value: IXQValue);
802
 procedure printExtractedVariables(vars: TXQVariableChangeLog; state: string; showDefaultVariable: boolean);
803
 procedure printExtractedVariables(parser: THtmlTemplateParser; showDefaultVariableOverride: boolean);
804

805
 function process(data: IData): TFollowToList; override;
806 807

 procedure assignOptions(other: TExtraction);
808
 function clone(newparent: TProcessingContext): TDataProcessing; override;
809 810
private
 currentFollowList: TFollowToList;
811
 currentData: IData;
812
 procedure pageProcessed(unused: TMultipageTemplateReader; parser: THtmlTemplateParser);
813 814
end;

815

816 817 818 819
{ TFollowToWrapper }

TFollowToWrapper = class(TDataProcessing)
  followTo: TFollowTo;
Benito van der Zander's avatar
Benito van der Zander committed
820
  procedure readOptions(reader: TOptionReaderWrapper); override;
821
  function process(data: IData): TFollowToList; override;
822
  function clone(newparent: TProcessingContext): TDataProcessing; override;
823
  destructor Destroy; override;
824 825 826 827
end;

{ TProcessingContext }

828 829 830 831 832 833
//Processing is done in processing contexts
//A processing context can have its own data sources (TFollowTo or data sources of a nested processing context) or receive the data from its parent
//To every data source actions are applied (e.g. tdownload or textraction). These actions can also yield new data sources (e.g. follow := assignments or nested processing contexts with yieldDataToParent)
//The expression in follow is evaluated and the resulting data processed in the context followTo
//Then processing continues in nextSibling
//Remaining unprocessed data is passed to the parent
834
TProcessingContext = class(TDataProcessing)
835 836
  dataSources: array of TDataProcessing; //data sources, e.g. a list of URLs
  actions: array of TDataProcessing;     //actions e.g. a download target
837 838

  follow: string;
839
  followKind: TExtractionKind;
840
  followQueryCache: IXQuery;
841
  followExclude, followInclude: TStringArray;
842
  followTo: TProcessingContext;
843
  followMaxLevel: integer;
844
  followInputFormat: TInputFormat;
845 846

  nextSibling: TProcessingContext;
847 848 849 850

  wait: Extended;
  userAgent: string;
  proxy: string;
851
  printReceivedHeaders: boolean;
852
  errorHandling: string;
853
  loadCookies, saveCookies: string;
854

Benito van der Zander's avatar
Benito van der Zander committed
855
  silent, printPostData: boolean;
856

857
  ignoreNamespace: boolean;
Benito van der Zander's avatar
Benito van der Zander committed
858
  compatibilityNoExtendedStrings,compatibilityNoJSON, compatibilityNoJSONliterals, compatibilityOnlyJSONObjects, compatibilityNoExtendedJson, compatibilityStrictTypeChecking, compatibilityStrictNamespaces: boolean;
859
  compatibilityDotNotation: TXQPropertyDotNotation;
860
  noOptimizations: boolean;
861

862 863
  yieldDataToParent: boolean;

864 865 866 867 868
  procedure printStatus(s: string);

  procedure readOptions(reader: TOptionReaderWrapper); override;
  procedure mergeWithObject(obj: TXQValueObject); override;

869
  procedure addNewDataSource(source: TDataProcessing);
870
  procedure readNewDataSource(data: TFollowTo; options: TOptionReaderWrapper);
871
  procedure addNewAction(action: TDataProcessing);
872 873 874 875 876
  procedure readNewAction(action: TDataProcessing; options: TOptionReaderWrapper);

  procedure assignOptions(other: TProcessingContext);
  procedure assignActions(other: TProcessingContext);

877
  function clone(newparent: TProcessingContext): TDataProcessing; override;
878

879 880
  function last: TProcessingContext; //returns the last context in this sibling/follow chain

881
  procedure insertFictiveDatasourceIfNeeded; //if no data source is given in an expression (or an subexpression), but an aciton is there, <empty/> is added as data source
882

883 884
  function process(data: IData): TFollowToList; override;

885
  class function replaceEnclosedExpressions(expr: string): string;
886 887
  function replaceEnclosedExpressions(data: IData; expr: string): string;

888
  destructor destroy; override;
889
private
890
  stupidHTTPReactionHackFlag: integer;
891
  procedure loadDataForQueryPreParse(const data: IData);
892
  procedure loadDataForQuery(const data: IData; const query: IXQuery);
893
  function evaluateQuery(const query: IXQuery; const data: IData; const allowWithoutReturnValue: boolean = false): IXQValue;
894
  procedure httpReact (sender: TInternetAccess; var method: string; var url: TDecodedUrl; var data:TInternetAccessDataBlock; var reaction: TInternetAccessReaction);
895 896 897 898
end;

type EInvalidArgument = Exception;

899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942
constructor TFollowToXQVObject.create(const abasedata: IData; const av: IXQValue);
begin
  basedata := abasedata;
  v := av;
end;

function TFollowToXQVObject.clone: TFollowTo;
begin
  result := TFollowToXQVObject.create(basedata, v);
end;

function TFollowToXQVObject.retrieve(parent: TProcessingContext; arecursionLevel: integer): IData;
var
  temp: TProcessingContext;
  fl: TFollowToList;
begin
  if parent = nil then exit(nil);
  temp := TProcessingContext.Create();
  fl := TFollowToList.Create;
  temp.assignOptions(parent); //do not copy actions/data sources. they would apply to basedata, not to dest
  temp.parent := parent;
  temp.follow := parent.follow; //need to copy follow and follow-to, so it follows to the new data
  temp.followKind := parent.followKind;
  temp.followTo := parent.followTo;
  temp.followInputFormat := parent.followInputFormat;
  temp.nextSibling := parent.nextSibling;
  temp.mergeWithObject(v as TXQValueObject);
  fl := temp.process(basedata);
  case fl.count of
    0: ;
    1: result := fl.first.retrieve(temp, arecursionLevel );
    else raise Exception.Create('Invalid follow to count: ' + inttostr(fl.Count));
  end;
  temp.followTo := nil;
  temp.nextSibling := nil;
  temp.Free;
end;

function TFollowToXQVObject.equalTo(ft: TFollowTo): boolean;
begin
  if not (ft is TFollowToXQVObject) then exit(false);
  result := false;//not working: xpathparser.StaticContext.compareDeepAtomic(v, TFollowToXQVObject(ft).v, xpathparser.StaticContext.collation) = 0;
end;

943 944 945 946 947 948 949
{ TOptionReaderWrapper }

function TOptionReaderWrapper.read(const name: string; out value: IXQValue): boolean;
begin
  result := false;
end;

950 951 952 953 954 955 956 957 958 959 960
function TOptionReaderWrapper.read(const name: string; out inputformat: TInputFormat): boolean;
var
  temp: String;
begin
  result := read('input-format', temp);
  if result then
    case temp of
      'auto': inputFormat:=ifAuto;
      'xml': inputFormat:=ifXML;
      'html': inputFormat:=ifHTML;
      'xml-strict': inputFormat:=ifXMLStrict;
961 962
      'json': inputFormat := ifJSON;
      'json-strict': inputFormat := ifJSONStrict
963 964 965 966
      else raise EXidelException.Create('Invalid input-format: '+temp);
    end;
end;

967 968 969 970 971 972 973
{ TDataObject }

function TDataObject.rawData: string;
begin
  result := frawdata;
end;

974
function TDataObject.baseUri: string;
975
begin
976 977 978 979 980 981
  result := fbaseurl;
end;

function TDataObject.displayBaseUri: string;
begin
  result := fdisplaybaseurl;
982 983 984 985 986 987 988
end;

function TDataObject.contentType: string;
begin
  result := fcontenttype;
end;

989 990 991 992 993
function TDataObject.headers: TStringList;
begin
  result := fheaders;
end;

994 995 996 997 998
function TDataObject.recursionLevel: integer;
begin
  result := frecursionLevel;
end;

999
function TDataObject.inputFormat: TInputFormat;
1000
const FormatMap: array[TInternetToolsFormat] of TInputFormat = ( ifXML, ifHTML, ifJSON, ifXML );
1001
var
1002
  enc: TSystemCodePage;
1003 1004 1005 1006
begin
  if finputformat = ifAuto then begin
    finputformat := FormatMap[guessFormat(rawData, baseUri, contentType)];

1007
    if (finputformat in [ifJSON,ifJSONStrict]) and (hasOutputEncoding <> oePassRaw) then begin
1008 1009 1010
      //convert json to utf-8, because the regex parser does not match non-utf8 (not even with . escape)
      //it might be useful to convert other data, but the x/html parser does its own encoding detection
      enc := strEncodingFromContentType(contentType);
1011 1012 1013
      if enc = CP_NONE then
        if isInvalidUTF8(frawData) and not strContains(frawData, #0) then enc := CP_WINDOWS1252;
      if (enc <> CP_UTF8) and (enc <> CP_NONE) then frawdata := strConvertToUtf8(frawData, enc);
1014 1015
    end;
  end;
1016 1017 1018
  result := finputFormat;
end;

1019

1020 1021
{ TFollowToProcessedData }

1022
constructor TFollowToProcessedData.create(d: IData);
1023 1024 1025 1026 1027 1028 1029
begin
  data := d;
end;

function TFollowToProcessedData.clone: TFollowTo;
begin
  result :=  TFollowToProcessedData.Create(data);
1030
  result.inputFormat := inputFormat;
1031 1032
end;

1033
function TFollowToProcessedData.retrieve(parent: TProcessingContext; arecursionLevel: integer): IData;
1034 1035
begin
  result := data;
1036 1037 1038 1039
  if data <> nil then begin
    (result as TDataObject).finputFormat := self.inputFormat;
    (result as TDataObject).frecursionLevel := arecursionLevel;
  end;
1040 1041 1042 1043 1044 1045 1046
end;

function TFollowToProcessedData.equalTo(ft: TFollowTo): boolean;
begin
  result := (ft is TFollowToProcessedData) and (TFollowToProcessedData(ft).data = data);
end;

1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082
{ TOptionReaderFromObject }

constructor TOptionReaderFromObject.create(aobj: TXQValueObject);
begin
  obj := aobj;
end;

function TOptionReaderFromObject.read(const name: string; out value: string): boolean;
var
  temp: TXQValue;
begin
  result := obj.hasProperty(name, @temp);
  if result then value := temp.toString;
end;

function TOptionReaderFromObject.read(const name: string; out value: integer): boolean;
var
  temp: TXQValue;
begin
  result := obj.hasProperty(name, @temp);
  if result then value := temp.toInt64;
end;

function TOptionReaderFromObject.read(const name: string; out value: boolean): boolean;
var
  temp: TXQValue;
begin
  result := obj.hasProperty(name, @temp);
  if result then value := temp.toBoolean;
end;

function TOptionReaderFromObject.read(const name: string; out value: Extended): boolean;
var
  temp: TXQValue;
begin
  result := obj.hasProperty(name, @temp);
1083
  if result then value := temp.toFloat;
1084 1085
end;

1086 1087 1088 1089 1090 1091 1092 1093
function TOptionReaderFromObject.read(const name: string; out value: IXQValue): boolean;
var
  temp: TXQValue;
begin
  result := obj.hasProperty(name, @temp);
  if result then value := temp as TXQValue;
end;

1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126
{ TOptionReaderFromCommandLine }

constructor TOptionReaderFromCommandLine.create(cmdLine: TCommandLineReader);
begin
  acmdLine := cmdLine;
end;

function TOptionReaderFromCommandLine.read(const name: string; out value: string): boolean;
begin
  value := acmdLine.readString(name);
  result := acmdLine.existsProperty(name);
end;

function TOptionReaderFromCommandLine.read(const name: string; out value: integer): boolean;
begin
  value := acmdLine.readInt(name);
  result := acmdLine.existsProperty(name);
end;

function TOptionReaderFromCommandLine.read(const name: string; out value: boolean): boolean;
begin
  value := acmdLine.readFlag(name);
  result := acmdLine.existsProperty(name);
end;

function TOptionReaderFromCommandLine.read(const name: string; out value: Extended): boolean;
begin
  value := acmdLine.readFloat(name);
  result := acmdLine.existsProperty(name);
end;

{ TDownload }

1127

1128
function TDownload.process(data: IData): TFollowToList;
1129
var
1130
  temp, realUrl: String;
1131 1132 1133 1134
  j: LongInt;
  realPath: String;
  realFile: String;
  downloadTo: String;
1135
  color: TColorOptions;
1136 1137 1138
begin
  result := nil;
  if cgimode or not allowFileAccess then
1139
    raise EXidelException.Create('Download not permitted');
1140

1141
  realUrl := data.baseUri;
1142
  if guessType(realUrl) = rtRemoteURL then realurl := decodeURL(realUrl).path;
1143

1144 1145 1146 1147 1148 1149 1150 1151
  j := strRpos('/', realUrl);
  if j = 0 then begin
    realPath := '';
    realFile := realUrl;
  end else begin
    realPath := copy(realUrl, 1, j);
    realFile := copy(realUrl, j + 1, length(realUrl) - j)
  end;
1152
  while strBeginsWith(realPath, '/') do delete(realPath,1,1);
1153

1154
  downloadTo := parent.replaceEnclosedExpressions(data, Self.downloadTarget);
1155 1156
  if striBeginsWith(downloadTo, 'http://') then delete(downloadTo, 1, length('http://'));
  if striBeginsWith(downloadTo, 'https://') then delete(downloadTo, 1, length('https://'));
1157 1158 1159 1160
  {$ifdef win32}
  downloadTo := StringReplace(downloadTo, '\' , '/', [rfReplaceAll]);
  {$endif}

1161 1162 1163 1164 1165 1166
  //If downloadTo is a file                               : save with that name
  //If downloadTo is a directory and does not end with /  : save with basename
  //If downloadTo is a directory and does     end with /  : save with path and basename
  //If downloadTo is -                                    : print to stdout

  //example: Download abc/def/index.html
1167 1168 1169
  //    foo/bar/xyz   save in directory foo/bar with name xyz
  //    foo/bar/      save in directory foo/bar/abc/def with name index.html
  //    foo/bar/.     save in directory foo/bar with name index.html
1170
  //    foo           save in current directory with name foo
1171 1172
  //    ./            save in current directory/abc/def with name index.html
  //    ./.           save in current directory with name index.html
1173
  //    .             save in current directory with name index.html
1174 1175
  //    -             print to stdout
  if downloadTo = '-' then begin
1176 1177 1178 1179
    color := colorizing;
    if color in [cAlways, cAuto] then
      case data.inputFormat of
        ifHTML, ifXML, ifXMLStrict: color := cXML;
1180
        ifJSON, ifJSONStrict: color := cJSON;
1181 1182
      end;
    wcolor(data.rawdata, color);
1183 1184
    exit;
  end;
1185 1186 1187 1188
  if strEndsWith(downloadTo, '/.') then downloadTo := downloadTo + '/' + realFile
  else if strEndsWith(downloadTo, '/') then downloadTo := downloadTo + '/' + realPath + realFile
  else if DirectoryExists(downloadTo) or (downloadTo = '.' { <- redunant check, but safety first }) then downloadTo := downloadTo + '/' + realFile;
  if strEndsWith(downloadTo, '/') or (downloadTo = '') then downloadTo += 'index.html'; //sometimes realFile is empty
1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199
  parent.printStatus('**** Save as: '+downloadTo+' ****');
  if pos('/', downloadTo) > 0 then
    ForceDirectories(StringReplace(StringReplace(copy(downloadTo, 1, strRpos('/', downloadTo)-1), '//', '/', [rfReplaceAll]), '/', DirectorySeparator, [rfReplaceAll]));
  strSaveToFileUTF8(StringReplace(downloadTo, '/', DirectorySeparator, [rfReplaceAll]), data.rawdata);
end;

procedure TDownload.readOptions(reader: TOptionReaderWrapper);
begin
  reader.read('download', downloadTarget);
end;

1200
function TDownload.clone(newparent: TProcessingContext): TDataProcessing;
1201 1202
begin
  result := TDownload.Create;
1203
  result.parent := newparent;
1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219
  TDownload(result).downloadTarget:=downloadTarget;
end;


{ THTTPRequest }

constructor THTTPRequest.create(aurl: string);
begin
  url := aurl;
end;

function THTTPRequest.clone: TFollowTo;
begin
  result := THTTPRequest.create(url);
  THTTPRequest(result).method:=method;
  THTTPRequest(result).data:=data;
1220
  THTTPRequest(result).header:=header;
1221 1222
  THTTPRequest(result).multipart:=multipart;
  THTTPRequest(result).variablesReplaced:=variablesReplaced;
1223
  THTTPRequest(result).rawURL:=rawURL;
1224
  result.assign(self);
1225 1226
end;

1227
function THTTPRequest.retrieve(parent: TProcessingContext; arecursionLevel: integer): IData;
1228
var escapedURL: string;
1229

1230 1231
var
  i: Integer;
1232
  d: TDataObject;
1233
begin
1234
  if not allowInternetAccess then raise EXidelException.Create('Internet access not permitted');
1235
  if assigned(onPrepareInternet) then  internet := onPrepareInternet(parent.userAgent, parent.proxy, @parent.httpReact);
1236 1237 1238 1239
  if (parent.loadCookies <> '') then begin
    internet.cookies.loadFromFile(parent.loadCookies);
    parent.loadCookies := ''; //only need to load them once?
  end;
1240
  escapedURL := url;
1241
  if not rawURL then escapedURL := TInternetAccess.urlEncodeData(url, ueXPathHTML4);
1242
  parent.printStatus('**** Retrieving ('+method+'): '+escapedURL+' ****');
1243
  if parent.printPostData and (data <> '') then parent.printStatus(data);
1244
  result := TDataObject.create('', escapedURL);
1245
  if assigned(onRetrieve) then begin
1246 1247 1248 1249 1250 1251
    parent.stupidHTTPReactionHackFlag := 0;
    (result as TDataObject).frawdata := onRetrieve(method, escapedURL, data, header);
    case parent.stupidHTTPReactionHackFlag of
      1: (result as TDataObject).frawdata := '';
      2: exit(nil);
    end;
1252 1253 1254 1255 1256
    if assigned(internet) then begin
      (result as TDataObject).fbaseurl := internet.lastUrl;
      (result as TDataObject).fdisplaybaseurl := internet.lastUrl;
    end;
  end;
1257 1258 1259 1260 1261
  if parent.printReceivedHeaders and assigned(internet) then begin
    parent.printStatus('** Headers: (status: '+inttostr(internet.lastHTTPResultCode)+')**');
    for i:=0 to internet.lastHTTPHeaders.Count-1 do
      wln(internet.lastHTTPHeaders[i]);
  end;
1262 1263
  if Assigned(internet) then begin
    d := (result as TDataObject);
1264
    d.fcontenttype := internet.getLastContentType;
1265 1266 1267 1268
    d.fheaders := TStringList.Create;
    for i := 0 to internet.lastHTTPHeaders.count - 1 do
      d.fheaders.Add(internet.lastHTTPHeaders[i]);
  end;
1269 1270 1271 1272
  with result as TDataObject do begin
    finputFormat := self.inputFormat;
    frecursionLevel := arecursionLevel;
  end;
1273 1274 1275
end;

procedure THTTPRequest.replaceVariables;
1276 1277 1278 1279
  procedure parseFormMime();
  var mime: TMIMEMultipartData;
    forms: TStringArray;
    i: Integer;
1280 1281 1282 1283 1284 1285 1286 1287 1288 1289
    p: SizeInt;
    name: String;
    value: String;
    paren: Char;
    nvalue: String;
    temp: String;
    filename: String;
    contenttype: String;
    kind: Char;
    t: Integer;
1290
  begin
1291 1292
    if data <> '' then raise EXidelException.Create('Cannot mix urlencoded and multipart data');
    forms := strSplit(multipart, #0, false);
1293
    for i := 0 to high(forms) do begin
1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329
      p := pos('=', forms[i]);
      name := copy(forms[i], 1, p-1);
      value := strCopyFrom(forms[i], p+1);
      filename := '';
      contenttype := '';
      kind := 'x';
      if length(value) > 0 then begin
        if value[1] in ['<','@'] then begin
          kind := value[1];
          delete(value, 1, 1);
        end else kind := 'x';
        if value[1] in ['"', ''''] then begin
          paren := value[1];
          nvalue := '';
          t := 2;
          while (t <= length(value)) do begin
            if value[t] = '\' then begin
              inc(t);
              nvalue += value[t];
            end else if value[t] = paren then break
            else nvalue += value[t];
            inc(t);
          end;
          delete(value, 1, t+1);
        end else begin
          p := pos(';', value);
          if p = 0 then p := length(value) + 1;
          nvalue := copy(value, 1, p-1);
          delete(value, 1, p);
        end;
        if kind in ['<', '@'] then begin
          if kind = '@' then filename := nvalue;
          nvalue := strLoadFromFileUTF8(nvalue);
        end;
        if value <> '' then begin
          for temp in strSplit(value, ';', false) do begin
1330 1331 1332 1333
            value := temp;
            case strSplitGet('=', value) of
              'filename': filename := value;
              'type':     contenttype := value;
1334 1335 1336 1337 1338 1339
              else raise EXidelException.Create('Unknown option in '+forms[i]);
            end;
          end;
        end;
      end;
      mime.addFormData(name, nvalue, filename, contenttype, '');
1340
    end;
1341
    data := mime.compose(header);
1342
    header := TMIMEMultipartData.HeaderForBoundary(header);
1343
  end;
1344
begin
1345
  if variablesReplaced then exit; //this method is still supposed to be only called once
1346 1347 1348
  url := TProcessingContext.replaceEnclosedExpressions(url);
  method := TProcessingContext.replaceEnclosedExpressions(method);
  data := TProcessingContext.replaceEnclosedExpressions(data);
1349
  header := TProcessingContext.replaceEnclosedExpressions(header);
1350 1351 1352 1353
  multipart := TProcessingContext.replaceEnclosedExpressions(multipart);

  if multipart <> '' then parseFormMime();
  variablesReplaced := true;
1354 1355 1356 1357
end;

function THTTPRequest.equalTo(ft: TFollowTo): boolean;
begin
1358
  result := (ft is THTTPRequest) and (THTTPRequest(ft).url = url) and (THTTPRequest(ft).method = method) and (THTTPRequest(ft).data = data) and (THTTPRequest(ft).header = header) and (THTTPRequest(ft).multipart = multipart);
1359 1360
end;

1361 1362 1363 1364 1365
function isStdin(s: string): boolean;
begin
  result := (s = '-') or (s = 'stdin:///') or (s = 'stdin:') or (s = 'stdin://');
end;

1366 1367
procedure closeMultiArgs(var oldValue: string; separator: string); forward;

Benito van der Zander's avatar
Benito van der Zander committed
1368 1369
procedure THTTPRequest.readOptions(reader: TOptionReaderWrapper);
var temp: string;
1370 1371
  tempxq: IXQValue;
  h: IXQValue;
Benito van der Zander's avatar
Benito van der Zander committed
1372
begin
1373
  inherited;
1374
  if method <> '' then exit; //already initialized, must abort to keep stdin working (todo: allow postfix data/method options?)
1375
  reader.read('raw-url', rawURL);
1376
  reader.read('header', header);
1377 1378 1379 1380 1381 1382 1383 1384 1385
  if reader is TOptionReaderFromObject then begin
    variablesReplaced := true;
    if reader.read('headers', tempxq) then begin
      for h in tempxq  do begin
        if header <> '' then header := header + #13#10;
        header += h.toString;
      end;
    end;
  end;
1386 1387 1388
  method:='GET';
  if reader.read('post', data) then
    method:='POST';
1389 1390
  if reader.read('form', multipart) then
    method:='POST';
1391 1392
  if reader.read('method', temp) then begin
    method:=temp;
1393
    if isStdin(method) then
1394 1395
      method := trim(strReadFromStdin);
  end;
1396 1397
  if reader is TOptionReaderFromCommandLine then closeMultiArgs(data, '&');
  header := trim(header);
1398
  if isStdin(data) then
1399
    data := strReadFromStdin;
Benito van der Zander's avatar
Benito van der Zander committed
1400 1401
end;

1402 1403 1404 1405 1406 1407 1408 1409 1410 1411
{ TFileRequest }

constructor TFileRequest.create(aurl: string);
begin
  url := aurl;
end;

function TFileRequest.clone: TFollowTo;
begin
  result := TFileRequest.create(url);
1412
  result.assign(self);
1413 1414
end;

1415
function TFileRequest.retrieve(parent: TProcessingContext; arecursionLevel: integer): IData;
1416
begin
1417
  if not allowFileAccess then raise EXidelException.Create('File access not permitted');
1418
  parent.printStatus('**** Retrieving: '+url+' ****');
1419
  result := TDataObject.create(strLoadFromFileUTF8(url), url);
1420
  with result as TDataObject do begin
1421
    fbaseurl:=fileNameExpandToURI(fbaseurl);
1422 1423 1424
    finputFormat := self.inputFormat;
    frecursionLevel := arecursionLevel;
  end;
1425 1426 1427 1428
end;

procedure TFileRequest.replaceVariables;
begin
1429
  url := TProcessingContext.replaceEnclosedExpressions(url);
1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446
end;

function TFileRequest.equalTo(ft: TFollowTo): boolean;
begin
  result := (ft is TFileRequest) and (TFileRequest(ft).url = url);
end;

{ TDirectDataRequest }

constructor TDirectDataRequest.create(adata: string);
begin
  data := adata;
end;

function TDirectDataRequest.clone: TFollowTo;
begin
  result := TDirectDataRequest.create(data);
1447
  result.assign(self);
Benito van der Zander's avatar