xidelbase.pas 145 KB
Newer Older
1
{Copyright (C) 2012-2015  Benito van der Zander
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
}

18
unit xidelbase;
19 20 21 22 23 24 25

{$mode objfpc}{$H+}
{$modeswitch advancedrecords}

interface

uses
26
  Classes,         {$ifdef win32} windows, {$endif}
27
  extendedhtmlparser,  xquery, sysutils, bbutils, simplehtmltreeparser, multipagetemplate,
28
  internetaccess, contnrs, simplexmltreeparserfpdom, xquery_module_file, xquery_module_math,
Benito van der Zander's avatar
Benito van der Zander committed
29
  rcmdline,math
30 31 32 33
  ;

var cgimode: boolean = false;
    allowInternetAccess: boolean = true;
Benito van der Zander's avatar
Benito van der Zander committed
34
    allowFileAccess: boolean = true;
35
    xqueryDefaultCollation: string = '';
36
    mycmdline: TCommandLineReader;
37
    defaultUserAgent: string = 'Mozilla/3.0 (compatible; Xidel)';
38

39
    majorVersion: integer = 0;
40
    minorVersion: integer = 9;
Benito van der Zander's avatar
Benito van der Zander committed
41
    buildVersion: integer = 5;
42

43

44 45
type EXidelException = class(Exception);

46
var
47
    onPostParseCmdLine: procedure ();
48
    onPrepareInternet: function (const useragent, proxy: string; onReact: TTransferReactEvent): tinternetaccess;
49
    onRetrieve: function (const method, url, postdata, headers: string): string;
50
    onPreOutput: procedure (extractionKind: TExtractionKind);
51

52

53 54 55 56
procedure perform;

implementation

57
uses process, strutils, bigdecimalmath, xquery_json, xquery__regex, xquery_utf8 {$ifdef unix},termio{$endif};
58
//{$R xidelbase.res}
59

60 61
///////////////LCL IMPORT
//uses lazutf8;
62
{$ifdef windows}
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
function WinCPToUTF8(const s: string): string; {$ifdef WinCe}inline;{$endif}
// result has codepage CP_ACP
var
  UTF16WordCnt: SizeInt;
  UTF16Str: UnicodeString;
begin
  {$ifdef WinCE}
  Result := SysToUtf8(s);
  {$else}
  Result:=s;(*
  if IsASCII(Result) then begin
    {$ifdef FPC_HAS_CPSTRING}
    // prevent codepage conversion magic
    SetCodePage(RawByteString(Result), CP_ACP, False);
    {$endif}
    exit;
  end;        *)
  UTF16WordCnt:=MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, Pointer(s), length(s), nil, 0);
  // this will null-terminate
  if UTF16WordCnt>0 then
  begin
    setlength(UTF16Str, UTF16WordCnt);
    MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, Pointer(s), length(s), @UTF16Str[1], UTF16WordCnt);
    Result:=UTF8Encode(UTF16Str);
    {$ifdef FPC_HAS_CPSTRING}
    // prevent codepage conversion magic
    SetCodePage(system.RawByteString(Result), CP_ACP, False);
    {$endif}
  end;
  {$endif}
end;

function ConsoleToUTF8(const s: string): string;// converts UTF8 string to console encoding (used by Write, WriteLn)
{$ifNdef WinCE}
var
  Dst: PChar;
{$endif}
begin
  {$ifdef WinCE}
  Result := SysToUTF8(s);
  {$else}
  Dst := AllocMem((Length(s) + 1) * SizeOf(Char));
  if OemToChar(PChar(s), Dst) then
    Result := StrPas(Dst)
  else
    Result := s;
  FreeMem(Dst);
  Result := WinCPToUTF8(Result);
  {$endif}
end;
(*
function UTF8ToConsole(const s: string): string;
{$ifNdef WinCE}
var
  Dst: PChar;
{$endif}
begin
  {$ifdef WinCE}
  Result := UTF8ToSys(s);
  {$else WinCE}
  {$ifndef NO_CP_RTL}
  Result := UTF8ToWinCP(s);
  {$else NO_CP_RTL}
  Result := UTF8ToSys(s); // Kept for compatibility
  {$endif NO_CP_RTL}
  Dst := AllocMem((Length(Result) + 1) * SizeOf(Char));
  if CharToOEM(PChar(Result), Dst) then
    Result := StrPas(Dst);
  FreeMem(Dst);
  {$ifndef NO_CP_RTL}
  SetCodePage(RawByteString(Result), CP_OEMCP, False);
  {$endif NO_CP_RTL}
  {$endif WinCE}
end;*)

138
{$endif}
139 140 141 142 143 144 145 146 147

function GetEnvironmentVariableUTF8(const EnvVar: string): String;
begin
  {$IFDEF FPC_RTL_UNICODE}
  Result:=UTF16ToUTF8(SysUtils.GetEnvironmentVariable(UTF8ToUTF16(EnvVar)));
  {$ELSE}
  // on Windows SysUtils.GetEnvironmentString returns OEM encoded string
  // so ConsoleToUTF8 function should be used!
  // RTL issue: http://bugs.freepascal.org/view.php?id=15233
148
  Result:={$ifdef windows}ConsoleToUTF8{$endif}(SysUtils.GetEnvironmentVariable({UTF8ToSys}(EnvVar)));
149 150 151 152 153 154
  {$ENDIF}
end;


/////////////////////////////////////////////////////

155
type TOutputFormat = (ofAdhoc, ofJsonWrapped, ofXMLWrapped, ofRawXML, ofRawHTML, ofBash, ofWindowsCmd);
156 157 158 159
     TColorOptions = (cAuto, cNever, cAlways, cJSON, cXML);
     TMyConsoleColors = (ccNormal, ccRedBold, ccGreenBold, ccBlueBold, ccPurpleBold, ccYellowBold, ccCyanBold,
                                   ccRed, ccGreen, ccBlue, ccPurple, ccYellow
      );
160 161
var //output options
    outputFormat: TOutputFormat;
162
    windowsCmdPercentageEscape: string;
163
    hasOutputEncoding: (oeAbsent,oeConvert,oePassRaw) = oeAbsent;
164 165
    outputHeader, outputFooter, outputSeparator: string;
    //outputArraySeparator: array[toutputformat] of string = ('',  ', ', '</e><e>', '', '', '', '');
166
    {$ifdef win32}systemEncodingIsUTF8: boolean = true;{$endif}
167 168 169
    colorizing: TColorOptions;

    lastConsoleColor: TMyConsoleColors = ccNormal;
170
    isStdinTTY: boolean = false;
171 172
    isStderrTTY: boolean = false;
    isStdoutTTY: boolean = false;
173

174
    internet: TInternetAccess;
175

176
type TInputFormat = (ifAuto, ifXML, ifHTML, ifXMLStrict, ifJSON);
177

178 179
var
    globalDefaultInputFormat: TInputFormat;
180

181
type
182 183 184 185 186
IData = interface //data interface, so we do not have to care about memory managment
function rawData: string;
function baseUri: string;
function displayBaseUri: string;
function contenttype: string;
187
function headers: TStringList;
188 189 190 191
function recursionLevel: integer;
function inputFormat: TInputFormat;
end;

192 193
{ THtmlTemplateParserBreaker }

194
THtmlTemplateParserBreaker = class(THtmlTemplateParser)
195 196
  ignorenamespaces: boolean;

197 198 199
  procedure initParsingModel(const data: IData);
  procedure parseHTML(const data: IData);
  procedure parseHTMLSimple(const data: IData);
200
  procedure closeVariableLog;
201 202

  procedure parseDoc(sender: TXQueryEngine; html,uri,contenttype: string; var node: TTreeNode);
203 204
end;

205 206
 { TTemplateReaderBreaker }

207 208 209 210 211
 TTemplateReaderBreaker = class(TMultipageTemplateReader)
   constructor create();
   procedure setTemplate(atemplate: TMultiPageTemplate);
   procedure perform(actions: TStringArray);
   procedure selfLog(sender: TMultipageTemplateReader; logged: string; debugLevel: integer);
212

213 214
 end;

215

216 217 218 219 220
    //data processing classes
var htmlparser:THtmlTemplateParserBreaker;
    xpathparser: TXQueryEngine;
    multipage: TTemplateReaderBreaker;
    multipagetemp: TMultiPageTemplate;
221
    currentRoot: TTreeNode;
222

223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256
procedure setTerminalColor(err: boolean; color: TMyConsoleColors);
{$ifdef unix}
const colorCodes: array[TMyConsoleColors] of string = (
   #27'[0m', #27'[1;31m', #27'[1;32m', #27'[1;34m', #27'[1;35m', #27'[1;33m', #27'[1;36m',
             #27'[0;31m', #27'[0;32m', #27'[0;34m', #27'[0;35m', #27'[0;33m'
   );
var
  f: TextFile;
{$endif}
{$ifdef windows}
const colorCodes: array[TMyConsoleColors] of integer = (
   FOREGROUND_RED or FOREGROUND_GREEN or FOREGROUND_BLUE,
     FOREGROUND_RED or FOREGROUND_INTENSITY, FOREGROUND_GREEN or FOREGROUND_INTENSITY, FOREGROUND_BLUE or FOREGROUND_INTENSITY, FOREGROUND_RED or FOREGROUND_BLUE or FOREGROUND_INTENSITY, FOREGROUND_RED or FOREGROUND_GREEN or FOREGROUND_INTENSITY, FOREGROUND_BLUE or FOREGROUND_GREEN or FOREGROUND_INTENSITY,
     FOREGROUND_RED, FOREGROUND_GREEN, FOREGROUND_BLUE, FOREGROUND_RED or FOREGROUND_BLUE, FOREGROUND_RED or FOREGROUND_GREEN
   );
var handle: Integer;
{$endif}
begin
  if err and not isStderrTTY then exit;
  if not err and not isStdoutTTY then exit;
  if color <> lastConsoleColor then begin
    if err then Flush(stderr) else flush(StdOut);
    {$ifdef unix}
    if err then f := stderr else f := stdout;
    write(f, colorCodes[color]);
    {$endif}
    {$ifdef windows}
    if err then handle := StdErrorHandle else handle := StdOutputHandle;
    SetConsoleTextAttribute(handle, colorCodes[color]);
    {$endif}
    lastConsoleColor := color;
  end;
end;

257
procedure w(const s: string);
258
{$ifdef win32}
259 260
var
  temp, temp2: String;
261
{$endif}
262 263
begin
  if s = '' then exit;
264 265 266 267
  {$IFDEF FPC_HAS_CPSTRING}
  write(s);
  {$ELSE}
  fpc 3 is required now
268 269 270 271 272 273 274
  if (outputEncoding = eUTF8) or (outputEncoding = eUnknown) then write(s)
  {$ifdef win32}
  else if outputEncoding = eUnknownUser1 then begin
    if systemEncodingIsUTF8 then temp := s
    else temp := Utf8ToAnsi(s);
    SetLength(temp2, length(temp)+1);
    if charToOEM(pchar(temp), pchar(temp2)) then
275
      write(pchar(temp2));
276 277 278
  end
  {$endif}
  else write(strConvertFromUtf8(s, outputEncoding));
279
  {$ENDIF}
280 281 282 283 284 285 286 287
end;

procedure wln(const s: string = '');
begin
  w(s);
  w(LineEnding);
end;

288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439
var stacklen: integer;
    stack: TLongintArray;

procedure wcolor(const s: string; color: TColorOptions);
const JSON_COLOR_OBJECT_PAREN: TMyConsoleColors = ccYellowBold;
      JSON_COLOR_OBJECT_KEY: TMyConsoleColors = ccPurpleBold;
      JSON_COLOR_ARRAY_PAREN: TMyConsoleColors = ccGreenBold;
{$ifdef windows}
      JSON_COLOR_STRING: TMyConsoleColors = ccCyanBold; //green is ugly on windows
{$else}
      JSON_COLOR_STRING: TMyConsoleColors = ccGreen;
{$endif}

      JSON_STATE_ARRAY = 1;
      JSON_STATE_OBJECTVALUE = 2;
      JSON_STATE_OBJECTKEY = 3;

      XML_COLOR_COMMENT: TMyConsoleColors = ccBlue;
      XML_COLOR_TAG: TMyConsoleColors = ccYellowBold;
      XML_COLOR_ATTRIB_NAME: TMyConsoleColors = ccPurpleBold;
      XML_COLOR_ATTRIB_VALUE: TMyConsoleColors = ccGreenBold;


var pos, lastpos: integer;

  procedure colorChange(c: TMyConsoleColors);
  begin
    w(copy(s, lastpos, pos - lastpos));
    setTerminalColor(false, c);
    lastpos:=pos;
  end;

var    quote: Char;
    scriptSpecialCase: Boolean;
begin
  case color of
    cJSON: begin
      if stacklen = 0 then arrayAddFast(stack, stacklen, 0);
      pos := 1;
      lastpos := 1;

      while pos <= length(s) do begin
        case s[pos] of
          '{', '}': begin
            if s[pos] = '{' then arrayAddFast(stack, stacklen, JSON_STATE_OBJECTKEY)
            else if stacklen > 1 then dec(stacklen);
            colorChange(JSON_COLOR_OBJECT_PAREN);
            inc(pos);
            colorChange(ccNormal);
          end;
          '[', ']': begin
            if s[pos] = '[' then arrayAddFast(stack, stacklen, JSON_STATE_ARRAY)
            else if stacklen > 1 then dec(stacklen);
            colorChange(JSON_COLOR_ARRAY_PAREN);
            inc(pos);
            colorChange(ccNormal);
          end;
          ',', ':': begin
            case stack[stacklen-1] of
              JSON_STATE_OBJECTKEY, JSON_STATE_OBJECTVALUE: begin
                colorChange(JSON_COLOR_OBJECT_PAREN);
                if s[pos] = ',' then stack[stacklen-1] := JSON_STATE_OBJECTKEY
                else stack[stacklen-1] := JSON_STATE_OBJECTVALUE;
              end;
              JSON_STATE_ARRAY: colorChange(JSON_COLOR_ARRAY_PAREN);
            end;
            inc(pos);
            colorChange(ccNormal);
          end;
          '"': begin
            case stack[stacklen-1] of
              JSON_STATE_OBJECTKEY: colorChange(JSON_COLOR_OBJECT_KEY);
              else colorChange(JSON_COLOR_STRING);
            end;
            inc(pos);
            while (pos <= length(s)) and (s[pos] <> '"') do begin
              if s[pos] = '\' then inc(pos);
              inc(pos);
            end;
            inc(pos);
          end
          else inc(pos);
        end;
      end;
      colorChange(ccNormal)
    end;
    cXML: begin
      pos := 1;
      lastpos := 1;
      scriptSpecialCase := false;
      while pos <= length(s) do begin
        case s[pos] of
          '<': if scriptSpecialCase and not striBeginsWith(@s[pos], '</script') then inc(pos)
          else begin
            colorChange(XML_COLOR_TAG);
            if (pos + 1) <= length(s) then begin
              case s[pos+1] of
                '/', '?': inc(pos,2);
                '!': if strBeginsWith(@s[pos], '<!--') then begin
                  colorChange(XML_COLOR_COMMENT);
                  inc(pos,3);
                  while (pos + 3 <= length(s)) and ((s[pos] <> '-') or (s[pos+1] <> '-')or (s[pos+2] <> '>')) do inc(pos);
                  inc(pos);
                  continue;
                end;
              end;
            end;
            scriptSpecialCase := striBeginsWith(@s[pos], '<script');
            while (pos <= length(s)) and not (s[pos] in ['>','/','?',#0..#32]) do inc(pos);
            while (pos <= length(s)) do begin
              case s[pos] of
                '>','/','?': begin
                  colorChange(XML_COLOR_TAG);
                  if s[pos] <> '>' then inc(pos);
                  break;
                end;
                 #0..#32: ;
                 else begin
                   colorChange(XML_COLOR_ATTRIB_NAME);
                   while (pos <= length(s)) and not (s[pos] in ['=','/','>']) do inc(pos);
                   colorChange(XML_COLOR_TAG);
                   if s[pos] <> '=' then break;
                   inc(pos);
                   while (pos <= length(s)) and (s[pos] in [#0..#32]) do inc(pos);
                   colorChange(XML_COLOR_ATTRIB_VALUE);
                   if (pos <= length(s)) then
                     case s[pos] of
                       '''', '"': begin
                         quote := s[pos];
                         inc(pos);
                         while (pos <= length(s)) and (s[pos] <> quote) do inc(pos);
                         inc(pos);
                       end;
                       else while (pos <= length(s)) and not (s[pos] in [#0..#32]) do inc(pos);
                     end;
                   continue;
                 end;
              end;
              inc(pos);
            end;
            inc(pos);
            colorChange(ccNormal);
          end;
          else inc(pos);
        end;
      end;
      colorChange(ccNormal)
    end;
    else w(s);
  end;
end;

440 441
var firstItem: boolean = true;

442
procedure writeItem(const s: string; color: TColorOptions = cNever);
443 444 445 446
begin
  if not firstItem then begin
    w(outputSeparator);
  end;
447
  wcolor(s, color);
448 449 450
  firstItem := false;
end;

451
procedure writeVarName(const s: string; color: TColorOptions = cNever);
452
begin
453
  writeItem(s, color);
454 455 456 457 458 459 460 461 462
  firstItem := true; //prevent another line break / separator
end;

var firstGroup: boolean = true;

procedure writeBeginGroup;
begin
  case outputFormat of
    ofXMLWrapped: begin
463
      wcolor('<e>', cXML);
464
    end;
465
    ofJsonWrapped: if not firstGroup then wcolor(', ' + LineEnding, cJSON);
466 467 468 469 470 471 472
  end;
  firstGroup := false;
end;

procedure writeEndGroup;
begin
  case outputFormat of
473 474 475
    ofXMLWrapped: begin
      wcolor('</e>' + LineEnding, cXML);
    end;
476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511
  end;
end;

{procedure printBeginValueGroup;
begin

end;

procedure printBeginValue(varname: string);
begin
  case outputFormat of
    ofXMLWrapped: w('<e>');
  end;
  firstValue := false;
end;

procedure printInnerValueSeparator;
begin
  if not firstValue then begin
    w(outputSeparator);
    //w(outputArraySeparator[outputFormat]);
  end;
  firstValue := false;
end;

procedure printEndValue;
begin
end;

procedure printEndValueGroup;
begin

end;                             }


function joined(s: array of string): string; //for command line help
512 513 514 515 516 517 518 519
var
  i: Integer;
begin
  if length(s) = 0 then exit('');
  result := s[0];
  for i:=1 to high(s) do result := result + LineEnding + s[i];
end;

520 521
function strLoadFromFileChecked(const fn: string): string;
begin
522
  result := strLoadFromFileUTF8(fn);
523
  if strBeginsWith(result, '#!') then result := strAfter(result, #10);
524
  if Result = '' then raise EXidelException.Create('File '+fn+' is empty.');
525 526 527 528 529 530 531 532 533 534 535 536 537
end;

function strReadFromStdin: string;
var s:string;
begin
  result:='';
  while not EOF(Input) do begin
    ReadLn(s);
    result+=s+LineEnding;
  end;
end;


538
function setTextEncoding(var t: TextFile; e: string): integer;
539
var
540
  codepage: Integer;
541
  str: String;
542
begin
543
  codepage := -1;
544
  str:=UpperCase(e);
545
  case str of
546 547 548 549 550 551 552
    'UTF-8', 'UTF8': codepage := CP_UTF8;
    'CP1252', 'ISO-8859-1', 'LATIN1', 'ISO-8859-15': codepage := 1252;
    'UTF-16BE', 'UTF16BE': codepage := CP_UTF16BE;
    'UTF16', 'UTF-16', 'UTF-16LE', 'UTF16LE': codepage := CP_UTF16;
    'UTF-32BE', 'UTF32BE': codepage := CP_UTF32BE;
    'UTF32', 'UTF-32', 'UTF-32LE', 'UTF32LE': codepage := CP_UTF32;
    'OEM': codepage := CP_OEMCP;
Benito van der Zander's avatar
Benito van der Zander committed
553
    'INPUT': ;//none
554
    else if strBeginsWith(str, 'CP') then codepage := StrToIntDef(strAfter(str, 'CP'), -1)
555 556
    else writeln(stderr, 'Unknown encoding: ',e)
  end;
557 558 559 560 561 562 563 564 565 566
  result := codepage;
  if codepage <> -1 then
    SetTextCodePage(t, codepage);
end;

procedure setOutputEncoding(e: string);
var
  codepage: Integer;
begin
  codepage := setTextEncoding(output, e);
567
  if codepage <> -1 then begin
568
    hasOutputEncoding := oeConvert;
569 570
    //SetTextCodePage(StdErr, codepage);
  end else begin
571
    hasOutputEncoding := oePassRaw;
572 573
    SetTextCodePage(Output, CP_ACP); //all our strings claim to be ACP (=UTF8) so there should be no conversion?
    //SetTextCodePage(StdErr, CP_ACP);
574 575 576
  end;
end;

577 578

type
579 580 581

  { TOptionReaderWrapper }

582 583 584 585 586
  TOptionReaderWrapper = class
    function read(const name: string; out value: string): boolean; virtual; abstract;
    function read(const name: string; out value: integer): boolean; virtual; abstract;
    function read(const name: string; out value: boolean): boolean; virtual; abstract;
    function read(const name: string; out value: Extended): boolean; virtual; abstract;
587
    function read(const name: string; out value: IXQValue): boolean; virtual;
588
    function read(const name: string; out inputformat: TInputFormat): boolean; virtual;
589
  end;
590

591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610
  { TOptionReaderFromCommandLine }

  TOptionReaderFromCommandLine = class(TOptionReaderWrapper)
    constructor create(cmdLine: TCommandLineReader);
    function read(const name: string; out value: string): boolean; override;
    function read(const name: string; out value: integer): boolean; override;
    function read(const name: string; out value: boolean): boolean; override;
    function read(const name: string; out value: Extended): boolean; override;
  private
    acmdLine: TCommandLineReader;
  end;

  { TOptionReaderFromObject }

  TOptionReaderFromObject = class(TOptionReaderWrapper)
    constructor create(aobj: TXQValueObject);
    function read(const name: string; out value: string): boolean; override;
    function read(const name: string; out value: integer): boolean; override;
    function read(const name: string; out value: boolean): boolean; override;
    function read(const name: string; out value: Extended): boolean; override;
611
    function read(const name: string; out value: IXQValue): boolean; override;
612 613 614 615 616 617 618 619
  private
    obj: TXQValueObject;
  end;

type

{ TData }

620 621 622
{ TDataObject }

TDataObject = class(TInterfacedObject, IData)
623 624 625 626
{private todo: optimize
  fparsed: TTreeDocument;
  function GetParsed: TTreeDocument;
public}
627 628
private
  frawdata: string;
629
  fbaseurl, fdisplaybaseurl: string;
630
  fcontenttype: string;
631
  frecursionLevel: integer;
632
  finputformat: TInputFormat;
633
  fheaders: TStringList;
634 635
public
  function rawData: string;
636 637
  function baseUri: string;
  function displayBaseUri: string;
638
  function contentType: string;
639
  function headers: TStringList;
640
  function recursionLevel: integer;
641
  function inputFormat: TInputFormat;
642
  constructor create(somedata: string; aurl: string; acontenttype: string = '');
643
  destructor Destroy; override;
644 645 646 647 648 649 650 651 652
  //property parsed:TTreeDocument read GetParsed;
end;

TDataProcessing = class;
TProcessingContext = class;

{ TFollowTo }

TFollowTo = class
653
  nextAction: integer; //the next action after the action yielding the data, so an action does not process its own follows
654
  inputFormat: TInputFormat;
655
  class function createFromRetrievalAddress(data: string): TFollowTo;
656

657
  function clone: TFollowTo; virtual; abstract;
658
  function retrieve(parent: TProcessingContext; arecursionLevel: integer): IData; virtual; abstract;
659 660
  procedure replaceVariables; virtual;
  function equalTo(ft: TFollowTo): boolean; virtual; abstract;
Benito van der Zander's avatar
Benito van der Zander committed
661
  procedure readOptions(reader: TOptionReaderWrapper); virtual;
662
  procedure assign(other: TFollowTo); virtual;
663 664 665 666 667
end;

{ THTTPRequest }

THTTPRequest = class(TFollowTo)
668 669 670
private
  variablesReplaced: boolean;
public
671 672 673
  url: string;
  method: string;
  data: string;
674
  header: string;
675
  multipart: string;
676
  rawURL: boolean;
677 678
  constructor create(aurl: string);
  function clone: TFollowTo; override;
679
  function retrieve(parent: TProcessingContext; arecursionLevel: integer): IData; override;
680 681
  procedure replaceVariables; override;
  function equalTo(ft: TFollowTo): boolean; override;
Benito van der Zander's avatar
Benito van der Zander committed
682
  procedure readOptions(reader: TOptionReaderWrapper); override;
683 684 685 686 687 688 689 690
end;

{ TFileRequest }

TFileRequest = class(TFollowTo)
  url: string;
  constructor create(aurl: string);
  function clone: TFollowTo; override;
691
  function retrieve(parent: TProcessingContext; arecursionLevel: integer): IData; override;
692 693 694 695 696 697 698 699 700 701
  procedure replaceVariables; override;
  function equalTo(ft: TFollowTo): boolean; override;
end;

{ TDirectDataRequest }

TDirectDataRequest = class(TFollowTo)
  data: string;
  constructor create(adata: string);
  function clone: TFollowTo; override;
702
  function retrieve(parent: TProcessingContext; arecursionLevel: integer): IData; override;
703 704 705 706 707 708 709 710
  function equalTo(ft: TFollowTo): boolean; override;
  //procedure replaceVariables;  do not replace vars in direct data
end;

{ TStdinDataRequest }

TStdinDataRequest = class(TFollowTo)
  function clone: TFollowTo; override;
711
  function retrieve(parent: TProcessingContext; arecursionLevel: integer): IData; override;
712 713 714
  function equalTo(ft: TFollowTo): boolean; override;
end;

715 716 717
{ TFollowToProcessedData }

TFollowToProcessedData = class(TFollowTo)
718 719
  data: IData;
  constructor create(d: IData);
720
  function clone: TFollowTo; override;
721 722 723 724 725 726 727 728 729 730
  function retrieve(parent: TProcessingContext; arecursionLevel: integer): IData; override;
  function equalTo(ft: TFollowTo): boolean; override;
end;

TFollowToXQVObject = class(TFollowTo)
  v: IXQValue;
  basedata: IData;
  constructor create(const abasedata: IData; const av: IXQValue);
  function clone: TFollowTo; override;
  function retrieve(parent: TProcessingContext; arecursionLevel: integer): IData; override;
731 732 733
  function equalTo(ft: TFollowTo): boolean; override;
end;

734 735 736 737 738 739 740 741 742 743 744 745
{TFollowXQV = class(TFollowTo)
  xqv: TXQValue;
  //can be url/http-request, file(?), data
  //object with arbitrary options
  //sequence of previous
end;}


{ TFollowToList }

TFollowToList = class(TFpObjectList)
  constructor Create;
746
  procedure merge(l: TFollowToList; nextAction: integer = 0);
747 748 749
  function first: TFollowTo;

  procedure add(ft: TFollowTo);
750
  procedure merge(dest: IXQValue; basedata: IData; parent: TProcessingContext);
751 752 753

  function containsEqual(ft: TFollowTo): boolean;
private
754 755
  procedure addBasicUrl(absurl: string; baseurl: string; inputFormat: TInputFormat);
  procedure addObject(absurl: string; baseurl: string; options: TXQValueObject; fallBackInputFormat: TInputFormat);
756 757 758 759 760 761 762 763
end;



{ TDataProcessing }

TDataProcessing = class
  parent: TProcessingContext;
764
  function process(data: IData): TFollowToList; virtual; abstract;
765 766 767 768 769

  procedure readOptions(reader: TOptionReaderWrapper); virtual;
  procedure initFromCommandLine(cmdLine: TCommandLineReader); virtual;
  procedure mergeWithObject(obj: TXQValueObject); virtual;

770
  function clone(newparent: TProcessingContext): TDataProcessing; virtual; abstract;
771 772 773 774 775 776 777
end;


{ TDownload }

TDownload = class(TDataProcessing)
  downloadTarget: string;
778
  function process(data: IData): TFollowToList; override;
779
  procedure readOptions(reader: TOptionReaderWrapper); override;
780
  function clone(newparent: TProcessingContext): TDataProcessing; override;
781
end;
782

783
{ TExtraction }
784
TExtraction = class(TDataProcessing)
785
 extract: string;
786
 extractQueryCache: IXQuery;
787
 extractExclude, extractInclude: TStringArray;
788
 extractKind: TExtractionKind;
789

790 791
 templateActions: TStringArray;

792 793
 defaultName: string;
 printVariables: set of (pvLog, pvCondensedLog, pvFinal);
794 795
 printTypeAnnotations,  hideVariableNames: boolean;
 printedNodeFormat: TTreeNodeSerialization;
796

797 798
 inputFormat: TInputFormat;

799
 constructor create;
800

801
 procedure readOptions(reader: TOptionReaderWrapper); override;
802 803 804

 procedure setVariables(v: string);

805
 procedure printExtractedValue(value: IXQValue; invariable: boolean);
806
 procedure printCmdlineVariable(const name: string; const value: IXQValue);
807
 procedure printExtractedVariables(vars: TXQVariableChangeLog; state: string; showDefaultVariable: boolean);
808
 procedure printExtractedVariables(parser: THtmlTemplateParser; showDefaultVariableOverride: boolean);
809

810
 function process(data: IData): TFollowToList; override;
811 812

 procedure assignOptions(other: TExtraction);
813
 function clone(newparent: TProcessingContext): TDataProcessing; override;
814 815
private
 currentFollowList: TFollowToList;
816
 currentData: IData;
817
 procedure pageProcessed(unused: TMultipageTemplateReader; parser: THtmlTemplateParser);
818 819
end;

820

821 822 823 824
{ TFollowToWrapper }

TFollowToWrapper = class(TDataProcessing)
  followTo: TFollowTo;
Benito van der Zander's avatar
Benito van der Zander committed
825
  procedure readOptions(reader: TOptionReaderWrapper); override;
826
  function process(data: IData): TFollowToList; override;
827
  function clone(newparent: TProcessingContext): TDataProcessing; override;
828
  destructor Destroy; override;
829 830 831 832
end;

{ TProcessingContext }

833 834 835 836 837 838
//Processing is done in processing contexts
//A processing context can have its own data sources (TFollowTo or data sources of a nested processing context) or receive the data from its parent
//To every data source actions are applied (e.g. tdownload or textraction). These actions can also yield new data sources (e.g. follow := assignments or nested processing contexts with yieldDataToParent)
//The expression in follow is evaluated and the resulting data processed in the context followTo
//Then processing continues in nextSibling
//Remaining unprocessed data is passed to the parent
839
TProcessingContext = class(TDataProcessing)
840 841
  dataSources: array of TDataProcessing; //data sources, e.g. a list of URLs
  actions: array of TDataProcessing;     //actions e.g. a download target
842 843

  follow: string;
844
  followKind: TExtractionKind;
845
  followQueryCache: IXQuery;
846
  followExclude, followInclude: TStringArray;
847
  followTo: TProcessingContext;
848
  followMaxLevel: integer;
849
  followInputFormat: TInputFormat;
850 851

  nextSibling: TProcessingContext;
852 853 854 855

  wait: Extended;
  userAgent: string;
  proxy: string;
856
  printReceivedHeaders: boolean;
857
  errorHandling: string;
858

Benito van der Zander's avatar
Benito van der Zander committed
859
  silent, printPostData: boolean;
860

861
  ignoreNamespace: boolean;
Benito van der Zander's avatar
Benito van der Zander committed
862
  compatibilityNoExtendedStrings,compatibilityNoJSON, compatibilityNoJSONliterals, compatibilityOnlyJSONObjects, compatibilityNoExtendedJson, compatibilityStrictTypeChecking, compatibilityStrictNamespaces: boolean;
863
  compatibilityDotNotation: TXQPropertyDotNotation;
864
  noOptimizations: boolean;
865

866 867
  yieldDataToParent: boolean;

868 869 870 871 872
  procedure printStatus(s: string);

  procedure readOptions(reader: TOptionReaderWrapper); override;
  procedure mergeWithObject(obj: TXQValueObject); override;

873
  procedure addNewDataSource(source: TDataProcessing);
874
  procedure readNewDataSource(data: TFollowTo; options: TOptionReaderWrapper);
875
  procedure addNewAction(action: TDataProcessing);
876 877 878 879 880
  procedure readNewAction(action: TDataProcessing; options: TOptionReaderWrapper);

  procedure assignOptions(other: TProcessingContext);
  procedure assignActions(other: TProcessingContext);

881
  function clone(newparent: TProcessingContext): TDataProcessing; override;
882

883 884
  function last: TProcessingContext; //returns the last context in this sibling/follow chain

885
  procedure insertFictiveDatasourceIfNeeded; //if no data source is given in an expression (or an subexpression), but an aciton is there, <empty/> is added as data source
886

887 888
  function process(data: IData): TFollowToList; override;

889
  class function replaceEnclosedExpressions(expr: string): string;
890 891
  function replaceEnclosedExpressions(data: IData; expr: string): string;

892
  destructor destroy; override;
893
private
894
  stupidHTTPReactionHackFlag: integer;
895
  procedure loadDataForQueryPreParse(const data: IData);
896
  procedure loadDataForQuery(const data: IData; const query: IXQuery);
897
  function evaluateQuery(const query: IXQuery; const data: IData; const allowWithoutReturnValue: boolean = false): IXQValue;
898
  procedure httpReact (sender: TInternetAccess; var method: string; var url: TDecodedUrl; var data:string; var reaction: TInternetAccessReaction);
899 900 901 902
end;

type EInvalidArgument = Exception;

903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946
constructor TFollowToXQVObject.create(const abasedata: IData; const av: IXQValue);
begin
  basedata := abasedata;
  v := av;
end;

function TFollowToXQVObject.clone: TFollowTo;
begin
  result := TFollowToXQVObject.create(basedata, v);
end;

function TFollowToXQVObject.retrieve(parent: TProcessingContext; arecursionLevel: integer): IData;
var
  temp: TProcessingContext;
  fl: TFollowToList;
begin
  if parent = nil then exit(nil);
  temp := TProcessingContext.Create();
  fl := TFollowToList.Create;
  temp.assignOptions(parent); //do not copy actions/data sources. they would apply to basedata, not to dest
  temp.parent := parent;
  temp.follow := parent.follow; //need to copy follow and follow-to, so it follows to the new data
  temp.followKind := parent.followKind;
  temp.followTo := parent.followTo;
  temp.followInputFormat := parent.followInputFormat;
  temp.nextSibling := parent.nextSibling;
  temp.mergeWithObject(v as TXQValueObject);
  fl := temp.process(basedata);
  case fl.count of
    0: ;
    1: result := fl.first.retrieve(temp, arecursionLevel );
    else raise Exception.Create('Invalid follow to count: ' + inttostr(fl.Count));
  end;
  temp.followTo := nil;
  temp.nextSibling := nil;
  temp.Free;
end;

function TFollowToXQVObject.equalTo(ft: TFollowTo): boolean;
begin
  if not (ft is TFollowToXQVObject) then exit(false);
  result := false;//not working: xpathparser.StaticContext.compareDeepAtomic(v, TFollowToXQVObject(ft).v, xpathparser.StaticContext.collation) = 0;
end;

947 948 949 950 951 952 953
{ TOptionReaderWrapper }

function TOptionReaderWrapper.read(const name: string; out value: IXQValue): boolean;
begin
  result := false;
end;

954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969
function TOptionReaderWrapper.read(const name: string; out inputformat: TInputFormat): boolean;
var
  temp: String;
begin
  result := read('input-format', temp);
  if result then
    case temp of
      'auto': inputFormat:=ifAuto;
      'xml': inputFormat:=ifXML;
      'html': inputFormat:=ifHTML;
      'xml-strict': inputFormat:=ifXMLStrict;
      'json': inputFormat := ifJSON
      else raise EXidelException.Create('Invalid input-format: '+temp);
    end;
end;

970 971 972 973 974 975 976
{ TDataObject }

function TDataObject.rawData: string;
begin
  result := frawdata;
end;

977
function TDataObject.baseUri: string;
978
begin
979 980 981 982 983 984
  result := fbaseurl;
end;

function TDataObject.displayBaseUri: string;
begin
  result := fdisplaybaseurl;
985 986 987 988 989 990 991
end;

function TDataObject.contentType: string;
begin
  result := fcontenttype;
end;

992 993 994 995 996
function TDataObject.headers: TStringList;
begin
  result := fheaders;
end;

997 998 999 1000 1001
function TDataObject.recursionLevel: integer;
begin
  result := frecursionLevel;
end;

1002
function TDataObject.inputFormat: TInputFormat;
1003
const FormatMap: array[TInternetToolsFormat] of TInputFormat = ( ifXML, ifHTML, ifJSON, ifXML );
1004
var
1005
  enc: TSystemCodePage;
1006 1007 1008 1009
begin
  if finputformat = ifAuto then begin
    finputformat := FormatMap[guessFormat(rawData, baseUri, contentType)];

1010
    if (finputformat = ifJSON) and (hasOutputEncoding <> oePassRaw) then begin
1011 1012 1013
      //convert json to utf-8, because the regex parser does not match non-utf8 (not even with . escape)
      //it might be useful to convert other data, but the x/html parser does its own encoding detection
      enc := strEncodingFromContentType(contentType);
1014 1015 1016
      if enc = CP_NONE then
        if isInvalidUTF8(frawData) and not strContains(frawData, #0) then enc := CP_WINDOWS1252;
      if (enc <> CP_UTF8) and (enc <> CP_NONE) then frawdata := strConvertToUtf8(frawData, enc);
1017 1018
    end;
  end;
1019 1020 1021
  result := finputFormat;
end;

1022

1023 1024
{ TFollowToProcessedData }

1025
constructor TFollowToProcessedData.create(d: IData);
1026 1027 1028 1029 1030 1031 1032
begin
  data := d;
end;

function TFollowToProcessedData.clone: TFollowTo;
begin
  result :=  TFollowToProcessedData.Create(data);
1033
  result.inputFormat := inputFormat;
1034 1035
end;

1036
function TFollowToProcessedData.retrieve(parent: TProcessingContext; arecursionLevel: integer): IData;
1037 1038
begin
  result := data;
1039 1040 1041 1042
  if data <> nil then begin
    (result as TDataObject).finputFormat := self.inputFormat;
    (result as TDataObject).frecursionLevel := arecursionLevel;
  end;
1043 1044 1045 1046 1047 1048 1049
end;

function TFollowToProcessedData.equalTo(ft: TFollowTo): boolean;
begin
  result := (ft is TFollowToProcessedData) and (TFollowToProcessedData(ft).data = data);
end;

1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085
{ TOptionReaderFromObject }

constructor TOptionReaderFromObject.create(aobj: TXQValueObject);
begin
  obj := aobj;
end;

function TOptionReaderFromObject.read(const name: string; out value: string): boolean;
var
  temp: TXQValue;
begin
  result := obj.hasProperty(name, @temp);
  if result then value := temp.toString;
end;

function TOptionReaderFromObject.read(const name: string; out value: integer): boolean;
var
  temp: TXQValue;
begin
  result := obj.hasProperty(name, @temp);
  if result then value := temp.toInt64;
end;

function TOptionReaderFromObject.read(const name: string; out value: boolean): boolean;
var
  temp: TXQValue;
begin
  result := obj.hasProperty(name, @temp);
  if result then value := temp.toBoolean;
end;

function TOptionReaderFromObject.read(const name: string; out value: Extended): boolean;
var
  temp: TXQValue;
begin
  result := obj.hasProperty(name, @temp);
1086
  if result then value := temp.toFloat;
1087 1088
end;

1089 1090 1091 1092 1093 1094 1095 1096
function TOptionReaderFromObject.read(const name: string; out value: IXQValue): boolean;
var
  temp: TXQValue;
begin
  result := obj.hasProperty(name, @temp);
  if result then value := temp as TXQValue;
end;

1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129
{ TOptionReaderFromCommandLine }

constructor TOptionReaderFromCommandLine.create(cmdLine: TCommandLineReader);
begin
  acmdLine := cmdLine;
end;

function TOptionReaderFromCommandLine.read(const name: string; out value: string): boolean;
begin
  value := acmdLine.readString(name);
  result := acmdLine.existsProperty(name);
end;

function TOptionReaderFromCommandLine.read(const name: string; out value: integer): boolean;
begin
  value := acmdLine.readInt(name);
  result := acmdLine.existsProperty(name);
end;

function TOptionReaderFromCommandLine.read(const name: string; out value: boolean): boolean;
begin
  value := acmdLine.readFlag(name);
  result := acmdLine.existsProperty(name);
end;

function TOptionReaderFromCommandLine.read(const name: string; out value: Extended): boolean;
begin
  value := acmdLine.readFloat(name);
  result := acmdLine.existsProperty(name);
end;

{ TDownload }

1130

1131
function TDownload.process(data: IData): TFollowToList;
1132
var
1133
  temp, realUrl: String;
1134 1135 1136 1137
  j: LongInt;
  realPath: String;
  realFile: String;
  downloadTo: String;
1138
  color: TColorOptions;
1139 1140 1141
begin
  result := nil;
  if cgimode or not allowFileAccess then
1142
    raise EXidelException.Create('Download not permitted');
1143

1144
  realUrl := data.baseUri;
1145
  if guessType(realUrl) = rtRemoteURL then realurl := decodeURL(realUrl).path;
1146

1147 1148 1149 1150 1151 1152 1153 1154
  j := strRpos('/', realUrl);
  if j = 0 then begin
    realPath := '';
    realFile := realUrl;
  end else begin
    realPath := copy(realUrl, 1, j);
    realFile := copy(realUrl, j + 1, length(realUrl) - j)
  end;
1155
  while strBeginsWith(realPath, '/') do delete(realPath,1,1);
1156

1157
  downloadTo := parent.replaceEnclosedExpressions(data, Self.downloadTarget);
1158 1159
  if striBeginsWith(downloadTo, 'http://') then delete(downloadTo, 1, length('http://'));
  if striBeginsWith(downloadTo, 'https://') then delete(downloadTo, 1, length('https://'));
1160 1161 1162 1163
  {$ifdef win32}
  downloadTo := StringReplace(downloadTo, '\' , '/', [rfReplaceAll]);
  {$endif}

1164 1165 1166 1167 1168 1169
  //If downloadTo is a file                               : save with that name
  //If downloadTo is a directory and does not end with /  : save with basename
  //If downloadTo is a directory and does     end with /  : save with path and basename
  //If downloadTo is -                                    : print to stdout

  //example: Download abc/def/index.html
1170 1171 1172
  //    foo/bar/xyz   save in directory foo/bar with name xyz
  //    foo/bar/      save in directory foo/bar/abc/def with name index.html
  //    foo/bar/.     save in directory foo/bar with name index.html
1173
  //    foo           save in current directory with name foo
1174 1175
  //    ./            save in current directory/abc/def with name index.html
  //    ./.           save in current directory with name index.html
1176
  //    .             save in current directory with name index.html
1177 1178
  //    -             print to stdout
  if downloadTo = '-' then begin
1179 1180 1181 1182 1183 1184 1185
    color := colorizing;
    if color in [cAlways, cAuto] then
      case data.inputFormat of
        ifHTML, ifXML, ifXMLStrict: color := cXML;
        ifJSON: color := cJSON;
      end;
    wcolor(data.rawdata, color);
1186 1187
    exit;
  end;
1188 1189 1190 1191
  if strEndsWith(downloadTo, '/.') then downloadTo := downloadTo + '/' + realFile
  else if strEndsWith(downloadTo, '/') then downloadTo := downloadTo + '/' + realPath + realFile
  else if DirectoryExists(downloadTo) or (downloadTo = '.' { <- redunant check, but safety first }) then downloadTo := downloadTo + '/' + realFile;
  if strEndsWith(downloadTo, '/') or (downloadTo = '') then downloadTo += 'index.html'; //sometimes realFile is empty
1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202
  parent.printStatus('**** Save as: '+downloadTo+' ****');
  if pos('/', downloadTo) > 0 then
    ForceDirectories(StringReplace(StringReplace(copy(downloadTo, 1, strRpos('/', downloadTo)-1), '//', '/', [rfReplaceAll]), '/', DirectorySeparator, [rfReplaceAll]));
  strSaveToFileUTF8(StringReplace(downloadTo, '/', DirectorySeparator, [rfReplaceAll]), data.rawdata);
end;

procedure TDownload.readOptions(reader: TOptionReaderWrapper);
begin
  reader.read('download', downloadTarget);
end;

1203
function TDownload.clone(newparent: TProcessingContext): TDataProcessing;
1204 1205
begin
  result := TDownload.Create;
1206
  result.parent := newparent;
1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222
  TDownload(result).downloadTarget:=downloadTarget;
end;


{ THTTPRequest }

constructor THTTPRequest.create(aurl: string);
begin
  url := aurl;
end;

function THTTPRequest.clone: TFollowTo;
begin
  result := THTTPRequest.create(url);
  THTTPRequest(result).method:=method;
  THTTPRequest(result).data:=data;
1223
  THTTPRequest(result).header:=header;
1224 1225
  THTTPRequest(result).multipart:=multipart;
  THTTPRequest(result).variablesReplaced:=variablesReplaced;
1226
  THTTPRequest(result).rawURL:=rawURL;
1227
  result.assign(self);
1228 1229
end;

1230
function THTTPRequest.retrieve(parent: TProcessingContext; arecursionLevel: integer): IData;
1231
var escapedURL: string;
1232

1233 1234
var
  i: Integer;
1235
  d: TDataObject;
1236
begin
1237
  if not allowInternetAccess then raise EXidelException.Create('Internet access not permitted');
1238
  if assigned(onPrepareInternet) then  internet := onPrepareInternet(parent.userAgent, parent.proxy, @parent.httpReact);
1239 1240 1241
  escapedURL := url;
  if not rawURL then escapedURL := urlHexEncode(url, [#32..#126]); //    fn:escape-html-uri
  parent.printStatus('**** Retrieving ('+method+'): '+escapedURL+' ****');
1242
  if parent.printPostData and (data <> '') then parent.printStatus(data);
1243
  result := TDataObject.create('', escapedURL);
1244
  if assigned(onRetrieve) then begin
1245 1246 1247 1248 1249 1250
    parent.stupidHTTPReactionHackFlag := 0;
    (result as TDataObject).frawdata := onRetrieve(method, escapedURL, data, header);
    case parent.stupidHTTPReactionHackFlag of
      1: (result as TDataObject).frawdata := '';
      2: exit(nil);
    end;
1251 1252 1253 1254 1255
    if assigned(internet) then begin
      (result as TDataObject).fbaseurl := internet.lastUrl;
      (result as TDataObject).fdisplaybaseurl := internet.lastUrl;
    end;
  end;
1256 1257 1258 1259 1260
  if parent.printReceivedHeaders and assigned(internet) then begin
    parent.printStatus('** Headers: (status: '+inttostr(internet.lastHTTPResultCode)+')**');
    for i:=0 to internet.lastHTTPHeaders.Count-1 do
      wln(internet.lastHTTPHeaders[i]);
  end;
1261 1262
  if Assigned(internet) then begin
    d := (result as TDataObject);
1263
    d.fcontenttype := internet.getLastContentType;
1264 1265 1266 1267
    d.fheaders := TStringList.Create;
    for i := 0 to internet.lastHTTPHeaders.count - 1 do
      d.fheaders.Add(internet.lastHTTPHeaders[i]);
  end;
1268 1269 1270 1271
  with result as TDataObject do begin
    finputFormat := self.inputFormat;
    frecursionLevel := arecursionLevel;
  end;
1272 1273 1274
end;

procedure THTTPRequest.replaceVariables;
1275 1276 1277 1278
  procedure parseFormMime();
  var mime: TMIMEMultipartData;
    forms: TStringArray;
    i: Integer;
1279 1280 1281 1282 1283 1284 1285 1286 1287 1288
    p: SizeInt;
    name: String;
    value: String;
    paren: Char;
    nvalue: String;
    temp: String;
    filename: String;
    contenttype: String;
    kind: Char;
    t: Integer;
1289
  begin
1290 1291
    if data <> '' then raise EXidelException.Create('Cannot mix urlencoded and multipart data');
    forms := strSplit(multipart, #0, false);
1292
    for i := 0 to high(forms) do begin
1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328
      p := pos('=', forms[i]);
      name := copy(forms[i], 1, p-1);
      value := strCopyFrom(forms[i], p+1);
      filename := '';
      contenttype := '';
      kind := 'x';
      if length(value) > 0 then begin
        if value[1] in ['<','@'] then begin
          kind := value[1];
          delete(value, 1, 1);
        end else kind := 'x';
        if value[1] in ['"', ''''] then begin
          paren := value[1];
          nvalue := '';
          t := 2;
          while (t <= length(value)) do begin
            if value[t] = '\' then begin
              inc(t);
              nvalue += value[t];
            end else if value[t] = paren then break
            else nvalue += value[t];
            inc(t);
          end;
          delete(value, 1, t+1);
        end else begin
          p := pos(';', value);
          if p = 0 then p := length(value) + 1;
          nvalue := copy(value, 1, p-1);
          delete(value, 1, p);
        end;
        if kind in ['<', '@'] then begin
          if kind = '@' then filename := nvalue;
          nvalue := strLoadFromFileUTF8(nvalue);
        end;
        if value <> '' then begin
          for temp in strSplit(value, ';', false) do begin
1329 1330 1331 1332
            value := temp;
            case strSplitGet('=', value) of
              'filename': filename := value;
              'type':     contenttype := value;
1333 1334 1335 1336 1337 1338
              else raise EXidelException.Create('Unknown option in '+forms[i]);
            end;
          end;
        end;
      end;
      mime.addFormData(name, nvalue, filename, contenttype, '');
1339
    end;
1340
    data := mime.compose(header);
1341
    header := TMIMEMultipartData.HeaderForBoundary(header);
1342
  end;
1343
begin
1344
  if variablesReplaced then exit; //this method is still supposed to be only called once
1345 1346 1347
  url := TProcessingContext.replaceEnclosedExpressions(url);
  method := TProcessingContext.replaceEnclosedExpressions(method);
  data := TProcessingContext.replaceEnclosedExpressions(data);
1348
  header := TProcessingContext.replaceEnclosedExpressions(header);
1349 1350 1351 1352
  multipart := TProcessingContext.replaceEnclosedExpressions(multipart);

  if multipart <> '' then parseFormMime();
  variablesReplaced := true;
1353 1354 1355 1356
end;

function THTTPRequest.equalTo(ft: TFollowTo): boolean;
begin
1357
  result := (ft is THTTPRequest) and (THTTPRequest(ft).url = url) and (THTTPRequest(ft).method = method) and (THTTPRequest(ft).data = data) and (THTTPRequest(ft).header = header) and (THTTPRequest(ft).multipart = multipart);
1358 1359
end;

1360 1361 1362 1363 1364
function isStdin(s: string): boolean;
begin
  result := (s = '-') or (s = 'stdin:///') or (s = 'stdin:') or (s = 'stdin://');
end;

1365 1366
procedure closeMultiArgs(var oldValue: string; separator: string); forward;

Benito van der Zander's avatar
Benito van der Zander committed
1367 1368
procedure THTTPRequest.readOptions(reader: TOptionReaderWrapper);
var temp: string;
1369 1370
  tempxq: IXQValue;
  h: IXQValue;
Benito van der Zander's avatar
Benito van der Zander committed
1371
begin
1372
  inherited;
1373
  if method <> '' then exit; //already initialized, must abort to keep stdin working (todo: allow postfix data/method options?)
1374
  reader.read('raw-url', rawURL);
1375
  reader.read('header', header);
1376 1377 1378 1379 1380 1381 1382 1383 1384
  if reader is TOptionReaderFromObject then begin
    variablesReplaced := true;
    if reader.read('headers', tempxq) then begin
      for h in tempxq  do begin
        if header <> '' then header := header + #13#10;
        header += h.toString;
      end;
    end;
  end;
1385 1386 1387
  method:='GET';
  if reader.read('post', data) then
    method:='POST';
1388 1389
  if reader.read('form', multipart) then
    method:='POST';
1390 1391
  if reader.read('method', temp) then begin
    method:=temp;
1392
    if isStdin(method) then
1393 1394
      method := trim(strReadFromStdin);
  end;
1395 1396
  if reader is TOptionReaderFromCommandLine then closeMultiArgs(data, '&');
  header := trim(header);
1397
  if isStdin(data) then
1398
    data := strReadFromStdin;
Benito van der Zander's avatar
Benito van der Zander committed
1399 1400
end;

1401 1402 1403 1404 1405 1406 1407 1408 1409 1410
{ TFileRequest }

constructor TFileRequest.create(aurl: string);
begin
  url := aurl;
end;

function TFileRequest.clone: TFollowTo;
begin
  result := TFileRequest.create(url);
1411
  result.assign(self);
1412 1413
end;

1414
function TFileRequest.retrieve(parent: TProcessingContext; arecursionLevel: integer): IData;
1415
begin
1416
  if not allowFileAccess then raise EXidelException.Create('File access not permitted');
1417
  parent.printStatus('**** Retrieving: '+url+' ****');
1418
  result := TDataObject.create(strLoadFromFileUTF8(url), url);
1419
  with result as TDataObject do begin
1420
    fbaseurl:=fileNameExpandToURI(fbaseurl);
1421 1422 1423
    finputFormat := self.inputFormat;
    frecursionLevel := arecursionLevel;
  end;
1424 1425 1426 1427
end;

procedure TFileRequest.replaceVariables;
begin
1428
  url := TProcessingContext.replaceEnclosedExpressions(url);
1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445
end;

function TFileRequest.equalTo(ft: TFollowTo): boolean;
begin
  result := (ft is TFileRequest) and (TFileRequest(ft).url = url);
end;

{ TDirectDataRequest }

constructor TDirectDataRequest.create(adata: string);
begin
  data := adata;
end;

function TDirectDataRequest.clone: TFollowTo;
begin
  result := TDirectDataRequest.create(data);
1446
  result.assign(self);
1447 1448
end;

1449
function TDirectDataRequest.retrieve(parent: TProcessingContext; arecursionLevel: integer): IData;
1450 1451
var
  partialData: String;
1452
begin
1453 1454 1455
  partialData := data;
  if length(partialData) > 80 then begin SetLength(partialData, 80); partialData += '...'; end;
  result := TDataObject.Create(data, 'data:,'+partialData);
1456