xidelcgi.lpr 17.1 KB
Newer Older
Benito van der Zander's avatar
Benito van der Zander committed
1
program xidelcgi;
2 3 4 5

{$mode objfpc}{$H+}

uses
6
  xidelbase, simplehtmltreeparser,
7
  rcmdlinecgi, {utf8tools, }sysutils, strutils, math, bbutils, extendedhtmlparser, xquery.internals.common, xidelcrt
8 9
  { you can add units after this };

10
const ExampleHTML: string = '<html><body>'#13#10+
11
                            '<table id="t1"><tbody>'#13#10+
12
                            '<tr><td>Hello</td></tr>'#13#10+
13 14
                            '</tbody></table>'#13#10+
                            '<table id="t2"><tbody>'#13#10+
15 16 17 18
                            '<tr><td>123</td><td>other</td></tr>'#13#10+
                            '<tr><td>foo</td><td>columns</td></tr>'#13#10+
                            '<tr><td>bar</td><td>are</td></tr>'#13#10+
                            '<tr><td>xyz</td><td>ignored</td></tr>'#13#10+
19
                            '</tbody></table>'#13#10+
20
                            '</body></html>';
21 22 23 24 25 26 27 28


    ExampleTemplate:string = '<table id="t2">'#13#10+
                             '<template:loop>'#13#10+
                             '<tr><td>{col:=text()}</td></tr>'#13#10+
                             '</template:loop>'#13#10+
                             '</table>';

29 30
    ExampleCSS: string = '#t2 tr td:first-child';

31
    ExampleXPath: string = 'id("t2") / tbody / tr / td[1]';
32

33 34 35 36 37 38 39
    ExampleXQuery1: string = 'xquery version "1.0";'#13#10'declare function local:test($table as element()){'#13#10 +
                              '  $table / tbody / tr / td[1]'#13#10'};'#13#10+
                              'local:test(id("t2"))';
    ExampleXQuery3_0: string = 'xquery version "3.0";'#13#10'declare function local:test($table as element()){'#13#10 +
                              '  $table / tbody / tr / td[1]'#13#10'};'#13#10+
                              'local:test(id("t2"))';
    ExampleXQuery3_1: string = 'xquery version "3.1";'#13#10'declare function local:test($table as element()){'#13#10 +
40 41
                              '  $table / tbody / tr / td[1]'#13#10'};'#13#10+
                              'local:test(id("t2"))';
42

43 44 45 46 47 48 49
    ExampleTemplateResult: string =
      'col: 123'#13#10 +
      'col: foo'#13#10 +
      'col: bar'#13#10 +
      'col: xyz';


50 51 52 53 54
    ExampleOtherResult: string =
      '123'#13#10 +
      'foo'#13#10 +
      'bar'#13#10 +
      'xyz';
55 56 57 58


var
  wasRaw: Boolean = false;
59
  permalink, rawpermalink: String;
60 61 62

procedure w(const s: string);
begin
63
  xidelcrt.wln(s);
64 65
end;

66 67 68 69
function extractKindToString(kind: TExtractionKind): string;
begin
  case kind of
    ekAuto: exit('auto');
70
    ekXPath2: exit('xpath2');
71 72
    ekXPath3_0: exit('xpath3');
    ekXPath3_1: exit('xpath3_1');
73 74
    ekPatternHTML: exit('html-pattern');
    ekPatternXML: exit('xml-pattern');
75
    ekCSS: exit('css');
76
    ekXQuery1: exit('xquery1');
77 78
    ekXQuery3_0: exit('xquery3');
    ekXQuery3_1: exit('xquery3_1');
79 80 81 82
    else exit('auto');
  end;
end;

83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
var
  oldinoutfunc, oldflushfunc: CodePointer;

Procedure HTMLEscapedFileWriteFunc(var t:TextRec);
type
  FileFunc = Procedure(var t : TextRec);
  procedure writeEscaped;
  var helper: TXHTMLStrBuilder;
    buffer: string;
    bufferptr, bufferend: PChar;
    size: integer;
  begin
    helper.init(@buffer, 2*t.bufpos);
    helper.appendHTMLText(@t.bufptr^[0], t.bufpos);
    helper.final;
    bufferptr := pchar(buffer);
    bufferend := bufferptr + length(buffer);
    while bufferptr < bufferend do begin
      size := min(t.bufsize, bufferend - bufferptr);
      move(bufferptr^, t.bufptr^, size);
      t.bufpos := size;
      FileFunc(oldinoutfunc)(t);
      bufferptr += size;
    end;
  end;

var
  needescape: Boolean;
  i: Integer;
begin
  needescape := false;
  for i := 0 to t.bufpos - 1 do begin
    needescape := t.bufptr^[i] in ['<','>','&'];
    if needescape then break;
  end;
  //writeln(stderr, needescape, ' ', t.bufpos);
  if not needescape then FileFunc(oldinoutfunc)(t)
  else writeEscaped;
end;

123 124 125 126 127 128 129 130 131
type

{ TCommandLineReaderBreaker }

 TCommandLineReaderBreaker = class(TCommandLineReaderCGI)
  procedure setString(const n,v: string);
  procedure setFlag(const n: string; v: boolean);
end;

132
var firstExtractionKind: string;
133

134
procedure printPre(extractionKind: TExtractionKind);
135 136 137 138 139
  function example(t: string): string;
  begin
    if (t = mycmdline.readString('extract-kind')) and (mycmdline.readString('extract') <> '') then
      exit(mycmdline.readString('extract'));
    case t of
140 141 142 143
    'xpath', 'xpath2', 'xpath3', 'xpath3.0', 'xpath3.1': exit(ExampleXPath);
    'xquery1': exit(ExampleXQuery1);
    'xquery', 'xquery3', 'xquery3.0': exit(ExampleXQuery3_0);
    'xquery3.1': exit(ExampleXQuery3_1);
144 145 146 147 148 149 150 151
    'css': exit(ExampleCSS);
    {'template', 'auto':} else exit(ExampleTemplate);
    end;
  end;

  function kind(t, n: string): string;
  begin
    result := '<input type="radio" name="extract-kind" value="'+t+'"';
152
    if (mycmdline.readString('extract-kind') = t) or
153 154
       ((mycmdline.readString('extract-kind') = 'xpath') and (t = 'xpath3.0')) or
       ((mycmdline.readString('extract-kind') = 'xquery') and (t = 'xquery3.0'))  then result += ' checked';
155
    result += ' onclick="changeexample('''+t+''', '''  +  StringsReplace(example(t), ['\', #13#10, '''', '&', '"',  '<', '>'], ['\\', '\n', '\''', '&amp', '&quot;', '&lt;', '&gt;'], [rfReplaceAll]) +  '''); update();"';
156 157 158 159 160 161 162 163 164 165 166 167
    result += '/> '+ n;
  end;
  function checkbox(t, n: string): string;
  begin
    result := '<input type="checkbox" name="'+t+'" value="true"';
    if mycmdline.readFlag(t) then result += ' checked';
    result += '/> '+ n;
  end;
  function select(t, n: string; list: array of string): string;
  var
    cur: String;
    i: Integer;
168
    s: Integer;
169
  begin
170 171
    if n <> '' then n += ': ';
    result := n + '<select name="'+t+'"/>';
172
    cur := mycmdline.readString(t);
173 174 175 176 177 178 179

    s := -1;
    for i := 0 to high(list) do if list[i] = cur then begin s := i; break; end;
    if s = -1 then
      for i := 0 to high(list) do if lowercase(list[i]) = lowercase(cur) then begin s := i; break; end; //useless

    for i := 0 to high(list) do result += '<option value="'+list[i]+'"'+ifthen(i = s, ' selected') +'>'+list[i]+'</option>' + '<!-- ' + cur + ' -->';
180 181 182 183
    result += '</select> ';
  end;

begin
184 185 186
  outputHeader := '';
  setOutputFileName('stdout:///', mycmdline);

187 188 189
  if (mycmdline.readFlag('case-sensitive')) then
    xqueryDefaultCollation:='http://www.w3.org/2005/xpath-functions/collation/codepoint';

190 191
  if mycmdline.readFlag('raw') then begin
    case mycmdLine.readString('output-format') of
192 193
      //'xml', 'xml-wrapped': w('Content-Type: application/xml');
      //'html': w('Content-Type: text/html');
194
      'json', 'json-wrapped': w('Content-Type: application/json');
195 196
      {'adhoc':} else w('Content-Type: text/plain');
    end;
197
    w('Xidel-Detected-Extraction-Kind: '+extractKindToString(extractionKind));
198 199 200 201 202
    w('');
    wasRaw := true;
    exit;
  end;

203 204 205
  if mycmdline.readString('extract-kind') <> 'auto' then firstExtractionKind := mycmdline.readString('extract-kind')
  else if mycmdline.readString('extract') <> '' then firstExtractionKind := extractKindToString(extractionKind)
  else firstExtractionKind:='';
206

207

208 209 210
  w('Content-Type: text/html');
  w('');

211
  w('<!DOCTYPE html><html><head>');
212
  w('<title>Template / XPath 3.0 / XQuery 3.0 / CSS 3 Selector / JSONiq Online Tester</title>');
213 214
  w('<link rel="stylesheet" href="../codemirror/codemirror.css">');
  w('<link rel="stylesheet" href="http://code.jquery.com/ui/1.10.2/themes/smoothness/jquery-ui.css">');
215 216
  w('<link rel="stylesheet" type="text/css" href="../cgi.css" />');
  w('<link rel="stylesheet" type="text/css" href="cgi.css" />');
217
  w('<meta charset="utf-8" /> ');
218 219
  w('<script src="../cgi.js"></script>');
  w('<script src="cgi.js"></script>');
220
  w('</head><body onload="init()">');
221
  w('<h1>Template / XPath 3.0 / XQuery 3.0 / CSS 3 Selector / JSONiq Online Tester</h1>');
222
  w('(You can find the documentation below)<br><br>');
223
  w('<form method="POST" action="./xidelcgi">');
224
  w('<div id="html">'+select('input-format', 'HTML/XML-Input file', ['auto', 'html', 'xml', 'xml-strict'])
225
    + '<br><textarea name="data" rows="18" cols="80"  >'+xmlStrEscape(IfThen(mycmdline.readString('data') <> '', mycmdline.readString('data'), ExampleHTML))+'</textarea></div>');
226
  w('<div id="template">'+kind('template', 'Template')+kind('xpath3.0', 'XPath 3.0')+ kind('xpath2', '2.0')+kind('xquery3.0', 'XQuery 3.0')+kind('xquery1', '1')+kind('css', 'CSS 3.0 selectors')+kind('auto', 'Autodetect'));
227
  w('<br><textarea name="extract" rows=18 cols=80 >');
228 229 230
  if mycmdline.readString('extract') <> '' then w(xmlStrEscape(mycmdline.readString('extract')))
  else w(example(mycmdline.readString('extract-kind')));
  w('</textarea></div>');
231
  w('<br><br><input type="submit"> '+checkbox('no-auto-update', 'disable auto refresh')+' <span >' {id="codemirrorspan"} + checkbox('no-highlighting', 'disable syntax highlighting') +'</span>');
232
  w('<br><span class="options"><b>Output Options</b>: ');
233
  w(  select('printed-node-format', 'Node format:', ['text', 'xml', 'html']) +  select('output-format', 'Output format:', ['adhoc', 'html', 'xml', 'xml-wrapped', 'json-wrapped', 'bash', 'cmd']));
234
  w(checkbox('print-type-annotations', 'Show types') + checkbox('hide-variable-names', 'Hide variable names') );
235 236 237
  w('<br><b>Compatibility</b>: '+select('compatibility', '', ['Standard XQuery', 'Standard XQuery+JSONiq', 'Enable all extensions', 'Custom'])
    + '<span id="compatibilityOptions">'+ checkbox('no-extended-strings', 'Disable extended strings (e.g. x"{$varname}") ')
    + checkbox('no-json', 'Disable JSONiq (e.g. {"a": 1}("a"))') + checkbox('no-json-literals', 'Disable JSONiq literals (true,false,null)')
238 239
    + checkbox('only-json-objects', 'Only JSON types in objects (e.g. {"a": null} != {"a": ()})')
    + select('dot-notation', '&nbsp;&nbsp;&nbsp; Allow dot notation (e.g. {"a": 1}.a): ', ['off', 'unambiguous', 'on'])
240 241
    + checkbox('strict-type-checking', 'Strict type checking') + checkbox('strict-namespaces', 'Strict namespaces')
    + checkbox('case-sensitive', 'case sensitive'));
242
  w('</span>');
243

244 245 246
  w('</span>');
  w('<br>Work in progress: ' + kind('xpath3.1', 'XPath 3.1') + kind('xquery3.1', 'XQuery 3.1')+'<br>');
  w('</form>');
247

Benito van der Zander's avatar
Benito van der Zander committed
248
 { w('<script src="../codemirror/codemirror.js"></script>');
249 250 251
  w('<script src="../codemirror/javascript/javascript.js"></script>');
  w('<script src="../codemirror/css/css.js"></script>');
  w('<script src="../codemirror/xml/xml.js"></script>');
Benito van der Zander's avatar
Benito van der Zander committed
252 253
  w('<script src="../codemirror/htmlmixed/htmlmixed.js"></script>');}
  w('<script src="../codemirror/codemirror-compressed-js-html-xml-css.js"></script>');
254
  w('<script src="../codemirror/xquery/xquery.js"></script>');
Benito van der Zander's avatar
Benito van der Zander committed
255 256
  w('<script src="../codemirror/jquery-1.9.1.js"></script>');
  w('<script src="../codemirror/jquery-ui-1.10.2.custom.min.js"></script>');
257 258

  w('<hr>');
259
  w('Result of the above expression applied to the above HTML file:<br>');
260
  w('<textarea id="result" rows="30" cols="100">');
261

262 263 264 265 266
  if  (mycmdline.readString('data') = '') and (mycmdline.readString('extract') = '') then
    case mycmdline.readString('extract-kind') of
    'template', 'auto', '':  w(ExampleTemplateResult);
    else w(ExampleOtherResult);
    end;
267

268 269
  permalink := 'http://www.videlibri.de/cgi-bin/xidelcgi?'+TCommandLineReaderCGI(mycmdline).urlEncodeParams;
  rawpermalink := 'http://www.videlibri.de/cgi-bin/xidelcgi?raw=true&'+TCommandLineReaderCGI(mycmdline).urlEncodeParams;
270

271

272
  flush(xidelOutputFile);
273

274 275 276 277
  oldinoutfunc := TextRec(xidelOutputFile).inoutfunc;
  oldflushfunc := TextRec(xidelOutputFile).flushfunc;
  TextRec(xidelOutputFile).inoutfunc := @HTMLEscapedFileWriteFunc;
  TextRec(xidelOutputFile).flushfunc := @HTMLEscapedFileWriteFunc;
278 279
end;

280
var compatibiltiyOptionsOn: array[1..3] of string =
281 282
    ('no-extended-strings;no-json;no-json-literals;only-json-objects;strict-type-checking;strict-namespaces;case-sensitive',
     'no-extended-strings;only-json-objects;strict-type-checking;strict-namespaces;case-sensitive',
283 284 285 286 287
     ''
    );
    compatibiltiyOptionsOff: array[1..3] of string =
    ('',
     'no-json;no-json-literals',
288 289 290 291 292 293
     'no-extended-strings;no-json;no-json-literals;only-json-objects;strict-type-checking;strict-namespaces;case-sensitive'
    );
    compatibiltiyOptionsChange: array[1..3] of string =
    ('dot-notation=off',
     'dot-notation=off',
     'dot-notation=unambiguous'
294 295
    );

296
procedure printPost;
297
function link(ref, title: string; desc: string = ''; attribs: string = ''): string;
298
begin
299
  result := '&nbsp;&nbsp;&nbsp;&nbsp;<a href="'+ref+'"'+attribs+'>'+title+'</a>'+desc+'.<br>';
300 301 302
end;

begin
303 304 305 306 307 308
  //this is not needed
  flush(xidelOutputFile);
  TextRec(xidelOutputFile).inoutfunc := oldinoutfunc;
  TextRec(xidelOutputFile).flushfunc := oldflushfunc;

  //rest of the page
309
  w('</textarea><br>');
310 311
  w('<a id="permalink" href="'+permalink+'">permalink</a>, ');
  w('<a id="rawpermalink" href="'+rawpermalink+'">result-only</a>');
312
  //w('</div>');
313 314 315 316 317 318

  w('<br><br><br><hr>');

  //w(cgi.QueryString);

  w('<h2>What is this about?</h2>');
319
  w('Here you can test HTML templates, CSS 3 selectors, standard XPath 2.0 / 3.0 / XQuery 1.0 / 3.0 and JSONiq expressions.<br>');
320 321 322 323 324 325 326 327
  w('It is an example for my Pascal Internet Tools library written for VideLibri and implementing these queries.<br>');
  //w('The template example shows the two most basic template commands (read/loop) and copies the first column of a table.'+' .<br>');

  w('<br>You can find more details in the corresponding unit documentation:<br>');
  w(link('http://benibela.de/documentation/internettools/xquery.TXQueryEngine.html', 'Documentation of the XQuery / XPath / CSS 3 selector implementation'));
  w(link('http://benibela.de/documentation/internettools/extendedhtmlparser.THtmlTemplateParser.html', 'Documentation of the template syntax'));
  w('<br>Other related links:<br>');
  w(link('http://www.benibela.de/sources_en.html#internettools', 'Internet Tools library', ', the library page'));
328 329 330 331 332
  w(link('http://www.benibela.de/documentation/internettools/xqts.html', 'XQuery Test Suite Results'));
  w(link('http://www.videlibri.de/xidel.html', 'Xidel command line tool', ', a litte tool using this library for web page downloading / scraping'));
  w(link('https://sourceforge.net/p/videlibri/code/ci/tip/tree/', 'Source repository', '', ' rel="nofollow"'));
  w(link('https://github.com/benibela/xidel', 'Github mirror (Xidel excluding library)', '', ' rel="nofollow"'));
  w(link('https://bitbucket.org/benibela/xidel', 'Bitbucket mirror (Xidel excluding library)', '', ' rel="nofollow"'));
333 334


335 336 337
  w('<script>lastQueryEditMode="'+firstExtractionKind+'"; ');
  w('compatibilityOn = ["'+compatibiltiyOptionsOn[1]+'", "'+compatibiltiyOptionsOn[2]+'", "'+compatibiltiyOptionsOn[3]+'"];');
  w('compatibilityOff = ["'+compatibiltiyOptionsOff[1]+'", "'+compatibiltiyOptionsOff[2]+'", "'+compatibiltiyOptionsOff[3]+'"];');
338
  w('compatibilityChange = ["'+compatibiltiyOptionsChange[1]+'", "'+compatibiltiyOptionsChange[2]+'", "'+compatibiltiyOptionsChange[3]+'"];');
339
  w('activateCodeMirrors(); </script>');
340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357

  w('<div id="sf-logo"><a href="http://sourceforge.net/projects/videlibri"><img src="http://sflogo.sourceforge.net/sflogo.php?group_id=359854&amp;type=1" width="125" height="37" border="0" alt="SourceForge.net Logo" /></a></div>');

  {sl := tstringlist.create;
  cgi.AddResponseLn('reqvar:');
  cgi.GetRequestVarList(sl);
  for i:=0 to sl.Count-1 do
    cgi.AddResponseLn(sl[i]+'<br>');

  cgi.AddResponseLn('cgivar:');
  cgi.GetCGIVarList(sl);
  for i:=0 to sl.Count-1 do
    cgi.AddResponseLn(sl[i]+'<br>');
  sl.free;}

  w('</body></html>');
end;

358 359 360 361 362 363 364 365
{ TCommandLineReaderBreaker }



procedure onPostParseCmdLine;
var
  onn: String;
  off: String;
366
  change: string;
367 368 369
  temp: TStringArray;
  i: Integer;
begin
370
  i := 1;
371
  case lowercase(mycmdline.readString('compatibility')) of
372 373 374 375
    'standard xquery': i := 1;
    'standard xquery+jsoniq': i := 2;
    'enable all extensions': i := 3;
    else exit; //'Custom'
376
  end;
377 378 379 380 381

  onn := compatibiltiyOptionsOn[i];
  off := compatibiltiyOptionsOff[i];
  change := compatibiltiyOptionsChange[i];

382 383 384 385
  temp := strSplit(onn, ';', false);
  for i := 0 to high(temp) do TCommandLineReaderBreaker(mycmdline).setFlag(temp[i],true);
  temp := strSplit(off, ';', false);
  for i := 0 to high(temp) do TCommandLineReaderBreaker(mycmdline).setFlag(temp[i],false);
386 387
  temp := strSplit(change, ';', false);
  for i := 0 to high(temp) do TCommandLineReaderBreaker(mycmdline).setString(strSplit(temp[i], '=')[0],strSplit(temp[i], '=')[1]);
388 389 390 391 392 393 394 395 396 397 398 399
end;

procedure TCommandLineReaderBreaker.setString(const n, v: string);
begin
  findProperty(n)^.strvalue:=v;
end;

procedure TCommandLineReaderBreaker.setFlag(const n: string; v: boolean);
begin
  findProperty(n)^.flagvalue:=v;
end;

400
begin
401 402
  xidelbase.cgimode := true;
  xidelbase.allowInternetAccess := false;
Benito van der Zander's avatar
Benito van der Zander committed
403
  xidelcrt.allowFileAccess := false;
404
  xidelbase.mycmdline := TCommandLineReaderCGI.create;
405

406 407


408

409 410
  mycmdline.beginDeclarationCategory('CGI Only options');
  mycmdline.declareFlag('raw', 'Only prints the output of the expression');
411
  mycmdline.declareFlag('no-auto-update', 'No automatical javascript based autoupdate');
412
  mycmdline.declareFlag('no-highlighting', 'No syntax highlighting');
413
  mycmdline.declareFlag('case-sensitive', 'Case sensitive');
414
  mycmdline.declareString('compatibility', 'XQuery compatibility options', 'Enable all extensions');
415

416
  xidelbase.onPostParseCmdLine := @onPostParseCmdLine;
417 418
  xidelbase.onPreOutput := @printPre;

419
  xidelbase.perform;
420 421

  if not wasRaw then printPost;
422 423
end.