...
 
Commits (4)
......@@ -147,6 +147,7 @@ src)
tar -cvzf /tmp/xidel-$VERSION.src.tar.gz --exclude=.hgtags --exclude=.hg xidel-$VERSION-src
fileUpload xidel-$VERSION.src.tar.gz "$UPLOAD_PATH"
cp /tmp/xidel-$VERSION.src.tar.gz .
;;
......@@ -178,12 +179,12 @@ downloadTable)
else "E"
};
<table class="downloadTable">
<tr><th>Operating System</th><th>Filename</th><th>Size</th><th>SHA-1</th></tr>
<tr><th>Operating System</th><th>Filename</th><th>Size</th><th>SHA-256</th></tr>
{ for <TABLE id="files_list"><t:loop><TR class="file"><TH>
{link := {"verboseName": verboseName(.), "url": resolve-uri(.) || "/download", "name": ./data()}}</TH><td/><td>{link.size := .}</td></TR></t:loop></TABLE> in (/)
order by ordering($link.name)||$link.verboseName
return <tr><td>{$link.verboseName}</td><td><a href="{$link.url}">{$link.name}</a></td><td>{$link.size/text()}</td>
<td>{if (file:exists($link.name)) then extract(system("sha1sum "||$link.name), " *[0-9A-Fa-f]+") else ""}</td>
<td>{if (file:exists($link.name)) then extract(system("sha256sum "||$link.name), " *[0-9A-Fa-f]+") else ""}</td>
</tr>}
<tr><td>Mac 10.8</td><td colspan="2"><a href="https://www.evernote.com/shard/s69/sh/ff1e78f3-a369-4855-b18f-6184ce789c45/f3511927d0fb356ce883835f2eb712e0">externally prebuilt version</a> and compile instructions.</td></tr>
</table>' --printed-node-format xml > /tmp/downloadTable.html;
......
......@@ -164,6 +164,7 @@ var //output options
outputFormat: TOutputFormat;
windowsCmdPercentageEscape: string;
hasOutputEncoding: (oeAbsent,oeConvert,oePassRaw) = oeAbsent;
outputEncoding: TSystemCodePage;
outputHeader, outputFooter, outputSeparator: string;
//outputArraySeparator: array[toutputformat] of string = ('', ', ', '</e><e>', '', '', '', '');
{$ifdef win32}systemEncodingIsUTF8: boolean = true;{$endif}
......@@ -223,6 +224,8 @@ var htmlparser:THtmlTemplateParserBreaker;
multipage: TTemplateReaderBreaker;
multipagetemp: TMultiPageTemplate;
currentRoot: TTreeNode;
xidelOutputFile: TextFile;
xidelOutputFileName: string;
{$ifdef windows}
backgroundColor: integer = 0;
stdoutTextAttributes: integer = 0;
......@@ -249,9 +252,9 @@ begin
if err and not isStderrTTY then exit;
if not err and not isStdoutTTY then exit;
if color <> lastConsoleColor then begin
if err then Flush(stderr) else flush(StdOut);
if err then Flush(stderr) else flush(xidelOutputFile);
{$ifdef unix}
if err then f := stderr else f := stdout;
if err then f := stderr else f := xidelOutputFile;
write(f, colorCodes[color]);
{$endif}
{$ifdef windows}
......@@ -262,35 +265,28 @@ begin
end;
end;
function setTextEncoding(var t: TextFile; e: string): integer;
var
codepage: Integer;
function strEncodingFromNameXidel(const e: string): integer;
begin
codepage := strEncodingFromName(e);
if codepage = CP_NONE then begin
if striEqual(e, 'input') then codepage := -1
else writeln(stderr, 'Unknown encoding: ',e)
end;
result := codepage;
if codepage <> -1 then
SetTextCodePage(t, codepage);
result := strEncodingFromName(e);
if result = CP_NONE then
writeln(stderr, 'Unknown encoding: ',e)
end;
procedure setOutputEncoding(e: string);
var
codepage: Integer;
begin
codepage := setTextEncoding(output, e);
if codepage <> -1 then begin
if e <> 'input' then begin
hasOutputEncoding := oeConvert;
//SetTextCodePage(StdErr, codepage);
outputEncoding := strEncodingFromNameXidel(e);
end else begin
hasOutputEncoding := oePassRaw;
SetTextCodePage(Output, CP_ACP); //all our strings claim to be ACP (=UTF8) so there should be no conversion?
//SetTextCodePage(StdErr, CP_ACP);
outputEncoding := CP_ACP; //all our strings claim to be ACP (=UTF8) so there should be no conversion?
end;
if xidelOutputFileName <> '' then
SetTextCodePage(xidelOutputFile, outputEncoding);
end;
procedure setOutputFileName(n: string); forward;
procedure initTerminalAndColoring;
{$ifdef windows}
var
......@@ -317,7 +313,7 @@ begin
isStderrTTY := getfiletype(StdErrorHandle) = FILE_TYPE_CHAR;
{$endif}
if not isStdoutTTY and (hasOutputEncoding = oeAbsent) then setOutputEncoding('utf-8');
if not isStdinTTY or mycmdline.existsProperty('stdin-encoding') then SetTextEncoding(input, mycmdline.readString('stdin-encoding'));
if not isStdinTTY or mycmdline.existsProperty('stdin-encoding') then SetTextCodePage(input, strEncodingFromNameXidel(mycmdline.readString('stdin-encoding')));
end;
case colorizing of
......@@ -361,7 +357,7 @@ var
begin
if s = '' then exit;
{$IFDEF FPC_HAS_CPSTRING}
write(s);
write(xidelOutputFile, s);
{$ELSE}
fpc 3 is required now
if (outputEncoding = eUTF8) or (outputEncoding = eUnknown) then write(s)
......@@ -856,6 +852,7 @@ TExtraction = class(TDataProcessing)
printTypeAnnotations, hideVariableNames: boolean;
printedNodeFormat: TTreeNodeSerialization;
printedJSONFormat: (jisDefault, jisPretty, jisCompact);
inplaceOverride: boolean;
inputFormat: TInputFormat;
......@@ -878,6 +875,7 @@ private
currentFollowList: TFollowToList;
currentData: IData;
procedure pageProcessed({%H-}unused: TMultipageTemplateReader; parser: THtmlTemplateParser);
procedure prepareForOutput(const data: IData);
end;
......@@ -1251,6 +1249,7 @@ begin
ifHTML, ifXML, ifXMLStrict: color := cXML;
ifJSON, ifJSONStrict: color := cJSON;
end;
if xidelOutputFileName = '' then setOutputFileName('stdout:///');
wcolor(data.rawdata, color);
exit;
end;
......@@ -1857,6 +1856,8 @@ begin
end;
reader.read('input-format', inputFormat);
reader.read('in-place', inplaceOverride);
end;
procedure TExtraction.setVariables(v: string);
......@@ -2381,6 +2382,36 @@ begin
end;
end;
procedure setOutputFileName(n: string);
begin
if xidelOutputFileName = n then exit;
if xidelOutputFileName <> '' then begin
if outputfooter <> '' then wcolor(outputFooter, colorizing)
else if not mycmdline.existsProperty('output-footer') and not firstItem then wln();
flush(xidelOutputFile);
if not striBeginsWith(xidelOutputFileName, 'stdout:') then CloseFile(xidelOutputFile);
end;
xidelOutputFileName := n;
if striBeginsWith(xidelOutputFileName, 'http://') or striBeginsWith(xidelOutputFileName, 'https://') then
raise Exception.Create('Cannot output to webpage')
else if striBeginsWith(xidelOutputFileName, 'stdout:') then begin
xidelOutputFile := StdOut;
end else begin
xidelOutputFileName := strRemoveFileURLPrefix(xidelOutputFileName);
colorizing := cNever;
AssignFile(xidelOutputFile, xidelOutputFileName);
Rewrite(xidelOutputFile);
end;
if hasOutputEncoding <> oeAbsent then
SetTextCodePage(xidelOutputFile, outputEncoding);
if n = '' then exit;
if outputHeader <> '' then wcolor(outputHeader, colorizing);
if outputFormat in [ofJsonWrapped, ofXMLWrapped] then needRawWrapper;
end;
function bashStrEscape(s: string): string;
begin
if not strContains(s, #13) and not strContains(s, #10) then
......@@ -2571,6 +2602,12 @@ begin
THtmlTemplateParserBreaker(htmlparser).closeVariableLog;
end;
procedure TExtraction.prepareForOutput(const data: IData);
begin
if inplaceOverride then setOutputFileName(data.baseUri)
else if xidelOutputFileName = '' then setOutputFileName('stdout:///');
end;
function TExtraction.process(data: IData): TFollowToList;
function termContainsVariableDefinition(term: TXQTerm): boolean;
......@@ -2614,6 +2651,7 @@ begin
else htmlparser.TemplateParser.parsingModel := pmStrict;
htmlparser.parseTemplate(extract); //todo reuse existing parser
htmlparser.parseHTML(data); //todo: full url is abs?
prepareForOutput(data);
pageProcessed(nil,htmlparser);
end;
ekXPath2, ekXPath3, ekCSS, ekXQuery1, ekXQuery3: begin
......@@ -2633,6 +2671,7 @@ begin
end;
parent.loadDataForQuery(data, extractQueryCache);
THtmlTemplateParserBreaker(htmlparser).closeVariableLog;
prepareForOutput(data);
if termContainsVariableDefinition(extractQueryCache.Term) then begin
parent.evaluateQuery(extractQueryCache, data, true);
printExtractedVariables(htmlparser, true);
......@@ -2656,6 +2695,7 @@ begin
if extract = '' then raise Exception.Create('Multipage-action-template is empty');
multipagetemp.loadTemplateFromString(extract, ExtractFileName(extractBaseUri), ExtractFileDir(extractBaseUri));
multipage.setTemplate(multipagetemp);
prepareForOutput(data);
multipage.perform(templateActions);
end
else raise EXidelException.Create('Impossible');
......@@ -3593,6 +3633,10 @@ begin
end;
end;
procedure OnWarningDeprecated({%H-}pseudoSelf: tobject; {%H-}sender: TXQueryEngine; warning: string);
begin
writeln(stderr, warning);
end;
function encodingName(e: TSystemCodePage): string;
begin
......@@ -3719,6 +3763,7 @@ begin
mycmdLine.declareString('input-format', 'Input format: auto, html, xml, xml-strict, json, json-strict', 'auto');
mycmdLine.declareFlag('xml','Abbreviation for --input-format=xml --output-format=xml');
mycmdLine.declareFlag('html','Abbreviation for --input-format=html --output-format=html');
mycmdLine.declareFlag('in-place', 'Override the input file');
mycmdLine.beginDeclarationCategory('Debug options:');
......@@ -3753,6 +3798,7 @@ begin
contextStack[0] := baseContext;
xpathparser := htmlparser.QueryEngine;
xpathparser.OnWarningDeprecated := TXQWarningEvent(procedureToMethod(TProcedure(@onWarningDeprecated)));
if xpathparser.StaticContext.namespaces = nil then htmlparser.QueryEngine.StaticContext.namespaces := TNamespaceList.Create;
xpathparser.StaticContext.namespaces.add(XMLNamespace_Expath_File);
xpathparser.StaticContext.namespaces.add(TNamespace.create(XMLNamespaceURL_XQTErrors, 'err'));
......@@ -3902,9 +3948,6 @@ begin
if assigned(onPreOutput) then onPreOutput(guessExtractionKind(mycmdline.readString('extract')));
if outputHeader <> '' then wcolor(outputHeader, colorizing);
if outputFormat in [ofJsonWrapped, ofXMLWrapped] then needRawWrapper;
htmlparser.TemplateParser.repairMissingEndTags:=false;
htmlparser.TemplateParser.repairMissingStartTags:=false;
......@@ -3976,8 +4019,7 @@ begin
writeItem('SET #'+usedCmdlineVariables[i].name +'='+ inttostr(usedCmdlineVariables[i].count));
end;
if outputfooter <> '' then wcolor(outputFooter, colorizing)
else if not mycmdline.existsProperty('output-footer') and not firstItem then wln();
setOutputFileName('');
{$ifdef windows}if colorizing <> cNever then begin
if (stdoutTextAttributes <> 0) and isStdoutTTY then
SetConsoleTextAttribute(StdOutputHandle, stdoutTextAttributes);
......