Commit 531bd4e8 authored by Alberto Mardegan's avatar Alberto Mardegan
Browse files

Add HTML parser for RSS discovery

parent 06bbf698
Pipeline #385775408 failed with stages
in 3 minutes and 26 seconds
import qbs 1.0
import qbs.Environment
import "src/qhtmlparser/sources.qbs" as HtmlParserSources
Project {
name: "MiTubo"
......@@ -22,6 +23,9 @@ Project {
'QT_DISABLE_DEPRECATED_BEFORE=0x050900',
'QT_DEPRECATED_WARNINGS=0',
]
cpp.includePaths: [
htmlParserSources.prefix,
]
cpp.enableExceptions: false
Group {
......@@ -39,6 +43,8 @@ Project {
"feed_parser.h",
"feed_store.cpp",
"feed_store.h",
"html_parser.cpp",
"html_parser.h",
"main.cpp",
"playlist.cpp",
"playlist.h",
......@@ -81,6 +87,10 @@ Project {
fileTags: "freedesktop.appIcon"
}
HtmlParserSources {
id: htmlParserSources
}
Depends { name: "cpp" }
Depends { name: "Qt.core" }
Depends { name: "Qt.quick" }
......
......@@ -19,6 +19,8 @@
#include "feed_discoverer.h"
#include "html_parser.h"
#include <QDebug>
#include <QRegularExpression>
#include <QUrl>
......@@ -50,6 +52,7 @@ private:
QHash<int, QByteArray> m_roles;
QString m_inputText;
QRegularExpression m_ytChannelRegexp;
HtmlParser m_htmlParser;
FeedDiscoverer *q_ptr;
};
......@@ -62,6 +65,16 @@ FeedDiscovererPrivate::FeedDiscovererPrivate(FeedDiscoverer *q):
m_roles[FeedDiscoverer::UrlRole] = "url";
m_roles[FeedDiscoverer::TitleRole] = "title";
m_roles[FeedDiscoverer::MimeTypeRole] = "mimeType";
QObject::connect(&m_htmlParser, &HtmlParser::gotAlternate,
q, [this](const QString &title,
const QString &type,
const QUrl &url) {
if (type == "application/rss+xml" ||
type == "application/atom+xml") {
addFeed({ url, title, type });
}
});
}
void FeedDiscovererPrivate::clear()
......@@ -111,6 +124,8 @@ void FeedDiscoverer::setInputText(const QString &text)
"YouTube",
"application/atom+xml"
});
} else {
d->m_htmlParser.parseUrl(QUrl::fromUserInput(text));
}
}
......
/*
* Copyright (C) 2021 Alberto Mardegan <mardy@users.sourceforge.net>
*
* This file is part of MiTubo.
*
* MiTubo is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* MiTubo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with MiTubo. If not, see <http://www.gnu.org/licenses/>.
*/
#include "html_parser.h"
#include <Html/Parser>
#include <QByteArray>
#include <QDebug>
#include <QIODevice>
#include <QNetworkAccessManager>
#include <QNetworkRequest>
#include <QNetworkReply>
#include <QUrl>
using namespace MiTubo;
using namespace it::mardy::Html;
namespace MiTubo {
class HtmlParserPrivate: public Parser
{
enum State {
NotStarted = 0,
InHtml,
InHead,
};
public:
HtmlParserPrivate(HtmlParser *q);
void parseUrl(const QUrl &url);
void handleStartTag(const QString &tag, const Parser::Attributes &attrs) override;
void handleEndTag(const QString &tag) override;
private:
Q_DECLARE_PUBLIC(HtmlParser)
QNetworkAccessManager m_nam;
QScopedPointer<QNetworkReply> m_reply;
State m_state;
HtmlParser *q_ptr;
};
} // namespace
HtmlParserPrivate::HtmlParserPrivate(HtmlParser *q):
Parser(Parser::ConvertCharRefs),
q_ptr(q)
{
}
void HtmlParserPrivate::parseUrl(const QUrl &url)
{
Q_Q(HtmlParser);
m_state = NotStarted;
QNetworkRequest req(url);
req.setAttribute(QNetworkRequest::FollowRedirectsAttribute, true);
m_reply.reset(m_nam.get(req));
QObject::connect(m_reply.data(), &QIODevice::readyRead,
q, [this]() {
if (!m_reply) return;
int statusCode =
m_reply->attribute(QNetworkRequest::HttpStatusCodeAttribute).
toInt();
if (statusCode == 0) return;
if (statusCode < 200 || statusCode >= 300) {
qDebug() << "Aborting because of status code" << statusCode;
QNetworkReply *reply = m_reply.take();
reply->deleteLater();
return;
}
QByteArray data = m_reply->readAll();
feed(QString::fromUtf8(data));
});
QObject::connect(m_reply.data(), &QNetworkReply::finished,
q, [this]() {
if (!m_reply) return;
m_reply->deleteLater();
m_reply.take();
});
QObject::connect(m_reply.data(), &QNetworkReply::errorOccurred,
q, [this](QNetworkReply::NetworkError code) {
qWarning() << "Network error:" << code;
});
}
void HtmlParserPrivate::handleStartTag(const QString &tag,
const Parser::Attributes &attrs)
{
Q_Q(HtmlParser);
if (m_state == NotStarted) {
if (tag == "html") {
m_state = InHtml;
}
} else if (m_state == InHtml) {
if (tag == "head") {
m_state = InHead;
}
} else if (m_state == InHead) {
if (tag == "link" && attrs.value("rel") == "alternate") {
QUrl url = attrs.value("href");
QUrl baseUrl = m_reply->request().url();
Q_EMIT q->gotAlternate(attrs.value("title"),
attrs.value("type"),
baseUrl.resolved(url));
}
}
}
void HtmlParserPrivate::handleEndTag(const QString &tag)
{
if (m_state == InHead) {
if (tag == "head") {
// Nothing interesting for us anymore
QNetworkReply *reply = m_reply.take();
reply->deleteLater();
}
}
}
HtmlParser::HtmlParser(QObject *parent):
QObject(parent),
d_ptr(new HtmlParserPrivate(this))
{
}
HtmlParser::~HtmlParser() = default;
void HtmlParser::parseUrl(const QUrl &url)
{
Q_D(HtmlParser);
d->parseUrl(url);
}
/*
* Copyright (C) 2021 Alberto Mardegan <mardy@users.sourceforge.net>
*
* This file is part of MiTubo.
*
* MiTubo is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* MiTubo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with MiTubo. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef MITUBO_HTML_PARSER_H
#define MITUBO_HTML_PARSER_H
#include <QObject>
#include <QScopedPointer>
#include <QString>
class QUrl;
namespace MiTubo {
class HtmlParserPrivate;
class HtmlParser: public QObject
{
Q_OBJECT
public:
HtmlParser(QObject *parent = nullptr);
virtual ~HtmlParser();
void parseUrl(const QUrl &url);
Q_SIGNALS:
void gotAlternate(const QString &title, const QString &type,
const QUrl &url);
private:
Q_DECLARE_PRIVATE(HtmlParser)
QScopedPointer<HtmlParserPrivate> d_ptr;
};
} // namespace
#endif // MITUBO_HTML_PARSER_H
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment