Skip to content

Commit

Permalink
use the latest radar rule parser
Browse files Browse the repository at this point in the history
  • Loading branch information
yindaheng98 committed Mar 30, 2024
1 parent 62b7c96 commit f0632cd
Show file tree
Hide file tree
Showing 8 changed files with 267 additions and 444 deletions.
1 change: 0 additions & 1 deletion config.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ config = {
logger_level: envs.LOGGER_LEVEL || 'info',
no_logfiles: envs.NO_LOGFILES,
is_package: envs.IS_PACKAGE,
rsshub_parser: envs.RSSHUB_PARSER || 'rssaid',
reply_received: envs.REPLY_RECEIVED || "false",
valid_username: envs.VALID_USERNAME,
unsubscribe_db_path: envs.UNSUB_DB_PATH || 'db/unsubscribe.json',
Expand Down
34 changes: 32 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "rss-telegram-bot",
"version": "1.6.8",
"version": "1.7",
"description": "RSSBot 是一个能帮你订阅 RSSHub 的 Telegram Bot",
"main": "index.js",
"scripts": {
Expand Down Expand Up @@ -29,6 +29,7 @@
"node-telegram-bot-api": "^0.56.0",
"psl": "^1.8.0",
"route-recognizer": "^0.3.4",
"tldts": "^6.1.11",
"winston": "^3.6.0"
}
}
22 changes: 2 additions & 20 deletions rsshub/index.js
Original file line number Diff line number Diff line change
@@ -1,27 +1,9 @@
const got = require('../utils/got');
const logger = require('../utils/logger');
const config = require('../config')
const { getPageRSSHub: rssaidGet } = require('./rule-driver/rssaid');
const { getPageRSSHub: radarGet } = require('./rule-driver/radar');
const { getPageRSSHub } = require('./rsshub');
const { getRules } = require("./rules");

async function getPageRSSHub(data) {
if (config.rsshub_parser === 'radar') {
return await radarGet({
url: data.url,
html: data.html,
rules: data.rules
});
}
return JSON.parse(await rssaidGet({
url: data.url,
host: data.host,
path: data.pathname,
html: data.html,
rules: data.rules
}));
}

async function getRSSHubLink(url) {
const { host, pathname } = new URL(url);
const rules = await getRules();
Expand All @@ -32,7 +14,7 @@ async function getRSSHubLink(url) {
} catch (e) {
logger.warn(`Cannot get html from ${url}`);
}
const feeds = await getPageRSSHub(
const feeds = getPageRSSHub(
{ url, host, pathname, html, rules }
);
for (let feed of feeds) {
Expand Down
222 changes: 222 additions & 0 deletions rsshub/rsshub.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
// Generate: npx tsc src/lib/rsshub.ts --target es5
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.getWebsiteRSSHub = exports.getPageRSSHub = void 0;
var route_recognizer_1 = require("route-recognizer");
var tldts_1 = require("tldts");
var rules_1 = require("./rules");
function ruleHandler(rule, params, url, html, success, fail) {
var run = function () {
var _a;
var resultWithParams;
if (typeof rule.target === "function") {
try {
resultWithParams = rule.target(params, url);
}
catch (error) {
resultWithParams = "";
}
}
else if (typeof rule.target === "string") {
resultWithParams = rule.target;
}
if (resultWithParams) {
// if no :param in resultWithParams, requiredParams will be null
// in that case, just skip the following steps and return resultWithParams
var requiredParams = (_a = resultWithParams
.match(/\/:\w+\??(?=\/|$)/g)) === null || _a === void 0 ? void 0 : _a.map(function (param) { return ({
name: param.slice(2).replace(/\?$/, ""),
optional: param.endsWith("?"),
}); });
if (!requiredParams) {
return resultWithParams;
}
for (var _i = 0, requiredParams_1 = requiredParams; _i < requiredParams_1.length; _i++) {
var param = requiredParams_1[_i];
if (params[param.name]) {
// successfully matched
var regex = new RegExp("/:".concat(param.name, "\\??(?=/|$)"));
resultWithParams = resultWithParams.replace(regex, "/".concat(params[param.name]));
}
else if (param.optional) {
// missing optional parameter, drop all following parameters, otherwise the route will be invalid
var regex = new RegExp("/:".concat(param.name, "\\?(/.*)?$"));
resultWithParams = resultWithParams.replace(regex, "");
break;
}
else {
// missing necessary parameter, fail
resultWithParams = "";
break;
}
}
// bypassing double-check since `:` maybe a part of parameter value
// if (resultWithParams && resultWithParams.includes(':')) {
// // double-check
// resultWithParams = '';
// }
}
return resultWithParams;
};
var resultWithParams = run();
if (resultWithParams) {
success(resultWithParams);
}
else {
fail();
}
}
function formatBlank(str1, str2) {
if (str1 && str2) {
return (str1 +
(str1[str1.length - 1].match(/[a-zA-Z0-9]/) ||
str2[0].match(/[a-zA-Z0-9]/)
? " "
: "") +
str2);
}
else {
return (str1 || "") + (str2 || "");
}
}
function getPageRSSHub(data) {
var url = data.url, html = data.html;
var rules = (0, rules_1.parseRules)(data.rules);
var parsedDomain;
try {
parsedDomain = (0, tldts_1.parse)(new URL(url).hostname);
}
catch (error) {
return [];
}
if (parsedDomain && parsedDomain.domain) {
var subdomain = parsedDomain.subdomain;
var domain_1 = parsedDomain.domain;
if (rules[domain_1]) {
var rule_1 = rules[domain_1][subdomain || "."];
if (!rule_1) {
if (subdomain === "www") {
rule_1 = rules[domain_1]["."];
}
else if (!subdomain) {
rule_1 = rules[domain_1].www;
}
}
if (rule_1) {
var recognized_1 = [];
rule_1.forEach(function (ru, index) {
var oriSources = Object.prototype.toString.call(ru.source) === "[object Array]"
? ru.source
: typeof ru.source === "string"
? [ru.source]
: [];
var sources = [];
// route-recognizer do not support optional segments or partial matching
// thus, we need to manually handle it
// allowing partial matching is necessary, since many rule authors did not mark optional segments
oriSources.forEach(function (source) {
// trimming `?` is necessary, since route-recognizer considers it as a part of segment
source = source.replace(/(\/:\w+)\?(?=\/|$)/g, "$1");
sources.push(source);
var tailMatch;
do {
tailMatch = source.match(/\/:\w+$/);
if (tailMatch) {
var tail = tailMatch[0];
source = source.slice(0, source.length - tail.length);
sources.push(source);
}
} while (tailMatch);
});
// deduplicate (some rule authors may already have done similar job)
sources = sources.filter(function (item, index) { return sources.indexOf(item) === index; });
// match!
sources.forEach(function (source) {
var router = new route_recognizer_1();
router.add([
{
path: source,
handler: index,
},
]);
var result = router.recognize(new URL(url).pathname.replace(/\/$/, ""));
if (result && result[0]) {
recognized_1.push(result[0]);
}
});
});
var result_1 = [];
Promise.all(recognized_1.map(function (recog) {
return new Promise(function (resolve) {
ruleHandler(rule_1[recog.handler], recog.params, url, html, function (parsed) {
if (parsed) {
result_1.push({
title: formatBlank(rules[domain_1]._name ? "Current" : "", rule_1[recog.handler].title),
url: "{rsshubDomain}" + parsed,
path: parsed,
});
}
else {
result_1.push({
title: formatBlank(rules[domain_1]._name ? "Current" : "", rule_1[recog.handler].title),
url: rule_1[recog.handler].docs,
isDocs: true,
});
}
resolve();
}, function () {
resolve();
});
});
}));
return result_1;
}
else {
return [];
}
}
else {
return [];
}
}
else {
return [];
}
}
exports.getPageRSSHub = getPageRSSHub;
function getWebsiteRSSHub(data) {
var url = data.url;
var rules = (0, rules_1.parseRules)(data.rules);
var parsedDomain;
try {
parsedDomain = (0, tldts_1.parse)(new URL(url).hostname);
}
catch (error) {
return [];
}
if (parsedDomain && parsedDomain.domain) {
var domain_2 = parsedDomain.domain;
if (rules[domain_2]) {
var domainRules = [];
for (var subdomainRules in rules[domain_2]) {
if (subdomainRules[0] !== "_") {
domainRules.push.apply(domainRules, rules[domain_2][subdomainRules]);
}
}
return domainRules.map(function (rule) {
return ({
title: formatBlank(rules[domain_2]._name, rule.title),
url: rule.docs,
isDocs: true,
});
});
}
else {
return [];
}
}
else {
return [];
}
}
exports.getWebsiteRSSHub = getWebsiteRSSHub;
Loading

0 comments on commit f0632cd

Please sign in to comment.