-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgeneral-conference-talks.js
246 lines (212 loc) · 6.84 KB
/
general-conference-talks.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
"use strict";
function $(sel, $el) {
return ($el || document).querySelector(sel);
}
function $$(sel, $el) {
return ($el || document).querySelectorAll(sel);
}
// There should be 5 sessions with 6-9 talks per session
// (plus one header per each)
var CARDS_MINIMUM = 5 * 6;
var CARDS_SELECTOR = "nav ul.doc-map ul.doc-map li a";
var TITLE_SELECTOR = "h4";
var SESSION_SUFFIX = "Session";
var MP3_PAGE_STATE_RE = /window.__INITIAL_STATE__\s*=\s*"([^"]+)"/;
var MP3_CDN_URL_RE = /("https:[^"]*cdn[^"]*[^"]*mp3[^"]*")/g;
function parseTalk($session, $talk, talkNumber, domListIndex) {
// TODO add session info
let title = parseTitle($talk);
if (!title) {
let err = new Error(`the title selectors have changed`);
console.warn($talk);
window.alert(`Error: ${err.message}`);
throw err;
}
let speaker;
try {
speaker = parseSpeaker($talk);
} catch (e) {
console.warn($talk);
let err = new Error(
`Could not parse speaker info for '${title}': e.message`
);
window.alert(err.message);
throw err;
}
let description = parseDescription($talk);
let url = $talk.href;
let sessionNumberStr = $session.index.toString();
let talkNumberStr = talkNumber.toString();
talkNumberStr = talkNumberStr.padStart(2, "0");
let talk = {
session_number: sessionNumberStr,
talk_number: talkNumberStr,
title: title,
speaker: speaker,
description: description,
talk_url: url,
_domListIndex: domListIndex,
$session: $session,
$talk: $talk,
};
console.info(
`✅ was able to parse talk ${$session.index}.${talkNumber}'s description:`,
talk
);
if (!talk.talk_url) {
let err = new Error(`the talk details link has changed`);
console.warn($talk);
window.alert(`Error: ${err.message}`);
throw err;
}
console.info(
`✅ Found "${talk.title}" by ${talk.speaker}'s description with link:`
);
console.info(` ${talk.talk_url}`);
return talk;
}
function parseMp3Link(html) {
// Un-react-ify the page data
var m = html.match(MP3_PAGE_STATE_RE);
if (!m) {
let err = new Error(
`could not get talk description page state: ${MP3_PAGE_STATE_RE} is no longer the valid selector`
);
window.alert(`Error: ${err.message}`);
throw err;
}
var jsonText;
try {
jsonText = atob(m[1]);
} catch (e) {
// catch more exactly below
}
if (!jsonText) {
let err = new Error(
`could not parse talk description page state: ${MP3_PAGE_STATE_RE} is no longer the valid selector`
);
console.warn(html);
window.alert(`Error: ${err.message}`);
throw err;
}
var mp3Match = jsonText.match(MP3_CDN_URL_RE);
if (!mp3Match?.[0]) {
let err = new Error(
`could not find talk's mp3 link: ${MP3_CDN_URL_RE} is no longer the valid selector`
);
console.warn(jsonText);
window.alert(`Error: ${err.message}`);
throw err;
}
var mp3Url;
try {
mp3Url = JSON.parse(mp3Match[0]);
} catch (e) {
// catch more exactly on next line
}
if (!mp3Url) {
let err = new Error(
`could not parse talk's mp3 link: ${MP3_CDN_URL_RE} is no longer the valid selector`
);
console.warn(mp3Match);
window.alert(`Error: ${err.message}`);
throw err;
}
return mp3Url;
}
function parseTitle($card) {
return $(TITLE_SELECTOR, $card)?.innerText?.trim() ?? "";
}
function parseSpeaker($card) {
return $("h6", $card).innerText.trim();
}
function parseDescription($card) {
// sustainings do not have a description
return $(".description", $card)?.innerText?.trim() ?? "";
}
function parseNameByIndex(mp3Url, prefix) {
let url = new URL(mp3Url);
let filename = url.pathname.split("/").pop();
let name = `${prefix}_${filename}`;
return name;
}
async function main() {
// using var on purpose because it's easier to test in the console
var $$cards = $$(CARDS_SELECTOR);
var hasCards = $$cards?.length >= CARDS_MINIMUM;
if (!hasCards) {
let err = new Error(
`the talk card selector has changed: '${CARDS_SELECTOR}' is no longer correct.`
);
window.alert(`Error: ${err.message}`);
throw err;
}
$$cards = Array.from($$cards);
console.info(`✅ selected ${$$cards.length} talk cards`);
console.log($$cards);
var $$sessions = [];
var $session;
var talks = [];
var sessionTalkNumber = 0;
$$cards.forEach(function ($card, i) {
let title = parseTitle($card);
console.log(`Title: ${title}`);
if (title.endsWith("Session")) {
$session = $card;
$$sessions.push($card);
// TODO parse session info
$session.index = $$sessions.length;
sessionTalkNumber = 0;
return;
}
console.log($card);
sessionTalkNumber += 1;
let talk = parseTalk($session, $card, sessionTalkNumber, i);
talks.push(talk);
console.info("");
});
var hasTalks = talks?.length >= CARDS_MINIMUM;
if (!hasTalks) {
let err = new Error(
`the talk card parser has changed: not enough talks found`
);
window.alert(`Error: ${err.message}`);
throw err;
}
console.log(talks);
console.info("");
console.info(`✅✅ Parsed Descriptions of All Talks!`);
console.info("");
let curls = [];
for (let talk of talks) {
let page = await window.fetch(talk.talk_url);
let html = await page.text();
let mp3Url = parseMp3Link(html);
console.info(
`✅ Found ${talk.session_number}.${talk.talk_number} "${talk.title}" by ${talk.speaker}'s mp3 url:`
);
console.info(` ${mp3Url}`);
let filename = parseNameByIndex(
mp3Url,
`${talk.session_number}-${talk.talk_number}`
);
let curl = [
`echo Downloading ${filename} ...`,
`curl -fsSL -A 'ChJCDev/1.0' -o "${filename}" '${mp3Url}'`,
].join("\n");
curls.push(curl);
let downloadHtml = `<h6 style="font-size: 8pt;">${talk.session_number}.${talk.talk_number} Download <a href="${talk.mp3Url}" target="_blank" download="${talk.filename}">"${talk.title}" by ${talk.speaker} ⬇️</a></h6>`;
talk.$talk.insertAdjacentHTML("beforeend", downloadHtml);
talk.$talk.children[0].remove();
}
let sh = curls.join("\n\n");
console.log(talks);
console.info("");
console.info(`✅✅ Fetched MP3s URLs for of All Talks!`);
console.info("");
console.info(sh);
}
main().catch(function (err) {
console.error("Error:");
console.error(err);
});