From 6910811dfdf4f736ae997f68b9fc52f7338ec2dc Mon Sep 17 00:00:00 2001 From: Kunal Mehta Date: Tue, 3 Mar 2026 22:31:49 -0500 Subject: [PATCH 1/5] Update pollcats --- dbreps2/src/general/pollcats.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/dbreps2/src/general/pollcats.rs b/dbreps2/src/general/pollcats.rs index 434b7b2..2e5fa94 100644 --- a/dbreps2/src/general/pollcats.rs +++ b/dbreps2/src/general/pollcats.rs @@ -63,8 +63,10 @@ WHERE FROM page AS p2 JOIN categorylinks ON cl_from = p2.page_id + JOIN linktarget ON cl_target_id = lt_id WHERE - cl_to = p1.page_title + lt_namespace = 14 + AND lt_title = p1.page_title AND p2.page_namespace IN (2, 3) ) AND EXISTS( @@ -73,8 +75,10 @@ WHERE FROM page AS p3 JOIN categorylinks ON cl_from = p3.page_id + JOIN linktarget ON cl_target_id = lt_id WHERE - cl_to = p1.page_title + lt_namespace = 14 + AND lt_title = p1.page_title AND p3.page_namespace = 0 ) LIMIT From c9b490aa0811a754ef8e7794724b63c9601b5fd1 Mon Sep 17 00:00:00 2001 From: Kunal Mehta Date: Tue, 3 Mar 2026 22:38:50 -0500 Subject: [PATCH 2/5] Remove conflictedfiles, switched to on-wiki --- dbreps2/src/enwiki.rs | 4 +- dbreps2/src/enwiki/conflictedfiles.rs | 77 --------------------------- dbreps2/src/main.rs | 3 -- 3 files changed, 1 insertion(+), 83 deletions(-) delete mode 100644 dbreps2/src/enwiki/conflictedfiles.rs diff --git a/dbreps2/src/enwiki.rs b/dbreps2/src/enwiki.rs index e6fd247..3d32404 100644 --- a/dbreps2/src/enwiki.rs +++ b/dbreps2/src/enwiki.rs @@ -16,7 +16,6 @@ along with this program. If not, see . */ mod boteditcount; mod brokenwikiprojtemps; -mod conflictedfiles; mod editcount; mod emptycats; mod featuredbysize; @@ -56,8 +55,7 @@ mod webhostpages; pub use { boteditcount::BotEditCount, brokenwikiprojtemps::BrokenWikiProjTemps, - conflictedfiles::ConflictedFiles, editcount::EditCount, - emptycats::EmptyCats, featuredbysize::FeaturedBySize, + editcount::EditCount, emptycats::EmptyCats, featuredbysize::FeaturedBySize, goodarticlesbysize::GoodArticlesBySize, linkedmiscapitalizations::LinkedMiscapitalizations, linkedmisspellings::LinkedMisspellings, longstubs::LongStubs, diff --git a/dbreps2/src/enwiki/conflictedfiles.rs b/dbreps2/src/enwiki/conflictedfiles.rs deleted file mode 100644 index 50094a1..0000000 --- a/dbreps2/src/enwiki/conflictedfiles.rs +++ /dev/null @@ -1,77 +0,0 @@ -/* -Copyright 2010, 2013 bjweeks, MZMcBride, Tim Landscheidt -Copyright 2021 Kunal Mehta - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . - */ - -use anyhow::Result; -use dbreps2::{str_vec, Frequency, Report}; -use mysql_async::prelude::*; -use mysql_async::Conn; - -pub struct Row { - page_title: String, -} - -pub struct ConflictedFiles {} - -impl Report for ConflictedFiles { - fn title(&self) -> &'static str { - "Files with conflicting categorization" - } - - fn frequency(&self) -> Frequency { - Frequency::Weekly - } - - fn query(&self) -> &'static str { - r#" -/* conflictedfiles.rs SLOW_OK */ -SELECT - page_title -FROM - page - JOIN categorylinks AS c1 ON c1.cl_from = page_id - JOIN categorylinks AS c2 ON c2.cl_from = page_id -WHERE - page_namespace = 6 - AND c1.cl_to = 'All_free_media' - AND c2.cl_to = 'All_non-free_media'; -"# - } - - async fn run_query(&self, conn: &mut Conn) -> Result> { - let rows = conn - .query_map(self.query(), |page_title| Row { page_title }) - .await?; - Ok(rows) - } - - fn intro(&self) -> &'static str { - "Files that are categorized in [[:Category:All non-free media]] and [[:Category:All free media]]" - } - - fn headings(&self) -> Vec<&'static str> { - vec!["File"] - } - - fn format_row(&self, row: &Row) -> Vec { - str_vec![format!("[[:File:{}|{}]]", row.page_title, row.page_title)] - } - - fn code(&self) -> &'static str { - include_str!("conflictedfiles.rs") - } -} diff --git a/dbreps2/src/main.rs b/dbreps2/src/main.rs index 9e233f7..8b6e880 100644 --- a/dbreps2/src/main.rs +++ b/dbreps2/src/main.rs @@ -66,9 +66,6 @@ async fn main() -> Result<()> { .really_run(&enwiki_runner) .await; (enwiki::BotEditCount {}).really_run(&enwiki_runner).await; - (enwiki::ConflictedFiles {}) - .really_run(&enwiki_runner) - .await; (enwiki::EditCount {}).really_run(&enwiki_runner).await; (enwiki::EmptyCats {}).really_run(&enwiki_runner).await; (enwiki::FeaturedBySize { From 1881d89be3a5f0b37e21343fea56093e943a17b6 Mon Sep 17 00:00:00 2001 From: Kunal Mehta Date: Tue, 3 Mar 2026 22:39:02 -0500 Subject: [PATCH 3/5] Updating *bysize reports --- dbreps2/src/enwiki/featuredbysize.rs | 12 +++++++----- dbreps2/src/enwiki/goodarticlesbysize.rs | 12 +++++++----- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/dbreps2/src/enwiki/featuredbysize.rs b/dbreps2/src/enwiki/featuredbysize.rs index fdfbd01..a4f8433 100644 --- a/dbreps2/src/enwiki/featuredbysize.rs +++ b/dbreps2/src/enwiki/featuredbysize.rs @@ -45,13 +45,15 @@ impl Report for FeaturedBySize { r#" /* featuredbysize.rs SLOW_OK */ SELECT - page_title + page_title FROM - page - JOIN categorylinks ON cl_from = page_id + page + JOIN categorylinks ON cl_from = page_id + JOIN linktarget ON cl_target_id = lt_id WHERE - cl_to = "Featured_articles" - AND page_namespace = 0 + lt_namespace = 14 + AND lt_title = 'Featured_articles' + AND page_namespace = 0 "# } diff --git a/dbreps2/src/enwiki/goodarticlesbysize.rs b/dbreps2/src/enwiki/goodarticlesbysize.rs index 44565a5..3a7cd82 100644 --- a/dbreps2/src/enwiki/goodarticlesbysize.rs +++ b/dbreps2/src/enwiki/goodarticlesbysize.rs @@ -49,13 +49,15 @@ impl Report for GoodArticlesBySize { r#" /* goodarticlesbysize.rs SLOW_OK */ SELECT - page_title + page_title FROM - page - JOIN categorylinks ON cl_from = page_id + page + JOIN categorylinks ON cl_from = page_id + JOIN linktarget ON cl_target_id = lt_id WHERE - cl_to = "Good_articles" - AND page_namespace = 0 + lt_namespace = 14 + AND lt_title = 'Good_articles' + AND page_namespace = 0 "# } From 0bd09c1bb7043321cc85a86f3b65726dca8c1f0d Mon Sep 17 00:00:00 2001 From: Kunal Mehta Date: Tue, 3 Mar 2026 22:52:10 -0500 Subject: [PATCH 4/5] Remove a bunch of reports migrated on-wiki --- dbreps2/src/enwiki.rs | 26 ++--- .../src/enwiki/linkedmiscapitalizations.rs | 91 --------------- dbreps2/src/enwiki/linkedmisspellings.rs | 91 --------------- dbreps2/src/enwiki/longstubs.rs | 87 -------------- dbreps2/src/enwiki/lotnonfree.rs | 97 ---------------- dbreps2/src/enwiki/olddeletiondiscussions.rs | 107 ------------------ dbreps2/src/enwiki/stickyprodblps.rs | 2 +- dbreps2/src/main.rs | 11 -- 8 files changed, 10 insertions(+), 502 deletions(-) delete mode 100644 dbreps2/src/enwiki/linkedmiscapitalizations.rs delete mode 100644 dbreps2/src/enwiki/linkedmisspellings.rs delete mode 100644 dbreps2/src/enwiki/longstubs.rs delete mode 100644 dbreps2/src/enwiki/lotnonfree.rs delete mode 100644 dbreps2/src/enwiki/olddeletiondiscussions.rs diff --git a/dbreps2/src/enwiki.rs b/dbreps2/src/enwiki.rs index 3d32404..f0cb651 100644 --- a/dbreps2/src/enwiki.rs +++ b/dbreps2/src/enwiki.rs @@ -20,12 +20,7 @@ mod editcount; mod emptycats; mod featuredbysize; mod goodarticlesbysize; -mod linkedmiscapitalizations; -mod linkedmisspellings; -mod longstubs; -mod lotnonfree; mod newprojects; -mod olddeletiondiscussions; mod orphanedafds; mod orphanedsubtalks; mod overusednonfree; @@ -56,18 +51,15 @@ mod webhostpages; pub use { boteditcount::BotEditCount, brokenwikiprojtemps::BrokenWikiProjTemps, editcount::EditCount, emptycats::EmptyCats, featuredbysize::FeaturedBySize, - goodarticlesbysize::GoodArticlesBySize, - linkedmiscapitalizations::LinkedMiscapitalizations, - linkedmisspellings::LinkedMisspellings, longstubs::LongStubs, - lotnonfree::LotNonFree, newprojects::NewProjects, - olddeletiondiscussions::OldDeletionDiscussions, orphanedafds::OrphanedAfds, - orphanedsubtalks::OrphanedSubTalks, overusednonfree::OverusedNonFree, - polltemps::PollTemps, potenshbdps1::Potenshbdps1, - potenshbdps3::Potenshbdps3, potenshbdps4::Potenshbdps4, - potenshblps1::Potenshblps1, potenshblps2::Potenshblps2, - potenshblps3::Potenshblps3, projectchanges::ProjectChanges, - shortestbios::ShortestBios, stickyprodblps::StickyProdBLPs, - templatedisambigs::TemplateDisambigs, templatesnonfree::TemplatesNonFree, + goodarticlesbysize::GoodArticlesBySize, newprojects::NewProjects, + orphanedafds::OrphanedAfds, orphanedsubtalks::OrphanedSubTalks, + overusednonfree::OverusedNonFree, polltemps::PollTemps, + potenshbdps1::Potenshbdps1, potenshbdps3::Potenshbdps3, + potenshbdps4::Potenshbdps4, potenshblps1::Potenshblps1, + potenshblps2::Potenshblps2, potenshblps3::Potenshblps3, + projectchanges::ProjectChanges, shortestbios::ShortestBios, + stickyprodblps::StickyProdBLPs, templatedisambigs::TemplateDisambigs, + templatesnonfree::TemplatesNonFree, unbelievablelifespans::UnbelievableLifeSpans, uncatunrefblps::UncatUnrefBLPs, unsourcedblps::UnsourcedBLPs, untaggedblps::UntaggedBLPs, untaggedstubs::UntaggedStubs, diff --git a/dbreps2/src/enwiki/linkedmiscapitalizations.rs b/dbreps2/src/enwiki/linkedmiscapitalizations.rs deleted file mode 100644 index 1df4766..0000000 --- a/dbreps2/src/enwiki/linkedmiscapitalizations.rs +++ /dev/null @@ -1,91 +0,0 @@ -/* -Copyright 2018 MZMcBride, WBM -Copyright 2021 Kunal Mehta - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . - */ - -use anyhow::Result; -use dbreps2::{dbr_link, str_vec, Frequency, Report}; -use mysql_async::prelude::*; -use mysql_async::Conn; - -pub struct Row { - page_title: String, - count: u32, -} - -pub struct LinkedMiscapitalizations {} - -impl Report for LinkedMiscapitalizations { - fn title(&self) -> &'static str { - "Linked miscapitalizations" - } - - fn frequency(&self) -> Frequency { - Frequency::Daily - } - - fn query(&self) -> &'static str { - r#" -/* linkedmiscapitalizations.rs SLOW_OK */ -SELECT - p1.page_title, - COUNT(*) -FROM - page AS p1 - JOIN categorylinks ON p1.page_id = cl_from - JOIN linktarget on p1.page_title = lt_title - AND lt_namespace = 0 - JOIN pagelinks ON pl_target_id = lt_id - JOIN page AS p2 ON pl_from = p2.page_id - AND p2.page_namespace = 0 -WHERE - p1.page_namespace = 0 - AND p1.page_is_redirect = 1 - AND cl_to = 'Redirects_from_miscapitalisations' - AND NOT(p1.page_id = p2.page_id) -GROUP BY - 1 -LIMIT - 1000; -"# - } - - async fn run_query(&self, conn: &mut Conn) -> Result> { - let rows = conn - .query_map(self.query(), |(page_title, count)| Row { - page_title, - count, - }) - .await?; - Ok(rows) - } - - fn intro(&self) -> &'static str { - "Linked miscapitalizations (limited to the first 1000 entries)" - } - - fn headings(&self) -> Vec<&'static str> { - vec!["Article", "Incoming links"] - } - - fn format_row(&self, row: &Row) -> Vec { - str_vec![dbr_link(&row.page_title), row.count] - } - - fn code(&self) -> &'static str { - include_str!("linkedmiscapitalizations.rs") - } -} diff --git a/dbreps2/src/enwiki/linkedmisspellings.rs b/dbreps2/src/enwiki/linkedmisspellings.rs deleted file mode 100644 index c432d8e..0000000 --- a/dbreps2/src/enwiki/linkedmisspellings.rs +++ /dev/null @@ -1,91 +0,0 @@ -/* -Copyright 2012-2013 MZMcBride, Tim Landscheidt -Copyright 2021 Kunal Mehta - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . - */ - -use anyhow::Result; -use dbreps2::{dbr_link, str_vec, Frequency, Report}; -use mysql_async::prelude::*; -use mysql_async::Conn; - -pub struct Row { - page_title: String, - count: u32, -} - -pub struct LinkedMisspellings {} - -impl Report for LinkedMisspellings { - fn title(&self) -> &'static str { - "Linked misspellings" - } - - fn frequency(&self) -> Frequency { - Frequency::Daily - } - - fn query(&self) -> &'static str { - r#" -/* linkedmisspellings.rs SLOW_OK */ -SELECT - p1.page_title, - COUNT(*) -FROM - page AS p1 - JOIN categorylinks ON p1.page_id = cl_from - JOIN linktarget on p1.page_title = lt_title - AND lt_namespace = 0 - JOIN pagelinks ON pl_target_id = lt_id - JOIN page AS p2 ON pl_from = p2.page_id - AND p2.page_namespace = 0 -WHERE - p1.page_namespace = 0 - AND p1.page_is_redirect = 1 - AND cl_to = 'Redirects_from_misspellings' - AND NOT(p1.page_id = p2.page_id) -GROUP BY - 1 -LIMIT - 1000; -"# - } - - async fn run_query(&self, conn: &mut Conn) -> Result> { - let rows = conn - .query_map(self.query(), |(page_title, count)| Row { - page_title, - count, - }) - .await?; - Ok(rows) - } - - fn intro(&self) -> &'static str { - "Linked misspellings (limited to the first 1000 entries)" - } - - fn headings(&self) -> Vec<&'static str> { - vec!["Article", "Incoming links"] - } - - fn format_row(&self, row: &Row) -> Vec { - str_vec![dbr_link(&row.page_title), row.count] - } - - fn code(&self) -> &'static str { - include_str!("linkedmisspellings.rs") - } -} diff --git a/dbreps2/src/enwiki/longstubs.rs b/dbreps2/src/enwiki/longstubs.rs deleted file mode 100644 index 7c4d3ac..0000000 --- a/dbreps2/src/enwiki/longstubs.rs +++ /dev/null @@ -1,87 +0,0 @@ -/* -Copyright 2008, 2013 bjweeks, MZMcBride, CBM, Tim Landscheidt -Copyright 2021 Kunal Mehta - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . - */ - -use anyhow::Result; -use dbreps2::{str_vec, Frequency, Report}; -use mysql_async::prelude::*; -use mysql_async::Conn; - -pub struct Row { - page_title: String, - page_len: u64, -} - -pub struct LongStubs {} - -impl Report for LongStubs { - fn title(&self) -> &'static str { - "Long stubs" - } - - fn frequency(&self) -> Frequency { - Frequency::Weekly - } - - fn query(&self) -> &'static str { - r#" -/* longstubs.rs SLOW_OK */ -SELECT - page_title, - page_len -FROM - page - JOIN categorylinks ON cl_from = page_id -WHERE - cl_to LIKE '%stubs' - AND page_namespace = 0 - AND page_len > 2000 -GROUP BY - page_title -ORDER BY - page_len DESC -LIMIT - 1000; -"# - } - - async fn run_query(&self, conn: &mut Conn) -> Result> { - let rows = conn - .query_map(self.query(), |(page_title, page_len)| Row { - page_title, - page_len, - }) - .await?; - Ok(rows) - } - - fn intro(&self) -> &'static str { - "Long pages in categories that end in \"stubs\" (limited to the first 1000 entries)" - } - - fn headings(&self) -> Vec<&'static str> { - vec!["Page", "Length"] - } - - fn format_row(&self, row: &Row) -> Vec { - str_vec![format!("[[{}]]", row.page_title), row.page_len] - } - - fn code(&self) -> &'static str { - include_str!("longstubs.rs") - } -} diff --git a/dbreps2/src/enwiki/lotnonfree.rs b/dbreps2/src/enwiki/lotnonfree.rs deleted file mode 100644 index 7f45866..0000000 --- a/dbreps2/src/enwiki/lotnonfree.rs +++ /dev/null @@ -1,97 +0,0 @@ -/* -Copyright 2010, 2013 bjweeks, MZMcBride, Tim Landscheidt -Copyright 2021 Kunal Mehta - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . - */ - -use anyhow::Result; -use dbreps2::{linker, str_vec, Frequency, Report}; -use mysql_async::prelude::*; -use mysql_async::Conn; - -pub struct Row { - page_namespace: u32, - page_title: String, - count: u64, -} - -pub struct LotNonFree {} - -impl Report for LotNonFree { - fn title(&self) -> &'static str { - "Pages containing an unusually high number of non-free files" - } - - fn frequency(&self) -> Frequency { - Frequency::Weekly - } - - fn query(&self) -> &'static str { - r#" -/* lotnonfree.rs SLOW_OK */ -SELECT - imgtmp.page_namespace, - imgtmp.page_title, - COUNT(cl_to) -FROM - page AS pg1 - JOIN categorylinks ON cl_from = pg1.page_id - JOIN ( - SELECT - pg2.page_namespace, - pg2.page_title, - il_to - FROM - page AS pg2 - JOIN imagelinks ON il_from = page_id - ) AS imgtmp ON il_to = pg1.page_title -WHERE - pg1.page_namespace = 6 - AND cl_to = 'All_non-free_media' -GROUP BY - imgtmp.page_namespace, - imgtmp.page_title -HAVING - COUNT(cl_to) > 6 -ORDER BY - COUNT(cl_to) DESC; -"# - } - - async fn run_query(&self, conn: &mut Conn) -> Result> { - let rows = conn - .query_map(self.query(), |(page_namespace, page_title, count)| { - Row { - page_namespace, - page_title, - count, - } - }) - .await?; - Ok(rows) - } - - fn headings(&self) -> Vec<&'static str> { - vec!["Page", "Non-free files"] - } - - fn format_row(&self, row: &Row) -> Vec { - str_vec![linker(row.page_namespace, &row.page_title), row.count] - } - - fn code(&self) -> &'static str { - include_str!("lotnonfree.rs") - } -} diff --git a/dbreps2/src/enwiki/olddeletiondiscussions.rs b/dbreps2/src/enwiki/olddeletiondiscussions.rs deleted file mode 100644 index 52a012a..0000000 --- a/dbreps2/src/enwiki/olddeletiondiscussions.rs +++ /dev/null @@ -1,107 +0,0 @@ -/* -Copyright 2010, 2013 bjweeks, MZMcBride, Tim Landscheidt -Copyright 2021 Kunal Mehta - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . - */ - -use anyhow::Result; -use dbreps2::{linker, str_vec, Frequency, Report}; -use mysql_async::prelude::*; -use mysql_async::Conn; - -pub struct Row { - page_namespace: u32, - page_title: String, - cl_timestamp: String, - cl_to: String, -} - -pub struct OldDeletionDiscussions {} - -impl Report for OldDeletionDiscussions { - fn title(&self) -> &'static str { - "Old deletion discussions" - } - - fn frequency(&self) -> Frequency { - Frequency::Weekly - } - - fn query(&self) -> &'static str { - r#" -/* olddeletiondiscussions.rs SLOW_OK */ -SELECT - page_namespace, - page_title, - cl_timestamp, - cl_to -FROM - page - JOIN categorylinks ON cl_from = page_id -WHERE - cl_to IN ( - 'Articles_for_deletion', - 'Templates_for_deletion', - 'Wikipedia_files_for_deletion', - 'Categories_for_deletion', - 'Categories_for_merging', - 'Categories_for_renaming', - 'Redirects_for_discussion', - 'Miscellaneous_pages_for_deletion', - 'Stub_categories_for_deletion', - 'Stub_template_deletion_candidates' - ) - AND cl_timestamp < DATE_SUB(NOW(), INTERVAL 30 DAY) - AND NOT( - page_namespace <> 0 - AND cl_to = 'Articles_for_deletion' - ) -ORDER BY - page_namespace, - page_title ASC; -"# - } - - async fn run_query(&self, conn: &mut Conn) -> Result> { - let rows = conn - .query_map( - self.query(), - |(page_namespace, page_title, cl_timestamp, cl_to)| Row { - page_namespace, - page_title, - cl_timestamp, - cl_to, - }, - ) - .await?; - Ok(rows) - } - - fn headings(&self) -> Vec<&'static str> { - vec!["Page", "Timestamp", "Category"] - } - - fn format_row(&self, row: &Row) -> Vec { - str_vec![ - linker(row.page_namespace, &row.page_title), - row.cl_timestamp, - row.cl_to - ] - } - - fn code(&self) -> &'static str { - include_str!("olddeletiondiscussions.rs") - } -} diff --git a/dbreps2/src/enwiki/stickyprodblps.rs b/dbreps2/src/enwiki/stickyprodblps.rs index a571fc9..b25d08e 100644 --- a/dbreps2/src/enwiki/stickyprodblps.rs +++ b/dbreps2/src/enwiki/stickyprodblps.rs @@ -109,6 +109,6 @@ WHERE } fn code(&self) -> &'static str { - include_str!("linkedmisspellings.rs") + include_str!("stickyprodblps.rs") } } diff --git a/dbreps2/src/main.rs b/dbreps2/src/main.rs index 8b6e880..725c068 100644 --- a/dbreps2/src/main.rs +++ b/dbreps2/src/main.rs @@ -80,18 +80,7 @@ async fn main() -> Result<()> { }) .really_run(&enwiki_runner) .await; - (enwiki::LinkedMiscapitalizations {}) - .really_run(&enwiki_runner) - .await; - (enwiki::LinkedMisspellings {}) - .really_run(&enwiki_runner) - .await; - (enwiki::LongStubs {}).really_run(&enwiki_runner).await; - (enwiki::LotNonFree {}).really_run(&enwiki_runner).await; (enwiki::NewProjects {}).really_run(&enwiki_runner).await; - (enwiki::OldDeletionDiscussions {}) - .really_run(&enwiki_runner) - .await; (enwiki::OrphanedAfds {}).really_run(&enwiki_runner).await; (enwiki::OrphanedSubTalks {}) .really_run(&enwiki_runner) From 3f1da2bf991ff053818c8df162700faea102f0ae Mon Sep 17 00:00:00 2001 From: Kunal Mehta Date: Tue, 3 Mar 2026 23:25:16 -0500 Subject: [PATCH 5/5] And a bunch more --- dbreps2/src/enwiki.rs | 16 +-- dbreps2/src/enwiki/overusednonfree.rs | 96 ----------------- dbreps2/src/enwiki/polltemps.rs | 83 -------------- dbreps2/src/enwiki/potenshbdps1.rs | 113 ------------------- dbreps2/src/enwiki/potenshbdps3.rs | 88 --------------- dbreps2/src/enwiki/potenshbdps4.rs | 41 ++++--- dbreps2/src/enwiki/potenshblps1.rs | 112 ------------------- dbreps2/src/enwiki/potenshblps2.rs | 44 ++++---- dbreps2/src/enwiki/potenshblps3.rs | 137 ------------------------ dbreps2/src/enwiki/shortestbios.rs | 86 --------------- dbreps2/src/enwiki/stickyprodblps.rs | 8 +- dbreps2/src/enwiki/templatedisambigs.rs | 129 ---------------------- dbreps2/src/main.rs | 12 --- 13 files changed, 56 insertions(+), 909 deletions(-) delete mode 100644 dbreps2/src/enwiki/overusednonfree.rs delete mode 100644 dbreps2/src/enwiki/polltemps.rs delete mode 100644 dbreps2/src/enwiki/potenshbdps1.rs delete mode 100644 dbreps2/src/enwiki/potenshbdps3.rs delete mode 100644 dbreps2/src/enwiki/potenshblps1.rs delete mode 100644 dbreps2/src/enwiki/potenshblps3.rs delete mode 100644 dbreps2/src/enwiki/shortestbios.rs delete mode 100644 dbreps2/src/enwiki/templatedisambigs.rs diff --git a/dbreps2/src/enwiki.rs b/dbreps2/src/enwiki.rs index f0cb651..e16ceb3 100644 --- a/dbreps2/src/enwiki.rs +++ b/dbreps2/src/enwiki.rs @@ -23,18 +23,10 @@ mod goodarticlesbysize; mod newprojects; mod orphanedafds; mod orphanedsubtalks; -mod overusednonfree; -mod polltemps; -mod potenshbdps1; -mod potenshbdps3; mod potenshbdps4; -mod potenshblps1; mod potenshblps2; -mod potenshblps3; mod projectchanges; -mod shortestbios; mod stickyprodblps; -mod templatedisambigs; mod templatesnonfree; mod unbelievablelifespans; mod uncatunrefblps; @@ -53,12 +45,8 @@ pub use { editcount::EditCount, emptycats::EmptyCats, featuredbysize::FeaturedBySize, goodarticlesbysize::GoodArticlesBySize, newprojects::NewProjects, orphanedafds::OrphanedAfds, orphanedsubtalks::OrphanedSubTalks, - overusednonfree::OverusedNonFree, polltemps::PollTemps, - potenshbdps1::Potenshbdps1, potenshbdps3::Potenshbdps3, - potenshbdps4::Potenshbdps4, potenshblps1::Potenshblps1, - potenshblps2::Potenshblps2, potenshblps3::Potenshblps3, - projectchanges::ProjectChanges, shortestbios::ShortestBios, - stickyprodblps::StickyProdBLPs, templatedisambigs::TemplateDisambigs, + potenshbdps4::Potenshbdps4, potenshblps2::Potenshblps2, + projectchanges::ProjectChanges, stickyprodblps::StickyProdBLPs, templatesnonfree::TemplatesNonFree, unbelievablelifespans::UnbelievableLifeSpans, uncatunrefblps::UncatUnrefBLPs, unsourcedblps::UnsourcedBLPs, diff --git a/dbreps2/src/enwiki/overusednonfree.rs b/dbreps2/src/enwiki/overusednonfree.rs deleted file mode 100644 index 2b8c46f..0000000 --- a/dbreps2/src/enwiki/overusednonfree.rs +++ /dev/null @@ -1,96 +0,0 @@ -/* -Copyright 2008, 2013 bjweeks, MZMcBride, Tim Landscheidt -Copyright 2021 Kunal Mehta - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . - */ - -use anyhow::Result; -use dbreps2::{str_vec, Frequency, Report}; -use mysql_async::prelude::*; -use mysql_async::Conn; - -pub struct Row { - page_title: String, - count: u32, -} - -pub struct OverusedNonFree {} - -impl Report for OverusedNonFree { - fn title(&self) -> &'static str { - "Overused non-free files" - } - - fn frequency(&self) -> Frequency { - Frequency::Weekly - } - - fn query(&self) -> &'static str { - r#" -/* overusednonfree.rs SLOW_OK */ -SELECT - page_title, - COUNT(*) -FROM - imagelinks - JOIN ( - SELECT - page_id, - page_title - FROM - page - JOIN categorylinks ON cl_from = page_id - WHERE - cl_to = 'All_non-free_media' - AND page_namespace = 6 - ) AS pgtmp ON pgtmp.page_title = il_to -GROUP BY - il_to -HAVING - COUNT(*) > 4 -ORDER BY - COUNT(*) DESC; -"# - } - - async fn run_query(&self, conn: &mut Conn) -> Result> { - let rows = conn - .query_map(self.query(), |(page_title, count)| Row { - page_title, - count, - }) - .await?; - Ok(rows) - } - - fn intro(&self) -> &'static str { - "Non-free files used on more than four pages" - } - - fn headings(&self) -> Vec<&'static str> { - vec!["File", "Uses"] - } - - fn format_row(&self, row: &Row) -> Vec { - str_vec![ - format!("[[:File:{}|{}]]", row.page_title, row.page_title), - row.count - ] - } - - fn code(&self) -> &'static str { - include_str!("overusednonfree.rs") - } -} diff --git a/dbreps2/src/enwiki/polltemps.rs b/dbreps2/src/enwiki/polltemps.rs deleted file mode 100644 index 4eb98b0..0000000 --- a/dbreps2/src/enwiki/polltemps.rs +++ /dev/null @@ -1,83 +0,0 @@ -/* -Copyright 2011, 2013 bjweeks, MZMcBride, WOSlinker, Tim Landscheidt -Copyright 2021 Kunal Mehta - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . - */ - -use anyhow::Result; -use dbreps2::{str_vec, Frequency, Report}; -use mysql_async::prelude::*; -use mysql_async::Conn; - -pub struct Row { - page_title: String, -} - -pub struct PollTemps {} - -impl Report for PollTemps { - fn title(&self) -> &'static str { - "Template categories containing articles" - } - - fn frequency(&self) -> Frequency { - Frequency::Weekly - } - - fn query(&self) -> &'static str { - r#" -/* polltemps.rs SLOW_OK */ -SELECT - page_title -FROM - page AS pg1 - JOIN templatelinks AS tl ON pg1.page_id = tl.tl_from - JOIN linktarget AS lt ON tl.tl_target_id = lt.lt_id -WHERE - pg1.page_namespace = 14 - AND lt.lt_namespace = 10 - AND lt.lt_title = 'Template_category' - AND EXISTS ( - SELECT - 1 - FROM - page AS pg2 - JOIN categorylinks AS cl ON pg2.page_id = cl.cl_from - WHERE - pg2.page_namespace = 0 - AND pg1.page_title = cl.cl_to - ); -"# - } - - async fn run_query(&self, conn: &mut Conn) -> Result> { - let rows = conn - .query_map(self.query(), |page_title| Row { page_title }) - .await?; - Ok(rows) - } - - fn headings(&self) -> Vec<&'static str> { - vec!["Category"] - } - - fn format_row(&self, row: &Row) -> Vec { - str_vec![format!("[[:Category:{}]]", row.page_title)] - } - - fn code(&self) -> &'static str { - include_str!("polltemps.rs") - } -} diff --git a/dbreps2/src/enwiki/potenshbdps1.rs b/dbreps2/src/enwiki/potenshbdps1.rs deleted file mode 100644 index 346421a..0000000 --- a/dbreps2/src/enwiki/potenshbdps1.rs +++ /dev/null @@ -1,113 +0,0 @@ -/* -Copyright 2009, 2013 bjweeks, MZMcBride, Tim Landscheidt -Copyright 2021 Kunal Mehta - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . - */ - -use anyhow::Result; -use dbreps2::{str_vec, Frequency, Report}; -use mysql_async::prelude::*; -use mysql_async::Conn; - -pub struct Row { - page_title: String, -} - -pub struct Potenshbdps1 {} - -impl Report for Potenshbdps1 { - fn title(&self) -> &'static str { - "Potential biographies of dead people (1)" - } - - fn frequency(&self) -> Frequency { - Frequency::Weekly - } - - fn query(&self) -> &'static str { - r#" -/* potenshbdps1.rs SLOW_OK */ -SELECT - pg1.page_title -FROM - page AS pg1 - JOIN templatelinks ON pg1.page_id = tl_from - JOIN linktarget ON tl_target_id = lt_id -WHERE - pg1.page_namespace = 0 - AND lt_namespace = 10 - AND lt_title = 'BLP_unsourced' - AND NOT EXISTS ( - SELECT - 1 - FROM - page AS pg2 - JOIN categorylinks ON pg2.page_id = cl_from - WHERE - pg1.page_title = pg2.page_title - AND pg2.page_namespace = 0 - AND cl_to = 'Living_people' - ) - AND NOT EXISTS ( - SELECT - 1 - FROM - page AS pg3 - JOIN categorylinks ON pg3.page_id = cl_from - WHERE - pg1.page_title = pg3.page_title - AND pg3.page_namespace = 0 - AND cl_to = 'Possibly_living_people' - ) - AND EXISTS ( - SELECT - 1 - FROM - page AS pg4 - JOIN categorylinks ON pg4.page_id = cl_from - WHERE - pg1.page_title = pg4.page_title - AND pg4.page_namespace = 0 - AND cl_to RLIKE '^[0-9]{4}_deaths$' - ); -"# - } - - async fn run_query(&self, conn: &mut Conn) -> Result> { - let rows = conn - .query_map(self.query(), |page_title| Row { page_title }) - .await?; - Ok(rows) - } - - fn intro(&self) -> &'static str { - "Articles that transclude {{tl|BLP unsourced}} that are not \ - categorized in [[:Category:Living people]] or \ - [[:Category:Possibly living people]] and are in a \"XXXX \ - deaths\" category" - } - - fn headings(&self) -> Vec<&'static str> { - vec!["Biography"] - } - - fn format_row(&self, row: &Row) -> Vec { - str_vec![format!("[[{}]]", row.page_title)] - } - - fn code(&self) -> &'static str { - include_str!("potenshbdps1.rs") - } -} diff --git a/dbreps2/src/enwiki/potenshbdps3.rs b/dbreps2/src/enwiki/potenshbdps3.rs deleted file mode 100644 index ef63411..0000000 --- a/dbreps2/src/enwiki/potenshbdps3.rs +++ /dev/null @@ -1,88 +0,0 @@ -/* -Copyright 2009, 2013 bjweeks, MZMcBride, Tim Landscheidt -Copyright 2021 Kunal Mehta - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . - */ - -use anyhow::Result; -use dbreps2::{str_vec, Frequency, Report}; -use mysql_async::prelude::*; -use mysql_async::Conn; - -pub struct Row { - page_title: String, -} - -pub struct Potenshbdps3 {} - -impl Report for Potenshbdps3 { - fn title(&self) -> &'static str { - "Potential biographies of dead people (3)" - } - - fn frequency(&self) -> Frequency { - Frequency::Weekly - } - - fn query(&self) -> &'static str { - r#" -/* potenshbdps3.rs SLOW_OK */ -SELECT - pg1.page_title -FROM - page AS pg1 - JOIN templatelinks ON pg1.page_id = tl_from - JOIN linktarget ON tl_target_id = lt_id -WHERE - lt_namespace = 10 - AND lt_title = 'BLP' - AND pg1.page_namespace = 1 - AND EXISTS( - SELECT - 1 - FROM - page AS pg2 - JOIN categorylinks ON pg2.page_id = cl_from - WHERE - pg1.page_title = pg2.page_title - AND pg2.page_namespace = 0 - AND cl_to RLIKE '^[0-9]{1,4}_deaths$' - ); -"# - } - - async fn run_query(&self, conn: &mut Conn) -> Result> { - let rows = conn - .query_map(self.query(), |page_title| Row { page_title }) - .await?; - Ok(rows) - } - - fn intro(&self) -> &'static str { - "Articles in a \"XXXX deaths\" category whose talk pages transclude {{tl|BLP}}" - } - - fn headings(&self) -> Vec<&'static str> { - vec!["Biography"] - } - - fn format_row(&self, row: &Row) -> Vec { - str_vec![format!("[[{}]]", row.page_title)] - } - - fn code(&self) -> &'static str { - include_str!("potenshbdps3.rs") - } -} diff --git a/dbreps2/src/enwiki/potenshbdps4.rs b/dbreps2/src/enwiki/potenshbdps4.rs index b60e8ce..d4d13a0 100644 --- a/dbreps2/src/enwiki/potenshbdps4.rs +++ b/dbreps2/src/enwiki/potenshbdps4.rs @@ -40,27 +40,34 @@ impl Report for Potenshbdps4 { fn query(&self) -> &'static str { r#" /* potenshbdps4.rs SLOW_OK */ -select +SELECT page_title, - cl_to -from - page as p1 - join categorylinks on cl_from = p1.page_id -where + lt1.lt_title +FROM + page AS p1 + JOIN categorylinks AS cl1 ON cl1.cl_from = p1.page_id + JOIN linktarget AS lt1 ON cl1.cl_target_id = lt1.lt_id +WHERE p1.page_namespace = 0 - and cl_to like ? - and not exists ( - select - * - from + AND lt1.lt_namespace = 14 + AND lt1.lt_title LIKE ? + AND NOT EXISTS ( + SELECT + 1 + FROM page AS p2 - join categorylinks on p2.page_id = cl_from - where + JOIN categorylinks AS cl2 ON p2.page_id = cl2.cl_from + JOIN linktarget AS lt2 ON cl2.cl_target_id = lt2.lt_id + WHERE p2.page_title = p1.page_title - and p2.page_namespace = 0 - and (cl_to like "%_deaths" or cl_to = "Year_of_death_unknown" or cl_to = "Year_of_death_missing") - ); -"# + AND p2.page_namespace = 0 + AND lt2.lt_namespace = 14 + AND ( + lt2.lt_title LIKE '%_deaths' + OR lt2.lt_title = 'Year_of_death_unknown' + OR lt2.lt_title = 'Year_of_death_missing' + ) + );"# } async fn run_query(&self, conn: &mut Conn) -> Result> { diff --git a/dbreps2/src/enwiki/potenshblps1.rs b/dbreps2/src/enwiki/potenshblps1.rs deleted file mode 100644 index d143fcb..0000000 --- a/dbreps2/src/enwiki/potenshblps1.rs +++ /dev/null @@ -1,112 +0,0 @@ -/* -Copyright 2009, 2013 bjweeks, MZMcBride, Tim Landscheidt -Copyright 2021 Kunal Mehta - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . - */ - -use anyhow::Result; -use dbreps2::{str_vec, Frequency, Report}; -use mysql_async::prelude::*; -use mysql_async::Conn; - -pub struct Row { - page_title: String, -} - -pub struct Potenshblps1 {} - -impl Report for Potenshblps1 { - fn title(&self) -> &'static str { - "Potential biographies of living people (1)" - } - - fn frequency(&self) -> Frequency { - Frequency::Weekly - } - - fn query(&self) -> &'static str { - r#" -/* potenshblps1.rs SLOW_OK */ -SELECT - pg1.page_title -FROM - page AS pg1 - JOIN templatelinks ON pg1.page_id = tl_from - JOIN linktarget ON tl_target_id = lt_id -WHERE - pg1.page_namespace = 0 - AND lt_namespace = 10 - AND lt_title = 'BLP_unsourced' - AND NOT EXISTS ( - SELECT - 1 - FROM - page AS pg2 - JOIN categorylinks ON pg2.page_id = cl_from - WHERE - pg1.page_title = pg2.page_title - AND pg2.page_namespace = 0 - AND cl_to = 'Living_people' - ) - AND NOT EXISTS ( - SELECT - 1 - FROM - page AS pg3 - JOIN categorylinks ON pg3.page_id = cl_from - WHERE - pg1.page_title = pg3.page_title - AND pg3.page_namespace = 0 - AND cl_to = 'Possibly_living_people' - ) - AND NOT EXISTS ( - SELECT - 1 - FROM - page AS pg4 - JOIN categorylinks ON pg4.page_id = cl_from - WHERE - pg1.page_title = pg4.page_title - AND pg4.page_namespace = 0 - AND cl_to RLIKE '^[0-9]{4}_deaths$' - ); -"# - } - - async fn run_query(&self, conn: &mut Conn) -> Result> { - let rows = conn - .query_map(self.query(), |page_title| Row { page_title }) - .await?; - Ok(rows) - } - - fn intro(&self) -> &'static str { - "Articles that transclude {{tl|BLP unsourced}} that are not \ - categorized in [[:Category:Living people]] or \ - [[:Category:Possibly living people]]" - } - - fn headings(&self) -> Vec<&'static str> { - vec!["Biography"] - } - - fn format_row(&self, row: &Row) -> Vec { - str_vec![format!("[[{}]]", row.page_title)] - } - - fn code(&self) -> &'static str { - include_str!("potenshblps1.rs") - } -} diff --git a/dbreps2/src/enwiki/potenshblps2.rs b/dbreps2/src/enwiki/potenshblps2.rs index b628c9a..d6c55c9 100644 --- a/dbreps2/src/enwiki/potenshblps2.rs +++ b/dbreps2/src/enwiki/potenshblps2.rs @@ -46,28 +46,32 @@ SELECT FROM page JOIN categorylinks AS c1 ON c1.cl_from = page_id - AND c1.cl_to = CONCAT(?, '_births') + JOIN linktarget AS lt1 ON c1.cl_target_id = lt1.lt_id + AND lt1.lt_namespace = 14 + AND lt1.lt_title = CONCAT(?, '_births') LEFT JOIN categorylinks AS c2 ON c2.cl_from = page_id - AND ( - c2.cl_to IN ( - 'Living_people', - 'Possibly_living_people', - 'Disappeared_people', - 'Missing_people', - 'Year_of_death_unknown', - 'Date_of_death_unknown', - 'Year_of_death_missing', - 'Date_of_death_missing', - '20th-century_deaths', - '21st-century_deaths', - '1900s_deaths', - '2000s_deaths', - 'People_declared_dead_in_absentia' + LEFT JOIN linktarget AS lt2 ON c2.cl_target_id = lt2.lt_id + AND lt2.lt_namespace = 14 + AND ( + lt2.lt_title IN ( + 'Living_people', + 'Possibly_living_people', + 'Disappeared_people', + 'Missing_people', + 'Year_of_death_unknown', + 'Date_of_death_unknown', + 'Year_of_death_missing', + 'Date_of_death_missing', + '20th-century_deaths', + '21st-century_deaths', + '1900s_deaths', + '2000s_deaths', + 'People_declared_dead_in_absentia' + ) + OR lt2.lt_title REGEXP '^[0-9]{4}_deaths$' + OR lt2.lt_title REGEXP '^[0-9]{4}_suicides$' + OR lt2.lt_title REGEXP '^[0-9]{3}0s_deaths$' ) - OR c2.cl_to REGEXP '^[0-9]{4}_deaths$' - OR c2.cl_to REGEXP '^[0-9]{4}_suicides$' - OR c2.cl_to REGEXP '^[0-9]{3}0s_deaths$' - ) WHERE page_namespace = 0 AND page_is_redirect = 0 diff --git a/dbreps2/src/enwiki/potenshblps3.rs b/dbreps2/src/enwiki/potenshblps3.rs deleted file mode 100644 index adac6f0..0000000 --- a/dbreps2/src/enwiki/potenshblps3.rs +++ /dev/null @@ -1,137 +0,0 @@ -/* -Copyright 2009, 2013 bjweeks, MZMcBride, Tim Landscheidt -Copyright 2021 Kunal Mehta - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . - */ - -use anyhow::Result; -use dbreps2::{str_vec, Frequency, Report}; -use mysql_async::prelude::*; -use mysql_async::Conn; - -pub struct Row { - page_title: String, -} - -pub struct Potenshblps3 {} - -impl Report for Potenshblps3 { - fn title(&self) -> &'static str { - "Potential biographies of living people (3)" - } - - fn frequency(&self) -> Frequency { - Frequency::Weekly - } - - fn query(&self) -> &'static str { - r" -/* potenshblps3.rs SLOW_OK */ -SELECT - pg1.page_title -FROM - page AS pg1 - JOIN templatelinks ON pg1.page_id = tl_from - JOIN linktarget ON tl_target_id = lt_id -WHERE - lt_namespace = 10 - AND lt_title = 'BLP' - AND pg1.page_namespace = 1 - AND NOT EXISTS( - SELECT - 1 - FROM - page AS pg2 - JOIN categorylinks ON pg2.page_id = cl_from - WHERE - pg1.page_title = pg2.page_title - AND pg2.page_namespace = 0 - AND ( - cl_to IN ( - 'Living_people', - 'Possibly_living_people', - 'Human_name_disambiguation_pages', - 'Missing_people' - ) - OR cl_to LIKE 'Musical_groups%' - OR cl_to LIKE '%music_groups' - ) - ) - AND NOT EXISTS( - SELECT - 1 - FROM - page AS pg6 - JOIN categorylinks ON pg6.page_id = cl_from - WHERE - pg1.page_title = pg6.page_title - AND pg6.page_namespace = 1 - AND cl_to = 'Musicians_work_group_articles' - ) - AND NOT EXISTS( - SELECT - 1 - FROM - page AS pg7 - WHERE - pg1.page_title = pg7.page_title - AND pg7.page_namespace = 0 - AND pg7.page_is_redirect = 1 - ) - AND EXISTS( - SELECT - 1 - FROM - page AS pg8 - JOIN templatelinks ON pg8.page_id = tl_from - JOIN linktarget ON tl_target_id = lt_id - WHERE - lt_namespace = 10 - AND lt_title = 'WikiProject_Biography' - AND pg1.page_title = pg8.page_title - AND pg8.page_namespace = 1 - ) - AND REPLACE(pg1.page_title, '_', ' ') NOT REGEXP '(^List of|^Line of|\bcontroversy\b|\belection\b|\bmurder(s)?\b|\binvestigation\b|\bkidnapping\b|\baffair\b|\ballegation\b|\brape(s)?\b| v. |\bfamily\b| and |\bband\b| of |\barchive\b|recordholders| & |^The|^[0-9]|\bfiction\b|\bcharacter\b| the |\bincident(s)?\b|\bprinciples\b|\bmost\b)' -LIMIT - 1000; -" - } - - async fn run_query(&self, conn: &mut Conn) -> Result> { - let rows = conn - .query_map(self.query(), |page_title| Row { page_title }) - .await?; - Ok(rows) - } - - fn intro(&self) -> &'static str { - "Articles whose talk pages transclude {{tl|BLP}} that are likely to be \ - biographies of living people, but are not in [[:Category:Living people]], \ - [[:Category:Possibly living people]], or [[:Category:Missing people]] \ - (limited to the first 1000 entries)" - } - - fn headings(&self) -> Vec<&'static str> { - vec!["Biography"] - } - - fn format_row(&self, row: &Row) -> Vec { - str_vec![format!("[[{}]]", row.page_title)] - } - - fn code(&self) -> &'static str { - include_str!("potenshblps3.rs") - } -} diff --git a/dbreps2/src/enwiki/shortestbios.rs b/dbreps2/src/enwiki/shortestbios.rs deleted file mode 100644 index 38bea44..0000000 --- a/dbreps2/src/enwiki/shortestbios.rs +++ /dev/null @@ -1,86 +0,0 @@ -/* -Copyright 2010, 2013 bjweeks, MZMcBride, Tim Landscheidt -Copyright 2021 Kunal Mehta - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . - */ - -use anyhow::Result; -use dbreps2::{str_vec, Frequency, Report}; -use mysql_async::prelude::*; -use mysql_async::Conn; - -pub struct Row { - page_title: String, - page_len: u32, -} - -pub struct ShortestBios {} - -impl Report for ShortestBios { - fn title(&self) -> &'static str { - "Shortest biographies of living people" - } - - fn frequency(&self) -> Frequency { - Frequency::Daily - } - - fn query(&self) -> &'static str { - r#" -/* shortestbios.rs SLOW_OK */ -SELECT - page_title, - page_len -FROM - page - JOIN categorylinks ON cl_from = page_id -WHERE - page_namespace = 0 - AND page_is_redirect = 0 - AND cl_to = 'Living_people' -ORDER BY - 2, - 1 -LIMIT - 1000; -"# - } - - async fn run_query(&self, conn: &mut Conn) -> Result> { - let rows = conn - .query_map(self.query(), |(page_title, page_len)| Row { - page_title, - page_len, - }) - .await?; - Ok(rows) - } - - fn intro(&self) -> &'static str { - "The shortest [[:Category:Living people|biographies of living people]] by page length in bytes (limited to the first 1000 entries)" - } - - fn headings(&self) -> Vec<&'static str> { - vec!["Category", "Length"] - } - - fn format_row(&self, row: &Row) -> Vec { - str_vec![format!("[[{}]]", &row.page_title), row.page_len] - } - - fn code(&self) -> &'static str { - include_str!("shortestbios.rs") - } -} diff --git a/dbreps2/src/enwiki/stickyprodblps.rs b/dbreps2/src/enwiki/stickyprodblps.rs index b25d08e..e119e33 100644 --- a/dbreps2/src/enwiki/stickyprodblps.rs +++ b/dbreps2/src/enwiki/stickyprodblps.rs @@ -49,9 +49,11 @@ SELECT 1 FROM categorylinks + JOIN linktarget AS lt2 ON cl_target_id = lt2.lt_id WHERE cl_from = page_id - AND cl_to IN ( + AND lt2.lt_namespace = 14 + AND lt2.lt_title IN ( 'BLP_articles_proposed_for_deletion', 'Articles_for_deletion' ) @@ -60,8 +62,10 @@ FROM page JOIN revision ON rev_page = page_id JOIN categorylinks ON cl_from = page_id + JOIN linktarget ON cl_target_id = lt_id WHERE - cl_to = 'All_unreferenced_BLPs' + lt_namespace = 14 + AND lt_title = 'All_unreferenced_BLPs' AND page_namespace = 0 AND page_is_redirect = 0 AND rev_timestamp = ( diff --git a/dbreps2/src/enwiki/templatedisambigs.rs b/dbreps2/src/enwiki/templatedisambigs.rs deleted file mode 100644 index bdb513a..0000000 --- a/dbreps2/src/enwiki/templatedisambigs.rs +++ /dev/null @@ -1,129 +0,0 @@ -/* -Copyright 2010, 2013 bjweeks, MZMcBride, Tim Landscheidt -Copyright 2021 Kunal Mehta - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . - */ - -use anyhow::Result; -use dbreps2::{str_vec, Frequency, Report}; -use mysql_async::prelude::*; -use mysql_async::Conn; - -pub struct Row { - template_title: String, - disambiguation_title: String, - transclusions_count: u64, -} - -pub struct TemplateDisambigs {} - -impl Report for TemplateDisambigs { - fn title(&self) -> &'static str { - "Templates containing links to disambiguation pages" - } - - fn frequency(&self) -> Frequency { - Frequency::Weekly - } - - fn query(&self) -> &'static str { - r#" -/* templatedisambigs.rs SLOW_OK */ -SELECT - pltmp.page_title AS template_title, - pltmp.lt_title AS disambiguation_title, - ( - SELECT - COUNT(*) - FROM - templatelinks - JOIN linktarget ON tl_target_id = lt_id - WHERE - lt_namespace = 10 - AND lt_title = pltmp.page_title - ) AS transclusions_count -FROM - ( - SELECT - page_namespace, - page_title, - lt_namespace, - lt_title - FROM - page - JOIN pagelinks ON pl_from = page_id - JOIN linktarget ON pl_target_id = lt_id - WHERE - page_namespace = 10 - AND lt_namespace = 0 - LIMIT - 1000000 - ) AS pltmp - JOIN page AS pg2 - /* removes red links */ - ON pltmp.lt_namespace = pg2.page_namespace - AND pltmp.lt_title = pg2.page_title -WHERE - EXISTS ( - SELECT - 1 - FROM - categorylinks - WHERE - pg2.page_id = cl_from - AND cl_to = 'All_disambiguation_pages' - ) -ORDER BY - transclusions_count DESC;"# - } - - async fn run_query(&self, conn: &mut Conn) -> Result> { - let rows = conn - .query_map( - self.query(), - |( - template_title, - disambiguation_title, - transclusions_count, - )| Row { - template_title, - disambiguation_title, - transclusions_count, - }, - ) - .await?; - Ok(rows) - } - - fn intro(&self) -> &'static str { - "Templates containing links to disambiguation pages (limited results)" - } - - fn headings(&self) -> Vec<&'static str> { - vec!["Template", "Disambiguation page", "Transclusions"] - } - - fn format_row(&self, row: &Row) -> Vec { - str_vec![ - format!("[[Template:{}|]]", row.template_title), - format!("[[{}]]", row.disambiguation_title), - row.transclusions_count - ] - } - - fn code(&self) -> &'static str { - include_str!("templatedisambigs.rs") - } -} diff --git a/dbreps2/src/main.rs b/dbreps2/src/main.rs index 725c068..a576892 100644 --- a/dbreps2/src/main.rs +++ b/dbreps2/src/main.rs @@ -85,22 +85,10 @@ async fn main() -> Result<()> { (enwiki::OrphanedSubTalks {}) .really_run(&enwiki_runner) .await; - (enwiki::OverusedNonFree {}) - .really_run(&enwiki_runner) - .await; - (enwiki::PollTemps {}).really_run(&enwiki_runner).await; - (enwiki::Potenshbdps1 {}).really_run(&enwiki_runner).await; - (enwiki::Potenshbdps3 {}).really_run(&enwiki_runner).await; (enwiki::Potenshbdps4 {}).really_run(&enwiki_runner).await; - (enwiki::Potenshblps1 {}).really_run(&enwiki_runner).await; (enwiki::Potenshblps2 {}).really_run(&enwiki_runner).await; - (enwiki::Potenshblps3 {}).really_run(&enwiki_runner).await; (enwiki::ProjectChanges {}).really_run(&enwiki_runner).await; - (enwiki::ShortestBios {}).really_run(&enwiki_runner).await; (enwiki::StickyProdBLPs {}).really_run(&enwiki_runner).await; - (enwiki::TemplateDisambigs {}) - .really_run(&enwiki_runner) - .await; (enwiki::TemplatesNonFree {}) .really_run(&enwiki_runner) .await;