|
1 |
| -from bs4 import BeautifulSoup |
| 1 | +import requests |
2 | 2 | from datetime import datetime
|
3 | 3 |
|
4 | 4 |
|
5 | 5 | class ZennScraper:
|
6 | 6 | def __init__(self, url):
|
| 7 | + """ |
| 8 | + ZennScraperクラスのコンストラクタ。 |
| 9 | + 指定したURLを初期化し、空の記事リストを作成する。 |
| 10 | +
|
| 11 | + Args: |
| 12 | + url (str): スクレイピングするZennのURL。 |
| 13 | + """ |
7 | 14 | self.url = url
|
8 | 15 | self.articles = []
|
9 | 16 |
|
10 |
| - def get_articles(self, driver): |
11 |
| - driver.get(self.url) |
12 |
| - |
13 |
| - soup = BeautifulSoup(driver.page_source, 'html.parser') |
14 |
| - articles = soup.find_all('article', class_='ArticleCard_container__3qUYt') |
15 |
| - |
16 |
| - for article in articles: |
17 |
| - title = article.find('h3', class_='ArticleCard_title__UnBHE').text |
18 |
| - url = "https://zenn.dev" + article.find('a', class_='ArticleCard_mainLink__X2TOE')['href'] |
19 |
| - name = article.find('div', class_='ArticleCard_userName__1q_wZ').text |
20 |
| - self.articles.append({'title': title, 'url': url, 'name': name}) |
| 17 | + def get_midra_lab_articles(self, usernames): |
| 18 | + base_url = "https://zenn.dev/api/articles" |
| 19 | + |
| 20 | + for username in usernames: |
| 21 | + response = requests.get(base_url, params={'username': username, 'order': 'latest'}) |
| 22 | + if response.status_code == 200: |
| 23 | + data = response.json() |
| 24 | + for article in data["articles"]: |
| 25 | + # 'publication'が存在し、その'name'が'midra_lab'であるか確認 |
| 26 | + if article.get("publication") and article["publication"].get("name") == "midra_lab": |
| 27 | + |
| 28 | + # 日付の解析とフォーマット |
| 29 | + published_at = article.get("published_at") |
| 30 | + if published_at: |
| 31 | + date_obj = datetime.strptime(published_at, '%Y-%m-%dT%H:%M:%S.%f%z') |
| 32 | + formatted_date = date_obj.strftime('%Y-%m-%d') |
| 33 | + article_info = { |
| 34 | + 'title': article["title"], |
| 35 | + 'name': article["user"]["username"], |
| 36 | + 'url': f"https://zenn.dev{article['path']}", |
| 37 | + 'created_at': formatted_date |
| 38 | + } |
| 39 | + self.articles.append(article_info) |
21 | 40 |
|
22 | 41 | def is_articles_empty(self):
|
23 |
| - return len(self.articles) == 0 |
| 42 | + """ |
| 43 | + 記事リストが空かどうかを確認する。 |
24 | 44 |
|
25 |
| - def get_article_details(self, driver, article): |
26 |
| - driver.get(article['url']) |
27 |
| - |
28 |
| - soup = BeautifulSoup(driver.page_source, 'html.parser') |
29 |
| - date = soup.find('span', class_='ArticleHeader_num__rSDj6').text |
30 |
| - date_obj = datetime.strptime(date, '%Y/%m/%d') |
31 |
| - formatted_date = date_obj.strftime('%Y-%m-%d') |
32 |
| - tags_container = soup.find('div', class_='View_topics__OVMdM') |
33 |
| - tags = tags_container.find_all('div', class_='View_topicName__rxKth') |
34 |
| - |
35 |
| - results = [] |
36 |
| - for tag in tags: |
37 |
| - results.append(tag.text) |
38 |
| - |
39 |
| - return {"tags": results, "date": formatted_date} |
| 45 | + Returns: |
| 46 | + bool: リストが空の場合はTrue、それ以外の場合はFalse。 |
| 47 | + """ |
| 48 | + return len(self.articles) == 0 |
0 commit comments