import requests
from bs4 import BeautifulSoup
import csv
import time
def get_latest_naver_news(query, num_articles=10):
url = f"https://search.naver.com/search.naver?where=news&sm=tab_jum&query={query}"
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
news_items = soup.find_all('div', {'class': 'news_area'}, limit=num_articles)
latest_news = []
for item in news_items:
title = item.find('a', {'class': 'news_tit'}).text
link = item.find('a', {'class': 'news_tit'})['href']
latest_news.append({'title': title, 'link': link})
return latest_news
def save_news_to_csv(news_items, filename='latest_naver_news.csv'):
with open(filename, 'a', newline='', encoding='utf-8') as file:
writer = csv.DictWriter(file, fieldnames=['title', 'link'])
writer.writeheader()
for news in news_items:
writer.writerow(news)
def main():
query = '인공지능'
while True:
print(f"{time.strftime('%Y-%m-%d %H:%M:%S')}에 크롤링 시작")
latest_news_items = get_latest_naver_news(query)
save_news_to_csv(latest_news_items)
print(f"{time.strftime('%Y-%m-%d %H:%M:%S')}에 크롤링 완료, 다음 크롤링까지 60초 대기")
time.sleep(60)
if __name__ == "__main__":
main()