Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .idea/AIS-CJS.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 2 additions & 5 deletions AIS-CJS/AIS_CJS.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,2 @@
print("asd")
print("gg")
#��ȫ�� �ù߶��Ƥ�����
print("ee")
print("실행도잘됨")


28 changes: 28 additions & 0 deletions AIS-CJS/csvjson.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@

{
"프로그래밍 언어": 100,
"자바": 62,
"자바 프로그래밍": 62,
"파이썬 프로그래밍": 41,
"파이썬": 40,
"프로그래밍 갤러리": 39,
"코딩": 34,
"프로그램": 34,
"게임 프로그래밍": 33,
"c++": 31,
"객체": 30,
"컴퓨터 프로그래밍": 28,
"c 언어 프로그래밍": 28,
"c 언어": 27,
"객체 지향": 25,
"객체 지향 프로그래밍": 25,
"함수형 프로그래밍": 21,
"안드로이드 프로그래밍": 21,
"소켓 프로그래밍": 20,
"시스템 프로그래밍": 20,
"java": 18,
"명품 자바 프로그래밍": 16,
"프로그래머": 15,
"알고리즘": 14,
"개발자": 13
}
21 changes: 21 additions & 0 deletions AIS-CJS/dataTrans.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import socket

#연결할 Host, Port 정보
HOST = '175.200.108.201'
PORT = 5000

#소켓 생성
client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

#서버에 접속
client_socket.connect((HOST, PORT))

#서버에 "Hello world"메세지 전송
client_socket.sendall("Hello world".encode());
print("데이터 전송")
#서버에게서 메시지를 수신(에코)
data = client_socket.recv(1024)
print('Received', repr(data.decode()))

#클라이언트 소켓을 닫는다.
client_socket.close()
51 changes: 51 additions & 0 deletions AIS-CJS/jobAPI.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from urllib.request import urlopen
from urllib.parse import urlencode, unquote, quote_plus
import urllib
import requests
import json
from xml.etree.ElementTree import parse
import xmltodict
from json.decoder import JSONDecoder
from os import error
import glob

data = []

page = 1
url = "http://openapi.work.go.kr/opi/opi/opia/wantedApi.do"
key = "WNLB0BQ31I58AQ2YDZ4ET2VR1HK"
occupation = "&occupation=023|024|025|026"

for page in range(1,11):
queryParams = '?' + urlencode({quote_plus('authKey') : 'WNLB0BQ31I58AQ2YDZ4ET2VR1HK', quote_plus('callTp'): 'L', quote_plus('returnType'): 'XML',quote_plus('startPage'): page, quote_plus('display'): '200',
quote_plus('occupation'): '023|024|025|026'})



request = urllib.request.Request(url + unquote(queryParams))

response_body = urlopen(request, timeout=60).read() # get bytes data


decode_data = response_body.decode('utf-8')
print(type(decode_data))

xml_parse = xmltodict.parse(decode_data)# string인 xml 파싱
xml_dict = json.loads(json.dumps(xml_parse))

print(xml_dict)
with open('job' +str(page) + '.json', 'w') as f:
json.dump(xml_dict, f)

for f in glob.glob("job*.json"):
with open(f, encoding="utf-8") as infile:
data.append(json.load(infile))

with open("job.json",'w', encoding="utf-8") as outfile:
json.dump(data, outfile, ensure_ascii=False, indent="\t")

#http post 통신 코드
# headers = {}
# headers = {'content-type': 'application/json'}
# postData = xml_dict
# response = requests.post("https://test.com", headers=headers, data=postData)
112 changes: 110 additions & 2 deletions AIS-CJS/jobSearch.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,110 @@
print("부끄럽다")
print("시발")

def main():
import requests
from bs4 import BeautifulSoup
import time
import datetime

# 잡코리아

f = open('jobkorea_apply_urls.csv', 'w')
f.write("기업 이름,모집 제목,경력,학력,우대,고용 형태, 급여, 지역, 모집 기간, 이미지 주소" + '\n')

keyword = "모바일앱" # 키워드 입력

# 페이지순서
for n in range(1, 2):
raw = requests.get(
"https://www.jobkorea.co.kr/Search/?stext={}&tabType=recruit&Page_No=".format(keyword) + str(n)
, headers={'User-Agent': 'Mozilla/5.0'})
html = BeautifulSoup(raw.text, "html.parser")
results = html.select("li.list-post")

for ar in results[0:20]:
company_name = ar.select_one("a.name").text.strip()
detail = ar.select_one("a.title").text.strip()
url = 'https://www.jobkorea.co.kr' + ar.find("a")['href']
exp = ar.select_one("span.exp").text.strip()
location = ar.select_one("span.loc").text.strip()
apply = ar.select_one("div.post-list-apply").text.strip()
company_name = company_name.replace(",", "")
detail = detail.replace(",", "")
location = location.replace(" 외", "")
now = datetime.datetime.now()
#nowDate = now.strftime('%Y-%m-%d')
raw2 = requests.get(url
, headers={'User-Agent': 'Mozilla/5.0'})
html2 = BeautifulSoup(raw2.text, "html.parser")
#score = str(html2.select("#tab04 > article.artReadStrategy > div > div > div.devStartlist.listArea.specList > div > div.specListWrap > div > ul > li:nth-child(1) > div > span > em"))[5:8]
date_tag1 = html2.select("#tab02 > div.divReadBx.clear.devMakeSameHeight > article.artReadPeriod > div > dl.date > dd:nth-child(2)")
date_tag2 = html2.select("#tab02 > div.divReadBx.clear.devMakeSameHeight > article.artReadPeriod > div > dl.date > dd:nth-child(4)")
prefer_tag = html2.select("#dlPref > dd > span")
region_tag = html2.select("#container > section > div.readSumWrap.clear > article > div.tbRow.clear > div:nth-child(2) > dl > dd:nth-child(6) > a")
pay_tag = html2.select("#container > section > div.readSumWrap.clear > article > div.tbRow.clear > div:nth-child(2) > dl > dd:nth-child(4)")
edu_tag = html2.select("#container > section > div.readSumWrap.clear > article > div.tbRow.clear > div:nth-child(1) > dl > dd:nth-child(4) > strong")
pattern_tag = html2.select("#container > section > div.readSumWrap.clear > article > div.tbRow.clear > div:nth-child(2) > dl > dd:nth-child(2) > ul > li > strong")
image_tag = html2.select("#cologo")

image = ""
for i in image_tag:
image = i['src']
if image == "":
image = "이미지 없음"
else:
image = "http:" + image
date = ""
pay = ""
prefer = ""
region = ""
edu = ""
pattern = ""
for tag in date_tag1:
date += "시작일 " + tag.getText() +" "
for tag in date_tag2:
date += "마감일 " + tag.getText()
for tag in prefer_tag:
prefer += tag.getText()
for tag in region_tag:
region += tag.getText()
for tag in pay_tag:
pay += tag.getText()
for tag in edu_tag:
edu += tag.getText()
for tag in pattern_tag:
pattern += tag.getText()

pay = remove_blank(pay)
pay = pay.replace(",","")
region = remove_blank(region)
prefer = remove_blank(prefer)
pattern = remove_blank(pattern)
edu = remove_blank(edu)
date = remove_blank(date)
if date == "":
date = "상시 채용"
f.write(
company_name + ',' + detail + ',' + exp + ',' + edu + ',' + prefer + ',' + pattern + ',' + pay + ',' + region + ',' + date + ',' + image + '\n')
time.sleep(1) # 1초


print(str(n) + "번째 페이지 내 " + str(keyword) + " 의 채용공고 크롤링을 완료했습니다.")
print("최종 엑셀 작업 마무리중 입니다.")

f.close()



print("잡코리아 크롤링이 완료되었습니다!!")


def remove_blank(string):
string = string.strip()
string = string.replace("\n", "")
string = string.replace("\r", "")
return string



if __name__ == "__main__":
main()

40 changes: 40 additions & 0 deletions AIS-CJS/main.ui
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
<class>MainWindow</class>
<widget class="QMainWindow" name="MainWindow">
<property name="geometry">
<rect>
<x>0</x>
<y>0</y>
<width>900</width>
<height>600</height>
</rect>
</property>
<property name="minimumSize">
<size>
<width>900</width>
<height>600</height>
</size>
</property>
<property name="mouseTracking">
<bool>false</bool>
</property>
<property name="windowTitle">
<string>MainWindow</string>
</property>
<widget class="QWidget" name="centralwidget"/>
<widget class="QMenuBar" name="menubar">
<property name="geometry">
<rect>
<x>0</x>
<y>0</y>
<width>900</width>
<height>21</height>
</rect>
</property>
</widget>
<widget class="QStatusBar" name="statusbar"/>
</widget>
<resources/>
<connections/>
</ui>
26 changes: 26 additions & 0 deletions AIS-CJS/text.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
����,��
���α׷��� ���,100
�ڹ�,62
�ڹ� ���α׷���,62
���̽� ���α׷���,41
���̽�,40
���α׷��� ������,39
�ڵ�,34
���α׷�,34
���� ���α׷���,33
c++,31
��ü,30
��ǻ�� ���α׷���,28
c ��� ���α׷���,28
c ���,27
��ü ����,25
��ü ���� ���α׷���,25
�Լ��� ���α׷���,21
�ȵ���̵� ���α׷���,21
���� ���α׷���,20
�ý��� ���α׷���,20
java,18
��ǰ �ڹ� ���α׷���,16
���α׷���,15
�˰�����,14
������,13
20 changes: 20 additions & 0 deletions AIS-CJS/wordCloud.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@

import json
import matplotlib.pyplot as plt

from wordcloud import WordCloud

inputFileName = 'csvjson'
data = json.loads(open(inputFileName+'.json', 'r', encoding= 'utf-8').read())
font_path = "C:/Users/dofury/AppData/Local/Microsoft/Windows/Fonts/MaruBuri-Bold.ttf"
wc = WordCloud(font_path=font_path, background_color='ivory', width=800, height=600)




cloud = wc.fit_words(data)
plt.Figure(figsize=(15, 20))
plt.imshow(cloud)
plt.axis('off')
plt.savefig("wordcloud.png")
plt.show()