Dev metacog/DevCaJournal
word cloud 개발일지
잘 배우고, 잘 익히기
2021. 9. 16. 00:41
ㅇ 핵심 기능 : 데이터 읽기, 데이터 클렌징(replace함수), WordCloud(wordcloud 패키지), masked word cloud
# 파일 쓰기
#
# f = open("test.txt", "w", encoding="utf-8")
# f.write("안녕, 스파르타!\n")
# for i in [1,2,3,4,5]:
# f.write(f'{i} 번째 좋아요\n')
#
# f.close()
# 파일 읽기
# with open("test.txt", "r", encoding="utf-8") as f:
# lines = f.readlines()
# for line in lines:
# print(line)
#
# # 파일 읽기
# text = ''
# with open("test.txt", "r", encoding="utf-8") as f:
# lines = f.readlines()
# for line in lines:
# text += line
#
# print(text)
# # 이용 가능한 폰트 중 '고딕'만 선별
# import matplotlib.font_manager as fm
#
# for font in fm.fontManager.ttflist:
# if 'Gothic' in font.name:
# print(font.name, font.fname)
# 워드 클라우드
# text = ''
# with open("kakaotalk.txt", "r", encoding="utf-8") as f:
# lines = f.readlines()
# for line in lines:
# text += line
#
#
# from wordcloud import WordCloud
#
# wc = WordCloud(font_path='C:/Windows/Fonts/malgunsl.ttf', background_color="white", width=600, height=400)
# wc.generate(text)
# wc.to_file("result.png")
# 클렌징
text = ''
with open("kakaotalk.txt", "r", encoding="utf-8") as f:
lines = f.readlines()
for line in lines[5:]:
if '] [' in line:
text += line.split('] ')[2].replace('ㅋ', '').replace('ㅠ', '').replace('ㅜ', '').replace('이모티콘\n', '').replace('사진\n', '').replace('삭제된 메시지입니다.', '')
# print(text)
#
# from wordcloud import WordCloud
#
# wc = WordCloud(font_path='C:/Windows/Fonts/malgunsl.ttf', background_color="white", width=600, height=400)
# wc.generate(text)
# wc.to_file("result.png")
# 마스크드 word cloud
from wordcloud import WordCloud
from PIL import Image
import numpy as np
mask = np.array(Image.open('cloud.png'))
wc = WordCloud(font_path='C:/Windows/Fonts/malgunsl.ttf', background_color="white", mask=mask)
wc.generate(text)
wc.to_file("result_masked.png")