개발일지/Big Data

[Pandas] 가장 많은 리뷰를 받은 `n`개의 음식점

zzoo-ppaamm 2020. 9. 2. 22:40

json 파일 불러오기

try:
    with open(data_path, encoding="utf-8") as f:
        data = json.loads(f.read())
except FileNotFoundError as e:
    print(f"`{data_path}` 가 존재하지 않습니다.")

data.json

[{"id": 0,
"name": "",
"branch": "",
"area": "",
"tel": "",
"address": "",
"latitude": "",
"longitude": "",
"category_list": [{"category": ""}, ...],
"menu_list": [{"menu": "a", "price": 0}, ...],
"bhour_list": [],
"review_cnt": 2,
"review_list": [{
    "writer_info": {"id": 389728, "gender": "여", "born_year": "1993"}, 
    "review_info": {"id": 2, "score": 5, "content": "", "reg_time": "1970-01-01 00:00:00"}
}, ...]

json to DataFrame

stores = []  # 음식점 테이블
reviews = []  # 리뷰 테이블
users = []
menus = []

def import_data(data_path=DATA_FILE):
    """
    Req. 1-1-1 음식점 데이터 파일을 읽어서 Pandas DataFrame 형태로 저장합니다
    """

    try:
        with open(data_path, encoding="utf-8") as f:
            data = json.loads(f.read())
    except FileNotFoundError as e:
        print(f"`{data_path}` 가 존재하지 않습니다.")
        exit(1)

    stores = []  # 음식점 테이블
    reviews = []  # 리뷰 테이블
    users = []
    menus = []

    menu_index = 0
    for d in data:
        categories = [c["category"] for c in d["category_list"]]
        stores.append(
            [
                d["id"],
                d["name"],
                d["branch"],
                d["area"],
                d["tel"],
                d["address"],
                d["latitude"],
                d["longitude"],
                "|".join(categories),
            ]
        )

        for review in d["review_list"]:
            r = review["review_info"]
            u = review["writer_info"]

            reviews.append(
                [r["id"], d["id"], u["id"], r["score"], r["content"], r["reg_time"]]
            )

            users.append(
                [u["id"], u["gender"], u["born_year"]]
            )

        for m in d["menu_list"]:
            menus.append(
                [menu_index, d["id"], m["menu"], m["price"]]
            )
            menu_index += 1

    store_frame = pd.DataFrame(data=stores, columns=store_columns)
    review_frame = pd.DataFrame(data=reviews, columns=review_columns)
    user_frame = pd.DataFrame(data=users, columns=user_columns)
    menu_frame = pd.DataFrame(data=menus, columns=menu_columns)

    return {"stores": store_frame, "reviews": review_frame, "users": user_frame, "menus": menu_frame}

DataFrame 저장 & 불러오기

def dump_dataframes(dataframes):
    pd.to_pickle(dataframes, DUMP_FILE)


def load_dataframes():
    return pd.read_pickle(DUMP_FILE)

DataFrame 정제

def get_most_reviewed_stores(dataframes, n=20):
    """
    Req. 1-2-3 가장 많은 리뷰를 받은 `n`개의 음식점을 정렬하여 리턴합니다
    """

    stores = dataframes["stores"]
    reviews = dataframes["reviews"]

    # grouping by store & counting
    stores_reviews_count = reviews.groupby("store").size()

    # sorting by desc & heading n numbers
    stores_reviews_count_top = stores_reviews_count.sort_values(ascending=False).head(n=n)

    # Series to DataFrame
    df_stores_reviews_count_top = pd.DataFrame({'count': stores_reviews_count_top}).reset_index()

    # merged stores into review_count
    result = pd.merge(df_stores_reviews_count_top, stores, left_on='store', right_on='id', how='left')

    return result

출력

def main():
    data = load_dataframes()

    term_w = shutil.get_terminal_size()[0] - 1
    separater = "-" * term_w

    #     최다 리뷰 음식점 출력
    stores_most_reviews = get_most_reviewed_stores(data)

    print("[최다 리뷰 음식점]")
    print(f"{separater}\n")
    for i, store in stores_most_reviews.iterrows():
        print(' {}위 : {}({}개)'.format(i + 1, store['store_name'], store['count']))
    print(f"{separater}\n")

if __name__ == "__main__":
    main()

+Python 출력 format

print('Water boils at %d degrees %s' % (temperature, measure))     # 1
print('Water boils at {} degrees {}'.format(temperature, measure)) # 2
print(f'Water boils at {temperature} degrees {measure}')           # 3