본문 바로가기
기록/인프런

실전! 스프링 부트와 JPA 활용2 - 컬렉션 조회 최적

by 신발사야지 2023. 5. 10.

 

API 개발 고급 - 컬렉션 조회 최적화

OneToMany 를 조회하고 최적화 하는 방법

엔티티를 직접 노출하는 잘못된 API V1

/*
    엔티티를 직접 노출
    이렇게 하면 안 된다는 예시
     */
    @GetMapping("/api/v1/orders")
    public List<Order> ordersV1() {
        List<Order> all = orderRepository.findAllByString(new OrderSearch());
        for (Order order : all) {
            order.getMember().getUserName();
            order.getDelivery().getStatus();
            order.getOrderItems().stream().forEach(o -> o.getItem().getName());
        }

        return all;
    }

DTO를 사용해서 리턴하는 API V2 (fetch joinX)

/*
    DTO 를 사용해서 리턴
     */
    @GetMapping("/api/v2/orders")
    public List<OrderDto> ordersV2() {
        List<Order> orders = orderRepository.findAllByString(new OrderSearch());

        // 실무에서는 이걸 페이징을 하거나 뭘 하겠죠
        List<OrderDto> result = orders.stream()
                .map(OrderDto::new)
                .collect(Collectors.toList());

        return result;
    }

쿼리를 몇 개 호출할까?

-- Oder 조회 1번
select
        order0_.order_id as order_id1_6_,
        order0_.delivery_id as delivery4_6_,
        order0_.member_id as member_i5_6_,
        order0_.order_date as order_da2_6_,
        order0_.status as status3_6_ 
    from
        orders order0_ 
    inner join
        member member1_ 
            on order0_.member_id=member1_.member_id limit ?

-- Member 1번
select
        member0_.member_id as member_i1_4_0_,
        member0_.city as city2_4_0_,
        member0_.street as street3_4_0_,
        member0_.zipcode as zipcode4_4_0_,
        member0_.user_name as user_nam5_4_0_ 
    from
        member member0_ 
    where
        member0_.member_id=?

-- OrderItem 1번
select
        orderitems0_.order_id as order_id5_5_0_,
        orderitems0_.order_item_id as order_it1_5_0_,
        orderitems0_.order_item_id as order_it1_5_1_,
        orderitems0_.count as count2_5_1_,
        orderitems0_.item_id as item_id4_5_1_,
        orderitems0_.order_id as order_id5_5_1_,
        orderitems0_.order_price as order_pr3_5_1_ 
    from
        order_item orderitems0_ 
    where
        orderitems0_.order_id=?

-- Item 쿼리 2 (item 갯수에 비례)
select
        item0_.item_id as item_id2_3_0_,
        item0_.name as name3_3_0_,
        item0_.price as price4_3_0_,
        item0_.stock_quantity as stock_qu5_3_0_,
        item0_.artist as artist6_3_0_,
        item0_.etc as etc7_3_0_,
        item0_.author as author8_3_0_,
        item0_.isbn as isbn9_3_0_,
        item0_.actor as actor10_3_0_,
        item0_.director as directo11_3_0_,
        item0_.dtype as dtype1_3_0_ 
    from
        item item0_ 
    where
        item0_.item_id=?

select
        item0_.item_id as item_id2_3_0_,
        item0_.name as name3_3_0_,
        item0_.price as price4_3_0_,
        item0_.stock_quantity as stock_qu5_3_0_,
        item0_.artist as artist6_3_0_,
        item0_.etc as etc7_3_0_,
        item0_.author as author8_3_0_,
        item0_.isbn as isbn9_3_0_,
        item0_.actor as actor10_3_0_,
        item0_.director as directo11_3_0_,
        item0_.dtype as dtype1_3_0_ 
    from
        item item0_ 
    where
        item0_.item_id=?

딱봐도 이렇게 했다가는 성능이.. 다음 시간에는 이거를 fetch join 을 적용하는 방법(collection 에 대해서) 에 대해서 알아보겠습니다. collection 에 fetch join 할 때는 고민해야 할 부분이 더 있습니다.

기존처럼 fetch join 을 사용해서 최적화를 한 API V3

public List<Order> findAllWithMemberDelivery() {
        return em.createQuery(
                "select o from Order o" +
                        " join fetch o.member m" +
                        " join fetch o.delivery d",
                Order.class
        ).getResultList();
    }

// JPA 입장에서는 어떤 데이터가 뻥튀기 되면 안되는지 파악할 수 없다.
select * from orders o
join order_item oi on o.order_id = oi.order_id

/*
기존처럼 fetch join 을 쓰면 1:N 관계에서 join 하면 데이터가 뻥튀기 됩니다.

4,2023-05-10 20:50:30.746887,ORDER
4,2023-05-10 20:50:30.746887,ORDER
11,2023-05-10 20:50:30.788022,ORDER
11,2023-05-10 20:50:30.788022,ORDER

4번 Order가 2개로 뻥튀기 된걸 볼 수 있음
*/

-- 실제 JPA 에서 나가는 쿼
select
        order0_.order_id as order_id1_6_0_,
        member1_.member_id as member_i1_4_1_,
        delivery2_.delivery_id as delivery1_2_2_,
        orderitems3_.order_item_id as order_it1_5_3_,
        item4_.item_id as item_id2_3_4_,
        order0_.delivery_id as delivery4_6_0_,
        order0_.member_id as member_i5_6_0_,
        order0_.order_date as order_da2_6_0_,
        order0_.status as status3_6_0_,
        member1_.city as city2_4_1_,
        member1_.street as street3_4_1_,
        member1_.zipcode as zipcode4_4_1_,
        member1_.user_name as user_nam5_4_1_,
        delivery2_.city as city2_2_2_,
        delivery2_.street as street3_2_2_,
        delivery2_.zipcode as zipcode4_2_2_,
        delivery2_.status as status5_2_2_,
        orderitems3_.count as count2_5_3_,
        orderitems3_.item_id as item_id4_5_3_,
        orderitems3_.order_id as order_id5_5_3_,
        orderitems3_.order_price as order_pr3_5_3_,
        orderitems3_.order_id as order_id5_5_0__,
        orderitems3_.order_item_id as order_it1_5_0__,
        item4_.name as name3_3_4_,
        item4_.price as price4_3_4_,
        item4_.stock_quantity as stock_qu5_3_4_,
        item4_.artist as artist6_3_4_,
        item4_.etc as etc7_3_4_,
        item4_.author as author8_3_4_,
        item4_.isbn as isbn9_3_4_,
        item4_.actor as actor10_3_4_,
        item4_.director as directo11_3_4_,
        item4_.dtype as dtype1_3_4_
    from
        orders order0_
    inner join
        member member1_
            on order0_.member_id=member1_.member_id
    inner join
        delivery delivery2_
            on order0_.delivery_id=delivery2_.delivery_id
    inner join
        order_item orderitems3_
            on order0_.order_id=orderitems3_.order_id
    inner join
        item item4_
            on orderitems3_.item_id=item4_.item_id

뻥튀기 된걸 제거 하려면 ? distinct 를 추가해주면 된다.

public List<Order> findAllWithItem() {
        // 실무에서는 조금 복잡해지면 QueryDsl 쓰세요
        // 정말 쉽게할 수 있습니다.

        // 이러헥 하면 order 가 OrderItems 때문에 뻥튀기가 되버립니다.
        return em.createQuery(
                "select distinct o from Order o " +
                        " join fetch o.member m " +
                        " join fetch o.delivery d " +
                        " join fetch o.orderItems oi " +
                        " join fetch oi.item i", Order.class).getResultList();
    }

실제 날아가는 쿼리인데 distinct 가 추가된 걸 볼 수 있는데 실제로는 DB에서 이 쿼리를 실행해도 중복이 제거되지는 않는다.

JPA 가 distinct 를 보고 중복된 데이터를 제거해준 것이다.

select
        distinct order0_.order_id as order_id1_6_0_,
        member1_.member_id as member_i1_4_1_,
        delivery2_.delivery_id as delivery1_2_2_,
        orderitems3_.order_item_id as order_it1_5_3_,
        item4_.item_id as item_id2_3_4_,
        order0_.delivery_id as delivery4_6_0_,
        order0_.member_id as member_i5_6_0_,
        order0_.order_date as order_da2_6_0_,
        order0_.status as status3_6_0_,
        member1_.city as city2_4_1_,
        member1_.street as street3_4_1_,
        member1_.zipcode as zipcode4_4_1_,
        member1_.user_name as user_nam5_4_1_,
        delivery2_.city as city2_2_2_,
        delivery2_.street as street3_2_2_,
        delivery2_.zipcode as zipcode4_2_2_,
        delivery2_.status as status5_2_2_,
        orderitems3_.count as count2_5_3_,
        orderitems3_.item_id as item_id4_5_3_,
        orderitems3_.order_id as order_id5_5_3_,
        orderitems3_.order_price as order_pr3_5_3_,
        orderitems3_.order_id as order_id5_5_0__,
        orderitems3_.order_item_id as order_it1_5_0__,
        item4_.name as name3_3_4_,
        item4_.price as price4_3_4_,
        item4_.stock_quantity as stock_qu5_3_4_,
        item4_.artist as artist6_3_4_,
        item4_.etc as etc7_3_4_,
        item4_.author as author8_3_4_,
        item4_.isbn as isbn9_3_4_,
        item4_.actor as actor10_3_4_,
        item4_.director as directo11_3_4_,
        item4_.dtype as dtype1_3_4_
    from
        orders order0_
    inner join
        member member1_
            on order0_.member_id=member1_.member_id
    inner join
        delivery delivery2_
            on order0_.delivery_id=delivery2_.delivery_id
    inner join
        order_item orderitems3_
            on order0_.order_id=orderitems3_.order_id
    inner join
        item item4_
            on orderitems3_.item_id=item4_.item_id

JPA 에서 distinct 키워드의 두가지 기능이 있다.

  1. SQL 에 distinct 를 추가해준다
  2. 중복된 루트 데이터를 제거해준다. (보통 루트라고 표현해주는데 여기서는 Order o)

기존에 10번 가까이 나가던 쿼리가 1번으로 줄어든 것을 알 수 있다.

JPA 의 막강한 fetch join 을 써야겠죠?

그런데 fetch join 의 어마어마한 단점이 있다. → 페이징 불가능

컬렉션 페치 조인을 사용하면 페이징이 불가능하다. 하이버네이트는 경고 로그를 남기면서 모든 데이터를 DB에서 읽어오고, 메모리에서 페이징 해버린다.(매우 위험하다)

기존코드에 페이징을 추가하였는데

public List<Order> findAllWithItem() {
        // 실무에서는 조금 복잡해지면 QueryDsl 쓰세요
        // 정말 쉽게할 수 있습니다.

        // 이러헥 하면 order 가 OrderItems 때문에 뻥튀기가 되버립니다.
        return em.createQuery(
                        "select distinct o from Order o " +
                                " join fetch o.member m " +
                                " join fetch o.delivery d " +
                                " join fetch o.orderItems oi " +
                                " join fetch oi.item i", Order.class).setFirstResult(1)
                .setMaxResults(100).
                getResultList();
    }

실제 날라간 쿼리를 보면 페이징이 안된것을 볼 수 있다.

select
        distinct order0_.order_id as order_id1_6_0_,
        member1_.member_id as member_i1_4_1_,
        delivery2_.delivery_id as delivery1_2_2_,
        orderitems3_.order_item_id as order_it1_5_3_,
        item4_.item_id as item_id2_3_4_,
        order0_.delivery_id as delivery4_6_0_,
        order0_.member_id as member_i5_6_0_,
        order0_.order_date as order_da2_6_0_,
        order0_.status as status3_6_0_,
        member1_.city as city2_4_1_,
        member1_.street as street3_4_1_,
        member1_.zipcode as zipcode4_4_1_,
        member1_.user_name as user_nam5_4_1_,
        delivery2_.city as city2_2_2_,
        delivery2_.street as street3_2_2_,
        delivery2_.zipcode as zipcode4_2_2_,
        delivery2_.status as status5_2_2_,
        orderitems3_.count as count2_5_3_,
        orderitems3_.item_id as item_id4_5_3_,
        orderitems3_.order_id as order_id5_5_3_,
        orderitems3_.order_price as order_pr3_5_3_,
        orderitems3_.order_id as order_id5_5_0__,
        orderitems3_.order_item_id as order_it1_5_0__,
        item4_.name as name3_3_4_,
        item4_.price as price4_3_4_,
        item4_.stock_quantity as stock_qu5_3_4_,
        item4_.artist as artist6_3_4_,
        item4_.etc as etc7_3_4_,
        item4_.author as author8_3_4_,
        item4_.isbn as isbn9_3_4_,
        item4_.actor as actor10_3_4_,
        item4_.director as directo11_3_4_,
        item4_.dtype as dtype1_3_4_ 
    from
        orders order0_ 
    inner join
        member member1_ 
            on order0_.member_id=member1_.member_id 
    inner join
        delivery delivery2_ 
            on order0_.delivery_id=delivery2_.delivery_id 
    inner join
        order_item orderitems3_ 
            on order0_.order_id=orderitems3_.order_id 
    inner join
        item item4_ 
            on orderitems3_.item_id=item4_.item_id

하이버네이트의 경고를 보면

2023-05-10 21:13:08.955 WARN 10044 --- [nio-8080-exec-3] o.h.h.internal.ast.QueryTranslatorImpl : HHH000104: firstResult/maxResults specified with collection fetch; applying in memory!

collection fetch 여서 메모리에서 페이징 처리를 했다는 경고이다.

Order가 1000만건이 넘는다고 생각하면 out of memory 가 될 수 있다.

왜 이렇게 될까요?

1:N 쿼리를 fetch join 을 해버리면, 사실 쿼리 상으로는 중복제거가 안 됩니

그 상태에서 페이징을 해버리면 우리가 원하는 결과가 제대로 안 나옵니다.

그래서 JPA 에서 경고를 내고 메모리에서 이걸 해버립니다. 물론 이걸 쓰면 큰일납니다.

일대다가 아닌건 상관이 없어요, 일대다일때는 fetch join 을 하면 데이터가 뻥튀기가 되버리기 때문에 DB 에서 페이징이 불가능해집니다.

참고: 컬렉션 페치 조인은 1개만 사용할 수 있다.(물론 쓸 수 는 있는데 쓰면 안 된다는 뜻) 컬렉션 둘 이상에서 페치 조인을 사용하면 데이터가 부정합하게 조회될 수 있다.

← JPA가 뻥튀기된 데이터를 못 줄일 수가 있다.

페이징과 한계 돌파

어떻게 이 문제를 해결할건가.. 많은 개발자들이 이걸 잘 모르세요(나만 알고 있다)

코드도 단순하고 성능 최적화도 보장하는 굉장한 방법을 알려드리겠습니다

대부분의 페이징 + 컬렉션 엔티티 조회 문제는 이 방법으로 해결할 수 있다. (사실 다른 방법이 없다)

  1. 먼저 ToOne 관계를 모두 페치조인 하세요 (ToOne 관계는 조인을 해도 데이터가 늘어나지 않잖아요)
  2. 컬렉션은 지연 로딩으로 조회한다
  3. 지연 로딩 성능 최적화를 위해 hibernate.default_batch_fetch_size, @BatchSize 를 적용한다

이 옵션을 사용하면 컬렉션이나 프록시로 설정한 객체를 IN 을 통해서 가져온다 (묶어서 조회한다는 뜻 같다)

기존 코드에 ToOne 관계인 Member 와 Delivery 만 fetch join 한다.

public List<Order> findAllWithMemberDeliveryPagin(int offset, int limit) {
        return em.createQuery(
                "select o from Order o" +
                        " join fetch o.member m" +
                        " join fetch o.delivery d",
                Order.class
        ).setFirstResult(offset).setMaxResults(limit).getResultList();
    }
select
        order0_.order_id as order_id1_6_0_,
        member1_.member_id as member_i1_4_1_,
        delivery2_.delivery_id as delivery1_2_2_,
        order0_.delivery_id as delivery4_6_0_,
        order0_.member_id as member_i5_6_0_,
        order0_.order_date as order_da2_6_0_,
        order0_.status as status3_6_0_,
        member1_.city as city2_4_1_,
        member1_.street as street3_4_1_,
        member1_.zipcode as zipcode4_4_1_,
        member1_.user_name as user_nam5_4_1_,
        delivery2_.city as city2_2_2_,
        delivery2_.street as street3_2_2_,
        delivery2_.zipcode as zipcode4_2_2_,
        delivery2_.status as status5_2_2_ 
    from
        orders order0_ 
    inner join
        member member1_ 
            on order0_.member_id=member1_.member_id 
    inner join
        delivery delivery2_ 
            on order0_.delivery_id=delivery2_.delivery_id limit ? offset ?

페이징이 되는 것을 볼 수 있다.

하이버네이트의 default_batch_fetch_size 옵션을 추가해준다

properties:
      hibernate:
        format_sql: true
        default_batch_fetch_size: 100

IN 을 사용해서 쿼리가 2번만 나간 것을 확인 할 수 있다.

select
        orderitems0_.order_id as order_id5_5_1_,
        orderitems0_.order_item_id as order_it1_5_1_,
        orderitems0_.order_item_id as order_it1_5_0_,
        orderitems0_.count as count2_5_0_,
        orderitems0_.item_id as item_id4_5_0_,
        orderitems0_.order_id as order_id5_5_0_,
        orderitems0_.order_price as order_pr3_5_0_ 
    from
        order_item orderitems0_ 
    where
        orderitems0_.order_id in (
            ?, ?
        )

select
        item0_.item_id as item_id2_3_0_,
        item0_.name as name3_3_0_,
        item0_.price as price4_3_0_,
        item0_.stock_quantity as stock_qu5_3_0_,
        item0_.artist as artist6_3_0_,
        item0_.etc as etc7_3_0_,
        item0_.author as author8_3_0_,
        item0_.isbn as isbn9_3_0_,
        item0_.actor as actor10_3_0_,
        item0_.director as directo11_3_0_,
        item0_.dtype as dtype1_3_0_ 
    from
        item item0_ 
    where
        item0_.item_id in (
            ?, ?, ?, ?
        )

되게 놀랍죠?

 

PK 기반으로 IN 절이 날라가기 때문에 DB 입장에서는 최적화가 잘 되는 쿼리에요

이정도면 충분히 여러분이 원하는 성능이 나와요

 

V4가 V3 에 비해 쿼리가 3번 날라가서 더 느리다고 생각할 수 있는데, 생각해보면 V3는 중복데이터도 엄청 많고 그 데이터를 전부 DB에서 Application 으로 전송을 한 V4 는 페이징 처리가 되어 있다.

 

default_batch_fetch_size 옵션은 무조건 켜놓는게 좋아요

 

batch_fetch_size 의 Maximum 은 1000개 입니다. INQuery 가 1000 개 이상 넘어가면 오류가 나는 DB 들이 있습니다. + DB와 WAS 의 성능/부하를 고려해야 한다

 

여기까지 하면 JPA 조회 성능 최적화의 90% 정도는 해결이 됩니다.

 

 

JPA 에서 DTO 직접 조회 (collection 이 있을 때)

package jpabook.jpashop.repository.order.query;

import lombok.RequiredArgsConstructor;
import org.springframework.stereotype.Repository;

import javax.persistence.EntityManager;
import java.util.List;

@Repository
@RequiredArgsConstructor
public class OrderQueryRepository {

    private final EntityManager em;

    // collection 을 바로 넣을 수가 없거든요
    public List<OrderQueryDto> findOrderQueryDtos() {
        List<OrderQueryDto> result = findOrders();
        result.forEach(o -> {
            o.setOrderItems(findOrderItems(o.getOrderId()));
        });
        return result;
    }

		private List<OrderQueryDto> findOrders() {
        return em.createQuery(
                "select new jpabook.jpashop.repository.order.query.OrderQueryDto(o.id, m.userName, o.orderDate, o.status, d.address) from Order o " +
                        " join o.member m " +
                        " join o.delivery d", OrderQueryDto.class
        ).getResultList();
    }

    private List<OrderItemQueryDto> findOrderItems(Long orderId) {
        // 재밌는 점 oi.order.id
        return em.createQuery(
                "select new jpabook.jpashop.repository.order.query.OrderItemQueryDto(oi.order.id" +
                        ", i.name, oi.orderPrice, oi.count)" +
                        " from OrderItem  oi" +
                        " join oi.item i" +
                        " where oi.order.id = :orderId", OrderItemQueryDto.class
        ).setParameter("orderId", orderId).getResultList();
    }

    
}

N+1 문제 발생 😮

 

이걸 최적화 하려면?

 

루프를 돌면서 Items 를 찾아주는게 아니라 In 절로 한 번에 찾아서 맵으로 바꿔서 값 할당

public List<OrderQueryDto> findAllOrderByDto_optimization() {
        List<OrderQueryDto> result = findOrders();

        // 이전꺼 단점이 Loop 를 돈단 말이에요
        List<Long> orderIds = result.stream().map(o -> o.getOrderId()).collect(Collectors.toList());

        Map<Long, List<OrderItemQueryDto>> orderItemsMap = em.createQuery("select new jpabook.jpashop.repository.order.query.OrderItemQueryDto(oi.order.id" +
                        ", i.name, oi.orderPrice, oi.count)" +
                        " from OrderItem  oi" +
                        " join oi.item i" +
                        " where oi.order.id in :orderIds", OrderItemQueryDto.class).setParameter("orderIds", orderIds)
                .getResultStream().collect(Collectors.groupingBy(OrderItemQueryDto::getOrderId));

        result.forEach(o -> {
            o.setOrderItems(orderItemsMap.getOrDefault(o.getOrderId(), new ArrayList<>()));
        });

        return result;
    }

코드 정리

public List<OrderQueryDto> findAllOrderByDto_optimization() {
        List<OrderQueryDto> result = findOrders();

        Map<Long, List<OrderItemQueryDto>> orderItemsMap = findOrderItemMap(toOrderIds(result));

        result.forEach(o -> {
            o.setOrderItems(orderItemsMap.getOrDefault(o.getOrderId(), new ArrayList<>()));
        });

        return result;
    }

    private static List<Long> toOrderIds(List<OrderQueryDto> result) {
        return result.stream().map(o -> o.getOrderId()).collect(Collectors.toList());
    }

    private Map<Long, List<OrderItemQueryDto>> findOrderItemMap(List<Long> orderIds) {
        return em.createQuery("select new jpabook.jpashop.repository.order.query.OrderItemQueryDto(oi.order.id" +
                        ", i.name, oi.orderPrice, oi.count)" +
                        " from OrderItem  oi" +
                        " join oi.item i" +
                        " where oi.order.id in :orderIds", OrderItemQueryDto.class).setParameter("orderIds", orderIds)
                .getResultStream().collect(Collectors.groupingBy(OrderItemQueryDto::getOrderId));
    }

 

 

쿼리 한 방으로 해결하는 방법도 있음

public List<OrderFlatDto> findAllOrderByDto_flat() {
        return em.createQuery("select new jpabook.jpashop.repository.order.query.OrderFlatDto(o.id, m.userName, o.orderDate,o.status, d.address, i.name, oi.orderPrice, oi.count)" +
                " from Order o " +
                " join o.member m " +
                " join o.delivery d " +
                " join o.orderItems oi " +
                " join oi.item i", OrderFlatDto.class).getResultList();
    }

Flat 하게 데이터를 펴서 중복 포함 데이터를 다 가져와서

@GetMapping("/api/v6/orders")
    public List<OrderQueryDto> ordersV6() {
        List<OrderFlatDto> flats = orderQueryRepository.findAllOrderByDto_flat();
        return flats.stream()
                .collect(groupingBy(o -> new OrderQueryDto(o.getOrderId(), o.getName(), o.getOrderDate(), o.getOrderStatus(), o.getAddress()),
                        mapping(o -> new OrderItemQueryDto(o.getOrderId(), o.getItemName(), o.getOrderPrice(), o.getOrderCount()), toList())
                )).entrySet().stream()
                .map(e -> new OrderQueryDto(e.getKey().getOrderId(), e.getKey().getName(), e.getKey().getOrderDate(), e.getKey().getOrderStatus(), e.getKey().getAddress(), e.getValue()))
                .collect(toList());
    }

API Spec 에 맞게 데이터를 만들어주면 됩니다.