API 개발 고급 - 컬렉션 조회 최적화
OneToMany 를 조회하고 최적화 하는 방법
엔티티를 직접 노출하는 잘못된 API V1
/*
엔티티를 직접 노출
이렇게 하면 안 된다는 예시
*/
@GetMapping("/api/v1/orders")
public List<Order> ordersV1() {
List<Order> all = orderRepository.findAllByString(new OrderSearch());
for (Order order : all) {
order.getMember().getUserName();
order.getDelivery().getStatus();
order.getOrderItems().stream().forEach(o -> o.getItem().getName());
}
return all;
}
DTO를 사용해서 리턴하는 API V2 (fetch joinX)
/*
DTO 를 사용해서 리턴
*/
@GetMapping("/api/v2/orders")
public List<OrderDto> ordersV2() {
List<Order> orders = orderRepository.findAllByString(new OrderSearch());
// 실무에서는 이걸 페이징을 하거나 뭘 하겠죠
List<OrderDto> result = orders.stream()
.map(OrderDto::new)
.collect(Collectors.toList());
return result;
}
쿼리를 몇 개 호출할까?
-- Oder 조회 1번
select
order0_.order_id as order_id1_6_,
order0_.delivery_id as delivery4_6_,
order0_.member_id as member_i5_6_,
order0_.order_date as order_da2_6_,
order0_.status as status3_6_
from
orders order0_
inner join
member member1_
on order0_.member_id=member1_.member_id limit ?
-- Member 1번
select
member0_.member_id as member_i1_4_0_,
member0_.city as city2_4_0_,
member0_.street as street3_4_0_,
member0_.zipcode as zipcode4_4_0_,
member0_.user_name as user_nam5_4_0_
from
member member0_
where
member0_.member_id=?
-- OrderItem 1번
select
orderitems0_.order_id as order_id5_5_0_,
orderitems0_.order_item_id as order_it1_5_0_,
orderitems0_.order_item_id as order_it1_5_1_,
orderitems0_.count as count2_5_1_,
orderitems0_.item_id as item_id4_5_1_,
orderitems0_.order_id as order_id5_5_1_,
orderitems0_.order_price as order_pr3_5_1_
from
order_item orderitems0_
where
orderitems0_.order_id=?
-- Item 쿼리 2 (item 갯수에 비례)
select
item0_.item_id as item_id2_3_0_,
item0_.name as name3_3_0_,
item0_.price as price4_3_0_,
item0_.stock_quantity as stock_qu5_3_0_,
item0_.artist as artist6_3_0_,
item0_.etc as etc7_3_0_,
item0_.author as author8_3_0_,
item0_.isbn as isbn9_3_0_,
item0_.actor as actor10_3_0_,
item0_.director as directo11_3_0_,
item0_.dtype as dtype1_3_0_
from
item item0_
where
item0_.item_id=?
select
item0_.item_id as item_id2_3_0_,
item0_.name as name3_3_0_,
item0_.price as price4_3_0_,
item0_.stock_quantity as stock_qu5_3_0_,
item0_.artist as artist6_3_0_,
item0_.etc as etc7_3_0_,
item0_.author as author8_3_0_,
item0_.isbn as isbn9_3_0_,
item0_.actor as actor10_3_0_,
item0_.director as directo11_3_0_,
item0_.dtype as dtype1_3_0_
from
item item0_
where
item0_.item_id=?
딱봐도 이렇게 했다가는 성능이.. 다음 시간에는 이거를 fetch join 을 적용하는 방법(collection 에 대해서) 에 대해서 알아보겠습니다. collection 에 fetch join 할 때는 고민해야 할 부분이 더 있습니다.
기존처럼 fetch join 을 사용해서 최적화를 한 API V3
public List<Order> findAllWithMemberDelivery() {
return em.createQuery(
"select o from Order o" +
" join fetch o.member m" +
" join fetch o.delivery d",
Order.class
).getResultList();
}
// JPA 입장에서는 어떤 데이터가 뻥튀기 되면 안되는지 파악할 수 없다.
select * from orders o
join order_item oi on o.order_id = oi.order_id
/*
기존처럼 fetch join 을 쓰면 1:N 관계에서 join 하면 데이터가 뻥튀기 됩니다.
4,2023-05-10 20:50:30.746887,ORDER
4,2023-05-10 20:50:30.746887,ORDER
11,2023-05-10 20:50:30.788022,ORDER
11,2023-05-10 20:50:30.788022,ORDER
4번 Order가 2개로 뻥튀기 된걸 볼 수 있음
*/
-- 실제 JPA 에서 나가는 쿼
select
order0_.order_id as order_id1_6_0_,
member1_.member_id as member_i1_4_1_,
delivery2_.delivery_id as delivery1_2_2_,
orderitems3_.order_item_id as order_it1_5_3_,
item4_.item_id as item_id2_3_4_,
order0_.delivery_id as delivery4_6_0_,
order0_.member_id as member_i5_6_0_,
order0_.order_date as order_da2_6_0_,
order0_.status as status3_6_0_,
member1_.city as city2_4_1_,
member1_.street as street3_4_1_,
member1_.zipcode as zipcode4_4_1_,
member1_.user_name as user_nam5_4_1_,
delivery2_.city as city2_2_2_,
delivery2_.street as street3_2_2_,
delivery2_.zipcode as zipcode4_2_2_,
delivery2_.status as status5_2_2_,
orderitems3_.count as count2_5_3_,
orderitems3_.item_id as item_id4_5_3_,
orderitems3_.order_id as order_id5_5_3_,
orderitems3_.order_price as order_pr3_5_3_,
orderitems3_.order_id as order_id5_5_0__,
orderitems3_.order_item_id as order_it1_5_0__,
item4_.name as name3_3_4_,
item4_.price as price4_3_4_,
item4_.stock_quantity as stock_qu5_3_4_,
item4_.artist as artist6_3_4_,
item4_.etc as etc7_3_4_,
item4_.author as author8_3_4_,
item4_.isbn as isbn9_3_4_,
item4_.actor as actor10_3_4_,
item4_.director as directo11_3_4_,
item4_.dtype as dtype1_3_4_
from
orders order0_
inner join
member member1_
on order0_.member_id=member1_.member_id
inner join
delivery delivery2_
on order0_.delivery_id=delivery2_.delivery_id
inner join
order_item orderitems3_
on order0_.order_id=orderitems3_.order_id
inner join
item item4_
on orderitems3_.item_id=item4_.item_id
뻥튀기 된걸 제거 하려면 ? distinct 를 추가해주면 된다.
public List<Order> findAllWithItem() {
// 실무에서는 조금 복잡해지면 QueryDsl 쓰세요
// 정말 쉽게할 수 있습니다.
// 이러헥 하면 order 가 OrderItems 때문에 뻥튀기가 되버립니다.
return em.createQuery(
"select distinct o from Order o " +
" join fetch o.member m " +
" join fetch o.delivery d " +
" join fetch o.orderItems oi " +
" join fetch oi.item i", Order.class).getResultList();
}
실제 날아가는 쿼리인데 distinct 가 추가된 걸 볼 수 있는데 실제로는 DB에서 이 쿼리를 실행해도 중복이 제거되지는 않는다.
JPA 가 distinct 를 보고 중복된 데이터를 제거해준 것이다.
select
distinct order0_.order_id as order_id1_6_0_,
member1_.member_id as member_i1_4_1_,
delivery2_.delivery_id as delivery1_2_2_,
orderitems3_.order_item_id as order_it1_5_3_,
item4_.item_id as item_id2_3_4_,
order0_.delivery_id as delivery4_6_0_,
order0_.member_id as member_i5_6_0_,
order0_.order_date as order_da2_6_0_,
order0_.status as status3_6_0_,
member1_.city as city2_4_1_,
member1_.street as street3_4_1_,
member1_.zipcode as zipcode4_4_1_,
member1_.user_name as user_nam5_4_1_,
delivery2_.city as city2_2_2_,
delivery2_.street as street3_2_2_,
delivery2_.zipcode as zipcode4_2_2_,
delivery2_.status as status5_2_2_,
orderitems3_.count as count2_5_3_,
orderitems3_.item_id as item_id4_5_3_,
orderitems3_.order_id as order_id5_5_3_,
orderitems3_.order_price as order_pr3_5_3_,
orderitems3_.order_id as order_id5_5_0__,
orderitems3_.order_item_id as order_it1_5_0__,
item4_.name as name3_3_4_,
item4_.price as price4_3_4_,
item4_.stock_quantity as stock_qu5_3_4_,
item4_.artist as artist6_3_4_,
item4_.etc as etc7_3_4_,
item4_.author as author8_3_4_,
item4_.isbn as isbn9_3_4_,
item4_.actor as actor10_3_4_,
item4_.director as directo11_3_4_,
item4_.dtype as dtype1_3_4_
from
orders order0_
inner join
member member1_
on order0_.member_id=member1_.member_id
inner join
delivery delivery2_
on order0_.delivery_id=delivery2_.delivery_id
inner join
order_item orderitems3_
on order0_.order_id=orderitems3_.order_id
inner join
item item4_
on orderitems3_.item_id=item4_.item_id
JPA 에서 distinct 키워드의 두가지 기능이 있다.
- SQL 에 distinct 를 추가해준다
- 중복된 루트 데이터를 제거해준다. (보통 루트라고 표현해주는데 여기서는 Order o)
기존에 10번 가까이 나가던 쿼리가 1번으로 줄어든 것을 알 수 있다.
JPA 의 막강한 fetch join 을 써야겠죠?
그런데 fetch join 의 어마어마한 단점이 있다. → 페이징 불가능
컬렉션 페치 조인을 사용하면 페이징이 불가능하다. 하이버네이트는 경고 로그를 남기면서 모든 데이터를 DB에서 읽어오고, 메모리에서 페이징 해버린다.(매우 위험하다)
기존코드에 페이징을 추가하였는데
public List<Order> findAllWithItem() {
// 실무에서는 조금 복잡해지면 QueryDsl 쓰세요
// 정말 쉽게할 수 있습니다.
// 이러헥 하면 order 가 OrderItems 때문에 뻥튀기가 되버립니다.
return em.createQuery(
"select distinct o from Order o " +
" join fetch o.member m " +
" join fetch o.delivery d " +
" join fetch o.orderItems oi " +
" join fetch oi.item i", Order.class).setFirstResult(1)
.setMaxResults(100).
getResultList();
}
실제 날라간 쿼리를 보면 페이징이 안된것을 볼 수 있다.
select
distinct order0_.order_id as order_id1_6_0_,
member1_.member_id as member_i1_4_1_,
delivery2_.delivery_id as delivery1_2_2_,
orderitems3_.order_item_id as order_it1_5_3_,
item4_.item_id as item_id2_3_4_,
order0_.delivery_id as delivery4_6_0_,
order0_.member_id as member_i5_6_0_,
order0_.order_date as order_da2_6_0_,
order0_.status as status3_6_0_,
member1_.city as city2_4_1_,
member1_.street as street3_4_1_,
member1_.zipcode as zipcode4_4_1_,
member1_.user_name as user_nam5_4_1_,
delivery2_.city as city2_2_2_,
delivery2_.street as street3_2_2_,
delivery2_.zipcode as zipcode4_2_2_,
delivery2_.status as status5_2_2_,
orderitems3_.count as count2_5_3_,
orderitems3_.item_id as item_id4_5_3_,
orderitems3_.order_id as order_id5_5_3_,
orderitems3_.order_price as order_pr3_5_3_,
orderitems3_.order_id as order_id5_5_0__,
orderitems3_.order_item_id as order_it1_5_0__,
item4_.name as name3_3_4_,
item4_.price as price4_3_4_,
item4_.stock_quantity as stock_qu5_3_4_,
item4_.artist as artist6_3_4_,
item4_.etc as etc7_3_4_,
item4_.author as author8_3_4_,
item4_.isbn as isbn9_3_4_,
item4_.actor as actor10_3_4_,
item4_.director as directo11_3_4_,
item4_.dtype as dtype1_3_4_
from
orders order0_
inner join
member member1_
on order0_.member_id=member1_.member_id
inner join
delivery delivery2_
on order0_.delivery_id=delivery2_.delivery_id
inner join
order_item orderitems3_
on order0_.order_id=orderitems3_.order_id
inner join
item item4_
on orderitems3_.item_id=item4_.item_id
하이버네이트의 경고를 보면
2023-05-10 21:13:08.955 WARN 10044 --- [nio-8080-exec-3] o.h.h.internal.ast.QueryTranslatorImpl : HHH000104: firstResult/maxResults specified with collection fetch; applying in memory!
collection fetch 여서 메모리에서 페이징 처리를 했다는 경고이다.
Order가 1000만건이 넘는다고 생각하면 out of memory 가 될 수 있다.
왜 이렇게 될까요?
1:N 쿼리를 fetch join 을 해버리면, 사실 쿼리 상으로는 중복제거가 안 됩니
그 상태에서 페이징을 해버리면 우리가 원하는 결과가 제대로 안 나옵니다.
그래서 JPA 에서 경고를 내고 메모리에서 이걸 해버립니다. 물론 이걸 쓰면 큰일납니다.
일대다가 아닌건 상관이 없어요, 일대다일때는 fetch join 을 하면 데이터가 뻥튀기가 되버리기 때문에 DB 에서 페이징이 불가능해집니다.
참고: 컬렉션 페치 조인은 1개만 사용할 수 있다.(물론 쓸 수 는 있는데 쓰면 안 된다는 뜻) 컬렉션 둘 이상에서 페치 조인을 사용하면 데이터가 부정합하게 조회될 수 있다.
← JPA가 뻥튀기된 데이터를 못 줄일 수가 있다.
페이징과 한계 돌파
어떻게 이 문제를 해결할건가.. 많은 개발자들이 이걸 잘 모르세요(나만 알고 있다)
코드도 단순하고 성능 최적화도 보장하는 굉장한 방법을 알려드리겠습니다
대부분의 페이징 + 컬렉션 엔티티 조회 문제는 이 방법으로 해결할 수 있다. (사실 다른 방법이 없다)
- 먼저 ToOne 관계를 모두 페치조인 하세요 (ToOne 관계는 조인을 해도 데이터가 늘어나지 않잖아요)
- 컬렉션은 지연 로딩으로 조회한다
- 지연 로딩 성능 최적화를 위해 hibernate.default_batch_fetch_size, @BatchSize 를 적용한다
이 옵션을 사용하면 컬렉션이나 프록시로 설정한 객체를 IN 을 통해서 가져온다 (묶어서 조회한다는 뜻 같다)
기존 코드에 ToOne 관계인 Member 와 Delivery 만 fetch join 한다.
public List<Order> findAllWithMemberDeliveryPagin(int offset, int limit) {
return em.createQuery(
"select o from Order o" +
" join fetch o.member m" +
" join fetch o.delivery d",
Order.class
).setFirstResult(offset).setMaxResults(limit).getResultList();
}
select
order0_.order_id as order_id1_6_0_,
member1_.member_id as member_i1_4_1_,
delivery2_.delivery_id as delivery1_2_2_,
order0_.delivery_id as delivery4_6_0_,
order0_.member_id as member_i5_6_0_,
order0_.order_date as order_da2_6_0_,
order0_.status as status3_6_0_,
member1_.city as city2_4_1_,
member1_.street as street3_4_1_,
member1_.zipcode as zipcode4_4_1_,
member1_.user_name as user_nam5_4_1_,
delivery2_.city as city2_2_2_,
delivery2_.street as street3_2_2_,
delivery2_.zipcode as zipcode4_2_2_,
delivery2_.status as status5_2_2_
from
orders order0_
inner join
member member1_
on order0_.member_id=member1_.member_id
inner join
delivery delivery2_
on order0_.delivery_id=delivery2_.delivery_id limit ? offset ?
페이징이 되는 것을 볼 수 있다.
하이버네이트의 default_batch_fetch_size 옵션을 추가해준다
properties:
hibernate:
format_sql: true
default_batch_fetch_size: 100
IN 을 사용해서 쿼리가 2번만 나간 것을 확인 할 수 있다.
select
orderitems0_.order_id as order_id5_5_1_,
orderitems0_.order_item_id as order_it1_5_1_,
orderitems0_.order_item_id as order_it1_5_0_,
orderitems0_.count as count2_5_0_,
orderitems0_.item_id as item_id4_5_0_,
orderitems0_.order_id as order_id5_5_0_,
orderitems0_.order_price as order_pr3_5_0_
from
order_item orderitems0_
where
orderitems0_.order_id in (
?, ?
)
select
item0_.item_id as item_id2_3_0_,
item0_.name as name3_3_0_,
item0_.price as price4_3_0_,
item0_.stock_quantity as stock_qu5_3_0_,
item0_.artist as artist6_3_0_,
item0_.etc as etc7_3_0_,
item0_.author as author8_3_0_,
item0_.isbn as isbn9_3_0_,
item0_.actor as actor10_3_0_,
item0_.director as directo11_3_0_,
item0_.dtype as dtype1_3_0_
from
item item0_
where
item0_.item_id in (
?, ?, ?, ?
)
되게 놀랍죠?
PK 기반으로 IN 절이 날라가기 때문에 DB 입장에서는 최적화가 잘 되는 쿼리에요
이정도면 충분히 여러분이 원하는 성능이 나와요
V4가 V3 에 비해 쿼리가 3번 날라가서 더 느리다고 생각할 수 있는데, 생각해보면 V3는 중복데이터도 엄청 많고 그 데이터를 전부 DB에서 Application 으로 전송을 한 V4 는 페이징 처리가 되어 있다.
default_batch_fetch_size 옵션은 무조건 켜놓는게 좋아요
batch_fetch_size 의 Maximum 은 1000개 입니다. INQuery 가 1000 개 이상 넘어가면 오류가 나는 DB 들이 있습니다. + DB와 WAS 의 성능/부하를 고려해야 한다
여기까지 하면 JPA 조회 성능 최적화의 90% 정도는 해결이 됩니다.
JPA 에서 DTO 직접 조회 (collection 이 있을 때)
package jpabook.jpashop.repository.order.query;
import lombok.RequiredArgsConstructor;
import org.springframework.stereotype.Repository;
import javax.persistence.EntityManager;
import java.util.List;
@Repository
@RequiredArgsConstructor
public class OrderQueryRepository {
private final EntityManager em;
// collection 을 바로 넣을 수가 없거든요
public List<OrderQueryDto> findOrderQueryDtos() {
List<OrderQueryDto> result = findOrders();
result.forEach(o -> {
o.setOrderItems(findOrderItems(o.getOrderId()));
});
return result;
}
private List<OrderQueryDto> findOrders() {
return em.createQuery(
"select new jpabook.jpashop.repository.order.query.OrderQueryDto(o.id, m.userName, o.orderDate, o.status, d.address) from Order o " +
" join o.member m " +
" join o.delivery d", OrderQueryDto.class
).getResultList();
}
private List<OrderItemQueryDto> findOrderItems(Long orderId) {
// 재밌는 점 oi.order.id
return em.createQuery(
"select new jpabook.jpashop.repository.order.query.OrderItemQueryDto(oi.order.id" +
", i.name, oi.orderPrice, oi.count)" +
" from OrderItem oi" +
" join oi.item i" +
" where oi.order.id = :orderId", OrderItemQueryDto.class
).setParameter("orderId", orderId).getResultList();
}
}
N+1 문제 발생 😮
이걸 최적화 하려면?
루프를 돌면서 Items 를 찾아주는게 아니라 In 절로 한 번에 찾아서 맵으로 바꿔서 값 할당
public List<OrderQueryDto> findAllOrderByDto_optimization() {
List<OrderQueryDto> result = findOrders();
// 이전꺼 단점이 Loop 를 돈단 말이에요
List<Long> orderIds = result.stream().map(o -> o.getOrderId()).collect(Collectors.toList());
Map<Long, List<OrderItemQueryDto>> orderItemsMap = em.createQuery("select new jpabook.jpashop.repository.order.query.OrderItemQueryDto(oi.order.id" +
", i.name, oi.orderPrice, oi.count)" +
" from OrderItem oi" +
" join oi.item i" +
" where oi.order.id in :orderIds", OrderItemQueryDto.class).setParameter("orderIds", orderIds)
.getResultStream().collect(Collectors.groupingBy(OrderItemQueryDto::getOrderId));
result.forEach(o -> {
o.setOrderItems(orderItemsMap.getOrDefault(o.getOrderId(), new ArrayList<>()));
});
return result;
}
코드 정리
public List<OrderQueryDto> findAllOrderByDto_optimization() {
List<OrderQueryDto> result = findOrders();
Map<Long, List<OrderItemQueryDto>> orderItemsMap = findOrderItemMap(toOrderIds(result));
result.forEach(o -> {
o.setOrderItems(orderItemsMap.getOrDefault(o.getOrderId(), new ArrayList<>()));
});
return result;
}
private static List<Long> toOrderIds(List<OrderQueryDto> result) {
return result.stream().map(o -> o.getOrderId()).collect(Collectors.toList());
}
private Map<Long, List<OrderItemQueryDto>> findOrderItemMap(List<Long> orderIds) {
return em.createQuery("select new jpabook.jpashop.repository.order.query.OrderItemQueryDto(oi.order.id" +
", i.name, oi.orderPrice, oi.count)" +
" from OrderItem oi" +
" join oi.item i" +
" where oi.order.id in :orderIds", OrderItemQueryDto.class).setParameter("orderIds", orderIds)
.getResultStream().collect(Collectors.groupingBy(OrderItemQueryDto::getOrderId));
}
쿼리 한 방으로 해결하는 방법도 있음
public List<OrderFlatDto> findAllOrderByDto_flat() {
return em.createQuery("select new jpabook.jpashop.repository.order.query.OrderFlatDto(o.id, m.userName, o.orderDate,o.status, d.address, i.name, oi.orderPrice, oi.count)" +
" from Order o " +
" join o.member m " +
" join o.delivery d " +
" join o.orderItems oi " +
" join oi.item i", OrderFlatDto.class).getResultList();
}
Flat 하게 데이터를 펴서 중복 포함 데이터를 다 가져와서
@GetMapping("/api/v6/orders")
public List<OrderQueryDto> ordersV6() {
List<OrderFlatDto> flats = orderQueryRepository.findAllOrderByDto_flat();
return flats.stream()
.collect(groupingBy(o -> new OrderQueryDto(o.getOrderId(), o.getName(), o.getOrderDate(), o.getOrderStatus(), o.getAddress()),
mapping(o -> new OrderItemQueryDto(o.getOrderId(), o.getItemName(), o.getOrderPrice(), o.getOrderCount()), toList())
)).entrySet().stream()
.map(e -> new OrderQueryDto(e.getKey().getOrderId(), e.getKey().getName(), e.getKey().getOrderDate(), e.getKey().getOrderStatus(), e.getKey().getAddress(), e.getValue()))
.collect(toList());
}
API Spec 에 맞게 데이터를 만들어주면 됩니다.
'기록 > 인프런' 카테고리의 다른 글
실전! 스프링 부트와 JPA 활용2 - 지연 로딩과 조회 성능 최적화 (2) | 2023.05.08 |
---|---|
실전! 스프링 부트와 JPA 활용1 - 동적 쿼리 및 변경 감지와 병합 (0) | 2023.04.29 |
실전! 스프링 부트와 JPA 활용1 - 도메인 분석 설계 (0) | 2023.04.27 |
스프링 핵심 원리 - 고급편 | 쓰레드 로컬 및 디자인 패턴 (0) | 2023.03.15 |
스프링 핵심 원리 - 고급편 | 로그 추적 (0) | 2023.03.14 |