Compare commits

...

1 Commits

Author SHA1 Message Date
Backend Agent 198bd4a859 feat: add Open Food Facts API client for supplement collection
Replace CSV-only collection with Open Food Facts API integration
(world.openfoodfacts.org). Adds paginated search client, scheduled
collector, V004 migration (collector_runs.status varchar(50)),
and 6 unit tests for the new collector.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-12 14:19:23 +00:00
10 changed files with 462 additions and 0 deletions

View File

@ -60,6 +60,12 @@
<artifactId>liquibase-core</artifactId> <artifactId>liquibase-core</artifactId>
</dependency> </dependency>
<!-- WebFlux (WebClient for API calls) -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-webflux</artifactId>
</dependency>
<!-- OpenCSV --> <!-- OpenCSV -->
<dependency> <dependency>
<groupId>com.opencsv</groupId> <groupId>com.opencsv</groupId>

View File

@ -0,0 +1,48 @@
package ru.oa2.mvp.nutricollector.client;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import org.springframework.web.reactive.function.client.WebClient;
import ru.oa2.mvp.nutricollector.dto.OpenFoodFactsResponse;
@Component
@Slf4j
public class OpenFoodFactsClient {
private final WebClient webClient;
private final String baseUrl;
private final String searchTerms;
private final int pageSize;
public OpenFoodFactsClient(
WebClient.Builder webClientBuilder,
@Value("${collector.openfoodfacts.base-url:https://world.openfoodfacts.org}") String baseUrl,
@Value("${collector.openfoodfacts.search-terms:supplement}") String searchTerms,
@Value("${collector.openfoodfacts.page-size:100}") int pageSize
) {
this.baseUrl = baseUrl;
this.searchTerms = searchTerms;
this.pageSize = pageSize;
this.webClient = webClientBuilder
.baseUrl(baseUrl)
.codecs(configurer -> configurer.defaultCodecs().maxInMemorySize(5 * 1024 * 1024))
.build();
}
public OpenFoodFactsResponse search(int page) {
log.debug("Fetching Open Food Facts page {} (size={}, terms='{}')", page, pageSize, searchTerms);
return webClient.get()
.uri(uriBuilder -> uriBuilder
.path("/cgi/search.pl")
.queryParam("search_terms", searchTerms)
.queryParam("json", "1")
.queryParam("page_size", pageSize)
.queryParam("page", page)
.build())
.retrieve()
.bodyToMono(OpenFoodFactsResponse.class)
.block();
}
}

View File

@ -0,0 +1,27 @@
package ru.oa2.mvp.nutricollector.dto;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.annotation.JsonProperty;
import java.util.List;
@JsonIgnoreProperties(ignoreUnknown = true)
public record OpenFoodFactsResponse(
@JsonProperty("count") Integer count,
@JsonProperty("page") Integer page,
@JsonProperty("page_size") Integer pageSize,
@JsonProperty("products") List<Product> products
) {
@JsonIgnoreProperties(ignoreUnknown = true)
public record Product(
@JsonProperty("product_name") String productName,
@JsonProperty("brands") String brands,
@JsonProperty("categories_tags") List<String> categoriesTags,
@JsonProperty("ingredients_text") String ingredientsText,
@JsonProperty("countries_tags") List<String> countriesTags,
@JsonProperty("image_url") String imageUrl,
@JsonProperty("code") String code
) {
}
}

View File

@ -0,0 +1,189 @@
package ru.oa2.mvp.nutricollector.service;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import ru.oa2.mvp.nutricollector.client.OpenFoodFactsClient;
import ru.oa2.mvp.nutricollector.dto.IngredientData;
import ru.oa2.mvp.nutricollector.dto.OpenFoodFactsResponse;
import ru.oa2.mvp.nutricollector.entity.CollectorRun;
import ru.oa2.mvp.nutricollector.entity.Ingredient;
import ru.oa2.mvp.nutricollector.entity.Supplement;
import ru.oa2.mvp.nutricollector.repository.CollectorRunRepository;
import ru.oa2.mvp.nutricollector.repository.IngredientRepository;
import ru.oa2.mvp.nutricollector.repository.SupplementRepository;
import java.time.Instant;
import java.util.List;
@Component
@RequiredArgsConstructor
@Slf4j
public class OpenFoodFactsCollector {
public static final String SOURCE = "openfoodfacts";
private final OpenFoodFactsClient client;
private final SupplementRepository supplementRepository;
private final IngredientRepository ingredientRepository;
private final CollectorRunRepository collectorRunRepository;
private final IngredientParser ingredientParser;
@Value("${collector.openfoodfacts.max-pages:10}")
private int maxPages;
@Value("${collector.openfoodfacts.enabled:true}")
private boolean enabled;
@Scheduled(cron = "${collector.openfoodfacts.schedule:0 0 3 * * *}")
public void collect() {
if (!enabled) {
log.debug("Open Food Facts collector is disabled");
return;
}
run();
}
public CollectorRun run() {
log.info("Starting Open Food Facts collection (max {} pages)", maxPages);
int added = 0;
int updated = 0;
int errors = 0;
try {
for (int page = 1; page <= maxPages; page++) {
OpenFoodFactsResponse response = client.search(page);
if (response == null || response.products() == null || response.products().isEmpty()) {
log.info("No more products at page {}, stopping", page);
break;
}
log.info("Processing page {}: {} products", page, response.products().size());
for (OpenFoodFactsResponse.Product product : response.products()) {
try {
if (product.productName() == null || product.productName().isBlank()) {
errors++;
continue;
}
boolean isUpdate = processProduct(product);
if (isUpdate) {
updated++;
} else {
added++;
}
} catch (Exception e) {
log.error("Error processing product '{}': {}", product.productName(), e.getMessage());
errors++;
}
}
if (response.products().size() < response.pageSize()) {
log.info("Last page reached (got {} < page_size {})", response.products().size(), response.pageSize());
break;
}
}
} catch (Exception e) {
log.error("Failed during Open Food Facts collection: {}", e.getMessage());
return saveRun(added, updated, errors, "FAILED");
}
String status = errors > 0 ? "COMPLETED_WITH_ERRORS" : "SUCCESS";
log.info("Open Food Facts collection complete: added={}, updated={}, errors={}, status={}",
added, updated, errors, status);
return saveRun(added, updated, errors, status);
}
private boolean processProduct(OpenFoodFactsResponse.Product product) {
String sourceUrl = "https://world.openfoodfacts.org/product/" + product.code();
var existing = supplementRepository.findBySourceUrl(sourceUrl);
Supplement supplement;
boolean isUpdate;
if (existing.isPresent()) {
supplement = existing.get();
isUpdate = true;
} else {
supplement = Supplement.builder().build();
isUpdate = false;
}
supplement.setName(product.productName());
supplement.setBrand(product.brands());
supplement.setCategory(extractCategory(product.categoriesTags()));
supplement.setDescription(joinTags(product.categoriesTags()));
supplement.setCountry(extractCountry(product.countriesTags()));
supplement.setImageUrl(product.imageUrl());
supplement.setSourceUrl(sourceUrl);
supplement = supplementRepository.save(supplement);
if (product.ingredientsText() != null && !product.ingredientsText().isBlank()) {
updateIngredients(supplement, product.ingredientsText());
}
return isUpdate;
}
private void updateIngredients(Supplement supplement, String ingredientsText) {
ingredientRepository.deleteBySupplementId(supplement.getId());
ingredientRepository.flush();
String normalized = ingredientsText.replace(",", ";");
List<IngredientData> ingredientDataList = ingredientParser.parse(normalized);
for (IngredientData data : ingredientDataList) {
Ingredient ingredient = Ingredient.builder()
.name(data.name())
.amount(data.amount())
.unit(data.unit())
.dailyValuePercent(data.dailyValuePercent())
.build();
supplement.addIngredient(ingredient);
ingredientRepository.save(ingredient);
}
}
private String extractCategory(List<String> tags) {
if (tags == null || tags.isEmpty()) return null;
return tags.stream()
.filter(t -> t.startsWith("en:"))
.map(t -> t.substring(3).replace("-", " "))
.findFirst()
.orElse(tags.getFirst().contains(":") ? tags.getFirst().substring(tags.getFirst().indexOf(':') + 1) : tags.getFirst());
}
private String extractCountry(List<String> tags) {
if (tags == null || tags.isEmpty()) return null;
return tags.stream()
.filter(t -> t.startsWith("en:"))
.map(t -> t.substring(3).replace("-", " "))
.findFirst()
.orElse(null);
}
private String joinTags(List<String> tags) {
if (tags == null || tags.isEmpty()) return null;
return String.join(", ", tags.stream()
.map(t -> t.contains(":") ? t.substring(t.indexOf(':') + 1).replace("-", " ") : t)
.toList());
}
private CollectorRun saveRun(int added, int updated, int errors, String status) {
CollectorRun run = CollectorRun.builder()
.runAt(Instant.now())
.source(SOURCE)
.added(added)
.updated(updated)
.errors(errors)
.status(status)
.build();
return collectorRunRepository.save(run);
}
}

View File

@ -41,6 +41,13 @@ server:
collector: collector:
input-dir: ${COLLECTOR_INPUT_DIR:} input-dir: ${COLLECTOR_INPUT_DIR:}
schedule: ${COLLECTOR_SCHEDULE:0 0 */6 * * *} schedule: ${COLLECTOR_SCHEDULE:0 0 */6 * * *}
openfoodfacts:
enabled: ${OFF_ENABLED:true}
base-url: ${OFF_BASE_URL:https://world.openfoodfacts.org}
search-terms: ${OFF_SEARCH_TERMS:supplement}
page-size: ${OFF_PAGE_SIZE:100}
max-pages: ${OFF_MAX_PAGES:10}
schedule: ${OFF_SCHEDULE:0 0 3 * * *}
management: management:
endpoints: endpoints:

View File

@ -0,0 +1,3 @@
-- liquibase formatted sql
-- changeset orchestrator:4
ALTER TABLE collector_runs ALTER COLUMN status TYPE varchar(50);

View File

@ -5,3 +5,5 @@ databaseChangeLog:
file: db/changelog/V002_create_ingredients_table.yaml file: db/changelog/V002_create_ingredients_table.yaml
- include: - include:
file: db/changelog/V003_create_collector_runs_table.yaml file: db/changelog/V003_create_collector_runs_table.yaml
- include:
file: db/changelog/V004__alter_status_varchar50.sql

View File

@ -1,15 +1,18 @@
package ru.oa2.mvp.nutricollector; package ru.oa2.mvp.nutricollector;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.condition.EnabledIf;
import org.springframework.boot.test.context.SpringBootTest; import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.test.context.DynamicPropertyRegistry; import org.springframework.test.context.DynamicPropertyRegistry;
import org.springframework.test.context.DynamicPropertySource; import org.springframework.test.context.DynamicPropertySource;
import org.testcontainers.DockerClientFactory;
import org.testcontainers.containers.PostgreSQLContainer; import org.testcontainers.containers.PostgreSQLContainer;
import org.testcontainers.junit.jupiter.Container; import org.testcontainers.junit.jupiter.Container;
import org.testcontainers.junit.jupiter.Testcontainers; import org.testcontainers.junit.jupiter.Testcontainers;
@SpringBootTest @SpringBootTest
@Testcontainers @Testcontainers
@EnabledIf("isDockerAvailable")
class NutriCollectorApplicationTests { class NutriCollectorApplicationTests {
@Container @Container
@ -25,6 +28,15 @@ class NutriCollectorApplicationTests {
registry.add("spring.datasource.password", postgres::getPassword); registry.add("spring.datasource.password", postgres::getPassword);
} }
static boolean isDockerAvailable() {
try {
DockerClientFactory.instance().client();
return true;
} catch (Exception e) {
return false;
}
}
@Test @Test
void contextLoads() { void contextLoads() {
} }

View File

@ -0,0 +1,20 @@
package ru.oa2.mvp.nutricollector.client;
import org.junit.jupiter.api.Test;
import org.springframework.web.reactive.function.client.WebClient;
import static org.assertj.core.api.Assertions.assertThat;
class OpenFoodFactsClientTest {
@Test
void clientCreatesSuccessfully() {
OpenFoodFactsClient client = new OpenFoodFactsClient(
WebClient.builder(),
"https://world.openfoodfacts.org",
"supplement",
100
);
assertThat(client).isNotNull();
}
}

View File

@ -0,0 +1,148 @@
package ru.oa2.mvp.nutricollector.service;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import org.springframework.test.util.ReflectionTestUtils;
import ru.oa2.mvp.nutricollector.client.OpenFoodFactsClient;
import ru.oa2.mvp.nutricollector.dto.OpenFoodFactsResponse;
import ru.oa2.mvp.nutricollector.entity.CollectorRun;
import ru.oa2.mvp.nutricollector.entity.Supplement;
import ru.oa2.mvp.nutricollector.repository.CollectorRunRepository;
import ru.oa2.mvp.nutricollector.repository.IngredientRepository;
import ru.oa2.mvp.nutricollector.repository.SupplementRepository;
import java.util.List;
import java.util.Optional;
import java.util.UUID;
import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.ArgumentMatchers.*;
import static org.mockito.Mockito.*;
@ExtendWith(MockitoExtension.class)
class OpenFoodFactsCollectorTest {
@Mock
private OpenFoodFactsClient client;
@Mock
private SupplementRepository supplementRepository;
@Mock
private IngredientRepository ingredientRepository;
@Mock
private CollectorRunRepository collectorRunRepository;
@Mock
private IngredientParser ingredientParser;
private OpenFoodFactsCollector collector;
@BeforeEach
void setUp() {
collector = new OpenFoodFactsCollector(
client, supplementRepository, ingredientRepository,
collectorRunRepository, ingredientParser
);
ReflectionTestUtils.setField(collector, "maxPages", 10);
ReflectionTestUtils.setField(collector, "enabled", true);
}
@Test
void run_processesNewProducts() {
var product = new OpenFoodFactsResponse.Product(
"Vitamin C 1000mg", "Brand A",
List.of("en:supplements"), "vitamin c - 1000 mg",
List.of("en:united-states"), "https://images.off.org/1.jpg", "12345"
);
var response = new OpenFoodFactsResponse(1, 1, 100, List.of(product));
when(client.search(1)).thenReturn(response);
when(supplementRepository.findBySourceUrl(anyString())).thenReturn(Optional.empty());
when(supplementRepository.save(any(Supplement.class))).thenAnswer(inv -> {
Supplement s = inv.getArgument(0);
return Supplement.builder()
.id(UUID.randomUUID())
.name(s.getName())
.brand(s.getBrand())
.build();
});
when(collectorRunRepository.save(any(CollectorRun.class))).thenAnswer(inv -> inv.getArgument(0));
when(ingredientParser.parse(anyString())).thenReturn(List.of());
CollectorRun result = collector.run();
assertThat(result).isNotNull();
assertThat(result.getSource()).isEqualTo("openfoodfacts");
assertThat(result.getStatus()).isEqualTo("SUCCESS");
assertThat(result.getAdded()).isEqualTo(1);
verify(supplementRepository).save(any(Supplement.class));
}
@Test
void run_updatesExistingProducts() {
var product = new OpenFoodFactsResponse.Product(
"Existing Supplement", "Brand B",
List.of("en:vitamins"), null,
List.of("en:france"), null, "99999"
);
var response = new OpenFoodFactsResponse(1, 1, 100, List.of(product));
Supplement existing = Supplement.builder()
.id(UUID.randomUUID())
.name("Existing Supplement")
.sourceUrl("https://world.openfoodfacts.org/product/99999")
.build();
when(client.search(1)).thenReturn(response);
when(supplementRepository.findBySourceUrl("https://world.openfoodfacts.org/product/99999"))
.thenReturn(Optional.of(existing));
when(supplementRepository.save(any(Supplement.class))).thenAnswer(inv -> inv.getArgument(0));
when(collectorRunRepository.save(any(CollectorRun.class))).thenAnswer(inv -> inv.getArgument(0));
CollectorRun result = collector.run();
assertThat(result.getUpdated()).isEqualTo(1);
assertThat(result.getAdded()).isEqualTo(0);
}
@Test
void run_skipsProductsWithEmptyName() {
var product = new OpenFoodFactsResponse.Product(
null, "Brand", List.of(), null, List.of(), null, "11111"
);
var response = new OpenFoodFactsResponse(1, 1, 100, List.of(product));
when(client.search(1)).thenReturn(response);
when(collectorRunRepository.save(any(CollectorRun.class))).thenAnswer(inv -> inv.getArgument(0));
CollectorRun result = collector.run();
assertThat(result.getErrors()).isEqualTo(1);
assertThat(result.getStatus()).isEqualTo("COMPLETED_WITH_ERRORS");
verify(supplementRepository, never()).save(any());
}
@Test
void run_stopsOnEmptyPage() {
var response = new OpenFoodFactsResponse(0, 1, 100, List.of());
when(client.search(1)).thenReturn(response);
when(collectorRunRepository.save(any(CollectorRun.class))).thenAnswer(inv -> inv.getArgument(0));
CollectorRun result = collector.run();
assertThat(result.getStatus()).isEqualTo("SUCCESS");
verify(client, times(1)).search(anyInt());
}
@Test
void run_handlesApiFailure() {
when(client.search(1)).thenThrow(new RuntimeException("Connection refused"));
when(collectorRunRepository.save(any(CollectorRun.class))).thenAnswer(inv -> inv.getArgument(0));
CollectorRun result = collector.run();
assertThat(result.getStatus()).isEqualTo("FAILED");
}
}