From 95e26e287a88d55ac558fd94023ba0f10023503c Mon Sep 17 00:00:00 2001 From: Dmitry Isaenko Date: Wed, 17 Jan 2024 18:39:51 +0300 Subject: [PATCH] Faster INPX parse --- .../ThreadPoolTaskExecutorSettings.java | 19 ++ .../marinesco/data/AuthorRepository.java | 8 +- .../marinesco/data/BookRepository.java | 4 +- .../marinesco/data/GenreRepository.java | 4 +- .../ru/redrise/marinesco/library/Author.java | 9 +- .../ru/redrise/marinesco/library/Book.java | 23 +- .../library/InpxLibraryMetadataScanner.java | 47 ++++ .../marinesco/library/InpxScanner.java | 239 ++++++++++-------- .../settings/SettingsController.java | 4 +- src/main/resources/application.yml | 6 +- 10 files changed, 219 insertions(+), 144 deletions(-) create mode 100644 src/main/java/ru/redrise/marinesco/ThreadPoolTaskExecutorSettings.java create mode 100644 src/main/java/ru/redrise/marinesco/library/InpxLibraryMetadataScanner.java diff --git a/src/main/java/ru/redrise/marinesco/ThreadPoolTaskExecutorSettings.java b/src/main/java/ru/redrise/marinesco/ThreadPoolTaskExecutorSettings.java new file mode 100644 index 0000000..eee0cf4 --- /dev/null +++ b/src/main/java/ru/redrise/marinesco/ThreadPoolTaskExecutorSettings.java @@ -0,0 +1,19 @@ +package ru.redrise.marinesco; + +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.core.task.TaskExecutor; +import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor; + +@Configuration +public class ThreadPoolTaskExecutorSettings { + @Bean + public TaskExecutor configTaskExecutor(){ + final ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor(); + executor.setCorePoolSize(8); + executor.setMaxPoolSize(16); + //executor.setQueueCapacity(50); + + return executor; + } +} diff --git a/src/main/java/ru/redrise/marinesco/data/AuthorRepository.java b/src/main/java/ru/redrise/marinesco/data/AuthorRepository.java index 4e220b3..f29a4d6 100644 --- a/src/main/java/ru/redrise/marinesco/data/AuthorRepository.java +++ b/src/main/java/ru/redrise/marinesco/data/AuthorRepository.java @@ -2,18 +2,14 @@ package ru.redrise.marinesco.data; import java.util.Optional; -import org.springframework.data.repository.CrudRepository; +import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.stereotype.Repository; import ru.redrise.marinesco.library.Author; import java.util.List; - - - - @Repository -public interface AuthorRepository extends CrudRepository{ +public interface AuthorRepository extends JpaRepository{ Optional findByAuthorName(String authorName); List findByAuthorNameContainingIgnoreCase(String authorName); } diff --git a/src/main/java/ru/redrise/marinesco/data/BookRepository.java b/src/main/java/ru/redrise/marinesco/data/BookRepository.java index b7e6309..00d4da0 100644 --- a/src/main/java/ru/redrise/marinesco/data/BookRepository.java +++ b/src/main/java/ru/redrise/marinesco/data/BookRepository.java @@ -2,7 +2,7 @@ package ru.redrise.marinesco.data; import java.util.List; -import org.springframework.data.repository.CrudRepository; +import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.stereotype.Repository; import ru.redrise.marinesco.library.Author; @@ -12,7 +12,7 @@ import ru.redrise.marinesco.library.Book; @Repository -public interface BookRepository extends CrudRepository{ +public interface BookRepository extends JpaRepository{ List findBySeriesContainingIgnoreCase(String title); List findByTitleContainingIgnoreCase(String title); diff --git a/src/main/java/ru/redrise/marinesco/data/GenreRepository.java b/src/main/java/ru/redrise/marinesco/data/GenreRepository.java index 27bcc3b..160383d 100644 --- a/src/main/java/ru/redrise/marinesco/data/GenreRepository.java +++ b/src/main/java/ru/redrise/marinesco/data/GenreRepository.java @@ -1,11 +1,11 @@ package ru.redrise.marinesco.data; -import org.springframework.data.repository.CrudRepository; +import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.stereotype.Repository; import ru.redrise.marinesco.library.Genre; @Repository -public interface GenreRepository extends CrudRepository{ +public interface GenreRepository extends JpaRepository{ } diff --git a/src/main/java/ru/redrise/marinesco/library/Author.java b/src/main/java/ru/redrise/marinesco/library/Author.java index de61dc2..f36e673 100644 --- a/src/main/java/ru/redrise/marinesco/library/Author.java +++ b/src/main/java/ru/redrise/marinesco/library/Author.java @@ -1,9 +1,6 @@ package ru.redrise.marinesco.library; -import jakarta.persistence.Column; import jakarta.persistence.Entity; -import jakarta.persistence.GeneratedValue; -import jakarta.persistence.GenerationType; import jakarta.persistence.Id; import lombok.AccessLevel; import lombok.Data; @@ -13,16 +10,12 @@ import lombok.NoArgsConstructor; @Entity @NoArgsConstructor(access = AccessLevel.PRIVATE, force = true) public class Author { -// private static final long serialVersionUID = 1L; - @Id - @GeneratedValue(strategy = GenerationType.AUTO) private Long id; - - @Column(unique=true) private String authorName; public Author(String name){ this.authorName = name; + this.id = (long) name.hashCode(); } } diff --git a/src/main/java/ru/redrise/marinesco/library/Book.java b/src/main/java/ru/redrise/marinesco/library/Book.java index e324b48..b6398a1 100644 --- a/src/main/java/ru/redrise/marinesco/library/Book.java +++ b/src/main/java/ru/redrise/marinesco/library/Book.java @@ -4,6 +4,7 @@ import java.nio.charset.StandardCharsets; import java.time.LocalDate; import java.util.ArrayList; import java.util.List; +import java.util.Set; import jakarta.persistence.Entity; import jakarta.persistence.Id; @@ -14,8 +15,6 @@ import lombok.Data; import lombok.NoArgsConstructor; import lombok.extern.slf4j.Slf4j; import ru.redrise.marinesco.RainbowDump; -import ru.redrise.marinesco.data.AuthorRepository; -import ru.redrise.marinesco.data.GenreRepository; @Slf4j @Entity @@ -50,8 +49,8 @@ public class Book { public Book(byte[] line, String container, - AuthorRepository authorRepository, - GenreRepository genreRepository, + Set authorsCollection, + Set genresCollection, Long libraryId, String libraryVersion) throws Exception { // AUTHOR;GENRE;TITLE;SERIES;SERNO;FILE;SIZE;LIBID;DEL;EXT;DATE; @@ -62,8 +61,8 @@ public class Book { this.container = container + ".zip"; this.authors = new ArrayList<>(); this.genres = new ArrayList<>(); - parseAuthors(authorRepository); - parseGenere(genreRepository); + parseAuthors(authorsCollection); + parseGenere(genresCollection); this.title = parseNextString(); this.series = parseNextString(); this.serNo = parseNextString(); @@ -96,7 +95,7 @@ public class Book { */ } - private void parseAuthors(AuthorRepository authorRepository) throws Exception { + private void parseAuthors(Set authorsCollection) throws Exception { for (; position < line.length; position++) { if (line[position] == 0x04) { String allAuthors = new String(line, 0, position, StandardCharsets.UTF_8); @@ -104,8 +103,9 @@ public class Book { for (String authorName : allAuthors.split(":")) { authorName = authorName.replaceAll(",", " ").trim(); if (!authorName.equals("")) { - Author author = authorRepository.findByAuthorName(authorName).orElse(new Author(authorName)); - authors.add(authorRepository.save(author)); + Author author = new Author(authorName); + authorsCollection.add(author); + authors.add(author); } } @@ -117,14 +117,15 @@ public class Book { throw new Exception("Invalid 'inp' file format (parse Authors)"); } - private void parseGenere(GenreRepository genreRepository) throws Exception { + private void parseGenere(Set genresCollection) throws Exception { for (int i = position; i < line.length; i++) { if (line[i] == 0x04) { String allGenres = new String(line, position, i - position, StandardCharsets.UTF_8); for (String genreName : allGenres.split(":")) { Genre genre = new Genre(genreName); - genres.add(genreRepository.save(genre)); + genresCollection.add(genre); + genres.add(genre); } position = i + 1; diff --git a/src/main/java/ru/redrise/marinesco/library/InpxLibraryMetadataScanner.java b/src/main/java/ru/redrise/marinesco/library/InpxLibraryMetadataScanner.java new file mode 100644 index 0000000..aa60ce7 --- /dev/null +++ b/src/main/java/ru/redrise/marinesco/library/InpxLibraryMetadataScanner.java @@ -0,0 +1,47 @@ +package ru.redrise.marinesco.library; + +import java.io.File; +import java.io.FileInputStream; +import java.nio.charset.StandardCharsets; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; + +import ru.redrise.marinesco.data.LibraryMetadataRepository; + +public class InpxLibraryMetadataScanner { + private InpxLibraryMetadataScanner() { } + + public static LibraryMetadata saveFromFile(File inpxFile, LibraryMetadataRepository repository) throws Exception { + LibraryMetadata libraryMetadata = new LibraryMetadata(); + + try (ZipInputStream zipInputStream = new ZipInputStream(new FileInputStream(inpxFile))) { + ZipEntry zipEntry; + + while ((zipEntry = zipInputStream.getNextEntry()) != null) { + if (isCollection(zipEntry)) + libraryMetadata.setCollectionInfo(readPlainText(zipInputStream)); + else if (isVersion(zipEntry)) + libraryMetadata.setVersionInfo(readPlainText(zipInputStream)); + } + } + + return repository.save(libraryMetadata); + } + + private static boolean isCollection(ZipEntry zipEntry) { + return zipEntry.getName().toLowerCase().contains("collection.info"); + } + + private static boolean isVersion(ZipEntry zipEntry){ + return zipEntry.getName().toLowerCase().contains("version.info"); + } + + private static String readPlainText(ZipInputStream zipInputStream) throws Exception { + byte[] content = new byte[1024]; + StringBuilder stringBuilder = new StringBuilder(); + while (zipInputStream.read(content) > 0) + stringBuilder.append(new String(content, StandardCharsets.UTF_8)); + + return stringBuilder.toString(); + } +} diff --git a/src/main/java/ru/redrise/marinesco/library/InpxScanner.java b/src/main/java/ru/redrise/marinesco/library/InpxScanner.java index 479fbde..66764e0 100644 --- a/src/main/java/ru/redrise/marinesco/library/InpxScanner.java +++ b/src/main/java/ru/redrise/marinesco/library/InpxScanner.java @@ -3,12 +3,21 @@ package ru.redrise.marinesco.library; import java.io.File; import java.io.FileInputStream; import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.time.temporal.ChronoUnit; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; import java.util.stream.Stream; import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; import org.springframework.core.io.FileSystemResource; +import org.springframework.core.task.TaskExecutor; import org.springframework.stereotype.Component; import lombok.extern.slf4j.Slf4j; @@ -20,12 +29,11 @@ import ru.redrise.marinesco.settings.ApplicationSettings; @Slf4j @Component -public class InpxScanner implements Runnable { +public class InpxScanner { + private static volatile String lastRunErrors = ""; + private static LocalDateTime lastRunTime = LocalDateTime.of(1970, 01, 01, 0, 0, 0); - private static volatile Thread parser; - private static volatile String lastRunErrors; - - private LibraryMetadata libraryMetadata; + private TaskExecutor executor; private LibraryMetadataRepository libraryMetadataRepository; private AuthorRepository authorRepository; private GenreRepository genreRepository; @@ -33,11 +41,13 @@ public class InpxScanner implements Runnable { private String filesLocation; - public InpxScanner(ApplicationSettings applicationSettings, + public InpxScanner(TaskExecutor executor, + ApplicationSettings applicationSettings, AuthorRepository authorRepository, GenreRepository genreRepository, BookRepository bookRepository, LibraryMetadataRepository libraryMetadataRepository) { + this.executor = executor; this.filesLocation = applicationSettings.getFilesLocation(); this.authorRepository = authorRepository; this.genreRepository = genreRepository; @@ -46,67 +56,70 @@ public class InpxScanner implements Runnable { } /* - * @return true if executed, false if already running + * @return true if executed, false otherwise */ public boolean reScan() { - if (parser == null || !parser.isAlive()) { - parser = new Thread(this); - parser.start(); - return true; + + LocalDateTime currentDateTime = LocalDateTime.now(); + + if (ChronoUnit.MINUTES.between(lastRunTime, currentDateTime) < 5) { + lastRunErrors = "Too frequent requests. Please whait 5 min. Last attmpt: " + + lastRunTime.format(DateTimeFormatter.ofPattern("DD.MM.YYYY HH:mm:ss")); + return false; } - return false; + lastRunTime = currentDateTime; + lastRunErrors = ""; + + Thread scanThread = new Thread(() -> { + try { + File inpxFile = getInpxFile(); + log.debug("INPX file found: " + inpxFile.getName()); + + LibraryMetadata libMetadata = InpxLibraryMetadataScanner.saveFromFile(inpxFile, + libraryMetadataRepository); + + Long libId = libMetadata.getId(); + String libVersion = libMetadata.getVersion(); + HashMap inpEntries = collectInp(inpxFile); + + for (Map.Entry entry : inpEntries.entrySet()) + executor.execute(new InpxWorker(entry, libId, libVersion)); + } catch (Exception e) { + log.error("{}", e); + lastRunErrors = lastRunErrors + " " + e.getMessage(); + } + }); + + scanThread.start(); + + return true; } - @Override - public void run() { - try { - final FileSystemResource libraryLocation = new FileSystemResource(filesLocation); - - final File inpxFile = Stream.of(libraryLocation.getFile().listFiles()) - .filter(file -> file.getName().endsWith(".inpx")) - .findFirst() - .get(); - - log.debug("INPX file found as " + inpxFile.getName()); - - getLibraryMetadata(inpxFile); - parseInp(inpxFile); - // Once multiple libraries imlemented, add here 'delete recrodds with old - // version of the library' - // TODO: fix lirary ID changes on every update: add selector on the front - } catch (Exception e) { - log.error("{}", e); - InpxScanner.lastRunErrors = e.getMessage(); - } + private File getInpxFile() throws Exception { + final FileSystemResource libraryLocation = new FileSystemResource(filesLocation); + return Stream.of(libraryLocation.getFile().listFiles()) + .filter(file -> file.getName().endsWith(".inpx")) + .findFirst() + .get(); } - private void getLibraryMetadata(File inpxFile) throws Exception { - libraryMetadata = new LibraryMetadata(); - + private HashMap collectInp(File inpxFile) throws Exception { + final HashMap inpEntries = new HashMap<>(); try (ZipInputStream zipInputStream = new ZipInputStream(new FileInputStream(inpxFile))) { - ZipEntry zipEntry = zipInputStream.getNextEntry(); - - while (zipEntry != null) { - if (zipEntry.getName().toLowerCase().contains("collection.info")) - libraryMetadata.setCollectionInfo(readPlainText(zipInputStream)); - - else if (zipEntry.getName().toLowerCase().contains("version.info")) - libraryMetadata.setVersionInfo(readPlainText(zipInputStream)); - - zipEntry = zipInputStream.getNextEntry(); + ZipEntry zipEntry; + while ((zipEntry = zipInputStream.getNextEntry()) != null) { + if (isInp(zipEntry)) { + String zipEntryName = zipEntry.getName(); + zipEntryName = zipEntryName.substring(0, zipEntryName.lastIndexOf('.')); + inpEntries.put(zipEntryName, inpToByteArray(zipInputStream, zipEntry.getSize())); + } } } - - libraryMetadata = libraryMetadataRepository.save(libraryMetadata); + return inpEntries; } - private String readPlainText(ZipInputStream zipInputStream) throws Exception { - byte[] content = new byte[1024]; - StringBuilder stringBuilder = new StringBuilder(); - while (zipInputStream.read(content) > 0) - stringBuilder.append(new String(content, StandardCharsets.UTF_8)); - - return stringBuilder.toString(); + private boolean isInp(ZipEntry zipEntry) { + return zipEntry.getName().toLowerCase().endsWith(".inp"); } private byte[] inpToByteArray(ZipInputStream stream, long fileSize) throws Exception { @@ -133,61 +146,6 @@ public class InpxScanner implements Runnable { return inpByteBuffer.array(); } - private void parseInp(File inpxFile) throws Exception { - /* - log.warn("REMOVE TEMPORARY SOLUTION - BREAKER"); - log.warn("REMOVE TEMPORARY SOLUTION - BREAKER"); - log.warn("REMOVE TEMPORARY SOLUTION - BREAKER"); - boolean breaker = false; - */ - try (ZipInputStream zipInputStream = new ZipInputStream(new FileInputStream(inpxFile))) { - ZipEntry zipEntry = zipInputStream.getNextEntry(); - - while (zipEntry != null) { - if (zipEntry.getName().toLowerCase().endsWith(".inp")) { - /* - if (breaker) { - zipEntry = zipInputStream.getNextEntry(); - continue; - } - breaker = true; - // */ - byte[] content = inpToByteArray(zipInputStream, zipEntry.getSize()); - parseInpContent(content, zipEntry.getName()); - } - zipEntry = zipInputStream.getNextEntry(); - } - } - } - - private void parseInpContent(byte[] content, String name) throws Exception { - name = name.substring(0, name.lastIndexOf('.')); - - log.info("FILE RELATED " + name); - int lastIndex = 0; - for (int i = 0; i < content.length; i++) { - if (content[i] == '\n') { - byte[] line = new byte[i - lastIndex]; - System.arraycopy(content, lastIndex, line, 0, i - lastIndex - 1); - - Book book = new Book(line, - name, - authorRepository, - genreRepository, - libraryMetadata.getId(), - libraryMetadata.getVersion()); - - bookRepository.save(book); - - if (isNextCarriageReturn(i, content)) { - i += 2; - lastIndex = i; - } else - lastIndex = ++i; - } - } - } - private boolean isNextCarriageReturn(int i, byte[] content) { return i + 1 < content.length && (content[i + 1] == '\r'); } @@ -195,4 +153,63 @@ public class InpxScanner implements Runnable { public static String getLastRunErrors() { return lastRunErrors; } + + private class InpxWorker implements Runnable { + + private Long libraryId; + private String libraryVersion; + private String name; + private byte[] content; + + private InpxWorker(Map.Entry entry, + Long libraryId, + String libraryVersion) { + this.libraryId = libraryId; + this.libraryVersion = libraryVersion; + this.name = entry.getKey(); + this.content = entry.getValue(); + } + + @Override + public void run() { + final List books = new ArrayList<>(); + final Set authors = new HashSet<>(); + final Set genres = new HashSet<>(); + try { + log.info("FILE RELATED " + name); + + int lastIndex = 0; + for (int i = 0; i < content.length; i++) { + if (content[i] == '\n') { + byte[] line = new byte[i - lastIndex]; + System.arraycopy(content, lastIndex, line, 0, i - lastIndex - 1); + + books.add(new Book(line, + name, + authors, + genres, + libraryId, + libraryVersion)); + + if (isNextCarriageReturn(i, content)) { + i += 2; + lastIndex = i; + } else + lastIndex = ++i; + } + } + saveAll(books, authors, genres); + } catch (Exception e) { + log.error("{}", e); + lastRunErrors = lastRunErrors + " " + e.getMessage(); + } + } + } + + /* REMINDER: DO NOT PUT THIS SHIT INTO THREAD */ + private synchronized void saveAll(List books, Set authors, Set genres) { + authorRepository.saveAll(authors); + genreRepository.saveAll(genres); + bookRepository.saveAll(books); + } } diff --git a/src/main/java/ru/redrise/marinesco/settings/SettingsController.java b/src/main/java/ru/redrise/marinesco/settings/SettingsController.java index 1270d4a..7f23383 100644 --- a/src/main/java/ru/redrise/marinesco/settings/SettingsController.java +++ b/src/main/java/ru/redrise/marinesco/settings/SettingsController.java @@ -37,7 +37,7 @@ public class SettingsController { @ModelAttribute(name = "lastScanErrors") public String setLastRunErrors(){ - if (InpxScanner.getLastRunErrors() != null) + if (InpxScanner.getLastRunErrors() != "") return "Last run attempt failed: "+InpxScanner.getLastRunErrors(); return null; } @@ -56,7 +56,7 @@ public class SettingsController { if (inpxScanner.reScan()) redirectAttributes.addAttribute("rescanOk", "Rescan started"); else - redirectAttributes.addAttribute("rescanError", "Rescan is currently in progress"); + redirectAttributes.addAttribute("rescanError", "Rescan could be currently in progress"); return redirectView; } diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index 63d08f9..e69dd91 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -5,16 +5,18 @@ spring: driver-class-name: org.h2.Driver generate-unique-name: false name: marinesco -# url: jdbc:h2:mem:marinesco - url: jdbc:h2:file:/tmp/h2 + url: jdbc:h2:mem:marinesco +# url: jdbc:h2:file:/tmp/h2 username: sa password: jpa: properties: hibernate: database-platform: org.hibernate.dialect.H2Dialect +# format_sql: true hibernate: ddl-auto: update +# show-sql: true h2: console: enabled: true