Faster INPX parse

This commit is contained in:
Dmitry Isaenko 2024-01-17 18:39:51 +03:00
parent b879181bd9
commit 95e26e287a
10 changed files with 219 additions and 144 deletions

View file

@ -0,0 +1,19 @@
package ru.redrise.marinesco;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.core.task.TaskExecutor;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
@Configuration
public class ThreadPoolTaskExecutorSettings {
@Bean
public TaskExecutor configTaskExecutor(){
final ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
executor.setCorePoolSize(8);
executor.setMaxPoolSize(16);
//executor.setQueueCapacity(50);
return executor;
}
}

View file

@ -2,18 +2,14 @@ package ru.redrise.marinesco.data;
import java.util.Optional; import java.util.Optional;
import org.springframework.data.repository.CrudRepository; import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.stereotype.Repository; import org.springframework.stereotype.Repository;
import ru.redrise.marinesco.library.Author; import ru.redrise.marinesco.library.Author;
import java.util.List; import java.util.List;
@Repository @Repository
public interface AuthorRepository extends CrudRepository<Author, Long>{ public interface AuthorRepository extends JpaRepository<Author, Long>{
Optional<Author> findByAuthorName(String authorName); Optional<Author> findByAuthorName(String authorName);
List<Author> findByAuthorNameContainingIgnoreCase(String authorName); List<Author> findByAuthorNameContainingIgnoreCase(String authorName);
} }

View file

@ -2,7 +2,7 @@ package ru.redrise.marinesco.data;
import java.util.List; import java.util.List;
import org.springframework.data.repository.CrudRepository; import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.stereotype.Repository; import org.springframework.stereotype.Repository;
import ru.redrise.marinesco.library.Author; import ru.redrise.marinesco.library.Author;
@ -12,7 +12,7 @@ import ru.redrise.marinesco.library.Book;
@Repository @Repository
public interface BookRepository extends CrudRepository<Book, Integer>{ public interface BookRepository extends JpaRepository<Book, Integer>{
List<Book> findBySeriesContainingIgnoreCase(String title); List<Book> findBySeriesContainingIgnoreCase(String title);
List<Book> findByTitleContainingIgnoreCase(String title); List<Book> findByTitleContainingIgnoreCase(String title);

View file

@ -1,11 +1,11 @@
package ru.redrise.marinesco.data; package ru.redrise.marinesco.data;
import org.springframework.data.repository.CrudRepository; import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.stereotype.Repository; import org.springframework.stereotype.Repository;
import ru.redrise.marinesco.library.Genre; import ru.redrise.marinesco.library.Genre;
@Repository @Repository
public interface GenreRepository extends CrudRepository<Genre, String>{ public interface GenreRepository extends JpaRepository<Genre, String>{
} }

View file

@ -1,9 +1,6 @@
package ru.redrise.marinesco.library; package ru.redrise.marinesco.library;
import jakarta.persistence.Column;
import jakarta.persistence.Entity; import jakarta.persistence.Entity;
import jakarta.persistence.GeneratedValue;
import jakarta.persistence.GenerationType;
import jakarta.persistence.Id; import jakarta.persistence.Id;
import lombok.AccessLevel; import lombok.AccessLevel;
import lombok.Data; import lombok.Data;
@ -13,16 +10,12 @@ import lombok.NoArgsConstructor;
@Entity @Entity
@NoArgsConstructor(access = AccessLevel.PRIVATE, force = true) @NoArgsConstructor(access = AccessLevel.PRIVATE, force = true)
public class Author { public class Author {
// private static final long serialVersionUID = 1L;
@Id @Id
@GeneratedValue(strategy = GenerationType.AUTO)
private Long id; private Long id;
@Column(unique=true)
private String authorName; private String authorName;
public Author(String name){ public Author(String name){
this.authorName = name; this.authorName = name;
this.id = (long) name.hashCode();
} }
} }

View file

@ -4,6 +4,7 @@ import java.nio.charset.StandardCharsets;
import java.time.LocalDate; import java.time.LocalDate;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Set;
import jakarta.persistence.Entity; import jakarta.persistence.Entity;
import jakarta.persistence.Id; import jakarta.persistence.Id;
@ -14,8 +15,6 @@ import lombok.Data;
import lombok.NoArgsConstructor; import lombok.NoArgsConstructor;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import ru.redrise.marinesco.RainbowDump; import ru.redrise.marinesco.RainbowDump;
import ru.redrise.marinesco.data.AuthorRepository;
import ru.redrise.marinesco.data.GenreRepository;
@Slf4j @Slf4j
@Entity @Entity
@ -50,8 +49,8 @@ public class Book {
public Book(byte[] line, public Book(byte[] line,
String container, String container,
AuthorRepository authorRepository, Set<Author> authorsCollection,
GenreRepository genreRepository, Set<Genre> genresCollection,
Long libraryId, Long libraryId,
String libraryVersion) throws Exception { String libraryVersion) throws Exception {
// AUTHOR;GENRE;TITLE;SERIES;SERNO;FILE;SIZE;LIBID;DEL;EXT;DATE; // AUTHOR;GENRE;TITLE;SERIES;SERNO;FILE;SIZE;LIBID;DEL;EXT;DATE;
@ -62,8 +61,8 @@ public class Book {
this.container = container + ".zip"; this.container = container + ".zip";
this.authors = new ArrayList<>(); this.authors = new ArrayList<>();
this.genres = new ArrayList<>(); this.genres = new ArrayList<>();
parseAuthors(authorRepository); parseAuthors(authorsCollection);
parseGenere(genreRepository); parseGenere(genresCollection);
this.title = parseNextString(); this.title = parseNextString();
this.series = parseNextString(); this.series = parseNextString();
this.serNo = parseNextString(); this.serNo = parseNextString();
@ -96,7 +95,7 @@ public class Book {
*/ */
} }
private void parseAuthors(AuthorRepository authorRepository) throws Exception { private void parseAuthors(Set<Author> authorsCollection) throws Exception {
for (; position < line.length; position++) { for (; position < line.length; position++) {
if (line[position] == 0x04) { if (line[position] == 0x04) {
String allAuthors = new String(line, 0, position, StandardCharsets.UTF_8); String allAuthors = new String(line, 0, position, StandardCharsets.UTF_8);
@ -104,8 +103,9 @@ public class Book {
for (String authorName : allAuthors.split(":")) { for (String authorName : allAuthors.split(":")) {
authorName = authorName.replaceAll(",", " ").trim(); authorName = authorName.replaceAll(",", " ").trim();
if (!authorName.equals("")) { if (!authorName.equals("")) {
Author author = authorRepository.findByAuthorName(authorName).orElse(new Author(authorName)); Author author = new Author(authorName);
authors.add(authorRepository.save(author)); authorsCollection.add(author);
authors.add(author);
} }
} }
@ -117,14 +117,15 @@ public class Book {
throw new Exception("Invalid 'inp' file format (parse Authors)"); throw new Exception("Invalid 'inp' file format (parse Authors)");
} }
private void parseGenere(GenreRepository genreRepository) throws Exception { private void parseGenere(Set<Genre> genresCollection) throws Exception {
for (int i = position; i < line.length; i++) { for (int i = position; i < line.length; i++) {
if (line[i] == 0x04) { if (line[i] == 0x04) {
String allGenres = new String(line, position, i - position, StandardCharsets.UTF_8); String allGenres = new String(line, position, i - position, StandardCharsets.UTF_8);
for (String genreName : allGenres.split(":")) { for (String genreName : allGenres.split(":")) {
Genre genre = new Genre(genreName); Genre genre = new Genre(genreName);
genres.add(genreRepository.save(genre)); genresCollection.add(genre);
genres.add(genre);
} }
position = i + 1; position = i + 1;

View file

@ -0,0 +1,47 @@
package ru.redrise.marinesco.library;
import java.io.File;
import java.io.FileInputStream;
import java.nio.charset.StandardCharsets;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import ru.redrise.marinesco.data.LibraryMetadataRepository;
public class InpxLibraryMetadataScanner {
private InpxLibraryMetadataScanner() { }
public static LibraryMetadata saveFromFile(File inpxFile, LibraryMetadataRepository repository) throws Exception {
LibraryMetadata libraryMetadata = new LibraryMetadata();
try (ZipInputStream zipInputStream = new ZipInputStream(new FileInputStream(inpxFile))) {
ZipEntry zipEntry;
while ((zipEntry = zipInputStream.getNextEntry()) != null) {
if (isCollection(zipEntry))
libraryMetadata.setCollectionInfo(readPlainText(zipInputStream));
else if (isVersion(zipEntry))
libraryMetadata.setVersionInfo(readPlainText(zipInputStream));
}
}
return repository.save(libraryMetadata);
}
private static boolean isCollection(ZipEntry zipEntry) {
return zipEntry.getName().toLowerCase().contains("collection.info");
}
private static boolean isVersion(ZipEntry zipEntry){
return zipEntry.getName().toLowerCase().contains("version.info");
}
private static String readPlainText(ZipInputStream zipInputStream) throws Exception {
byte[] content = new byte[1024];
StringBuilder stringBuilder = new StringBuilder();
while (zipInputStream.read(content) > 0)
stringBuilder.append(new String(content, StandardCharsets.UTF_8));
return stringBuilder.toString();
}
}

View file

@ -3,12 +3,21 @@ package ru.redrise.marinesco.library;
import java.io.File; import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets; import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.time.temporal.ChronoUnit;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Stream; import java.util.stream.Stream;
import java.util.zip.ZipEntry; import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream; import java.util.zip.ZipInputStream;
import org.springframework.core.io.FileSystemResource; import org.springframework.core.io.FileSystemResource;
import org.springframework.core.task.TaskExecutor;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
@ -20,12 +29,11 @@ import ru.redrise.marinesco.settings.ApplicationSettings;
@Slf4j @Slf4j
@Component @Component
public class InpxScanner implements Runnable { public class InpxScanner {
private static volatile String lastRunErrors = "";
private static LocalDateTime lastRunTime = LocalDateTime.of(1970, 01, 01, 0, 0, 0);
private static volatile Thread parser; private TaskExecutor executor;
private static volatile String lastRunErrors;
private LibraryMetadata libraryMetadata;
private LibraryMetadataRepository libraryMetadataRepository; private LibraryMetadataRepository libraryMetadataRepository;
private AuthorRepository authorRepository; private AuthorRepository authorRepository;
private GenreRepository genreRepository; private GenreRepository genreRepository;
@ -33,11 +41,13 @@ public class InpxScanner implements Runnable {
private String filesLocation; private String filesLocation;
public InpxScanner(ApplicationSettings applicationSettings, public InpxScanner(TaskExecutor executor,
ApplicationSettings applicationSettings,
AuthorRepository authorRepository, AuthorRepository authorRepository,
GenreRepository genreRepository, GenreRepository genreRepository,
BookRepository bookRepository, BookRepository bookRepository,
LibraryMetadataRepository libraryMetadataRepository) { LibraryMetadataRepository libraryMetadataRepository) {
this.executor = executor;
this.filesLocation = applicationSettings.getFilesLocation(); this.filesLocation = applicationSettings.getFilesLocation();
this.authorRepository = authorRepository; this.authorRepository = authorRepository;
this.genreRepository = genreRepository; this.genreRepository = genreRepository;
@ -46,67 +56,70 @@ public class InpxScanner implements Runnable {
} }
/* /*
* @return true if executed, false if already running * @return true if executed, false otherwise
*/ */
public boolean reScan() { public boolean reScan() {
if (parser == null || !parser.isAlive()) {
parser = new Thread(this); LocalDateTime currentDateTime = LocalDateTime.now();
parser.start();
return true; if (ChronoUnit.MINUTES.between(lastRunTime, currentDateTime) < 5) {
} lastRunErrors = "Too frequent requests. Please whait 5 min. Last attmpt: "
+ lastRunTime.format(DateTimeFormatter.ofPattern("DD.MM.YYYY HH:mm:ss"));
return false; return false;
} }
lastRunTime = currentDateTime;
lastRunErrors = "";
@Override Thread scanThread = new Thread(() -> {
public void run() {
try { try {
final FileSystemResource libraryLocation = new FileSystemResource(filesLocation); File inpxFile = getInpxFile();
log.debug("INPX file found: " + inpxFile.getName());
final File inpxFile = Stream.of(libraryLocation.getFile().listFiles()) LibraryMetadata libMetadata = InpxLibraryMetadataScanner.saveFromFile(inpxFile,
libraryMetadataRepository);
Long libId = libMetadata.getId();
String libVersion = libMetadata.getVersion();
HashMap<String, byte[]> inpEntries = collectInp(inpxFile);
for (Map.Entry<String, byte[]> entry : inpEntries.entrySet())
executor.execute(new InpxWorker(entry, libId, libVersion));
} catch (Exception e) {
log.error("{}", e);
lastRunErrors = lastRunErrors + " " + e.getMessage();
}
});
scanThread.start();
return true;
}
private File getInpxFile() throws Exception {
final FileSystemResource libraryLocation = new FileSystemResource(filesLocation);
return Stream.of(libraryLocation.getFile().listFiles())
.filter(file -> file.getName().endsWith(".inpx")) .filter(file -> file.getName().endsWith(".inpx"))
.findFirst() .findFirst()
.get(); .get();
log.debug("INPX file found as " + inpxFile.getName());
getLibraryMetadata(inpxFile);
parseInp(inpxFile);
// Once multiple libraries imlemented, add here 'delete recrodds with old
// version of the library'
// TODO: fix lirary ID changes on every update: add selector on the front
} catch (Exception e) {
log.error("{}", e);
InpxScanner.lastRunErrors = e.getMessage();
}
} }
private void getLibraryMetadata(File inpxFile) throws Exception { private HashMap<String, byte[]> collectInp(File inpxFile) throws Exception {
libraryMetadata = new LibraryMetadata(); final HashMap<String, byte[]> inpEntries = new HashMap<>();
try (ZipInputStream zipInputStream = new ZipInputStream(new FileInputStream(inpxFile))) { try (ZipInputStream zipInputStream = new ZipInputStream(new FileInputStream(inpxFile))) {
ZipEntry zipEntry = zipInputStream.getNextEntry(); ZipEntry zipEntry;
while ((zipEntry = zipInputStream.getNextEntry()) != null) {
while (zipEntry != null) { if (isInp(zipEntry)) {
if (zipEntry.getName().toLowerCase().contains("collection.info")) String zipEntryName = zipEntry.getName();
libraryMetadata.setCollectionInfo(readPlainText(zipInputStream)); zipEntryName = zipEntryName.substring(0, zipEntryName.lastIndexOf('.'));
inpEntries.put(zipEntryName, inpToByteArray(zipInputStream, zipEntry.getSize()));
else if (zipEntry.getName().toLowerCase().contains("version.info"))
libraryMetadata.setVersionInfo(readPlainText(zipInputStream));
zipEntry = zipInputStream.getNextEntry();
} }
} }
}
libraryMetadata = libraryMetadataRepository.save(libraryMetadata); return inpEntries;
} }
private String readPlainText(ZipInputStream zipInputStream) throws Exception { private boolean isInp(ZipEntry zipEntry) {
byte[] content = new byte[1024]; return zipEntry.getName().toLowerCase().endsWith(".inp");
StringBuilder stringBuilder = new StringBuilder();
while (zipInputStream.read(content) > 0)
stringBuilder.append(new String(content, StandardCharsets.UTF_8));
return stringBuilder.toString();
} }
private byte[] inpToByteArray(ZipInputStream stream, long fileSize) throws Exception { private byte[] inpToByteArray(ZipInputStream stream, long fileSize) throws Exception {
@ -133,51 +146,50 @@ public class InpxScanner implements Runnable {
return inpByteBuffer.array(); return inpByteBuffer.array();
} }
private void parseInp(File inpxFile) throws Exception { private boolean isNextCarriageReturn(int i, byte[] content) {
/* return i + 1 < content.length && (content[i + 1] == '\r');
log.warn("REMOVE TEMPORARY SOLUTION - BREAKER");
log.warn("REMOVE TEMPORARY SOLUTION - BREAKER");
log.warn("REMOVE TEMPORARY SOLUTION - BREAKER");
boolean breaker = false;
*/
try (ZipInputStream zipInputStream = new ZipInputStream(new FileInputStream(inpxFile))) {
ZipEntry zipEntry = zipInputStream.getNextEntry();
while (zipEntry != null) {
if (zipEntry.getName().toLowerCase().endsWith(".inp")) {
/*
if (breaker) {
zipEntry = zipInputStream.getNextEntry();
continue;
}
breaker = true;
// */
byte[] content = inpToByteArray(zipInputStream, zipEntry.getSize());
parseInpContent(content, zipEntry.getName());
}
zipEntry = zipInputStream.getNextEntry();
}
}
} }
private void parseInpContent(byte[] content, String name) throws Exception { public static String getLastRunErrors() {
name = name.substring(0, name.lastIndexOf('.')); return lastRunErrors;
}
private class InpxWorker implements Runnable {
private Long libraryId;
private String libraryVersion;
private String name;
private byte[] content;
private InpxWorker(Map.Entry<String, byte[]> entry,
Long libraryId,
String libraryVersion) {
this.libraryId = libraryId;
this.libraryVersion = libraryVersion;
this.name = entry.getKey();
this.content = entry.getValue();
}
@Override
public void run() {
final List<Book> books = new ArrayList<>();
final Set<Author> authors = new HashSet<>();
final Set<Genre> genres = new HashSet<>();
try {
log.info("FILE RELATED " + name); log.info("FILE RELATED " + name);
int lastIndex = 0; int lastIndex = 0;
for (int i = 0; i < content.length; i++) { for (int i = 0; i < content.length; i++) {
if (content[i] == '\n') { if (content[i] == '\n') {
byte[] line = new byte[i - lastIndex]; byte[] line = new byte[i - lastIndex];
System.arraycopy(content, lastIndex, line, 0, i - lastIndex - 1); System.arraycopy(content, lastIndex, line, 0, i - lastIndex - 1);
Book book = new Book(line, books.add(new Book(line,
name, name,
authorRepository, authors,
genreRepository, genres,
libraryMetadata.getId(), libraryId,
libraryMetadata.getVersion()); libraryVersion));
bookRepository.save(book);
if (isNextCarriageReturn(i, content)) { if (isNextCarriageReturn(i, content)) {
i += 2; i += 2;
@ -186,13 +198,18 @@ public class InpxScanner implements Runnable {
lastIndex = ++i; lastIndex = ++i;
} }
} }
saveAll(books, authors, genres);
} catch (Exception e) {
log.error("{}", e);
lastRunErrors = lastRunErrors + " " + e.getMessage();
}
}
} }
private boolean isNextCarriageReturn(int i, byte[] content) { /* REMINDER: DO NOT PUT THIS SHIT INTO THREAD */
return i + 1 < content.length && (content[i + 1] == '\r'); private synchronized void saveAll(List<Book> books, Set<Author> authors, Set<Genre> genres) {
} authorRepository.saveAll(authors);
genreRepository.saveAll(genres);
public static String getLastRunErrors() { bookRepository.saveAll(books);
return lastRunErrors;
} }
} }

View file

@ -37,7 +37,7 @@ public class SettingsController {
@ModelAttribute(name = "lastScanErrors") @ModelAttribute(name = "lastScanErrors")
public String setLastRunErrors(){ public String setLastRunErrors(){
if (InpxScanner.getLastRunErrors() != null) if (InpxScanner.getLastRunErrors() != "")
return "Last run attempt failed: "+InpxScanner.getLastRunErrors(); return "Last run attempt failed: "+InpxScanner.getLastRunErrors();
return null; return null;
} }
@ -56,7 +56,7 @@ public class SettingsController {
if (inpxScanner.reScan()) if (inpxScanner.reScan())
redirectAttributes.addAttribute("rescanOk", "Rescan started"); redirectAttributes.addAttribute("rescanOk", "Rescan started");
else else
redirectAttributes.addAttribute("rescanError", "Rescan is currently in progress"); redirectAttributes.addAttribute("rescanError", "Rescan could be currently in progress");
return redirectView; return redirectView;
} }

View file

@ -5,16 +5,18 @@ spring:
driver-class-name: org.h2.Driver driver-class-name: org.h2.Driver
generate-unique-name: false generate-unique-name: false
name: marinesco name: marinesco
# url: jdbc:h2:mem:marinesco url: jdbc:h2:mem:marinesco
url: jdbc:h2:file:/tmp/h2 # url: jdbc:h2:file:/tmp/h2
username: sa username: sa
password: password:
jpa: jpa:
properties: properties:
hibernate: hibernate:
database-platform: org.hibernate.dialect.H2Dialect database-platform: org.hibernate.dialect.H2Dialect
# format_sql: true
hibernate: hibernate:
ddl-auto: update ddl-auto: update
# show-sql: true
h2: h2:
console: console:
enabled: true enabled: true