Faster INPX parse

master
Dmitry Isaenko 2024-01-17 18:39:51 +03:00
parent b879181bd9
commit 95e26e287a
10 changed files with 219 additions and 144 deletions

View File

@ -0,0 +1,19 @@
package ru.redrise.marinesco;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.core.task.TaskExecutor;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
@Configuration
public class ThreadPoolTaskExecutorSettings {
@Bean
public TaskExecutor configTaskExecutor(){
final ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
executor.setCorePoolSize(8);
executor.setMaxPoolSize(16);
//executor.setQueueCapacity(50);
return executor;
}
}

View File

@ -2,18 +2,14 @@ package ru.redrise.marinesco.data;
import java.util.Optional;
import org.springframework.data.repository.CrudRepository;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.stereotype.Repository;
import ru.redrise.marinesco.library.Author;
import java.util.List;
@Repository
public interface AuthorRepository extends CrudRepository<Author, Long>{
public interface AuthorRepository extends JpaRepository<Author, Long>{
Optional<Author> findByAuthorName(String authorName);
List<Author> findByAuthorNameContainingIgnoreCase(String authorName);
}

View File

@ -2,7 +2,7 @@ package ru.redrise.marinesco.data;
import java.util.List;
import org.springframework.data.repository.CrudRepository;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.stereotype.Repository;
import ru.redrise.marinesco.library.Author;
@ -12,7 +12,7 @@ import ru.redrise.marinesco.library.Book;
@Repository
public interface BookRepository extends CrudRepository<Book, Integer>{
public interface BookRepository extends JpaRepository<Book, Integer>{
List<Book> findBySeriesContainingIgnoreCase(String title);
List<Book> findByTitleContainingIgnoreCase(String title);

View File

@ -1,11 +1,11 @@
package ru.redrise.marinesco.data;
import org.springframework.data.repository.CrudRepository;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.stereotype.Repository;
import ru.redrise.marinesco.library.Genre;
@Repository
public interface GenreRepository extends CrudRepository<Genre, String>{
public interface GenreRepository extends JpaRepository<Genre, String>{
}

View File

@ -1,9 +1,6 @@
package ru.redrise.marinesco.library;
import jakarta.persistence.Column;
import jakarta.persistence.Entity;
import jakarta.persistence.GeneratedValue;
import jakarta.persistence.GenerationType;
import jakarta.persistence.Id;
import lombok.AccessLevel;
import lombok.Data;
@ -13,16 +10,12 @@ import lombok.NoArgsConstructor;
@Entity
@NoArgsConstructor(access = AccessLevel.PRIVATE, force = true)
public class Author {
// private static final long serialVersionUID = 1L;
@Id
@GeneratedValue(strategy = GenerationType.AUTO)
private Long id;
@Column(unique=true)
private String authorName;
public Author(String name){
this.authorName = name;
this.id = (long) name.hashCode();
}
}

View File

@ -4,6 +4,7 @@ import java.nio.charset.StandardCharsets;
import java.time.LocalDate;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import jakarta.persistence.Entity;
import jakarta.persistence.Id;
@ -14,8 +15,6 @@ import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import ru.redrise.marinesco.RainbowDump;
import ru.redrise.marinesco.data.AuthorRepository;
import ru.redrise.marinesco.data.GenreRepository;
@Slf4j
@Entity
@ -50,8 +49,8 @@ public class Book {
public Book(byte[] line,
String container,
AuthorRepository authorRepository,
GenreRepository genreRepository,
Set<Author> authorsCollection,
Set<Genre> genresCollection,
Long libraryId,
String libraryVersion) throws Exception {
// AUTHOR;GENRE;TITLE;SERIES;SERNO;FILE;SIZE;LIBID;DEL;EXT;DATE;
@ -62,8 +61,8 @@ public class Book {
this.container = container + ".zip";
this.authors = new ArrayList<>();
this.genres = new ArrayList<>();
parseAuthors(authorRepository);
parseGenere(genreRepository);
parseAuthors(authorsCollection);
parseGenere(genresCollection);
this.title = parseNextString();
this.series = parseNextString();
this.serNo = parseNextString();
@ -96,7 +95,7 @@ public class Book {
*/
}
private void parseAuthors(AuthorRepository authorRepository) throws Exception {
private void parseAuthors(Set<Author> authorsCollection) throws Exception {
for (; position < line.length; position++) {
if (line[position] == 0x04) {
String allAuthors = new String(line, 0, position, StandardCharsets.UTF_8);
@ -104,8 +103,9 @@ public class Book {
for (String authorName : allAuthors.split(":")) {
authorName = authorName.replaceAll(",", " ").trim();
if (!authorName.equals("")) {
Author author = authorRepository.findByAuthorName(authorName).orElse(new Author(authorName));
authors.add(authorRepository.save(author));
Author author = new Author(authorName);
authorsCollection.add(author);
authors.add(author);
}
}
@ -117,14 +117,15 @@ public class Book {
throw new Exception("Invalid 'inp' file format (parse Authors)");
}
private void parseGenere(GenreRepository genreRepository) throws Exception {
private void parseGenere(Set<Genre> genresCollection) throws Exception {
for (int i = position; i < line.length; i++) {
if (line[i] == 0x04) {
String allGenres = new String(line, position, i - position, StandardCharsets.UTF_8);
for (String genreName : allGenres.split(":")) {
Genre genre = new Genre(genreName);
genres.add(genreRepository.save(genre));
genresCollection.add(genre);
genres.add(genre);
}
position = i + 1;

View File

@ -0,0 +1,47 @@
package ru.redrise.marinesco.library;
import java.io.File;
import java.io.FileInputStream;
import java.nio.charset.StandardCharsets;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import ru.redrise.marinesco.data.LibraryMetadataRepository;
public class InpxLibraryMetadataScanner {
private InpxLibraryMetadataScanner() { }
public static LibraryMetadata saveFromFile(File inpxFile, LibraryMetadataRepository repository) throws Exception {
LibraryMetadata libraryMetadata = new LibraryMetadata();
try (ZipInputStream zipInputStream = new ZipInputStream(new FileInputStream(inpxFile))) {
ZipEntry zipEntry;
while ((zipEntry = zipInputStream.getNextEntry()) != null) {
if (isCollection(zipEntry))
libraryMetadata.setCollectionInfo(readPlainText(zipInputStream));
else if (isVersion(zipEntry))
libraryMetadata.setVersionInfo(readPlainText(zipInputStream));
}
}
return repository.save(libraryMetadata);
}
private static boolean isCollection(ZipEntry zipEntry) {
return zipEntry.getName().toLowerCase().contains("collection.info");
}
private static boolean isVersion(ZipEntry zipEntry){
return zipEntry.getName().toLowerCase().contains("version.info");
}
private static String readPlainText(ZipInputStream zipInputStream) throws Exception {
byte[] content = new byte[1024];
StringBuilder stringBuilder = new StringBuilder();
while (zipInputStream.read(content) > 0)
stringBuilder.append(new String(content, StandardCharsets.UTF_8));
return stringBuilder.toString();
}
}

View File

@ -3,12 +3,21 @@ package ru.redrise.marinesco.library;
import java.io.File;
import java.io.FileInputStream;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.time.temporal.ChronoUnit;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Stream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import org.springframework.core.io.FileSystemResource;
import org.springframework.core.task.TaskExecutor;
import org.springframework.stereotype.Component;
import lombok.extern.slf4j.Slf4j;
@ -20,12 +29,11 @@ import ru.redrise.marinesco.settings.ApplicationSettings;
@Slf4j
@Component
public class InpxScanner implements Runnable {
public class InpxScanner {
private static volatile String lastRunErrors = "";
private static LocalDateTime lastRunTime = LocalDateTime.of(1970, 01, 01, 0, 0, 0);
private static volatile Thread parser;
private static volatile String lastRunErrors;
private LibraryMetadata libraryMetadata;
private TaskExecutor executor;
private LibraryMetadataRepository libraryMetadataRepository;
private AuthorRepository authorRepository;
private GenreRepository genreRepository;
@ -33,11 +41,13 @@ public class InpxScanner implements Runnable {
private String filesLocation;
public InpxScanner(ApplicationSettings applicationSettings,
public InpxScanner(TaskExecutor executor,
ApplicationSettings applicationSettings,
AuthorRepository authorRepository,
GenreRepository genreRepository,
BookRepository bookRepository,
LibraryMetadataRepository libraryMetadataRepository) {
this.executor = executor;
this.filesLocation = applicationSettings.getFilesLocation();
this.authorRepository = authorRepository;
this.genreRepository = genreRepository;
@ -46,67 +56,70 @@ public class InpxScanner implements Runnable {
}
/*
* @return true if executed, false if already running
* @return true if executed, false otherwise
*/
public boolean reScan() {
if (parser == null || !parser.isAlive()) {
parser = new Thread(this);
parser.start();
return true;
LocalDateTime currentDateTime = LocalDateTime.now();
if (ChronoUnit.MINUTES.between(lastRunTime, currentDateTime) < 5) {
lastRunErrors = "Too frequent requests. Please whait 5 min. Last attmpt: "
+ lastRunTime.format(DateTimeFormatter.ofPattern("DD.MM.YYYY HH:mm:ss"));
return false;
}
return false;
lastRunTime = currentDateTime;
lastRunErrors = "";
Thread scanThread = new Thread(() -> {
try {
File inpxFile = getInpxFile();
log.debug("INPX file found: " + inpxFile.getName());
LibraryMetadata libMetadata = InpxLibraryMetadataScanner.saveFromFile(inpxFile,
libraryMetadataRepository);
Long libId = libMetadata.getId();
String libVersion = libMetadata.getVersion();
HashMap<String, byte[]> inpEntries = collectInp(inpxFile);
for (Map.Entry<String, byte[]> entry : inpEntries.entrySet())
executor.execute(new InpxWorker(entry, libId, libVersion));
} catch (Exception e) {
log.error("{}", e);
lastRunErrors = lastRunErrors + " " + e.getMessage();
}
});
scanThread.start();
return true;
}
@Override
public void run() {
try {
final FileSystemResource libraryLocation = new FileSystemResource(filesLocation);
final File inpxFile = Stream.of(libraryLocation.getFile().listFiles())
.filter(file -> file.getName().endsWith(".inpx"))
.findFirst()
.get();
log.debug("INPX file found as " + inpxFile.getName());
getLibraryMetadata(inpxFile);
parseInp(inpxFile);
// Once multiple libraries imlemented, add here 'delete recrodds with old
// version of the library'
// TODO: fix lirary ID changes on every update: add selector on the front
} catch (Exception e) {
log.error("{}", e);
InpxScanner.lastRunErrors = e.getMessage();
}
private File getInpxFile() throws Exception {
final FileSystemResource libraryLocation = new FileSystemResource(filesLocation);
return Stream.of(libraryLocation.getFile().listFiles())
.filter(file -> file.getName().endsWith(".inpx"))
.findFirst()
.get();
}
private void getLibraryMetadata(File inpxFile) throws Exception {
libraryMetadata = new LibraryMetadata();
private HashMap<String, byte[]> collectInp(File inpxFile) throws Exception {
final HashMap<String, byte[]> inpEntries = new HashMap<>();
try (ZipInputStream zipInputStream = new ZipInputStream(new FileInputStream(inpxFile))) {
ZipEntry zipEntry = zipInputStream.getNextEntry();
while (zipEntry != null) {
if (zipEntry.getName().toLowerCase().contains("collection.info"))
libraryMetadata.setCollectionInfo(readPlainText(zipInputStream));
else if (zipEntry.getName().toLowerCase().contains("version.info"))
libraryMetadata.setVersionInfo(readPlainText(zipInputStream));
zipEntry = zipInputStream.getNextEntry();
ZipEntry zipEntry;
while ((zipEntry = zipInputStream.getNextEntry()) != null) {
if (isInp(zipEntry)) {
String zipEntryName = zipEntry.getName();
zipEntryName = zipEntryName.substring(0, zipEntryName.lastIndexOf('.'));
inpEntries.put(zipEntryName, inpToByteArray(zipInputStream, zipEntry.getSize()));
}
}
}
libraryMetadata = libraryMetadataRepository.save(libraryMetadata);
return inpEntries;
}
private String readPlainText(ZipInputStream zipInputStream) throws Exception {
byte[] content = new byte[1024];
StringBuilder stringBuilder = new StringBuilder();
while (zipInputStream.read(content) > 0)
stringBuilder.append(new String(content, StandardCharsets.UTF_8));
return stringBuilder.toString();
private boolean isInp(ZipEntry zipEntry) {
return zipEntry.getName().toLowerCase().endsWith(".inp");
}
private byte[] inpToByteArray(ZipInputStream stream, long fileSize) throws Exception {
@ -133,61 +146,6 @@ public class InpxScanner implements Runnable {
return inpByteBuffer.array();
}
private void parseInp(File inpxFile) throws Exception {
/*
log.warn("REMOVE TEMPORARY SOLUTION - BREAKER");
log.warn("REMOVE TEMPORARY SOLUTION - BREAKER");
log.warn("REMOVE TEMPORARY SOLUTION - BREAKER");
boolean breaker = false;
*/
try (ZipInputStream zipInputStream = new ZipInputStream(new FileInputStream(inpxFile))) {
ZipEntry zipEntry = zipInputStream.getNextEntry();
while (zipEntry != null) {
if (zipEntry.getName().toLowerCase().endsWith(".inp")) {
/*
if (breaker) {
zipEntry = zipInputStream.getNextEntry();
continue;
}
breaker = true;
// */
byte[] content = inpToByteArray(zipInputStream, zipEntry.getSize());
parseInpContent(content, zipEntry.getName());
}
zipEntry = zipInputStream.getNextEntry();
}
}
}
private void parseInpContent(byte[] content, String name) throws Exception {
name = name.substring(0, name.lastIndexOf('.'));
log.info("FILE RELATED " + name);
int lastIndex = 0;
for (int i = 0; i < content.length; i++) {
if (content[i] == '\n') {
byte[] line = new byte[i - lastIndex];
System.arraycopy(content, lastIndex, line, 0, i - lastIndex - 1);
Book book = new Book(line,
name,
authorRepository,
genreRepository,
libraryMetadata.getId(),
libraryMetadata.getVersion());
bookRepository.save(book);
if (isNextCarriageReturn(i, content)) {
i += 2;
lastIndex = i;
} else
lastIndex = ++i;
}
}
}
private boolean isNextCarriageReturn(int i, byte[] content) {
return i + 1 < content.length && (content[i + 1] == '\r');
}
@ -195,4 +153,63 @@ public class InpxScanner implements Runnable {
public static String getLastRunErrors() {
return lastRunErrors;
}
private class InpxWorker implements Runnable {
private Long libraryId;
private String libraryVersion;
private String name;
private byte[] content;
private InpxWorker(Map.Entry<String, byte[]> entry,
Long libraryId,
String libraryVersion) {
this.libraryId = libraryId;
this.libraryVersion = libraryVersion;
this.name = entry.getKey();
this.content = entry.getValue();
}
@Override
public void run() {
final List<Book> books = new ArrayList<>();
final Set<Author> authors = new HashSet<>();
final Set<Genre> genres = new HashSet<>();
try {
log.info("FILE RELATED " + name);
int lastIndex = 0;
for (int i = 0; i < content.length; i++) {
if (content[i] == '\n') {
byte[] line = new byte[i - lastIndex];
System.arraycopy(content, lastIndex, line, 0, i - lastIndex - 1);
books.add(new Book(line,
name,
authors,
genres,
libraryId,
libraryVersion));
if (isNextCarriageReturn(i, content)) {
i += 2;
lastIndex = i;
} else
lastIndex = ++i;
}
}
saveAll(books, authors, genres);
} catch (Exception e) {
log.error("{}", e);
lastRunErrors = lastRunErrors + " " + e.getMessage();
}
}
}
/* REMINDER: DO NOT PUT THIS SHIT INTO THREAD */
private synchronized void saveAll(List<Book> books, Set<Author> authors, Set<Genre> genres) {
authorRepository.saveAll(authors);
genreRepository.saveAll(genres);
bookRepository.saveAll(books);
}
}

View File

@ -37,7 +37,7 @@ public class SettingsController {
@ModelAttribute(name = "lastScanErrors")
public String setLastRunErrors(){
if (InpxScanner.getLastRunErrors() != null)
if (InpxScanner.getLastRunErrors() != "")
return "Last run attempt failed: "+InpxScanner.getLastRunErrors();
return null;
}
@ -56,7 +56,7 @@ public class SettingsController {
if (inpxScanner.reScan())
redirectAttributes.addAttribute("rescanOk", "Rescan started");
else
redirectAttributes.addAttribute("rescanError", "Rescan is currently in progress");
redirectAttributes.addAttribute("rescanError", "Rescan could be currently in progress");
return redirectView;
}

View File

@ -5,16 +5,18 @@ spring:
driver-class-name: org.h2.Driver
generate-unique-name: false
name: marinesco
# url: jdbc:h2:mem:marinesco
url: jdbc:h2:file:/tmp/h2
url: jdbc:h2:mem:marinesco
# url: jdbc:h2:file:/tmp/h2
username: sa
password:
jpa:
properties:
hibernate:
database-platform: org.hibernate.dialect.H2Dialect
# format_sql: true
hibernate:
ddl-auto: update
# show-sql: true
h2:
console:
enabled: true