Faster INPX parse
This commit is contained in:
parent
b879181bd9
commit
95e26e287a
10 changed files with 219 additions and 144 deletions
|
@ -0,0 +1,19 @@
|
|||
package ru.redrise.marinesco;
|
||||
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.core.task.TaskExecutor;
|
||||
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
|
||||
|
||||
@Configuration
|
||||
public class ThreadPoolTaskExecutorSettings {
|
||||
@Bean
|
||||
public TaskExecutor configTaskExecutor(){
|
||||
final ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
|
||||
executor.setCorePoolSize(8);
|
||||
executor.setMaxPoolSize(16);
|
||||
//executor.setQueueCapacity(50);
|
||||
|
||||
return executor;
|
||||
}
|
||||
}
|
|
@ -2,18 +2,14 @@ package ru.redrise.marinesco.data;
|
|||
|
||||
import java.util.Optional;
|
||||
|
||||
import org.springframework.data.repository.CrudRepository;
|
||||
import org.springframework.data.jpa.repository.JpaRepository;
|
||||
import org.springframework.stereotype.Repository;
|
||||
|
||||
import ru.redrise.marinesco.library.Author;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@Repository
|
||||
public interface AuthorRepository extends CrudRepository<Author, Long>{
|
||||
public interface AuthorRepository extends JpaRepository<Author, Long>{
|
||||
Optional<Author> findByAuthorName(String authorName);
|
||||
List<Author> findByAuthorNameContainingIgnoreCase(String authorName);
|
||||
}
|
||||
|
|
|
@ -2,7 +2,7 @@ package ru.redrise.marinesco.data;
|
|||
|
||||
import java.util.List;
|
||||
|
||||
import org.springframework.data.repository.CrudRepository;
|
||||
import org.springframework.data.jpa.repository.JpaRepository;
|
||||
import org.springframework.stereotype.Repository;
|
||||
|
||||
import ru.redrise.marinesco.library.Author;
|
||||
|
@ -12,7 +12,7 @@ import ru.redrise.marinesco.library.Book;
|
|||
|
||||
|
||||
@Repository
|
||||
public interface BookRepository extends CrudRepository<Book, Integer>{
|
||||
public interface BookRepository extends JpaRepository<Book, Integer>{
|
||||
List<Book> findBySeriesContainingIgnoreCase(String title);
|
||||
List<Book> findByTitleContainingIgnoreCase(String title);
|
||||
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
package ru.redrise.marinesco.data;
|
||||
|
||||
import org.springframework.data.repository.CrudRepository;
|
||||
import org.springframework.data.jpa.repository.JpaRepository;
|
||||
import org.springframework.stereotype.Repository;
|
||||
|
||||
import ru.redrise.marinesco.library.Genre;
|
||||
|
||||
|
||||
@Repository
|
||||
public interface GenreRepository extends CrudRepository<Genre, String>{
|
||||
public interface GenreRepository extends JpaRepository<Genre, String>{
|
||||
}
|
||||
|
|
|
@ -1,9 +1,6 @@
|
|||
package ru.redrise.marinesco.library;
|
||||
|
||||
import jakarta.persistence.Column;
|
||||
import jakarta.persistence.Entity;
|
||||
import jakarta.persistence.GeneratedValue;
|
||||
import jakarta.persistence.GenerationType;
|
||||
import jakarta.persistence.Id;
|
||||
import lombok.AccessLevel;
|
||||
import lombok.Data;
|
||||
|
@ -13,16 +10,12 @@ import lombok.NoArgsConstructor;
|
|||
@Entity
|
||||
@NoArgsConstructor(access = AccessLevel.PRIVATE, force = true)
|
||||
public class Author {
|
||||
// private static final long serialVersionUID = 1L;
|
||||
|
||||
@Id
|
||||
@GeneratedValue(strategy = GenerationType.AUTO)
|
||||
private Long id;
|
||||
|
||||
@Column(unique=true)
|
||||
private String authorName;
|
||||
|
||||
public Author(String name){
|
||||
this.authorName = name;
|
||||
this.id = (long) name.hashCode();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,6 +4,7 @@ import java.nio.charset.StandardCharsets;
|
|||
import java.time.LocalDate;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import jakarta.persistence.Entity;
|
||||
import jakarta.persistence.Id;
|
||||
|
@ -14,8 +15,6 @@ import lombok.Data;
|
|||
import lombok.NoArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import ru.redrise.marinesco.RainbowDump;
|
||||
import ru.redrise.marinesco.data.AuthorRepository;
|
||||
import ru.redrise.marinesco.data.GenreRepository;
|
||||
|
||||
@Slf4j
|
||||
@Entity
|
||||
|
@ -50,8 +49,8 @@ public class Book {
|
|||
|
||||
public Book(byte[] line,
|
||||
String container,
|
||||
AuthorRepository authorRepository,
|
||||
GenreRepository genreRepository,
|
||||
Set<Author> authorsCollection,
|
||||
Set<Genre> genresCollection,
|
||||
Long libraryId,
|
||||
String libraryVersion) throws Exception {
|
||||
// AUTHOR;GENRE;TITLE;SERIES;SERNO;FILE;SIZE;LIBID;DEL;EXT;DATE;
|
||||
|
@ -62,8 +61,8 @@ public class Book {
|
|||
this.container = container + ".zip";
|
||||
this.authors = new ArrayList<>();
|
||||
this.genres = new ArrayList<>();
|
||||
parseAuthors(authorRepository);
|
||||
parseGenere(genreRepository);
|
||||
parseAuthors(authorsCollection);
|
||||
parseGenere(genresCollection);
|
||||
this.title = parseNextString();
|
||||
this.series = parseNextString();
|
||||
this.serNo = parseNextString();
|
||||
|
@ -96,7 +95,7 @@ public class Book {
|
|||
*/
|
||||
}
|
||||
|
||||
private void parseAuthors(AuthorRepository authorRepository) throws Exception {
|
||||
private void parseAuthors(Set<Author> authorsCollection) throws Exception {
|
||||
for (; position < line.length; position++) {
|
||||
if (line[position] == 0x04) {
|
||||
String allAuthors = new String(line, 0, position, StandardCharsets.UTF_8);
|
||||
|
@ -104,8 +103,9 @@ public class Book {
|
|||
for (String authorName : allAuthors.split(":")) {
|
||||
authorName = authorName.replaceAll(",", " ").trim();
|
||||
if (!authorName.equals("")) {
|
||||
Author author = authorRepository.findByAuthorName(authorName).orElse(new Author(authorName));
|
||||
authors.add(authorRepository.save(author));
|
||||
Author author = new Author(authorName);
|
||||
authorsCollection.add(author);
|
||||
authors.add(author);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -117,14 +117,15 @@ public class Book {
|
|||
throw new Exception("Invalid 'inp' file format (parse Authors)");
|
||||
}
|
||||
|
||||
private void parseGenere(GenreRepository genreRepository) throws Exception {
|
||||
private void parseGenere(Set<Genre> genresCollection) throws Exception {
|
||||
for (int i = position; i < line.length; i++) {
|
||||
if (line[i] == 0x04) {
|
||||
String allGenres = new String(line, position, i - position, StandardCharsets.UTF_8);
|
||||
|
||||
for (String genreName : allGenres.split(":")) {
|
||||
Genre genre = new Genre(genreName);
|
||||
genres.add(genreRepository.save(genre));
|
||||
genresCollection.add(genre);
|
||||
genres.add(genre);
|
||||
}
|
||||
|
||||
position = i + 1;
|
||||
|
|
|
@ -0,0 +1,47 @@
|
|||
package ru.redrise.marinesco.library;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipInputStream;
|
||||
|
||||
import ru.redrise.marinesco.data.LibraryMetadataRepository;
|
||||
|
||||
public class InpxLibraryMetadataScanner {
|
||||
private InpxLibraryMetadataScanner() { }
|
||||
|
||||
public static LibraryMetadata saveFromFile(File inpxFile, LibraryMetadataRepository repository) throws Exception {
|
||||
LibraryMetadata libraryMetadata = new LibraryMetadata();
|
||||
|
||||
try (ZipInputStream zipInputStream = new ZipInputStream(new FileInputStream(inpxFile))) {
|
||||
ZipEntry zipEntry;
|
||||
|
||||
while ((zipEntry = zipInputStream.getNextEntry()) != null) {
|
||||
if (isCollection(zipEntry))
|
||||
libraryMetadata.setCollectionInfo(readPlainText(zipInputStream));
|
||||
else if (isVersion(zipEntry))
|
||||
libraryMetadata.setVersionInfo(readPlainText(zipInputStream));
|
||||
}
|
||||
}
|
||||
|
||||
return repository.save(libraryMetadata);
|
||||
}
|
||||
|
||||
private static boolean isCollection(ZipEntry zipEntry) {
|
||||
return zipEntry.getName().toLowerCase().contains("collection.info");
|
||||
}
|
||||
|
||||
private static boolean isVersion(ZipEntry zipEntry){
|
||||
return zipEntry.getName().toLowerCase().contains("version.info");
|
||||
}
|
||||
|
||||
private static String readPlainText(ZipInputStream zipInputStream) throws Exception {
|
||||
byte[] content = new byte[1024];
|
||||
StringBuilder stringBuilder = new StringBuilder();
|
||||
while (zipInputStream.read(content) > 0)
|
||||
stringBuilder.append(new String(content, StandardCharsets.UTF_8));
|
||||
|
||||
return stringBuilder.toString();
|
||||
}
|
||||
}
|
|
@ -3,12 +3,21 @@ package ru.redrise.marinesco.library;
|
|||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.time.temporal.ChronoUnit;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Stream;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipInputStream;
|
||||
|
||||
import org.springframework.core.io.FileSystemResource;
|
||||
import org.springframework.core.task.TaskExecutor;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
@ -20,12 +29,11 @@ import ru.redrise.marinesco.settings.ApplicationSettings;
|
|||
|
||||
@Slf4j
|
||||
@Component
|
||||
public class InpxScanner implements Runnable {
|
||||
public class InpxScanner {
|
||||
private static volatile String lastRunErrors = "";
|
||||
private static LocalDateTime lastRunTime = LocalDateTime.of(1970, 01, 01, 0, 0, 0);
|
||||
|
||||
private static volatile Thread parser;
|
||||
private static volatile String lastRunErrors;
|
||||
|
||||
private LibraryMetadata libraryMetadata;
|
||||
private TaskExecutor executor;
|
||||
private LibraryMetadataRepository libraryMetadataRepository;
|
||||
private AuthorRepository authorRepository;
|
||||
private GenreRepository genreRepository;
|
||||
|
@ -33,11 +41,13 @@ public class InpxScanner implements Runnable {
|
|||
|
||||
private String filesLocation;
|
||||
|
||||
public InpxScanner(ApplicationSettings applicationSettings,
|
||||
public InpxScanner(TaskExecutor executor,
|
||||
ApplicationSettings applicationSettings,
|
||||
AuthorRepository authorRepository,
|
||||
GenreRepository genreRepository,
|
||||
BookRepository bookRepository,
|
||||
LibraryMetadataRepository libraryMetadataRepository) {
|
||||
this.executor = executor;
|
||||
this.filesLocation = applicationSettings.getFilesLocation();
|
||||
this.authorRepository = authorRepository;
|
||||
this.genreRepository = genreRepository;
|
||||
|
@ -46,67 +56,70 @@ public class InpxScanner implements Runnable {
|
|||
}
|
||||
|
||||
/*
|
||||
* @return true if executed, false if already running
|
||||
* @return true if executed, false otherwise
|
||||
*/
|
||||
public boolean reScan() {
|
||||
if (parser == null || !parser.isAlive()) {
|
||||
parser = new Thread(this);
|
||||
parser.start();
|
||||
return true;
|
||||
}
|
||||
|
||||
LocalDateTime currentDateTime = LocalDateTime.now();
|
||||
|
||||
if (ChronoUnit.MINUTES.between(lastRunTime, currentDateTime) < 5) {
|
||||
lastRunErrors = "Too frequent requests. Please whait 5 min. Last attmpt: "
|
||||
+ lastRunTime.format(DateTimeFormatter.ofPattern("DD.MM.YYYY HH:mm:ss"));
|
||||
return false;
|
||||
}
|
||||
lastRunTime = currentDateTime;
|
||||
lastRunErrors = "";
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
Thread scanThread = new Thread(() -> {
|
||||
try {
|
||||
final FileSystemResource libraryLocation = new FileSystemResource(filesLocation);
|
||||
File inpxFile = getInpxFile();
|
||||
log.debug("INPX file found: " + inpxFile.getName());
|
||||
|
||||
final File inpxFile = Stream.of(libraryLocation.getFile().listFiles())
|
||||
LibraryMetadata libMetadata = InpxLibraryMetadataScanner.saveFromFile(inpxFile,
|
||||
libraryMetadataRepository);
|
||||
|
||||
Long libId = libMetadata.getId();
|
||||
String libVersion = libMetadata.getVersion();
|
||||
HashMap<String, byte[]> inpEntries = collectInp(inpxFile);
|
||||
|
||||
for (Map.Entry<String, byte[]> entry : inpEntries.entrySet())
|
||||
executor.execute(new InpxWorker(entry, libId, libVersion));
|
||||
} catch (Exception e) {
|
||||
log.error("{}", e);
|
||||
lastRunErrors = lastRunErrors + " " + e.getMessage();
|
||||
}
|
||||
});
|
||||
|
||||
scanThread.start();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private File getInpxFile() throws Exception {
|
||||
final FileSystemResource libraryLocation = new FileSystemResource(filesLocation);
|
||||
return Stream.of(libraryLocation.getFile().listFiles())
|
||||
.filter(file -> file.getName().endsWith(".inpx"))
|
||||
.findFirst()
|
||||
.get();
|
||||
|
||||
log.debug("INPX file found as " + inpxFile.getName());
|
||||
|
||||
getLibraryMetadata(inpxFile);
|
||||
parseInp(inpxFile);
|
||||
// Once multiple libraries imlemented, add here 'delete recrodds with old
|
||||
// version of the library'
|
||||
// TODO: fix lirary ID changes on every update: add selector on the front
|
||||
} catch (Exception e) {
|
||||
log.error("{}", e);
|
||||
InpxScanner.lastRunErrors = e.getMessage();
|
||||
}
|
||||
}
|
||||
|
||||
private void getLibraryMetadata(File inpxFile) throws Exception {
|
||||
libraryMetadata = new LibraryMetadata();
|
||||
|
||||
private HashMap<String, byte[]> collectInp(File inpxFile) throws Exception {
|
||||
final HashMap<String, byte[]> inpEntries = new HashMap<>();
|
||||
try (ZipInputStream zipInputStream = new ZipInputStream(new FileInputStream(inpxFile))) {
|
||||
ZipEntry zipEntry = zipInputStream.getNextEntry();
|
||||
|
||||
while (zipEntry != null) {
|
||||
if (zipEntry.getName().toLowerCase().contains("collection.info"))
|
||||
libraryMetadata.setCollectionInfo(readPlainText(zipInputStream));
|
||||
|
||||
else if (zipEntry.getName().toLowerCase().contains("version.info"))
|
||||
libraryMetadata.setVersionInfo(readPlainText(zipInputStream));
|
||||
|
||||
zipEntry = zipInputStream.getNextEntry();
|
||||
ZipEntry zipEntry;
|
||||
while ((zipEntry = zipInputStream.getNextEntry()) != null) {
|
||||
if (isInp(zipEntry)) {
|
||||
String zipEntryName = zipEntry.getName();
|
||||
zipEntryName = zipEntryName.substring(0, zipEntryName.lastIndexOf('.'));
|
||||
inpEntries.put(zipEntryName, inpToByteArray(zipInputStream, zipEntry.getSize()));
|
||||
}
|
||||
}
|
||||
|
||||
libraryMetadata = libraryMetadataRepository.save(libraryMetadata);
|
||||
}
|
||||
return inpEntries;
|
||||
}
|
||||
|
||||
private String readPlainText(ZipInputStream zipInputStream) throws Exception {
|
||||
byte[] content = new byte[1024];
|
||||
StringBuilder stringBuilder = new StringBuilder();
|
||||
while (zipInputStream.read(content) > 0)
|
||||
stringBuilder.append(new String(content, StandardCharsets.UTF_8));
|
||||
|
||||
return stringBuilder.toString();
|
||||
private boolean isInp(ZipEntry zipEntry) {
|
||||
return zipEntry.getName().toLowerCase().endsWith(".inp");
|
||||
}
|
||||
|
||||
private byte[] inpToByteArray(ZipInputStream stream, long fileSize) throws Exception {
|
||||
|
@ -133,51 +146,50 @@ public class InpxScanner implements Runnable {
|
|||
return inpByteBuffer.array();
|
||||
}
|
||||
|
||||
private void parseInp(File inpxFile) throws Exception {
|
||||
/*
|
||||
log.warn("REMOVE TEMPORARY SOLUTION - BREAKER");
|
||||
log.warn("REMOVE TEMPORARY SOLUTION - BREAKER");
|
||||
log.warn("REMOVE TEMPORARY SOLUTION - BREAKER");
|
||||
boolean breaker = false;
|
||||
*/
|
||||
try (ZipInputStream zipInputStream = new ZipInputStream(new FileInputStream(inpxFile))) {
|
||||
ZipEntry zipEntry = zipInputStream.getNextEntry();
|
||||
|
||||
while (zipEntry != null) {
|
||||
if (zipEntry.getName().toLowerCase().endsWith(".inp")) {
|
||||
/*
|
||||
if (breaker) {
|
||||
zipEntry = zipInputStream.getNextEntry();
|
||||
continue;
|
||||
}
|
||||
breaker = true;
|
||||
// */
|
||||
byte[] content = inpToByteArray(zipInputStream, zipEntry.getSize());
|
||||
parseInpContent(content, zipEntry.getName());
|
||||
}
|
||||
zipEntry = zipInputStream.getNextEntry();
|
||||
}
|
||||
}
|
||||
private boolean isNextCarriageReturn(int i, byte[] content) {
|
||||
return i + 1 < content.length && (content[i + 1] == '\r');
|
||||
}
|
||||
|
||||
private void parseInpContent(byte[] content, String name) throws Exception {
|
||||
name = name.substring(0, name.lastIndexOf('.'));
|
||||
public static String getLastRunErrors() {
|
||||
return lastRunErrors;
|
||||
}
|
||||
|
||||
private class InpxWorker implements Runnable {
|
||||
|
||||
private Long libraryId;
|
||||
private String libraryVersion;
|
||||
private String name;
|
||||
private byte[] content;
|
||||
|
||||
private InpxWorker(Map.Entry<String, byte[]> entry,
|
||||
Long libraryId,
|
||||
String libraryVersion) {
|
||||
this.libraryId = libraryId;
|
||||
this.libraryVersion = libraryVersion;
|
||||
this.name = entry.getKey();
|
||||
this.content = entry.getValue();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
final List<Book> books = new ArrayList<>();
|
||||
final Set<Author> authors = new HashSet<>();
|
||||
final Set<Genre> genres = new HashSet<>();
|
||||
try {
|
||||
log.info("FILE RELATED " + name);
|
||||
|
||||
int lastIndex = 0;
|
||||
for (int i = 0; i < content.length; i++) {
|
||||
if (content[i] == '\n') {
|
||||
byte[] line = new byte[i - lastIndex];
|
||||
System.arraycopy(content, lastIndex, line, 0, i - lastIndex - 1);
|
||||
|
||||
Book book = new Book(line,
|
||||
books.add(new Book(line,
|
||||
name,
|
||||
authorRepository,
|
||||
genreRepository,
|
||||
libraryMetadata.getId(),
|
||||
libraryMetadata.getVersion());
|
||||
|
||||
bookRepository.save(book);
|
||||
authors,
|
||||
genres,
|
||||
libraryId,
|
||||
libraryVersion));
|
||||
|
||||
if (isNextCarriageReturn(i, content)) {
|
||||
i += 2;
|
||||
|
@ -186,13 +198,18 @@ public class InpxScanner implements Runnable {
|
|||
lastIndex = ++i;
|
||||
}
|
||||
}
|
||||
saveAll(books, authors, genres);
|
||||
} catch (Exception e) {
|
||||
log.error("{}", e);
|
||||
lastRunErrors = lastRunErrors + " " + e.getMessage();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isNextCarriageReturn(int i, byte[] content) {
|
||||
return i + 1 < content.length && (content[i + 1] == '\r');
|
||||
}
|
||||
|
||||
public static String getLastRunErrors() {
|
||||
return lastRunErrors;
|
||||
/* REMINDER: DO NOT PUT THIS SHIT INTO THREAD */
|
||||
private synchronized void saveAll(List<Book> books, Set<Author> authors, Set<Genre> genres) {
|
||||
authorRepository.saveAll(authors);
|
||||
genreRepository.saveAll(genres);
|
||||
bookRepository.saveAll(books);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -37,7 +37,7 @@ public class SettingsController {
|
|||
|
||||
@ModelAttribute(name = "lastScanErrors")
|
||||
public String setLastRunErrors(){
|
||||
if (InpxScanner.getLastRunErrors() != null)
|
||||
if (InpxScanner.getLastRunErrors() != "")
|
||||
return "Last run attempt failed: "+InpxScanner.getLastRunErrors();
|
||||
return null;
|
||||
}
|
||||
|
@ -56,7 +56,7 @@ public class SettingsController {
|
|||
if (inpxScanner.reScan())
|
||||
redirectAttributes.addAttribute("rescanOk", "Rescan started");
|
||||
else
|
||||
redirectAttributes.addAttribute("rescanError", "Rescan is currently in progress");
|
||||
redirectAttributes.addAttribute("rescanError", "Rescan could be currently in progress");
|
||||
|
||||
return redirectView;
|
||||
}
|
||||
|
|
|
@ -5,16 +5,18 @@ spring:
|
|||
driver-class-name: org.h2.Driver
|
||||
generate-unique-name: false
|
||||
name: marinesco
|
||||
# url: jdbc:h2:mem:marinesco
|
||||
url: jdbc:h2:file:/tmp/h2
|
||||
url: jdbc:h2:mem:marinesco
|
||||
# url: jdbc:h2:file:/tmp/h2
|
||||
username: sa
|
||||
password:
|
||||
jpa:
|
||||
properties:
|
||||
hibernate:
|
||||
database-platform: org.hibernate.dialect.H2Dialect
|
||||
# format_sql: true
|
||||
hibernate:
|
||||
ddl-auto: update
|
||||
# show-sql: true
|
||||
h2:
|
||||
console:
|
||||
enabled: true
|
||||
|
|
Loading…
Reference in a new issue