diff options
author | Max Magorsch <arzano@gentoo.org> | 2020-06-22 19:32:48 +0200 |
---|---|---|
committer | Max Magorsch <arzano@gentoo.org> | 2020-06-22 19:32:48 +0200 |
commit | 0f5a1f528fe4f5453f315564b448cfb1f9fea711 (patch) | |
tree | 32d8e114de3681499bac8eaba3212be5ba81ceaf | |
parent | Rework the data model to improve the performance (diff) | |
download | archives-0f5a1f528fe4f5453f315564b448cfb1f9fea711.tar.gz archives-0f5a1f528fe4f5453f315564b448cfb1f9fea711.tar.bz2 archives-0f5a1f528fe4f5453f315564b448cfb1f9fea711.zip |
Improve the performance of the importer
Signed-off-by: Max Magorsch <arzano@gentoo.org>
-rw-r--r-- | archives.go | 8 | ||||
-rw-r--r-- | docker-compose.develop.yml | 3 | ||||
-rw-r--r-- | docker-compose.yml | 2 | ||||
-rw-r--r-- | pkg/importer/importer.go | 80 | ||||
-rw-r--r-- | pkg/importer/utils.go | 91 |
5 files changed, 141 insertions, 43 deletions
diff --git a/archives.go b/archives.go index b6b2465..dd517ed 100644 --- a/archives.go +++ b/archives.go @@ -21,14 +21,20 @@ func main() { // main part - fullImport := flag.Bool("fullimport", false, "Start a full import, importing all mails") + fullImport := flag.Bool("full-import", false, "Start a full import, importing all mails") + incrementalImport := flag.Bool("incremental-import", false, "Start a incremental import, importing only new mails") serve := flag.Bool("serve", false, "Start serving the web application") + flag.Parse() if *fullImport { importer.FullImport() } + if *incrementalImport { + importer.IncrementalImport() + } + if *serve { app.Serve() } diff --git a/docker-compose.develop.yml b/docker-compose.develop.yml index 0c24a04..7acdd90 100644 --- a/docker-compose.develop.yml +++ b/docker-compose.develop.yml @@ -13,7 +13,8 @@ services: LANG: en_US.UTF-8 LANGUAGE: en_US:en LC_ALL: en_US.UTF-8 - shm_size: 512mb + shm_size: 1024mb + command: postgres -c 'shared_buffers=512MB' -c 'max_connections=1000' volumes: - pgdata:/var/lib/postgresql/data pgadmin: diff --git a/docker-compose.yml b/docker-compose.yml index f95201e..a606797 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -31,7 +31,7 @@ services: com.centurylinklabs.watchtower.enable: "true" environment: ARCHIVES_LOG_FILE: '/var/log/archives/updater.log' - command: '--fullimport' + command: '--full-import' depends_on: - db db: diff --git a/pkg/importer/importer.go b/pkg/importer/importer.go index cdb278d..76ba8e7 100644 --- a/pkg/importer/importer.go +++ b/pkg/importer/importer.go @@ -2,14 +2,88 @@ package importer import ( "archives/pkg/config" + "archives/pkg/database" + "archives/pkg/models" "fmt" + "os" "path/filepath" + "strconv" + "strings" + "time" ) +var overAllcounter int +var importedCounter int +var startTime time.Time + + func FullImport() { + + fmt.Println("Init import...") + filepath.Walk(config.MailDirPath(), initImport) + + overAllcounter = 0 + importedCounter = 0 + startTime = time.Now() + filepath.Walk(config.MailDirPath(), func(path string, info os.FileInfo, err error) error { + if overAllcounter % 1000 == 0 { + fmt.Println(strconv.Itoa(overAllcounter) + ": " + time.Now().Sub(startTime).String()) + } + overAllcounter++ + if err != nil { + return err + } + if !info.IsDir() && getDepth(path, config.MailDirPath()) >= 1 && isPublicList(path) { + importedCounter++ + importMail(path, info.Name()) + } + return nil + }) + + fmt.Println("Finished full import. Imported " + strconv.Itoa(importedCounter) + " messages.") +} + +func IncrementalImport() { + var messages []*models.Message + err := database.DBCon.Model(&messages). + Column("filename"). + Select() + + if err != nil { + fmt.Println("Problem during import, aborting:") + fmt.Println(err) + return + } + fmt.Println("Init import...") filepath.Walk(config.MailDirPath(), initImport) - fmt.Println("Start import...") - filepath.Walk(config.MailDirPath(), importMail) - fmt.Println("Finished import.") + + overAllcounter = 0 + importedCounter = 0 + startTime = time.Now() + filepath.Walk(config.MailDirPath(), func(path string, info os.FileInfo, err error) error { + if overAllcounter % 1000 == 0 { + fmt.Println(strconv.Itoa(overAllcounter) + ": " + time.Now().Sub(startTime).String()) + } + overAllcounter++ + if err != nil { + return err + } + if !info.IsDir() && getDepth(path, config.MailDirPath()) >= 1 && isPublicList(path) && !fileIsAlreadyPresent(path, messages) { + importedCounter++ + importMail(path, info.Name()) + } + return nil + }) + + fmt.Println("Finished incremental import. Imported " + strconv.Itoa(importedCounter) + " new messages.") } + +func fileIsAlreadyPresent(path string, messages []*models.Message) bool { + for _, message := range messages { + if strings.Contains(strings.TrimRight(path, ",S"), strings.TrimRight(message.Filename, ",S")){ + return true + } + } + return false +}
\ No newline at end of file diff --git a/pkg/importer/utils.go b/pkg/importer/utils.go index 8383ad0..de5b27c 100644 --- a/pkg/importer/utils.go +++ b/pkg/importer/utils.go @@ -4,9 +4,11 @@ import ( "archives/pkg/config" "archives/pkg/database" "archives/pkg/models" + "bytes" "fmt" "io" "io/ioutil" + "log" "mime/multipart" "net/mail" "os" @@ -31,8 +33,16 @@ func initImport(path string, info os.FileInfo, err error) error { } if !info.IsDir() && getDepth(path, config.MailDirPath()) >= 1 && isPublicList(path) { - file, _ := os.Open(path) - m, _ := mail.ReadMessage(file) + file, err := os.Open(path) + defer file.Close() + if err != nil { + return err + } + + m, err := mail.ReadMessage(file) + if err != nil { + return err + } mails = append(mails, &models.Message{ Id: m.Header.Get("X-Archives-Hash"), @@ -47,49 +57,56 @@ func initImport(path string, info os.FileInfo, err error) error { } // TODO -func importMail(path string, info os.FileInfo, err error) error { +func importMail(path, filename string) error { + content, err := ioutil.ReadFile(path) + if err != nil { + log.Fatal(err) + } + + r := bytes.NewReader(content) + m, err := mail.ReadMessage(r) if err != nil { return err } - if !info.IsDir() && getDepth(path, config.MailDirPath()) >= 1 && isPublicList(path) { - file, _ := os.Open(path) - m, _ := mail.ReadMessage(file) - - msg := models.Message{ - Id: m.Header.Get("X-Archives-Hash"), - MessageId: m.Header.Get("Message-Id"), - Filename: info.Name(), - From: m.Header.Get("From"), - To: strings.Split(m.Header.Get("To"), ","), - Cc: strings.Split(m.Header.Get("Cc"), ","), - Subject: m.Header.Get("Subject"), - - List: getListName(path), - - // TODO - Date: getDate(m.Header), - InReplyToId: getInReplyToMail(m.Header.Get("In-Reply-To"), m.Header.Get("From")), - //References: getReferencesToMail(strings.Split(m.Header.Get("References"), ","), m.Header.Get("From")), - Body: getBody(m.Header, m.Body), - Attachments: getAttachments(m.Header, m.Body), - - StartsThread: m.Header.Get("In-Reply-To") == "" && m.Header.Get("References") == "", - - Comment: "", - Hidden: false, - } - err := insertMessage(msg) + go importIntoDatabase(path, filename, m) - if err != nil { - fmt.Println("Error during importing Mail") - fmt.Println(err) - } + return nil +} - insertReferencesToMail(strings.Split(m.Header.Get("References"), ","), m.Header.Get("X-Archives-Hash"), m.Header.Get("From")) +func importIntoDatabase(path, filename string, m *mail.Message) { + msg := models.Message{ + Id: m.Header.Get("X-Archives-Hash"), + MessageId: m.Header.Get("Message-Id"), + Filename: filename, + From: m.Header.Get("From"), + To: strings.Split(m.Header.Get("To"), ","), + Cc: strings.Split(m.Header.Get("Cc"), ","), + Subject: m.Header.Get("Subject"), + + List: getListName(path), + + // TODO + Date: getDate(m.Header), + InReplyToId: getInReplyToMail(m.Header.Get("In-Reply-To"), m.Header.Get("From")), + //References: getReferencesToMail(strings.Split(m.Header.Get("References"), ","), m.Header.Get("From")), + Body: getBody(m.Header, m.Body), + Attachments: getAttachments(m.Header, m.Body), + + StartsThread: m.Header.Get("In-Reply-To") == "" && m.Header.Get("References") == "", + + Comment: "", + Hidden: false, + } + err := insertMessage(msg) + if err != nil { + fmt.Println("Error during importing Mail") + fmt.Println(err) } - return nil + + insertReferencesToMail(strings.Split(m.Header.Get("References"), ","), m.Header.Get("X-Archives-Hash"), m.Header.Get("From")) + } func getInReplyToMail(messageId, from string) string { |