In [None]:
infilename = "my_scopus_out.csv"
outfilename = "my_wos_format.txt"

from copy import deepcopy
import pandas as pd
from pybliometrics.scopus import AbstractRetrieval
import os, calendar

scopus = pd.read_csv(infilename)

### document type
# "J" = Journal
# "B" = Book
# "S" = Series
# "P" = Patent
dic_docu_type = {'Article': "J", 
                 'Review': "J", 
                 'Conference Paper': "J",
                 'Conference Review': "J",
                 'Book Chapter': "B",
                 'Erratum': "J", 
                 'Letter': "J",
                 '[No source information available]': "J", 
                 'Note': "J", 
                 'Short Survey': "J",
                 'Book': "B", 
                 'Retracted': "B", 
                 'Editorial': "S"}

### language
dic_language = {'eng': "English",
                'kor': "Korean"
               }


### File Conversion
mode = "a" if os.path.exists(outfilename) else "w"

if mode == "w":
    with open(outfilename, mode) as datafile:
        datafile.write("FN Clarivate Analytics Web of Science\nVR 1.0\n")
        
for artno in range(398, scopus.shape[0]):
    ab = AbstractRetrieval(scopus["EID"].iloc[artno], view="FULL")
    
    if scopus["Authors"].iloc[artno] != '[No author name available]':   
        mode = "a" if os.path.exists(filename) else "w"
        with open(outfilename, mode) as datafile:
            # 1. PT: publication type
            docu_type_ = scopus["Document Type"].iloc[artno]
            try:
                docu_type_0 = dic_docu_type[docu_type_]
            except:
                docu_type_0 = "unknown"
            datafile.write(f"PT {docu_type_0}\n")

            # 2. AU: author names
            indexed_name_ = pd.DataFrame(ab.authors)['indexed_name'].tolist()    
            datafile.write(f"AU {indexed_name_[0]}\n")
            for i in range(1, len(indexed_name_)):
                datafile.write(f"   {indexed_name_[i]}\n")

            # 3. AF: author names, full
            full_name_ = pd.DataFrame(ab.authors)[["surname", "given_name"]].apply(lambda s: f"{s[0]}, {s[1]}", axis=1).tolist()
            datafile.write(f"AF {full_name_[0]}\n")
            for i in range(1, len(full_name_)):
                datafile.write(f"   {full_name_[i]}\n")

            # 4. TI: document title
            docu_title_ = scopus["Title"].iloc[artno]
            datafile.write(f"TI {docu_title_}\n")

            # 5. SO: publication name
            src_title_ = scopus["Source title"].iloc[artno]
            datafile.write(f"SO {src_title_}\n")

            # 6. LA : Language
            try:
                language_ = dic_language[ab.language]
            except:
                language_ = "unknown"
            datafile.write(f"LA {language_}\n")

            # 7. DT : Document Type
            docu_type_ = scopus["Document Type"].iloc[artno]
            datafile.write(f"DT {docu_type_}\n")

            # 8. DE : Author Keywords
            auth_kw_ = ab.authkeywords
            if auth_kw_ == None:
                datafile.write("DE None\n")
            else:
                datafile.write(f"DE {'; '.join(auth_kw_)}\n")

            # 9. ID : Keyword Plus
            datafile.write("ID None\n")

            # 10. AB : Abstract
            datafile.write(f"AB {ab.abstract}\n")

            # 11. C1 : Author Address
            tmp = pd.DataFrame(ab.authorgroup)
            grouped = tmp.groupby('organization')
            aff_ids = tmp["organization"].unique()

            if len(aff_ids) > 0:
                for i, aff_id in enumerate(aff_ids):
                    if aff_id != None:
                        group = grouped.get_group(aff_id)
                        names = group[["surname", "given_name"]].apply(lambda s: f"{s[0]}, {s[1]}", axis=1).tolist()
                        aff = group[["organization", "city", "postalcode", "addresspart", "country"]].apply(lambda s: f"{s[0]}, {s[1]}, {s[2]}, {s[3]}, {s[4]}", axis=1).iloc[0]
                        if i == 0:
                            datafile.write('C1 [' + '; '.join(names) + f'] {aff}\n')
                        else:
                            datafile.write('   [' + '; '.join(names) + f'] {aff}\n')

            # 12. RP : Reprint Address
            datafile.write(f"RP None\n")

            # 13. EM : E-mail Address
            datafile.write(f"EM None\n")

            # 14. CR : Cited References
            if ab.references != None:
                tmp = pd.DataFrame(ab.references)
                refcount = int(ab.refcount)

                for i in range(refcount):
                    tmp_ = tmp.iloc[i]
                    tmp_authors = tmp_['authors']
                    if tmp_authors == None:
                        tmp_authors = "[Anonymous]"
                    tmp_year = tmp_['publicationyear']
                    tmp_src = tmp_['sourcetitle']
                    tmp_vol = tmp_['volume']
                    tmp_page = tmp_['first']
                    tmp_doi = tmp_['doi']

                    ref = tmp_authors
                    for item in [tmp_year, tmp_src, tmp_vol, tmp_page]:
                        if item != None:
                            ref = ', '.join([ref, item])
                    if tmp_doi != None:
                        ref = ref + f", DOI {tmp_doi}"

                    if i == 0:
                        datafile.write(f"CR {ref}\n")
                    else:
                        datafile.write(f"   {ref}\n")

            # 15. NR : Cited Reference Count
            datafile.write(f"NR {refcount}\n")

            # 16. TC : Web of Science Core Collection Times Cited Count
            citecount = ab.citedby_count
            datafile.write(f"TC {citecount}\n")

            # 17. Z9 : Total Times Cited Count
            datafile.write(f"Z9 {citecount}\n")

            # 18. U1 : Usage Count (Last 180 Days)
            # 19. U2 : Usage Count (Since 2013)
            # 20. PU : Publisher = ELSEVIER SCI LTD
            # 21. PI : Publisher City = OXFORD
            # 22. PA : Publisher Address = THE BOULEVARD, LANGFORD LANE, KIDLINGTON, OXFORD OX5 1GB, OXON, ENGLAND
            # 23. SN : International Standard Serial Number (ISSN) = 0959-6526
            if ab.issn != None:
                datafile.write(f"SN {ab.issn}\n")

            # 24. EI : Electronic International Standard Serial Number (eISSN) = 1879-1786
            # 25. J9 : 29-Character Source Abbreviation = J CLEAN PROD
            if ab.sourcetitle_abbreviation != None:
                datafile.write(f"J9 {ab.sourcetitle_abbreviation.upper()}\n")

            # 26. JI : ISO Source Abbreviation = J. Clean Prod.
                datafile.write(f"JI {ab.sourcetitle_abbreviation}\n")

            # 27. PD : Publication Date = JUL 1
            if ab.coverDate != None:
                month = ab.coverDate.split('-')[1]
                date = ab.coverDate.split('-')[2]
                datafile.write(f"PD {calendar.month_name[int(month)][:3].upper()} {int(date)}\n")

            # 28. PY : Publication Year = 2020
            if scopus['Year'].iloc[artno] != None:
                datafile.write(f"PY {scopus['Year'].iloc[artno]}\n")

            # 29. VL : Volumn = 260
            if scopus['Volume'].iloc[artno] != None:
                datafile.write(f"VL {scopus['Volume'].iloc[artno]}\n")

            # 30. AR : Article Number = 121059
            if scopus['Art. No.'].iloc[artno] != None:
                datafile.write(f"AR {scopus['Art. No.'].iloc[artno]}\n")

            # 31. DI : Digital Object Identifier = 10.1016/j.jclepro.2020.121059
            if scopus['DOI'].iloc[artno] != None:
                datafile.write(f"DI {scopus['DOI'].iloc[artno]}\n")

            # 32. PG : Page Count = 14
            # 33. WC : Web of Science Categories = Green & Sustainable Science & Technology; Engineering, Environmental; Environmental Sciences
            # 34. SC : Research Areas = Science & Technology - Other Topics; Engineering; Environmental Sciences & Ecology
            if ab.subject_areas != None:
                tmp = pd.DataFrame(ab.subject_areas)
                tmp_ = tmp["area"].tolist()
                datafile.write("SC " + "; ".join(tmp_) + "\n")
            # 35. GA : Document Delivery Number = LL4XH
            # 36. UT : Accession Number = WOS:000531559900003
            # 37. DA : Date this report was generated. = 2020-06-14
            datafile.write(f"ER\n\n")

with open(filename, mode) as datafile:
    datafile.write(f"EF")