{ "cells": [ { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "from copy import deepcopy\n", "import numpy as np\n", "import pandas as pd\n", "import calendar\n", "from pybliometrics.scopus import ScopusSearch\n", "from pybliometrics.scopus import AbstractRetrieval\n", "\n", "from tqdm import tqdm\n", "\n", "pd.options.display.max_columns = 30\n", "\n", "filename = \"BIPV_ML.txt\"" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
eiddoipiipubmed_idtitlesubtypesubtypeDescriptioncreatorafidaffilnameaffiliation_cityaffiliation_countryauthor_countauthor_namesauthor_ids...issnsource_ideIssnaggregationTypevolumeissueIdentifierarticle_numberpageRangedescriptionauthkeywordscitedby_countopenaccessfund_acrfund_nofund_sponsor
02-s2.0-8512514579610.1016/j.egyr.2022.02.088S2352484722003353NoneData driven approach to forecast the next day ...arArticleFentis A.60017798;127756529;116607338Faculté des Sciences et Techniques de Mohammed...Mohammedia;Mohammedia;CasablancaMorocco;Morocco;Morocco5Fentis, Ayoub;Rafik, Mohamed;Bahatti, Lhoussai...57195404384;57208745688;55837734100;6506839198......None2110038951123524847Journal8NoneNone3221-3233Photovoltaic (PV) power has became an attracti...Machine learning | PV power forecasting | Rene...01NoneundefinedNone
12-s2.0-8512367614710.1016/j.egyr.2022.01.105S2352484722001056NoneEnergy modeling and photovoltaics integration ...arArticleAl Huneidi D.I.60113885Hamad Bin Khalifa University, College of Scien...DohaQatar3Al Huneidi, Dana I.;Tahir, Furqan;Al-Ghamdi, S...57389610600;57201605786;56439247500...None2110038951123524847Journal8NoneNone166-171Due to Qatar's increasing population and elect...Climate change | Demand side management | Desi...11QNRFNPRP12S-0212-190073Qatar National Research Fund
22-s2.0-8512440987410.1016/j.engappai.2022.104707S0952197622000288NoneMulti-quantile recurrent neural network for fe...arArticleZhang X.Y.60020595;60001455Royal Holloway, University of London;Anhui Uni...Egham;HefeiUnited Kingdom;China3Zhang, Xiao Yu;Watkins, Chris;Kuenzel, Stefanie57239241400;57447667900;55875409300...0952197624182NoneJournal110None104707NoneThe purpose of feeder-level energy disaggregat...Behind-the-meter PV generation | Deep neural n...00NoneundefinedNone
\n", "

3 rows × 34 columns

\n", "
" ], "text/plain": [ " eid doi pii \\\n", "0 2-s2.0-85125145796 10.1016/j.egyr.2022.02.088 S2352484722003353 \n", "1 2-s2.0-85123676147 10.1016/j.egyr.2022.01.105 S2352484722001056 \n", "2 2-s2.0-85124409874 10.1016/j.engappai.2022.104707 S0952197622000288 \n", "\n", " pubmed_id title subtype \\\n", "0 None Data driven approach to forecast the next day ... ar \n", "1 None Energy modeling and photovoltaics integration ... ar \n", "2 None Multi-quantile recurrent neural network for fe... ar \n", "\n", " subtypeDescription creator afid \\\n", "0 Article Fentis A. 60017798;127756529;116607338 \n", "1 Article Al Huneidi D.I. 60113885 \n", "2 Article Zhang X.Y. 60020595;60001455 \n", "\n", " affilname \\\n", "0 Faculté des Sciences et Techniques de Mohammed... \n", "1 Hamad Bin Khalifa University, College of Scien... \n", "2 Royal Holloway, University of London;Anhui Uni... \n", "\n", " affiliation_city affiliation_country author_count \\\n", "0 Mohammedia;Mohammedia;Casablanca Morocco;Morocco;Morocco 5 \n", "1 Doha Qatar 3 \n", "2 Egham;Hefei United Kingdom;China 3 \n", "\n", " author_names \\\n", "0 Fentis, Ayoub;Rafik, Mohamed;Bahatti, Lhoussai... \n", "1 Al Huneidi, Dana I.;Tahir, Furqan;Al-Ghamdi, S... \n", "2 Zhang, Xiao Yu;Watkins, Chris;Kuenzel, Stefanie \n", "\n", " author_ids ... issn \\\n", "0 57195404384;57208745688;55837734100;6506839198... ... None \n", "1 57389610600;57201605786;56439247500 ... None \n", "2 57239241400;57447667900;55875409300 ... 09521976 \n", "\n", " source_id eIssn aggregationType volume issueIdentifier \\\n", "0 21100389511 23524847 Journal 8 None \n", "1 21100389511 23524847 Journal 8 None \n", "2 24182 None Journal 110 None \n", "\n", " article_number pageRange \\\n", "0 None 3221-3233 \n", "1 None 166-171 \n", "2 104707 None \n", "\n", " description \\\n", "0 Photovoltaic (PV) power has became an attracti... \n", "1 Due to Qatar's increasing population and elect... \n", "2 The purpose of feeder-level energy disaggregat... \n", "\n", " authkeywords citedby_count openaccess \\\n", "0 Machine learning | PV power forecasting | Rene... 0 1 \n", "1 Climate change | Demand side management | Desi... 1 1 \n", "2 Behind-the-meter PV generation | Deep neural n... 0 0 \n", "\n", " fund_acr fund_no fund_sponsor \n", "0 None undefined None \n", "1 QNRF NPRP12S-0212-190073 Qatar National Research Fund \n", "2 None undefined None \n", "\n", "[3 rows x 34 columns]" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_sample = pd.read_pickle(\"df_sample.pkl\")\n", "df_sample.head(3)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(1028, 28)" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_ab = pd.read_pickle(\"df_ab.pkl\")\n", "df_ab.shape" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "# publication type\n", "# \"J\" = Journal\n", "# \"B\" = Book\n", "# \"S\" = Series\n", "# \"P\" = Patent\n", "dict_pubtype = {'ar': 'J', # 'Article',\n", " 'ab': 'J', # 'Abstract Report',\n", " 'bk': 'B', # 'Book',\n", " 'ch': 'B', # 'Book Chapter',\n", " 'bz': 'J', # 'Business Article',\n", " 'cp': 'J', # 'Conference Paper',\n", " 'cr': 'J', # 'Conference Review',\n", " 'dp': 'J', # 'Data Paper ',\n", " 'ed': 'S', # 'Editorial',\n", " 'er': 'J', # 'Erratum',\n", " 'le': 'J', # 'Letter',\n", " 'no': 'S', # 'Note',\n", " 'pr': 'S', # 'Press Release',\n", " 'rp': 'J', # 'Report',\n", " 'tb': 'J', # 'Retracted',\n", " 're': 'J', # 'Review',\n", " 'sh': 'J', # 'Short Survey'\n", " 'ip': 'P' # 'Patent' \n", " }\n", "\n", "# documentation type\n", "dict_docutype = {'ar': 'Article',\n", " 'ab': 'Abstract Report',\n", " 'bk': 'Book',\n", " 'ch': 'Book Chapter',\n", " 'bz': 'Business Article',\n", " 'cp': 'Conference Paper',\n", " 'cr': 'Conference Review',\n", " 'dp': 'Data Paper ',\n", " 'ed': 'Editorial',\n", " 'er': 'Erratum',\n", " 'le': 'Letter',\n", " 'no': 'Note',\n", " 'pr': 'Press Release',\n", " 'rp': 'Report',\n", " 'tb': 'Retracted',\n", " 're': 'Review',\n", " 'sh': 'Short Survey',\n", " 'ip': 'Patent'\n", " }\n", "\n", "# text cleaning\n", "dict_clean_text = {\"&\": \"&\", \n", " \" \": \" \", \n", " \"<\": \"<\",\n", " \">\": \">\"\n", " }\n", "def get_clean_text(text):\n", " for k, v in dict_clean_text.items():\n", " text = text.replace(k, v)\n", " return text" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "# all\n", "\n", "import os, calendar\n", "\n", "with open(filename, \"w\") as datafile:\n", " datafile.write(\"FN Clarivate Analytics Web of Science\\nVR 1.0\")\n", " \n", " for art in df_ab.index:\n", " df_row = df_ab.loc[art]\n", " \n", " # 1. PT: publication type\n", " pubtype_ = dict_pubtype[df_row['PT']]\n", " datafile.write(f\"\\nPT {pubtype_}\")\n", " \n", " # 2. AU: author names\n", " author_name_ = df_row['AU']\n", " author_name = \"\\nAU \"\n", " if all(author_name_):\n", " author_name += \"\\n \".join(author_name_)\n", " \n", " datafile.write(author_name)\n", " \n", " # 3. AF: affiliations\n", " affiliation_ = df_row['AF']\n", " affiliation = \"\\nAF \"\n", " if all(affiliation_):\n", " affiliation += \"\\n \".join(affiliation_)\n", " datafile.write(affiliation)\n", " \n", " # 4. TI: document title\n", " title = '\\nTI '\n", " title_ = df_row['TI']\n", " if title_:\n", " title += get_clean_text(title_)\n", " datafile.write(title)\n", " \n", " # 5. SO: publication name\n", " so = '\\nSO '\n", " so_ = df_row['SO']\n", " if so_:\n", " so += get_clean_text(so_)\n", " datafile.write(so)\n", " \n", " # 6. LA : Language\n", " datafile.write(\"\\nLA \" + df_row[\"LA\"])\n", " \n", " # 7. DT : Document Type\n", " docutype_ = dict_docutype[df_row['DT']]\n", " datafile.write(f\"\\nDT {docutype_}\")\n", " \n", " # 8. DE : Author Keywords\n", " de_ = df_row['DE']\n", " datafile.write(f\"\\nDE {de_}\")\n", " \n", " # 9. ID : Keyword Plus\n", " id_ = df_row['ID']\n", " datafile.write(f\"\\nID {id_}\")\n", " \n", " # 10. AB: Abstract\n", " ab_ = df_row['AB']\n", " datafile.write(f\"\\nAB {ab_}\")\n", " \n", " # 11. C1 : Author Address\n", " c1_ = df_row['C1']\n", " c1 = [f\"[{'; '.join(c[1])}] {', '.join(c[2].split(', ')[1:])}\" for c in c1_]\n", " c1 = \".\\n \".join(c1) + \".\"\n", " datafile.write(\"\\nC1 \" + c1)\n", " \n", " # 12. RP : Reprint Address\n", " datafile.write(\"\\nRP None\")\n", "\n", " # 13. EM : E-mail Address\n", " datafile.write(\"\\nEM None\")\n", " \n", " # 14. CR : Cited References\n", " cr_ = df_row[\"CR\"]\n", " if len(cr_) > 0:\n", " cr_[0] = \" \" + cr_[0][2:]\n", " cr_ = [c for c in cr_ if len(c) > 5]\n", " cr_ = [c.lstrip(\" \") for c in cr_ if (c[3] != ',') or (\"DOI\" in c)]\n", " datafile.write(\"\\nCR \"+\"\\n \".join(cr_))\n", " \n", " # 15. NR : Cited Reference Count\n", " datafile.write(f\"\\nNR {df_row['NR']}\")\n", " \n", " # 16. TC : Web of Science Core Collection Times Cited Count\n", " datafile.write(f\"\\nTC {df_row['TC']}\")\n", " \n", " # 17. Z9 : Total Times Cited Count\n", " z9_ = df_row['Z9']\n", " datafile.write(f\"\\nZ9 {z9_}\")\n", " \n", " # 18. U1 : Usage Count (Last 180 Days)\n", " # 19. U2 : Usage Count (Since 2013)\n", " # 20. PU : Publisher = ELSEVIER SCI LTD\n", " # 21. PI : Publisher City = OXFORD\n", " # 22. PA : Publisher Address = THE BOULEVARD, LANGFORD LANE, KIDLINGTON, OXFORD OX5 1GB, OXON, ENGLAND\n", " # 23. SN : International Standard Serial Number (ISSN) = 0959-6526\n", " sn_ = df_row[\"SN\"]\n", " datafile.write(f\"\\nSN {sn_}\")\n", " \n", " # 24. EI : Electronic International Standard Serial Number (eISSN) = 1879-1786\n", " # 25. J9 : 29-Character Source Abbreviation = J CLEAN PROD\n", " j9_ = df_row[\"J9\"]\n", " datafile.write(f\"\\nJ9 {j9_}\")\n", " \n", " # 26. JI : ISO Source Abbreviation = J. Clean Prod.\n", " ji_ = df_row[\"JI\"]\n", " datafile.write(f\"\\nJI {ji_}\")\n", " \n", " # 27. PD : Publication Date = JUL 1\n", " pd_ = df_row[\"PD\"]\n", " datafile.write(f\"\\nPD {pd_}\")\n", " \n", " # 28. PY : Publication Year = 2020\n", " py_ = df_row[\"PY\"]\n", " datafile.write(f\"\\nPY {py_}\")\n", " \n", " # 29. VL : Volumn = 260\n", " vl_ = df_row[\"VL\"]\n", " datafile.write(f\"\\nVL {vl_}\")\n", " \n", " # 30. AR : Article Number = 121059\n", " ar_ = df_row[\"AR\"]\n", " datafile.write(f\"\\nAR {ar_}\")\n", " \n", " # 31. DI : Digital Object Identifier = 10.1016/j.jclepro.2020.121059\n", " doi_ = df_row[\"DI\"]\n", " datafile.write(f\"\\nDI {doi_}\")\n", " \n", " # 32. PG : Page Count = 14\n", " # 33. WC : Web of Science Categories = Green & Sustainable Science & Technology; Engineering, Environmental; Environmental Sciences\n", " # 34. SC : Research Areas = Science & Technology - Other Topics; Engineering; Environmental Sciences & Ecology\n", " sc_ = df_row[\"SC\"]\n", " datafile.write(f\"\\nSC {sc_}\")\n", " \n", " # end \n", " datafile.write(f\"\\nER\\n\")\n", " \n", " datafile.write(\"\\nEF\\n\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" } }, "nbformat": 4, "nbformat_minor": 4 }