python [papers-anno2md] macOS Papers.app脚本:将论文PDF注释转换为Markdown注释并添加到常规注释部分
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python [papers-anno2md] macOS Papers.app脚本:将论文PDF注释转换为Markdown注释并添加到常规注释部分相关的知识,希望对你有一定的参考价值。
#!/usr/local/bin/python
# encoding: utf-8
"""
*Copy the general notes from selected papers in Papers.app to the clipoboard*
:Author:
David Young
:Date Created:
September 4, 2017
Usage:
papers-copy-selected-papers-notes
Options:
-h, --help show this help message
"""
################# GLOBAL IMPORTS ####################
import sys
import os
import json
from os.path import expanduser
import sqlite3
import psutil
import base64
from polyglot.markdown import translate
from subprocess import Popen, PIPE, STDOUT
from operator import itemgetter
from fundamentals import tools
PAPERS_DB_PATH = "~/Library/Application Support/Papers3/Library_Data/C02N63X6G3QP__D741EA9B-BEF1-44A5-8087-4481F4EABAF3/Library.papers3/Database.papersdb"
def dict_factory(cursor, row):
d = {}
for idx, col in enumerate(cursor.description):
d[col[0]] = row[idx]
return d
def main(arguments=None):
"""
*The main function used when ``papers-cleanDB.py`` is run as a single script from the cl*
"""
# SETUP THE COMMAND-LINE UTIL SETTINGS
su = tools(
arguments=arguments,
docString=__doc__,
logLevel="WARNING",
options_first=False,
projectName=False
)
arguments, settings, log, dbConn = su.setup()
# UNPACK REMAINING CL ARGUMENTS USING `EXEC` TO SETUP THE VARIABLE NAMES
# AUTOMATICALLY
for arg, val in arguments.iteritems():
if arg[0] == "-":
varname = arg.replace("-", "") + "Flag"
else:
varname = arg.replace("<", "").replace(">", "")
if isinstance(val, str) or isinstance(val, unicode):
exec(varname + " = '%s'" % (val,))
else:
exec(varname + " = %s" % (val,))
if arg == "--dbConn":
dbConn = val
log.debug('%s = %s' % (varname, val,))
home = expanduser("~")
papersDB = PAPERS_DB_PATH.replace("~/", home + "/")
# DON'T CONTINUE IF PAPERS APP IS NOT OPEN
openApp = False
for pid in psutil.pids():
p = psutil.Process(pid)
if "papers" in p.name().lower():
if "Papers.app" in p.exe():
openApp = True
if not openApp:
log.warning(
"Papers.app is not open, open the app before running this script")
return
paperUuids = grab_selected_paper_uuids_with_applescript(log)
paperUuids = ("', '").join(paperUuids)
# # CONNECT TO PAPERS DATABASE
conn = sqlite3.connect(papersDB)
conn.row_factory = dict_factory
cursor = conn.cursor()
# GENERATE THE QUERY TO GRAB THE NOTES FROM THE DATABASE
query = """select notes from Publication where UUID in ('%(paperUuids)s');""" % locals(
)
cursor.execute(query)
results = cursor.fetchall()
allNotes = []
for r in results:
allNotes.append(r["notes"])
allNotes = (u"\n\n").join(allNotes)
cursor.close()
print allNotes.encode("utf-8")
return allNotes
def grab_selected_paper_uuids_with_applescript(
log):
"""*grab a list of the UUIDs of the selected papers via applescript*
**Key Arguments:**
- ``log`` -- logger
**Return:**
- ``paperUuids`` -- list of uuids the pdf/publication annotation belong to (same length as ``papersJson``)
"""
log.info('starting the ``grab_selected_paper_uuids_with_applescript`` function')
applescript = """
tell application "Papers"
set uuidList to {}
set allSelected to selected publications of front library window
repeat with i from 1 to count of allSelected
set end of uuidList to (id of item i of allSelected)
end repeat
return uuidList
end tell
""" % locals()
cmd = "\n".join(["osascript << EOT", applescript, "EOT"])
p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
stdout, stderr = p.communicate()
# SPLIT UUIDS INTO LIST
paperUuids = stdout.strip()
paperUuids = paperUuids.split(", ")
log.info('completed the ``grab_selected_paper_uuids_with_applescript`` function')
return paperUuids
if __name__ == '__main__':
main()
#!/usr/local/bin/python
# encoding: utf-8
"""
*Convert Papers PDF Annotations to Markdown Notes and Add to the General Notes Section for the Publication*
:Author:
David Young
:Date Created:
August 30, 2017
Usage:
papers-anno2md
Options:
-h, --help show this help message
"""
################# GLOBAL IMPORTS ####################
import sys
import os
import json
from os.path import expanduser
import sqlite3
import psutil
import base64
from polyglot.markdown import translate
from subprocess import Popen, PIPE, STDOUT
from operator import itemgetter
from fundamentals import tools
PAPERS_DB_PATH = "~/Library/Application Support/Papers3/Library_Data/C02N63X6G3QP__D741EA9B-BEF1-44A5-8087-4481F4EABAF3/Library.papers3/Database.papersdb"
def dict_factory(cursor, row):
d = {}
for idx, col in enumerate(cursor.description):
d[col[0]] = row[idx]
return d
def main(arguments=None):
"""
*The main function used when ``papers-cleanDB.py`` is run as a single script from the cl*
"""
# SETUP THE COMMAND-LINE UTIL SETTINGS
su = tools(
arguments=arguments,
docString=__doc__,
logLevel="WARNING",
options_first=False,
projectName=False
)
arguments, settings, log, dbConn = su.setup()
# UNPACK REMAINING CL ARGUMENTS USING `EXEC` TO SETUP THE VARIABLE NAMES
# AUTOMATICALLY
for arg, val in arguments.iteritems():
if arg[0] == "-":
varname = arg.replace("-", "") + "Flag"
else:
varname = arg.replace("<", "").replace(">", "")
if isinstance(val, str) or isinstance(val, unicode):
exec(varname + " = '%s'" % (val,))
else:
exec(varname + " = %s" % (val,))
if arg == "--dbConn":
dbConn = val
log.debug('%s = %s' % (varname, val,))
home = expanduser("~")
papersDB = PAPERS_DB_PATH.replace("~/", home + "/")
# DON'T CONTINUE IF PAPERS APP IS NOT OPEN
openApp = False
for pid in psutil.pids():
p = psutil.Process(pid)
if "papers" in p.name().lower():
if "Papers.app" in p.exe():
openApp = True
if not openApp:
log.warning(
"Papers.app is not open, open the app before running this script")
return
papersJson, paperUuids = grab_papers_annotations_with_applescript(log)
noteDict = compile_markdown_notes_from_annotations(
papersJson, paperUuids, log)
# CONNECT TO PAPERS DATABASE
conn = sqlite3.connect(papersDB)
conn.row_factory = dict_factory
cursor = conn.cursor()
add_notes_and_citations_to_papers(cursor, noteDict, log)
cursor.close()
return
def grab_papers_annotations_with_applescript(
log):
"""*grab all annotations from the papers database via applescript - there's no easy way to order the annotations correctly when taken directly from the database*
**Key Arguments:**
- ``log`` -- logger
**Return:**
- ``papersJson`` -- list of json properties for every annotation
- ``paperUuids`` -- list of uuids the pdf/publication annotation belong to (same length as ``papersJson``)
"""
log.info('starting the ``grab_papers_annotations_with_applescript`` function')
applescript = """
tell application "Papers"
set thisList to {}
set sourceList to {}
set allAnnotations to every annotation item
repeat with i from 1 to count of allAnnotations
set end of thisList to json string of item i of allAnnotations
set end of sourceList to (id of publication item of (source file of item i of allAnnotations))
end repeat
return thisList & "||||||" & sourceList
end tell
""" % locals()
cmd = "\n".join(["osascript << EOT", applescript, "EOT"])
p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
stdout, stderr = p.communicate()
# JSON PROPERTIES FOR EVERY ANNOTATION AND THE UUID OF THE PDF/PUBLICATION
# IT BELONGS TO
papersJson, paperUuids = stdout.split("||||||")
papersJson = papersJson.strip()[:-1]
papersJson = json.loads("[%(papersJson)s]" % locals())
paperUuids = paperUuids.replace(" ", "").split(",")[1:]
log.info('completed the ``grab_papers_annotations_with_applescript`` function')
return papersJson, paperUuids
def compile_markdown_notes_from_annotations(
papersJson,
paperUuids,
log):
"""*iterate through all annotations and compile markdown style notes for each publication*
**Key Arguments:**
- ``papersJson`` -- list of json properties for every annotation
- ``paperUuids`` -- list of uuids the pdf/publication annotation belong to (same length as ``papersJson``)
- ``log`` -- logger
**Return:**
- ``noteDict`` -- a dictionary of markdown notes with publication uuids as keys
"""
log.info('starting the ``compile_markdown_notes_from_annotations`` function')
md = translate(
log=log,
settings=False
)
# ASSIGN CORRECT PDF UUID & CONVERT ANNOTATIONS TO MD
for u, j in zip(paperUuids, papersJson):
j["uuid"] = u
top = 1000000.
if "rectangles" in j:
for r in j["rectangles"]:
this = float(r.split("}, {")[0].split(", ")[1])
if this < top:
top = this
elif "top" in j:
top = j["top"]
page_nr = j["page_nr"]
top = 100000000. - top
annIndex = "%(page_nr)s-%(top)s" % locals()
j["annIndex"] = annIndex
if j['type'] == 2:
if "text" not in j or len(j['text'].strip()) == 0:
j["md"] = ""
elif j['color'] == 1:
j["md"] = md.blockquote(j['text'])
elif j['color'] == 2:
j["md"] = md.codeblock(j['text'], "xxx")
elif j['color'] == 3:
j["md"] = j['text'].strip()
elif j['color'] == 4:
j["md"] = md.bold(j['text'])
elif j['color'] == 5:
j["md"] = md.header(j['text'], level=2)
elif j['color'] == 6:
j["md"] = md.em(j['text'])
elif j['type'] == 0:
if "contents" not in j:
j["md"] = ""
else:
j["md"] = j['contents'].strip()
elif j['type'] == 53:
j["md"] = "**!!!TAKE A SCREENGRAB OF ANNOTATED IMAGE(S) IN PDF, CONVERT TO MD LINK AND PASTE INTO NOTE AT IMAGE LOCATION!!!**"
papersJson = sorted(papersJson, key=itemgetter(
'annIndex'), reverse=False)
noteDict = {}
for j in papersJson:
if j["uuid"] not in noteDict:
noteDict[j["uuid"]] = []
if "md" in j:
noteDict[j["uuid"]].append(j["md"])
log.info('completed the ``compile_markdown_notes_from_annotations`` function')
return noteDict
def add_notes_and_citations_to_papers(
cursor,
noteDict,
log):
"""*add markdown notes and citations to the general notes section of papers (via sqlite database*
**Key Arguments:**
- ``cursor`` -- the database cursor
- ``noteDict`` -- a dictionary of markdown notes with publication uuids as keys
- ``log`` -- logger
"""
log.info('starting the ``add_notes_and_citations_to_papers`` function')
# GRAB ALL UUIDs OF ALL PAPERS
sqlQuery = """select uuid from Publication where subtitle is not null and subtitle != "MISSING" and author_year_string is not null;"""
cursor.execute(sqlQuery)
paperMeta = cursor.fetchall()
# FOR EACH PAPER, GRAB METADATA TO FORM CITATION AND
# ADD THE PRE-GENERATED NOTES TO THE METADATA DICTS
for p in paperMeta:
puid = p["uuid"]
sqlQuery = """select remote_id as url from (select * from SyncEvent where device_ID = "%(puid)s" and remote_id like "http%%" order by updated_at) group by device_ID;""" % locals()
cursor.execute(sqlQuery)
results = cursor.fetchall()
if len(results):
p["url"] = results[0]["url"]
else:
p["url"] = None
sqlQuery = """select title, author_string, author_year_string, type, subtype, publisher, label, rating, subtitle, summary, tag_string, kind from Publication where uuid = "%(puid)s";""" % locals(
)
cursor.execute(sqlQuery)
results = cursor.fetchall()
if len(results):
p.update(results[0])
p["author_year_string"] = p["author_year_string"].replace(
"(", "").replace(")", "")
p["author_string"] = p["author_string"].title()
p["citeKey"] = "#%(title)s%(author_string)s" % p
p["citeKey"] = p["citeKey"].lower().replace(" ", "")
p["year"] = p["author_year_string"][-5:-1]
if p["rating"]:
stars = [u"★★★★★", u"★★★★☆", u"★★★☆☆", u"★★☆☆☆", u"★☆☆☆☆"]
numbers = [5, 4, 3, 2, 1]
for s, n in zip(stars, numbers):
if p["rating"] == n:
p["rating"] = s
else:
p["rating"] = ""
if p["url"]:
p["link"] = "*[%(title)s](%(url)s)*" % p
else:
p["link"] = "*%(title)s*" % p
if "Journal" in p["subtitle"] or p["subtitle"] in ["Draft Paper", "Book", "Book Chapter"]:
p["cite"] = """
[Not Cited][%(citeKey)s]
[%(citeKey)s]: %(rating)s **%(subtitle)s**: %(author_year_string)s, %(link)s, %(publisher)s.
""" % p
elif p["subtitle"] in ["Podcast", "Video"]:
p["cite"] = u"""
[Not Cited][%(citeKey)s]
[%(citeKey)s]: %(rating)s **%(subtitle)s**: %(author_string)s, %(link)s, %(publisher)s.
""" % p
else:
p["cite"] = """
[Not Cited][%(citeKey)s]
[%(citeKey)s]: %(rating)s **%(subtitle)s**: %(link)s, %(publisher)s.
""" % p
p["cite"] = p["cite"].replace(", None.", "").replace(
"..", ".").replace("Journal Abstract", "Journal Article").replace(" Et Al.", " et al.").replace(" , ", " ")
p["mdNote"] = ""
if p["uuid"] in noteDict:
p["mdNote"] = ("\n\n").join(noteDict[p["uuid"]]).replace(
"\n\n\n", "\n\n").replace("\n\n\n", "\n\n").replace("\n\n\n", "\n\n") + "\n\n"
p["mdNote"] = p["mdNote"] + p["cite"]
p["mdNote"] = p["mdNote"].replace('"', '\"').replace("'", "''").strip()
# NOW UPDATE EACH PAPER WITH THE NOTES AND CITATION INTO THE GENERAL
# NOTE SECTION IN PAPERS APP
sqlQueries = []
sqlQueries.append("DROP TRIGGER _upd_log_Publication_notes;")
for p in paperMeta:
sqlQueries.append(
"""UPDATE Publication SET notes = '%(mdNote)s' where UUID = "%(uuid)s";""" % p)
sqlQueries.append("""CREATE TRIGGER _upd_log_Publication_notes AFTER UPDATE OF notes ON Publication
WHEN enableTransactionLog() AND (NEW."searchresult" = 0) AND (OLD."notes" IS NULL AND NEW."notes" IS NOT NULL OR OLD."notes" IS NOT NULL AND NEW."notes" IS NULL OR OLD."notes" <> NEW."notes") BEGIN
DELETE FROM changeLog WHERE modUUID = NEW."uuid" AND modColumn="notes" AND modType=2; -- delete old changes immediately, for cleanup
INSERT INTO changeLog (modifiedDate, modTable, modUUID, modType, modColumn, modValue, device, dbRevision) VALUES ((strftime("%%s", "now") + strftime("%%f", "now") - strftime("%%S", "now")), "Publication", NEW."uuid", 2, "notes", NEW."notes", device(), dbRevision());END""")
for q in sqlQueries:
try:
cursor.execute(q)
except Exception, e:
log.warning('%(e)s' % locals())
log.info('completed the ``add_notes_and_citations_to_papers`` function')
return None
if __name__ == '__main__':
main()
以上是关于python [papers-anno2md] macOS Papers.app脚本:将论文PDF注释转换为Markdown注释并添加到常规注释部分的主要内容,如果未能解决你的问题,请参考以下文章