Commit 0ba09d102565cc5e2871cfef8bafdb1cb86b88c7

Don't use dashes ("-") in Python file names
timApp/documentprinter.py
(3 / 3)
  
266266 # TODO: getting the path could probably be done with more finesse
267267 cwd = os.getcwd()
268268 filters = [
269 os.path.join(cwd, "pandoc-inlinestylesfilter.py"),
270 os.path.join(cwd, "pandoc-imagefilepathsfilter.py"),
271 # os.path.join(cwd, "pandoc-headernumberingfilter.py") # handled allready when making md
269 os.path.join(cwd, "pandoc_inlinestylesfilter.py"),
270 os.path.join(cwd, "pandoc_imagefilepathsfilter.py"),
271 # os.path.join(cwd, "pandoc_headernumberingfilter.py") # handled allready when making md
272272 ]
273273
274274 src = self.get_content(plugins_user_print=plugins_user_print, target_format=target_format.value)
timApp/pandoc-headernumberingfilter.py
(0 / 23)
  
1#!/usr/bin/env python3
2
3"""
4Pandoc filter to remove numbering from header elements that have the .nonumber class.
5"""
6
7from pandocfilters import toJSONFilter, Header, RawInline
8
9
10def remove_header_numbering(key, value, fmt, meta):
11 if key == 'Header' and fmt == 'latex':
12 (level, [ident, classes, kvs], contents) = value
13
14 if 'nonumber' in classes:
15 classes.remove('nonumber') # remove the unnecessary TIM specific class
16 if not 'unnumbered' in classes:
17 classes.append('unnumbered') # add the class that pandoc understands
18
19 return Header(level, [ident, classes, kvs], contents)
20
21
22if __name__ == "__main__":
23 toJSONFilter(remove_header_numbering)
timApp/pandoc-imagefilepathsfilter.py
(0 / 213)
  
1#!/usr/bin/env python3
2
3"""
4Pandoc filter to convert image sources to latex graphics source paths considering
5the images location according to the set of following rules:
6
7- If an image has an absolute path that points to the TIM machine, e.g. "http://<TIM-domain>/imagepath"
8 or "<tim-domain>/imagepath", then....
9- If an image has a relative path, e.g. "/images/1239854102", then....
10- If an image points to a resource that resides at another host, simply convert the image
11 to a simple link at the output. This is due to possible copyright infringements, as the images
12 would othewrise be unrightly copied to the output document.
13
14TODO: BETTER DOCUMENTATION
15
16"""
17import os
18import re
19import tempfile
20import urllib.request
21
22from defaultconfig import FILES_PATH
23from documentmodel.randutils import hashfunc
24
25from pandocfilters import toJSONFilter, RawInline, Image, Link, Str
26
27# This of course, requires that this module resides in the timApp root folder
28
29APP_ROOT = os.path.dirname(os.path.abspath(__file__))
30
31IMAGE_ROOT = os.path.join(APP_ROOT, FILES_PATH, 'blocks')
32
33# protocol + hostname
34CURRENT_HOST_MACHINE = os.environ.get('TIM_HOST', None)
35
36ALLOWED_EXTERNAL_HOSTS = []
37
38PRINTING_WHITELIST_FILE = os.path.join(APP_ROOT, '.printing_whitelist.config')
39
40urlmaps = [
41 {'url': '/csstatic/', 'dir': '/service/timApp/modules/cs/static/'},
42 {'url': '/csgenerated/', 'dir': '/service/timApp/modules/cs/generated/'},
43 {'url': '/static/', 'dir': '/service/timApp/static/'},
44 {'url': '/images/', 'dir': '/tim_files/blocks/images/'}
45]
46
47
48
49def init_whitelist():
50 """ Init whitelist for trusted image source domains. """
51
52 # s = "" # just a test for env variables
53 # for a in os.environ:
54 # s += 'Var: ' + a + ' Value: ' + os.getenv(a) + "\n"
55 # open("Output.txt", "a").write("Environment:" + s)
56
57 if not os.path.exists(PRINTING_WHITELIST_FILE):
58 try:
59 os.makedirs(os.path.dirname(PRINTING_WHITELIST_FILE))
60 except OSError:
61 pass
62
63 try:
64 open(PRINTING_WHITELIST_FILE, 'a').close()
65 except IOError:
66 pass
67
68 content = []
69 try:
70 with open(PRINTING_WHITELIST_FILE, 'r') as f:
71 content = f.readlines()
72 except IOError:
73 pass
74
75 return [x.strip() for x in content]
76
77# Get the os temp directoryls
78TEMP_DIR_PATH = tempfile.gettempdir()
79DOWNLOADED_IMAGES_ROOT = os.path.join(TEMP_DIR_PATH, 'tim-img-dls')
80
81texdocid = None
82
83
84def handle_images(key, value, fmt, meta):
85 # open("Output.txt", "a").write("Meta:" + str(meta) + "\n")
86
87 if key == 'Image' and fmt == 'latex':
88 (attrs, alt_text_inlines, target) = value
89 (url, title) = target
90
91
92 # For debugging:
93 # return Image(attrs, alt_text_inlines, ["notarealhost.juupahuu.com/image.png", ""])
94
95 image_path = ""
96
97 parsed_url = urlparse(url)
98 parsed_cur = urlparse(CURRENT_HOST_MACHINE + "/kukku")
99 curhost = parsed_cur.hostname or ''
100
101 scheme = parsed_url.scheme or ''
102 host = parsed_url.hostname or ''
103 path = parsed_url.path or ''
104
105 image_path = ''
106
107 for urlmap in urlmaps:
108 urlbeg = urlmap.get('url')
109 if path.startswith(urlbeg):
110 image_path = path.replace(urlbeg, urlmap.get('dir'))
111 break
112
113 # open("Output.txt", "a").write("image_path: " + image_path + " host: " + host + "CHM: " + curhost + "\n")
114 if host != '' and host != curhost:
115 image_path = ''
116
117 if image_path != '' and os.path.exists(image_path):
118 image_path = image_path.replace('\\', '/')
119 return Image(attrs, alt_text_inlines, [image_path, title])
120
121 '''
122 # The first slash needs to be removed from the path in order for the joins to work properly
123 if path.startswith('/'):
124 path = path[1:]
125
126 # handle internal absolute urls
127 base_address = scheme + '://' if scheme != '' else ''
128 base_address += host + '/' if host != '' else ''
129 if (CURRENT_HOST_MACHINE is not None) and base_address == CURRENT_HOST_MACHINE:
130 image_path = os.path.join(APP_ROOT, path)
131
132 # handle internal relative urls
133 elif (host == "") and os.path.exists(os.path.join(APP_ROOT, path)):
134 image_path = os.path.join(APP_ROOT, path)
135
136 elif (host == "") and os.path.exists(os.path.join(IMAGE_ROOT, path)):
137 image_path = os.path.join(IMAGE_ROOT, path)
138 # open("Output.txt", "a").write("host: " + host + "\n")
139
140 # handle external urls
141 else:
142 '''
143
144 # Download images from allowed external urls to be attached to the document.
145 allow = False
146 for h in ALLOWED_EXTERNAL_HOSTS:
147 # open("Output.txt", "a").write("try image: " + h + " -> " + url + "\n")
148 if re.match(h, url):
149 allow = True
150 break
151
152 if allow:
153 # open("Output.txt", "a").write("Check texdocid \n")
154 global texdocid # check if we allready have path for doc id
155 if not texdocid:
156 m = meta.get('texdocid', None) # if we do not have, get the path from meta data
157 # open("Output.txt", "a").write("m:" + str(m) + "\n")
158 if m:
159 texdocid = str(m.get('c', 'xx'))
160 # open("Output.txt", "a").write("texdocid:" + texdocid + "\n")
161
162 images_root = os.path.join(DOWNLOADED_IMAGES_ROOT, texdocid)
163 # create folder for image dls, if it does not exist already
164 if not os.path.exists(images_root ):
165 os.makedirs(images_root )
166
167 # download img to the folder and give the file a unique name (hash the url)
168 img_uid = hashfunc(url)
169 try:
170 _, ext = os.path.splitext(url)
171 img_dl_path = os.path.join(images_root, str(img_uid) + ext)
172 # open("Output.txt", "a").write("img_dl_path = " + img_dl_path + "\n")
173
174 if not os.path.exists(img_dl_path):
175 # open("Output.txt", "a").write("retrieve: " + url + " -> " + img_dl_path + "\n")
176 urllib.request.urlretrieve(url, img_dl_path)
177 # urllib.URLopener().retrieve(url, img_dl_path)
178
179 img_dl_path = img_dl_path.replace('\\', '/') # Ensure UNIX form for pandoc
180 return Image(attrs, alt_text_inlines, [img_dl_path, title])
181
182 except IOError:
183 # could not download image, so display the image as a link to the imageURL
184 pass
185 except:
186 pass
187
188 # For other external images, transform the element to appear as a link
189 # to the image resource in the LaTeX-output.
190 return [
191 RawInline('latex', "\externalimagelink{"),
192 Link(attrs, [Str(url)], [url, title]),
193 RawInline('latex', "}")
194 ]
195
196 # Makes sure the paths are in the UNIX form, as that is what LaTeX uses for paths even on Windows
197 image_path = image_path.replace('\\', '/')
198
199 return Image(attrs, alt_text_inlines, [image_path, title])
200
201
202if __name__ == "__main__":
203
204 # Needs to import different package based on python version, as the urlparse method
205 # was moved from urlparse module to urllib.parse between python2.7 -> python3
206 try:
207 from urllib.parse import urlparse
208 except ImportError:
209 from urlparse import urlparse
210
211 ALLOWED_EXTERNAL_HOSTS = init_whitelist()
212
213 toJSONFilter(handle_images)
timApp/pandoc-inlinestylesfilter.py
(0 / 78)
  
1#!/usr/bin/env python3
2
3"""
4Pandoc filter to convert class values to commands of same name in latex. Leaves
5(should leave...) ids, other, predefined classes and key-values intact.
6"""
7
8from pandocfilters import toJSONFilter, Span, Str, RawInline
9
10
11def classes_to_latex_cmds(key, value, fmt, meta):
12 # open("Output.txt", "a").write("Key:"+key + " fmt:" + fmt + " value:" + str(value) + "\n")
13 if key == 'Str' and fmt == 'latex':
14 if value.startswith("RAWTEX"):
15 cls = value[6:]
16 return RawInline("latex","\\" + cls + "{")
17 # return RawInline("tex", "\\begin(red)")
18 if value == "ENDRAWTEX":
19 return RawInline("latex","}")
20 # return RawInline("tex", "\\end(red)")
21
22 if key == 'Str' and fmt == 'html':
23 if value.startswith("RAWTEX"):
24 cls = value[6:]
25 return RawInline("html",'<div class="' + cls + '">')
26 # return RawInline("tex", "\\begin(red)")
27 if value == "ENDRAWTEX":
28 return RawInline("html","</div>")
29 # return RawInline("tex", "\\end(red)")
30
31 if key == 'Span' and fmt == 'latex':
32 ([ident, classes, kvs], contents) = value
33
34 # debugging
35 # return Span([ident, classes, kvs], contents)
36
37 classes_to_wrap = []
38 for c in classes:
39 if c == 'hidden-print':
40 return []
41 if c == 'visible-print':
42 continue
43 if c not in ["csl-no-emph", "csl-no-strong", "csl-no-smallcaps"]:
44 classes_to_wrap.append(c)
45
46 # TODO: should preserve also the aforementioned predef styles
47
48 # TODO: the input 'contents' is a list, output should be a list of inline elements
49
50 content = wrap_with_latex_cmds(contents, classes_to_wrap)
51
52 return Span([ident, list(set(classes) - set(classes_to_wrap)), kvs], content)
53
54
55def wrap_with_latex_cmds(content, classes_to_wrap):
56 if len(classes_to_wrap) <= 0:
57 return content
58 else:
59 # c = classes_to_wrap[0]
60 # if len(classes_to_wrap) == 1:
61 # return [latex("\\%s{" % c)] + content + [latex("}")]
62 # else:
63 # wrap_with_latex_cmds(content, classes_to_wrap[1:])
64 c = classes_to_wrap[0]
65 if len(classes_to_wrap) > 1:
66 content = wrap_with_latex_cmds(content, classes_to_wrap[1:])
67 return [latex("\\%s{" % c)] + content + [latex("}")]
68
69 # return [latex("\\%s{" % c)] + content + [latex("}")]
70
71
72def latex(content):
73 return RawInline('latex', content)
74
75
76if __name__ == "__main__":
77 # open("Output.txt", "a").write("Alkaa inlinestylefilter\n")
78 toJSONFilter(classes_to_latex_cmds)
timApp/pandoc_headernumberingfilter.py
(23 / 0)
  
1#!/usr/bin/env python3
2
3"""
4Pandoc filter to remove numbering from header elements that have the .nonumber class.
5"""
6
7from pandocfilters import toJSONFilter, Header, RawInline
8
9
10def remove_header_numbering(key, value, fmt, meta):
11 if key == 'Header' and fmt == 'latex':
12 (level, [ident, classes, kvs], contents) = value
13
14 if 'nonumber' in classes:
15 classes.remove('nonumber') # remove the unnecessary TIM specific class
16 if not 'unnumbered' in classes:
17 classes.append('unnumbered') # add the class that pandoc understands
18
19 return Header(level, [ident, classes, kvs], contents)
20
21
22if __name__ == "__main__":
23 toJSONFilter(remove_header_numbering)
timApp/pandoc_imagefilepathsfilter.py
(213 / 0)
  
1#!/usr/bin/env python3
2
3"""
4Pandoc filter to convert image sources to latex graphics source paths considering
5the images location according to the set of following rules:
6
7- If an image has an absolute path that points to the TIM machine, e.g. "http://<TIM-domain>/imagepath"
8 or "<tim-domain>/imagepath", then....
9- If an image has a relative path, e.g. "/images/1239854102", then....
10- If an image points to a resource that resides at another host, simply convert the image
11 to a simple link at the output. This is due to possible copyright infringements, as the images
12 would othewrise be unrightly copied to the output document.
13
14TODO: BETTER DOCUMENTATION
15
16"""
17import os
18import re
19import tempfile
20import urllib.request
21
22from timApp.defaultconfig import FILES_PATH
23from timApp.documentmodel.randutils import hashfunc
24
25from pandocfilters import toJSONFilter, RawInline, Image, Link, Str
26
27# This of course, requires that this module resides in the timApp root folder
28
29APP_ROOT = os.path.dirname(os.path.abspath(__file__))
30
31IMAGE_ROOT = os.path.join(APP_ROOT, FILES_PATH, 'blocks')
32
33# protocol + hostname
34CURRENT_HOST_MACHINE = os.environ.get('TIM_HOST', None)
35
36ALLOWED_EXTERNAL_HOSTS = []
37
38PRINTING_WHITELIST_FILE = os.path.join(APP_ROOT, '.printing_whitelist.config')
39
40urlmaps = [
41 {'url': '/csstatic/', 'dir': '/service/timApp/modules/cs/static/'},
42 {'url': '/csgenerated/', 'dir': '/service/timApp/modules/cs/generated/'},
43 {'url': '/static/', 'dir': '/service/timApp/static/'},
44 {'url': '/images/', 'dir': '/tim_files/blocks/images/'}
45]
46
47
48
49def init_whitelist():
50 """ Init whitelist for trusted image source domains. """
51
52 # s = "" # just a test for env variables
53 # for a in os.environ:
54 # s += 'Var: ' + a + ' Value: ' + os.getenv(a) + "\n"
55 # open("Output.txt", "a").write("Environment:" + s)
56
57 if not os.path.exists(PRINTING_WHITELIST_FILE):
58 try:
59 os.makedirs(os.path.dirname(PRINTING_WHITELIST_FILE))
60 except OSError:
61 pass
62
63 try:
64 open(PRINTING_WHITELIST_FILE, 'a').close()
65 except IOError:
66 pass
67
68 content = []
69 try:
70 with open(PRINTING_WHITELIST_FILE, 'r') as f:
71 content = f.readlines()
72 except IOError:
73 pass
74
75 return [x.strip() for x in content]
76
77# Get the os temp directoryls
78TEMP_DIR_PATH = tempfile.gettempdir()
79DOWNLOADED_IMAGES_ROOT = os.path.join(TEMP_DIR_PATH, 'tim-img-dls')
80
81texdocid = None
82
83
84def handle_images(key, value, fmt, meta):
85 # open("Output.txt", "a").write("Meta:" + str(meta) + "\n")
86
87 if key == 'Image' and fmt == 'latex':
88 (attrs, alt_text_inlines, target) = value
89 (url, title) = target
90
91
92 # For debugging:
93 # return Image(attrs, alt_text_inlines, ["notarealhost.juupahuu.com/image.png", ""])
94
95 image_path = ""
96
97 parsed_url = urlparse(url)
98 parsed_cur = urlparse(CURRENT_HOST_MACHINE + "/kukku")
99 curhost = parsed_cur.hostname or ''
100
101 scheme = parsed_url.scheme or ''
102 host = parsed_url.hostname or ''
103 path = parsed_url.path or ''
104
105 image_path = ''
106
107 for urlmap in urlmaps:
108 urlbeg = urlmap.get('url')
109 if path.startswith(urlbeg):
110 image_path = path.replace(urlbeg, urlmap.get('dir'))
111 break
112
113 # open("Output.txt", "a").write("image_path: " + image_path + " host: " + host + "CHM: " + curhost + "\n")
114 if host != '' and host != curhost:
115 image_path = ''
116
117 if image_path != '' and os.path.exists(image_path):
118 image_path = image_path.replace('\\', '/')
119 return Image(attrs, alt_text_inlines, [image_path, title])
120
121 '''
122 # The first slash needs to be removed from the path in order for the joins to work properly
123 if path.startswith('/'):
124 path = path[1:]
125
126 # handle internal absolute urls
127 base_address = scheme + '://' if scheme != '' else ''
128 base_address += host + '/' if host != '' else ''
129 if (CURRENT_HOST_MACHINE is not None) and base_address == CURRENT_HOST_MACHINE:
130 image_path = os.path.join(APP_ROOT, path)
131
132 # handle internal relative urls
133 elif (host == "") and os.path.exists(os.path.join(APP_ROOT, path)):
134 image_path = os.path.join(APP_ROOT, path)
135
136 elif (host == "") and os.path.exists(os.path.join(IMAGE_ROOT, path)):
137 image_path = os.path.join(IMAGE_ROOT, path)
138 # open("Output.txt", "a").write("host: " + host + "\n")
139
140 # handle external urls
141 else:
142 '''
143
144 # Download images from allowed external urls to be attached to the document.
145 allow = False
146 for h in ALLOWED_EXTERNAL_HOSTS:
147 # open("Output.txt", "a").write("try image: " + h + " -> " + url + "\n")
148 if re.match(h, url):
149 allow = True
150 break
151
152 if allow:
153 # open("Output.txt", "a").write("Check texdocid \n")
154 global texdocid # check if we allready have path for doc id
155 if not texdocid:
156 m = meta.get('texdocid', None) # if we do not have, get the path from meta data
157 # open("Output.txt", "a").write("m:" + str(m) + "\n")
158 if m:
159 texdocid = str(m.get('c', 'xx'))
160 # open("Output.txt", "a").write("texdocid:" + texdocid + "\n")
161
162 images_root = os.path.join(DOWNLOADED_IMAGES_ROOT, texdocid)
163 # create folder for image dls, if it does not exist already
164 if not os.path.exists(images_root ):
165 os.makedirs(images_root )
166
167 # download img to the folder and give the file a unique name (hash the url)
168 img_uid = hashfunc(url)
169 try:
170 _, ext = os.path.splitext(url)
171 img_dl_path = os.path.join(images_root, str(img_uid) + ext)
172 # open("Output.txt", "a").write("img_dl_path = " + img_dl_path + "\n")
173
174 if not os.path.exists(img_dl_path):
175 # open("Output.txt", "a").write("retrieve: " + url + " -> " + img_dl_path + "\n")
176 urllib.request.urlretrieve(url, img_dl_path)
177 # urllib.URLopener().retrieve(url, img_dl_path)
178
179 img_dl_path = img_dl_path.replace('\\', '/') # Ensure UNIX form for pandoc
180 return Image(attrs, alt_text_inlines, [img_dl_path, title])
181
182 except IOError:
183 # could not download image, so display the image as a link to the imageURL
184 pass
185 except:
186 pass
187
188 # For other external images, transform the element to appear as a link
189 # to the image resource in the LaTeX-output.
190 return [
191 RawInline('latex', "\externalimagelink{"),
192 Link(attrs, [Str(url)], [url, title]),
193 RawInline('latex', "}")
194 ]
195
196 # Makes sure the paths are in the UNIX form, as that is what LaTeX uses for paths even on Windows
197 image_path = image_path.replace('\\', '/')
198
199 return Image(attrs, alt_text_inlines, [image_path, title])
200
201
202if __name__ == "__main__":
203
204 # Needs to import different package based on python version, as the urlparse method
205 # was moved from urlparse module to urllib.parse between python2.7 -> python3
206 try:
207 from urllib.parse import urlparse
208 except ImportError:
209 from urlparse import urlparse
210
211 ALLOWED_EXTERNAL_HOSTS = init_whitelist()
212
213 toJSONFilter(handle_images)
timApp/pandoc_inlinestylesfilter.py
(78 / 0)
  
1#!/usr/bin/env python3
2
3"""
4Pandoc filter to convert class values to commands of same name in latex. Leaves
5(should leave...) ids, other, predefined classes and key-values intact.
6"""
7
8from pandocfilters import toJSONFilter, Span, Str, RawInline
9
10
11def classes_to_latex_cmds(key, value, fmt, meta):
12 # open("Output.txt", "a").write("Key:"+key + " fmt:" + fmt + " value:" + str(value) + "\n")
13 if key == 'Str' and fmt == 'latex':
14 if value.startswith("RAWTEX"):
15 cls = value[6:]
16 return RawInline("latex","\\" + cls + "{")
17 # return RawInline("tex", "\\begin(red)")
18 if value == "ENDRAWTEX":
19 return RawInline("latex","}")
20 # return RawInline("tex", "\\end(red)")
21
22 if key == 'Str' and fmt == 'html':
23 if value.startswith("RAWTEX"):
24 cls = value[6:]
25 return RawInline("html",'<div class="' + cls + '">')
26 # return RawInline("tex", "\\begin(red)")
27 if value == "ENDRAWTEX":
28 return RawInline("html","</div>")
29 # return RawInline("tex", "\\end(red)")
30
31 if key == 'Span' and fmt == 'latex':
32 ([ident, classes, kvs], contents) = value
33
34 # debugging
35 # return Span([ident, classes, kvs], contents)
36
37 classes_to_wrap = []
38 for c in classes:
39 if c == 'hidden-print':
40 return []
41 if c == 'visible-print':
42 continue
43 if c not in ["csl-no-emph", "csl-no-strong", "csl-no-smallcaps"]:
44 classes_to_wrap.append(c)
45
46 # TODO: should preserve also the aforementioned predef styles
47
48 # TODO: the input 'contents' is a list, output should be a list of inline elements
49
50 content = wrap_with_latex_cmds(contents, classes_to_wrap)
51
52 return Span([ident, list(set(classes) - set(classes_to_wrap)), kvs], content)
53
54
55def wrap_with_latex_cmds(content, classes_to_wrap):
56 if len(classes_to_wrap) <= 0:
57 return content
58 else:
59 # c = classes_to_wrap[0]
60 # if len(classes_to_wrap) == 1:
61 # return [latex("\\%s{" % c)] + content + [latex("}")]
62 # else:
63 # wrap_with_latex_cmds(content, classes_to_wrap[1:])
64 c = classes_to_wrap[0]
65 if len(classes_to_wrap) > 1:
66 content = wrap_with_latex_cmds(content, classes_to_wrap[1:])
67 return [latex("\\%s{" % c)] + content + [latex("}")]
68
69 # return [latex("\\%s{" % c)] + content + [latex("}")]
70
71
72def latex(content):
73 return RawInline('latex', content)
74
75
76if __name__ == "__main__":
77 # open("Output.txt", "a").write("Alkaa inlinestylefilter\n")
78 toJSONFilter(classes_to_latex_cmds)