36
37:- module(rdf_turtle_write,
38 [ rdf_save_turtle/2, 39 rdf_save_canonical_turtle/2, 40 rdf_save_trig/2, 41 rdf_save_canonical_trig/2, 42 rdf_save_ntriples/2 43 ]). 44:- use_module(library(record),[(record)/1, op(_,_,record)]). 45:- use_module(library(semweb/turtle), []). 46
47:- use_module(library(semweb/rdf_prefixes),
48 [ rdf_current_prefix/2, rdf_global_id/2
49 ]). 50
51:- if(exists_source(library(semweb/rdf_db))). 52:- use_module(library(semweb/rdf_db),
53 [ rdf_graph/1, rdf_graph_prefixes/3,
54 rdf_is_bnode/1, rdf_equal/2, rdf_graph_property/2,
55 rdf_statistics/1, rdf/4, rdf_resource/1, rdf_subject/1,
56 rdf/3
57 ]). 58have_rdf_db.
59:- else. 60have_rdf_db :- fail.
61:- endif. 62
63:- autoload(library(apply),[maplist/3,include/3,partition/4]). 64:- autoload(library(debug),[assertion/1]). 65:- autoload(library(error),[must_be/2,existence_error/2,type_error/2]). 66:- autoload(library(lists),
67 [append/2,reverse/2,delete/3,append/3,select/3,member/2]). 68:- autoload(library(option),[meta_options/3]). 69:- autoload(library(pairs),
70 [ transpose_pairs/2,
71 map_list_to_pairs/3,
72 pairs_values/2,
73 group_pairs_by_key/2
74 ]). 75:- autoload(library(rbtrees),
76 [ ord_list_to_rbtree/2,
77 rb_lookup/3,
78 rb_insert/4,
79 rb_empty/1,
80 rb_update/5
81 ]). 82:- autoload(library(sgml),
83 [xml_name/1,xml_is_dom/1,xsd_number_string/2]). 84:- autoload(library(sgml_write),[xml_write/2]). 85:- autoload(library(url),[file_name_to_url/2,parse_url/2]). 86
87:- predicate_options(rdf_save_turtle/2, 2,
88 [ graph(atom),
89 base(atom),
90 encoding(oneof([utf8])),
91 indent(nonneg),
92 tab_distance(nonneg),
93 silent(boolean),
94 subject_white_lines(nonneg),
95 align_prefixes(boolean),
96 user_prefixes(boolean),
97 prefixes(list),
98 only_known_prefixes(boolean),
99 comment(boolean),
100 group(boolean),
101 inline_bnodes(boolean),
102 single_line_bnodes(boolean),
103 abbreviate_literals(boolean),
104 canonize_numbers(boolean),
105 canonical(boolean),
106 a(boolean),
107 expand(any)
108 ]). 109:- predicate_options(rdf_save_canonical_turtle/2, 2,
110 [ pass_to(rdf_save_turtle/2, 2)
111 ]). 112
113/** <module> Turtle - Terse RDF Triple Language writer
114
115This module implements the Turtle language for representing the RDF
116triple model as defined by Dave Beckett from the Institute for Learning
117and Research Technology University of Bristol in the document:
118
119 * http://www.w3.org/TeamSubmission/turtle/
120 * http://www.w3.org/TeamSubmission/2008/SUBM-turtle-20080114/#sec-conformance
121
122The Turtle format is designed as an RDF serialization that is easy to
123read and write by both machines and humans. Due to the latter property,
124this library goes a long way in trying to produce human-readable output.
125
126In addition to the human-readable format, this library can write a
127_canonical_ representation of RDF graphs. The canonical representation
128has the following properties:
129
130 * Equivalent graphs result in the same document. Graphs are
131 considered equivalent iff they contain the same _set_ of
132 triples, regardless of the labeling of blank nodes in the
133 graph.
134
135 * Changes to the graph are diff-friendly. This means
136
137 - Prefixes are combined in the header and thus changes
138 to the namespaces only result in changes in the header.
139 - Blank nodes that are used only once (including collections)
140 are written in-line with the object they belong to.
141 - For other blank nodes we to realise stable labeling that
142 is based on property-values.
143
144@tbd Low-level string output takes 28% of the time. Move to C?
145*/
146
147:- record
148 tw_state(graph, % graph being saved
149 graphs:list(atom), % TriG graphs being saved
150 base, % The base-URI
151 encoding=utf8, % Desired encoding
152 indent:nonneg=8, % Indent for ; and ,-lists
153 tab_distance:nonneg=8, % Tab distance
154 silent:boolean=false, % If true, do not print a message
155 subject_white_lines:nonneg=1,%Extra lines between subjects
156 a:boolean=true, % Use 'a' for rdf:type
157 align_prefixes:boolean=true,%Align prefix declarations
158 prefixes:list, % Provide prefixes
159 user_prefixes:boolean=true,% Use rdf_current_ns/2?
160 only_known_prefixes:boolean=false,% Only use known prefixes
161 comment:boolean=true, % write some comments into the file
162 group:boolean=true, % Group using ; and ,
163 inline_bnodes:boolean=true, % Inline single-used bnodes
164 single_line_bnodes:boolean=false, % No newline after ;
165 abbreviate_literals:boolean=true, % Abbreviate known types
166 canonize_numbers:boolean=false, % How to write numbers
167 canonical:boolean=false,
168 expand:any=lookup, % Access to the triples
169 % Private fields
170 bnode_id=0, % Incrementing bnode-id
171 nodeid_map, % RBTree mapping NodeIDs to Refs
172 bnode_hash, % RBTree holding reuse-count of hashes
173 subject_count=0, % # subjects saved
174 triple_count=0, % # triples saved
175 base_root, % Root URL of base
176 base_dir, % Directory
177 base_path, % Path of base
178 prefix_map). 179
180
181:- meta_predicate
182 rdf_save_turtle(+, :),
183 rdf_save_canonical_turtle(+, :),
184 rdf_save_canonical_trig(+, :),
185 rdf_save_trig(+, :). 186
259
260rdf_save_turtle(Spec, QOptions) :-
261 meta_options(is_meta, QOptions, Options),
262 statistics(cputime, T0),
263 must_be(list, Options),
264 make_tw_state(Options, State0, _Rest),
265 init_base(State0, State1),
266 init_prefix_map(State1, State),
267 tw_state_encoding(State, Enc),
268 setup_call_cleanup(
269 open_output(Spec, Enc, Stream, Cleanup),
270 ( tw_prefix_map(State, Stream),
271 tw_graph(State, Stream)
272 ),
273 Cleanup),
274 statistics(cputime, T1),
275 Time is T1-T0,
276 tw_state_triple_count(State, SavedTriples),
277 tw_state_subject_count(State, SavedSubjects),
278 ( tw_state_silent(State, true)
279 -> true
280 ; print_message(informational,
281 rdf(saved(Spec, Time, SavedSubjects, SavedTriples)))
282 ).
283
284is_meta(expand).
285
303
304rdf_save_canonical_turtle(Spec, M:Options) :-
305 canonical_options(CannonicalOptions, Options),
306 rdf_save_turtle(Spec, M:CannonicalOptions).
307
308canonical_options([ encoding(utf8),
309 indent(0),
310 tab_distance(0),
311 subject_white_lines(1),
312 align_prefixes(false),
313 user_prefixes(false),
314 comment(false),
315 group(false),
316 single_line_bnodes(true),
317 canonical(true)
318 | Options
319 ],
320 Options).
321
322
327
328rdf_save_ntriples(File, Options):-
329 rdf_save_turtle(File,
330 [ comment(false),
331 encoding(utf8),
332 group(false),
333 prefixes([]),
334 subject_white_lines(0),
335 a(false),
336 inline_bnodes(false),
337 abbreviate_literals(false)
338 | Options
339 ]).
340
341
352
353rdf_save_trig(Spec, QOptions) :-
354 meta_options(is_meta, QOptions, Options),
355 thread_self(Me),
356 thread_statistics(Me, cputime, T0),
357 must_be(list, Options),
358 make_tw_state(Options, State0, _Rest),
359 init_base(State0, State1),
360 trig_graphs(State1, Graphs),
361 init_prefix_map(State1, Graphs, State2),
362 tw_state_encoding(State2, Enc),
363 setup_call_cleanup(
364 open_output(Spec, Enc, Stream, Cleanup),
365 ( tw_prefix_map(State2, Stream),
366 tw_trig_graphs(Graphs, Stream, State2, State)
367 ),
368 Cleanup),
369 thread_statistics(Me, cputime, T1),
370 Time is T1-T0,
371 tw_state_triple_count(State, SavedTriples),
372 tw_state_subject_count(State, SavedSubjects),
373 length(Graphs, SavedGraphs),
374 ( tw_state_silent(State, true)
375 -> true
376 ; print_message(informational,
377 rdf(saved(Spec, Time, SavedSubjects, SavedTriples, SavedGraphs)))
378 ).
379
384
385
386rdf_save_canonical_trig(Spec, M:Options) :-
387 canonical_options(CannonicalOptions, Options),
388 rdf_save_trig(Spec, M:CannonicalOptions).
389
390tw_trig_graphs([], _, State, State).
391tw_trig_graphs([H|T], Stream, State0, State) :-
392 set_graph_of_tw_state(H, State0, State1),
393 nl(Stream),
394 tw_resource(H, State1, Stream),
395 format(Stream, ' {~n', []),
396 tw_graph(State1, Stream),
397 format(Stream, '~N}~n', []),
398 set_bnode_id_of_tw_state(0, State1, State2),
399 set_nodeid_map_of_tw_state(_, State2, State3),
400 set_bnode_hash_of_tw_state(_, State3, State4),
401 tw_trig_graphs(T, Stream, State4, State).
402
403
409
410trig_graphs(State, Graphs) :-
411 tw_state_graphs(State, Graphs),
412 ( nonvar(Graphs)
413 -> true
414 ; tw_state_expand(State, Expand),
415 graphs(Expand, Graphs0),
416 sort(Graphs0, Graphs)
417 ).
418
419:- if(have_rdf_db). 420graphs(lookup, Graphs) :-
421 findall(G, rdf_graph(G), Graphs).
422:- endif. 423graphs(Expand, Graphs) :-
424 findall(G, distinct(G, call(Expand,_S,_P,_O,G)), Graphs).
425
426
433
434open_output(stream(Out), Encoding, Out, Cleanup) :-
435 !,
436 stream_property(Out, encoding(Old)),
437 ( ( Old == Encoding
438 ; Old == wchar_t 439 )
440 -> Cleanup = true
441 ; set_stream(Out, encoding(Encoding)),
442 Cleanup = set_stream(Out, encoding(Old))
443 ).
444open_output(Stream, Encoding, Out, Cleanup) :-
445 \+ atom(Stream),
446 is_stream(Stream),
447 !,
448 open_output(stream(Stream), Encoding, Out, Cleanup).
449open_output(Spec, Encoding, Out,
450 close(Out)) :-
451 out_to_file(Spec, File),
452 open(File, write, Out, [encoding(Encoding)]).
453
454out_to_file(URL, File) :-
455 atom(URL),
456 file_name_to_url(File, URL),
457 !.
458out_to_file(File, File).
459
460
461 464
471
472init_prefix_map(State0, State) :-
473 tw_state_prefixes(State0, Prefixes),
474 nonvar(Prefixes),
475 !,
476 user_prefix_map(Prefixes, PrefixMap),
477 set_prefix_map_of_tw_state(PrefixMap, State0, State).
478init_prefix_map(State0, State) :-
479 tw_state_graph(State0, Graph),
480 graph_prefix_map(State0, Graph, PrefixMap),
481 set_prefix_map_of_tw_state(PrefixMap, State0, State).
482
483init_prefix_map(State0, _Graphs, State) :- 484 tw_state_prefixes(State0, Prefixes),
485 nonvar(Prefixes),
486 !,
487 user_prefix_map(Prefixes, PrefixMap),
488 set_prefix_map_of_tw_state(PrefixMap, State0, State).
489init_prefix_map(State0, Graphs, State) :- 490 maplist(graph_prefixes(State0), Graphs, NestedPrefixes),
491 append(NestedPrefixes, Prefixes0),
492 sort(Prefixes0, Prefixes),
493 prefix_map(State0, Prefixes, PrefixMap),
494 set_prefix_map_of_tw_state(PrefixMap, State0, State).
495
496graph_prefix_map(State, Graph, PrefixMap) :-
497 graph_prefixes(State, Graph, Prefixes),
498 prefix_map(State, Prefixes, PrefixMap).
499
500graph_prefixes(State0, Graph, Prefixes) :-
501 tw_state_expand(State0, Expand),
502 tw_state_only_known_prefixes(State0, OnlyKnown),
503 rdf_graph_prefixes(Graph, Prefixes,
504 [ filter(turtle_prefix(OnlyKnown)),
505 expand(Expand),
506 min_count(2),
507 get_prefix(turtle:iri_turtle_prefix)
508 ]).
509
510prefix_map(State, Prefixes, PrefixMap) :-
511 remove_base(State, Prefixes, Prefixes2),
512 prefix_names(Prefixes2, State, Pairs),
513 transpose_pairs(Pairs, URI_Abrevs),
514 reverse(URI_Abrevs, RURI_Abrevs),
515 flip_pairs(RURI_Abrevs, PrefixMap).
516
521
522user_prefix_map(Prefixes, PrefixMap) :-
523 must_be(list, Prefixes),
524 maplist(prefix_pair, Prefixes, Pairs),
525 map_list_to_pairs(prefix_length, Pairs, LenPairs),
526 sort(LenPairs, LenPairs1),
527 pairs_values(LenPairs1, RevPrefixMap),
528 reverse(RevPrefixMap, PrefixMap).
529
530prefix_pair(Prefix-URI, Prefix-URI) :-
531 !,
532 must_be(atom, Prefix),
533 must_be(atom, URI).
534prefix_pair(Prefix, Prefix-URI) :-
535 must_be(atom, Prefix),
536 ( rdf_current_prefix(Prefix, URI)
537 -> true
538 ; existence_error(prefix, Prefix)
539 ).
540
541prefix_length(_-URI, Len) :- atom_length(URI, Len).
542
547
548:- public turtle_prefix/4. 549
550turtle_prefix(true, _, Prefix, _) :-
551 !,
552 rdf_current_prefix(_, Prefix),
553 !.
554turtle_prefix(_, _, Prefix, URI) :-
555 sub_atom(Prefix, _, 1, 0, Last),
556 turtle_prefix_char(Last),
557 atom_concat(Prefix, Local, URI),
558 \+ sub_atom(Local, _, _, _, '.').
559
560turtle_prefix_char('#').
561turtle_prefix_char('/').
562
563
564remove_base(State, Prefixes, PrefixesNoBase) :-
565 tw_state_base_dir(State, BaseDir),
566 atom(BaseDir),
567 !,
568 delete(Prefixes, BaseDir, PrefixesNoBase).
569remove_base(_State, Prefixes, Prefixes).
570
571flip_pairs([], []).
572flip_pairs([Key-Val|Pairs], [Val-Key|Flipped]) :-
573 flip_pairs(Pairs, Flipped).
574
575prefix_names(URIs, State, Prefixes) :-
576 prefix_names(URIs, State, 1, Prefixes, []).
577
578prefix_names([], _, _, List, List) :- !.
579prefix_names(URIs, State, Len, Prefixes, Tail) :-
580 prefix_names(URIs, State, Len, Prefixes, PTail, Rest),
581 Len1 is Len + 1,
582 prefix_names(Rest, State, Len1, PTail, Tail).
583
584prefix_names(URIs, State, Len, Prefixes, PTail, Rest) :-
585 map_list_to_pairs(propose_abbrev(State, Len), URIs, Pairs),
586 !,
587 keysort(Pairs, Sorted),
588 unique(Sorted, Prefixes, PTail, Rest).
589prefix_names(URIs, _, _, Prefixes, PTail, []) :-
590 number_prefixes(URIs, 1, Prefixes, PTail).
591
592number_prefixes([], _, PL, PL).
593number_prefixes([H|T0], N, [P-H|PL], T) :-
594 atomic_concat(ns, N, P),
595 succ(N, N1),
596 number_prefixes(T0, N1, PL, T).
597
598unique([], L, L, []).
599unique([A-U|T0], [A-U|T], L, Rest) :-
600 T0 \= [A-_|_],
601 !,
602 unique(T0, T, L, Rest).
603unique([A-U|T0], Prefixes, L, [U|Rest0]) :-
604 strip_keys(T0, A, T1, Rest0, Rest),
605 unique(T1, Prefixes, L, Rest).
606
607strip_keys([A-U|T0], A, T, [U|R0], R) :-
608 !,
609 strip_keys(T0, A, T, R0, R).
610strip_keys(L, _, L, R, R).
611
612
617
618propose_abbrev(_, _, URI, Abbrev) :-
619 well_known_ns(Abbrev, URI),
620 !.
621propose_abbrev(State, _, URI, Abbrev) :-
622 tw_state_user_prefixes(State, true),
623 rdf_current_prefix(Abbrev, URI),
624 !.
625propose_abbrev(_, Len, URI, Abbrev) :-
626 namespace_parts(URI, Parts),
627 include(abbrev_part, Parts, Names),
628 reverse(Names, RevNames),
629 length(Use, Len),
630 append(Use, _, RevNames),
631 atomic_list_concat(Use, -, Abbrev).
632
633abbrev_part(X) :-
634 xml_name(X),
635 \+ well_known_ns(X, _),
636 \+ well_known_extension(X).
637
638well_known_ns(rdf, 'http://www.w3.org/1999/02/22-rdf-syntax-ns#').
639well_known_ns(rdfs, 'http://www.w3.org/2000/01/rdf-schema#').
640well_known_ns(owl, 'http://www.w3.org/2002/07/owl#').
641well_known_ns(xsd, 'http://www.w3.org/2001/XMLSchema#').
642well_known_ns(dc, 'http://purl.org/dc/elements/1.1/').
643
644well_known_extension(ttl).
645well_known_extension(nt).
646well_known_extension(n3).
647well_known_extension(xml).
648well_known_extension(rdf).
649well_known_extension(owl).
650
652
653namespace_parts(URL, Parts) :-
654 atom_codes(URL, Codes),
655 phrase(parts(Parts), Codes),
656 !.
657namespace_parts(URL, _) :-
658 format(user_error, 'Couldn\'t split ~q~n', [URL]),
659 fail.
660
661parts(List) --> sep2, parts2(List).
662
663parts2([H|T]) -->
664 string(Codes), {Codes \== []},
665 sep,
666 !,
667 {atom_codes(H, Codes)},
668 parts2(T).
669parts2([]) --> [].
670
671string([]) --> [].
672string([H|T]) --> [H], string(T).
673
674sep --> sep_char, sep2.
675sep([], []).
676
677sep2 --> sep_char, !, sep2.
678sep2 --> [].
679
680sep_char --> "/".
681sep_char --> ":".
682sep_char --> ".".
683sep_char --> "?".
684sep_char --> "#".
685
686
691
692init_base(State0, State) :-
693 tw_state_base(State0, BaseURI),
694 atom(BaseURI),
695 !,
696 parse_url(BaseURI, Attributes),
697 include(root_part, Attributes, RootAttrs),
698 parse_url(BaseRoot, RootAttrs),
699 memberchk(path(BasePath), Attributes),
700 file_directory_name(BasePath, BaseDir),
701 atomic_list_concat([BaseRoot, BaseDir, /], BaseDirURI),
702 set_base_root_of_tw_state(BaseRoot, State0, State1),
703 set_base_path_of_tw_state(BasePath, State1, State2),
704 set_base_dir_of_tw_state(BaseDirURI, State2, State).
705init_base(State, State).
706
707root_part(protocol(_)).
708root_part(host(_)).
709root_part(port(_)).
710
711
712 715
721
722tw_graph(State, Out) :-
723 subjects(State, Subjects),
724 length(Subjects, SubjectCount),
725 inc_subject_count(State, SubjectCount),
726 partition(rdf_is_bnode, Subjects, BNodes, ProperSubjects),
727 maplist(pair_var, BNodes, Pairs),
728 ord_list_to_rbtree(Pairs, BNTree),
729 tw_state_nodeid_map(State, BNTree),
730 ( ProperSubjects == []
731 -> true
732 ; length(ProperSubjects, PSCount),
733 comment(State, 'Named toplevel resources (~D)', [PSCount], Out),
734 tw_proper_subjects(ProperSubjects, State, Out)
735 ),
736 tw_bnodes(Pairs, State, Out).
737
738pair_var(BNode, BNode-_).
739
740tw_prefix_map(State, Out) :-
741 tw_state_prefix_map(State, PrefixMap),
742 tw_prefix_map(PrefixMap, State, Out).
743
747
748tw_prefix_map(PrefixMap, State, Out) :-
749 tw_state_align_prefixes(State, true),
750 !,
751 longest_prefix(PrefixMap, 0, Length),
752 PrefixCol is Length+10,
753 tw_base(PrefixCol, State, Out),
754 tw_prefix_map(PrefixMap, PrefixCol, State, Out).
755tw_prefix_map(PrefixMap, State, Out) :-
756 tw_base(0, State, Out),
757 tw_prefix_map(PrefixMap, 0, State, Out).
758
759longest_prefix([], L, L).
760longest_prefix([Prefix-_|T], L0, L) :-
761 atom_length(Prefix, L1),
762 L2 is max(L0, L1),
763 longest_prefix(T, L2, L).
764
765
766tw_base(Col, State, Out) :-
767 tw_state_base(State, Base),
768 atom(Base),
769 !,
770 format(Out, '@base ~t~*|', [Col]),
771 turtle:turtle_write_uri(Out, Base),
772 format(Out, ' .~n', []).
773tw_base(_, _, _).
774
775
776tw_prefix_map([], _, _, _).
777tw_prefix_map([Prefix-URI|T], Col, State, Out) :-
778 format(Out, '@prefix ~t~w: ~*|', [Prefix, Col]),
779 tw_relative_uri(URI, State, Out),
780 format(Out, ' .~n', []),
781 ( T == []
782 -> true
783 ; tw_prefix_map(T, Col, State, Out)
784 ).
785
786
790
791tw_proper_subjects([], _, _).
792tw_proper_subjects([H|T], State, Out) :-
793 separate_subjects(State, Out),
794 tw_subject(H, H, State, Out),
795 tw_proper_subjects(T, State, Out).
796
797
798separate_subjects(State, Out) :-
799 tw_state_subject_white_lines(State, ExtraLines),
800 put_n(ExtraLines, '\n', Out).
801
805
806tw_subject(URI, Ref, State, Out) :-
807 subject_triples(URI, State, Pairs),
808 length(Pairs, Count),
809 inc_triple_count(State, Count),
810 group_po(Pairs, Grouped),
811 tw_subject_triples(Grouped, Ref, State, Out).
812
813group_po(Pairs, Grouped) :-
814 group_pairs_by_key(Pairs, Grouped0),
815 rdf_equal(rdf:type, RDFType),
816 ( select(RDFType-Types, Grouped0, Grouped1)
817 -> Grouped = [RDFType-Types|Grouped1]
818 ; Grouped = Grouped0
819 ).
820
835
836tw_bnodes(Pairs, State, Out) :-
837 tw_top_bnodes(Pairs, State, Out, Rest1),
838 tw_numbered_bnodes(Rest1, State, Out, 1, Rest2),
839 tw_cyclic_bnodes(Rest2, State, Out, 0).
840
841
842tw_numbered_bnodes([], _, _, _, []) :- !.
843tw_numbered_bnodes(Pairs, State, Out, Level, Rest) :-
844 multi_referenced(Pairs, RefPairs, Rest0),
845 ( RefPairs == []
846 -> Rest = Rest0
847 ; length(RefPairs, Count),
848 comment(State, 'Level ~D multi-referenced blank-nodes (~D)',
849 [ Level, Count ], Out),
850 tw_ref_bnodes(RefPairs, State, Out),
851 Level1 is Level + 1,
852 tw_numbered_bnodes(Rest0, State, Out, Level1, Rest)
853 ).
854
855multi_referenced([], [], []).
856multi_referenced([H|T], RefPairs, Rest) :-
857 H = _-Ref,
858 ( Ref == written
859 -> multi_referenced(T, RefPairs, Rest)
860 ; var(Ref)
861 -> Rest = [H|TR],
862 multi_referenced(T, RefPairs, TR)
863 ; assertion(Ref = bnode(_)),
864 RefPairs = [H|TRP], 865 multi_referenced(T, TRP, Rest)
866 ).
867
868tw_ref_bnodes([], _, _).
869tw_ref_bnodes([BNode-Ref|T], State, Out) :-
870 separate_subjects(State, Out),
871 tw_subject(BNode, Ref, State, Out),
872 tw_ref_bnodes(T, State, Out).
873
874
879
880tw_top_bnodes(Pairs, State, Out, Rest) :-
881 unreferenced(Pairs, State, TopBNodes, Rest),
882 ( TopBNodes == []
883 -> true
884 ; length(TopBNodes, Count),
885 comment(State, 'Toplevel blank-nodes (~D)', [Count], Out),
886 sort_bnodes(TopBNodes, SortedTopBNodes, State),
887 tw_top_bnodes(SortedTopBNodes, State, Out)
888 ).
889
890unreferenced([], _, [], []).
891unreferenced([H|T], State, UnrefPairs, Rest) :-
892 H = BNode-Ref,
893 ( Ref == written
894 -> unreferenced(T, State, UnrefPairs, Rest)
895 ; var(Ref),
896 object_link_count(BNode, State, 0)
897 -> UnrefPairs = [H|URT],
898 unreferenced(T, State, URT, Rest)
899 ; Rest = [H|TR],
900 unreferenced(T, State, UnrefPairs, TR)
901 ).
902
903tw_top_bnodes([], _, _).
904tw_top_bnodes([BNode-_|T], State, Out) :-
905 tw_bnode(BNode, State, Out),
906 tw_top_bnodes(T, State, Out).
907
908
909tw_bnode(BNode, State, Out) :-
910 subject_triples(BNode, State, Pairs),
911 length(Pairs, Count),
912 inc_triple_count(State, Count),
913 ( tw_state_inline_bnodes(State, true)
914 -> tw_bnode_triples(Pairs, State, Out),
915 format(Out, ' .~n', [])
916 ; next_bnode_id(State, BNode, Ref),
917 tw_bnode_ntriples(Pairs, Ref, State, Out)
918 ).
919
920tw_bnode_triples(Pairs, State, Out) :-
921 group_po(Pairs, Grouped),
922 ( tw_state_single_line_bnodes(State, true)
923 -> format(Out, '[ ', []),
924 tw_triples(Grouped, -1, State, Out),
925 format(Out, ' ]', [])
926 ; line_position(Out, Indent),
927 format(Out, '[ ', []),
928 line_position(Out, AIndent),
929 tw_triples(Grouped, AIndent, State, Out),
930 nl_indent(Out, State, Indent),
931 format(Out, ']', [])
932 ).
933
934tw_bnode_ntriples([], _, _, _).
935tw_bnode_ntriples([P-O|T], Ref, State, Out) :-
936 tw_bnode_ref(Ref, Out),
937 format(Out, ' ', []),
938 tw_predicate(P, State, Out),
939 format(Out, ' ', []),
940 tw_object(O, State, Out),
941 format(Out, ' .~n', []),
942 tw_bnode_ntriples(T, Ref, State, Out).
943
944
951
952tw_cyclic_bnodes([], _State, _Out, _) :- !.
953tw_cyclic_bnodes(Pairs, State, Out, Cycle0) :-
954 ( tw_state_canonical(State, true)
955 -> sort_bnode_pairs(Pairs, BNodes, State)
956 ; BNodes = Pairs
957 ),
958 succ(Cycle0, Cycle),
959 BNodes = [BNode-Ref|_],
960 next_bnode_id(State, BNode, Ref),
961 comment(State, 'Breaking cycle ~D', [Cycle], Out),
962 tw_numbered_bnodes(Pairs, State, Out, 1, Rest),
963 tw_cyclic_bnodes(Rest, State, Out, Cycle).
964
965
973
974tw_subject_triples([], _, _, _) :- !.
975tw_subject_triples(Grouped, URI, State, Out) :-
976 tw_state_group(State, false),
977 !,
978 tw_ungrouped_triples(Grouped, URI, State, Out).
979tw_subject_triples(Grouped, URI, State, Out) :-
980 tw_resource(URI, State, Out),
981 ( tw_state_indent(State, Indent),
982 Indent > 0
983 -> nl_indent(Out, State, Indent)
984 ; put_char(Out, ' '),
985 line_position(Out, Indent)
986 ),
987 tw_triples(Grouped, Indent, State, Out),
988 format(Out, ' .~n', []).
989
994
995tw_ungrouped_triples([], _, _, _).
996tw_ungrouped_triples([P-Vs|Groups], URI, State, Out) :-
997 partition(rdf_is_bnode, Vs, BNVs, ProperVs),
998 tw_ungrouped_values(ProperVs, P, URI, State, Out),
999 sort_bnodes(BNVs, SortedBNVs, State),
1000 tw_ungrouped_values(SortedBNVs, P, URI, State, Out),
1001 tw_ungrouped_triples(Groups, URI, State, Out).
1002
1003tw_ungrouped_values([], _, _, _, _).
1004tw_ungrouped_values([V|T], P, URI, State, Out) :-
1005 tw_resource(URI, State, Out),
1006 put_char(Out, ' '),
1007 tw_predicate(P, State, Out),
1008 put_char(Out, ' '),
1009 tw_object(V, State, Out),
1010 format(Out, ' .~n', []),
1011 tw_ungrouped_values(T, P, URI, State, Out).
1012
1013
1017
1018tw_triples([P-Vs|MoreGroups], Indent, State, Out) :-
1019 tw_write_pvs(Vs, P, State, Out),
1020 ( MoreGroups == []
1021 -> true
1022 ; format(Out, ' ;', []),
1023 nl_indent(Out, State, Indent),
1024 tw_triples(MoreGroups, Indent, State, Out)
1025 ).
1026
1027tw_write_pvs(Values, P, State, Out) :-
1028 tw_predicate(P, State, Out),
1029 put_char(Out, ' '),
1030 line_position(Out, Indent),
1031 tw_write_vs(Values, Indent, State, Out).
1032
1033tw_predicate(P, State, Out) :-
1034 ( rdf_equal(P, rdf:type),
1035 tw_state_a(State, true)
1036 -> format(Out, 'a', [])
1037 ; tw_resource(P, State, Out)
1038 ).
1039
1040tw_write_vs([H|T], Indent, State, Out) :-
1041 tw_object(H, State, Out),
1042 ( T == []
1043 -> true
1044 ; format(Out, ' ,', []),
1045 nl_indent(Out, State, Indent),
1046 tw_write_vs(T, Indent, State, Out)
1047 ).
1048
1052
1053tw_object(Value, State, Out) :-
1054 rdf_is_bnode(Value),
1055 !,
1056 tw_bnode_object(Value, State, Out).
1057tw_object(Value, State, Out) :-
1058 atom(Value),
1059 !,
1060 tw_resource(Value, State, Out).
1061tw_object(Literal, State, Out) :-
1062 tw_literal(Literal, State, Out).
1063
1074
1075tw_bnode_object(BNode, State, Out) :-
1076 tw_state_nodeid_map(State, BNTree),
1077 rb_lookup(BNode, Ref, BNTree),
1078 !,
1079 ( var(Ref)
1080 -> ( tw_state_inline_bnodes(State, true),
1081 tw_unshared_bnode(BNode, State, Out)
1082 -> Ref = written
1083 ; next_bnode_id(State, BNode, Ref),
1084 tw_bnode_ref(Ref, Out)
1085 )
1086 ; tw_bnode_ref(Ref, Out)
1087 ).
1088tw_bnode_object(BNode, State, Out) :-
1089 object_link_count(BNode, State, N),
1090 N > 1,
1091 !,
1092 tw_state_nodeid_map(State, BNTree0),
1093 rb_insert(BNTree0, BNode, Ref, BNTree),
1094 set_nodeid_map_of_tw_state(BNTree, State),
1095 next_bnode_id(State, BNode, Ref),
1096 tw_bnode_ref(Ref, Out).
1097tw_bnode_object(BNode, State, Out) :-
1098 next_bnode_id(State, BNode, Ref),
1099 tw_bnode_ref(Ref, Out).
1100
1101tw_bnode_ref(bnode(Ref), Out) :-
1102 ( integer(Ref)
1103 -> format(Out, '_:bn~w', [Ref])
1104 ; format(Out, '_:~w', [Ref])
1105 ).
1106
1110
1111tw_unshared_bnode(BNode, State, Out) :-
1112 object_link_count(BNode, State, 1),
1113 subject_triples(BNode, State, Pairs),
1114 ( Pairs == []
1115 -> format(Out, '[]', [])
1116 ; pairs_unshared_collection(Pairs, State, Collection)
1117 -> ( Collection == []
1118 -> format(Out, '()', [])
1119 ; tw_state_nodeid_map(State, BNTree),
1120 rb_lookup(BNode, written, BNTree),
1121 length(Collection, NMembers),
1122 Triples is 2*NMembers,
1123 inc_triple_count(State, Triples),
1124 ( tw_state_single_line_bnodes(State, true)
1125 -> format(Out, '( ', []),
1126 tw_collection(Collection, -1, State, Out),
1127 format(Out, ' )', [])
1128 ; line_position(Out, Indent),
1129 format(Out, '( ', []),
1130 line_position(Out, AIndent),
1131 tw_collection(Collection, AIndent, State, Out),
1132 nl_indent(Out, State, Indent),
1133 format(Out, ')', [])
1134 )
1135 )
1136 ; tw_bnode_triples(Pairs, State, Out)
1137 ).
1138
1139tw_collection([H|T], Indent, State, Out) :-
1140 tw_object(H, State, Out),
1141 ( T \== []
1142 -> nl_indent(Out, State, Indent),
1143 tw_collection(T, Indent, State, Out)
1144 ; true
1145 ).
1146
1152
1153unshared_collection(C, _, []) :-
1154 rdf_equal(C, rdf:nil),
1155 !.
1156unshared_collection(C, State, List) :-
1157 rdf_is_bnode(C),
1158 object_link_count(C, State, 1),
1159 tw_state_nodeid_map(State, BNTree),
1160 rb_lookup(C, written, BNTree),
1161 subject_triples(C, State, Pairs),
1162 pairs_unshared_collection(Pairs, State, List).
1163
1164pairs_unshared_collection(Pairs, State, [H|T]) :-
1165 rdf_equal(rdf:first, RDFFirst),
1166 rdf_equal(rdf:rest, RDFRest),
1167 Pairs = [ RDFFirst-H,
1168 RDFRest-Rest
1169 | More
1170 ],
1171 ( More == []
1172 ; rdf_equal(rdf:type, RDFType),
1173 rdf_equal(rdf:'List', RDFList),
1174 More == [RDFType-RDFList]
1175 ),
1176 unshared_collection(Rest, State, T).
1177
1178
1182
1183object_link_count(BNode, State, Count) :-
1184 tw_state_graph(State, Graph),
1185 tw_state_expand(State, Expand),
1186 findall(S-P, call(Expand,S,P,BNode,Graph), Pairs0),
1187 sort(Pairs0, Pairs), 1188 length(Pairs, Count).
1189
1193
1194nl_indent(Out, _, -1) :-
1195 !,
1196 put_char(Out, ' ').
1197nl_indent(Out, State, Indent) :-
1198 nl(Out),
1199 tw_state_tab_distance(State, TD),
1200 ( TD == 0
1201 -> tab(Out, Indent)
1202 ; Tabs is Indent//TD,
1203 Spaces is Indent mod TD,
1204 put_n(Tabs, '\t', Out),
1205 put_n(Spaces, ' ', Out)
1206 ).
1207
1208put_n(N, Char, Out) :-
1209 N > 0,
1210 !,
1211 put_char(Out, Char),
1212 N2 is N - 1,
1213 put_n(N2, Char, Out).
1214put_n(_, _, _).
1215
1216
1221
1222subject_triples(URI, State, Pairs) :-
1223 tw_state_graph(State, Graph),
1224 tw_state_expand(State, Expand),
1225 findall(P-O, call(Expand, URI, P, O, Graph), Pairs0),
1226 sort(Pairs0, Pairs).
1227
1228
1229 1232
1237
1238subjects(State, Subjects) :-
1239 tw_state_expand(State, Expand),
1240 tw_state_graph(State, Graph),
1241 ( Expand == lookup,
1242 atom(Graph),
1243 ( rdf_graph_property(Graph, triples(Count))
1244 -> true
1245 ; Count = 0 1246 ),
1247 rdf_statistics(triples(Total)),
1248 Count * 10 < Total
1249 -> findall(S, rdf(S,_,_,Graph), List),
1250 sort(List, Subjects)
1251 ; Expand \== lookup
1252 -> findall(S, call(Expand, S,_,_,Graph), List),
1253 sort(List, Subjects)
1254 ; findall(Subject, subject(State, Subject), AllSubjects),
1255 sort(AllSubjects, Subjects)
1256 ).
1257
1258
1259subject(State, Subject) :-
1260 tw_state_graph(State, Graph),
1261 ( atom(Graph)
1262 -> rdf_resource(Subject),
1263 ( rdf(Subject, _, _, Graph)
1264 -> true
1265 )
1266 ; rdf_subject(Subject)
1267 ).
1268
1269
1270:- if(have_rdf_db). 1271:- public lookup/4. 1272
1273lookup(S,P,O,G) :-
1274 ( var(G)
1275 -> rdf(S,P,O)
1276 ; rdf(S,P,O,G)
1277 ).
1278:- else. 1279lookup(_S,_P,_O,_G) :-
1280 print_message(error, turtle_write(no_rdf_db)),
1281 fail.
1282:- endif. 1283
1284
1285 1288
1298
1302
1303sort_bnodes(BNodes, Sorted, _State) :-
1304 sort(BNodes, Sorted).
1305
1309
1310sort_bnode_pairs(Pairs, Sorted, _State) :-
1311 sort(Pairs, Sorted).
1312
1323
1324
1332
1333next_bnode_id(State, _BNode, bnode(Ref)) :-
1334 tw_state_canonical(State, false),
1335 !,
1336 tw_state_bnode_id(State, Ref0),
1337 Ref is Ref0+1,
1338 nb_set_bnode_id_of_tw_state(Ref, State).
1339next_bnode_id(State, BNode, bnode(Ref)) :-
1340 bnode_hash(BNode, Hash),
1341 tw_state_bnode_hash(State, BNHash),
1342 ( var(BNHash)
1343 -> rb_empty(BNHash)
1344 ; true
1345 ),
1346 ( rb_update(BNHash, Hash, C0, C, BNHash1)
1347 -> C is C0+1
1348 ; C = 0,
1349 rb_insert(BNHash, Hash, C, BNHash1)
1350 ),
1351 set_bnode_hash_of_tw_state(BNHash1, State),
1352 format(atom(Ref), 'bn_~w_~d', [Hash, C]).
1353
1359
1360bnode_hash(BNode, Hash) :-
1361 term_hash(BNode, Hash).
1362
1363
1364 1367
1371
1372tw_resource(BNodeID, _, Out) :-
1373 BNodeID = bnode(_),
1374 !,
1375 tw_bnode_ref(BNodeID, Out).
1376tw_resource(Resource, State, Out) :-
1377 tw_state_prefix_map(State, PrefixMap),
1378 member(Prefix-Full, PrefixMap),
1379 atom_concat(Full, Name, Resource),
1380 ( turtle:turtle_pn_local(Name)
1381 -> true
1382 ; Name == ''
1383 ),
1384 !,
1385 format(Out, '~w:', [Prefix]),
1386 turtle:turtle_write_pn_local(Out, Name).
1387tw_resource(Resource, State, Out) :-
1388 tw_relative_uri(Resource, State, Out).
1389
1390
1391tw_relative_uri(Resource, State, Out) :-
1392 tw_state_base_root(State, Root),
1393 atom(Root),
1394 atom_concat(Root, ResPath, Resource),
1395 sub_atom(ResPath, 0, _, _, /),
1396 tw_state_base_path(State, BasePath),
1397 relative_path(ResPath, BasePath, RelPath),
1398 !,
1399 turtle:turtle_write_uri(Out, RelPath).
1400tw_relative_uri(Resource, _, Out) :-
1401 turtle:turtle_write_uri(Out, Resource).
1402
1403relative_path(Path, RelTo, RelPath) :-
1404 atomic_list_concat(PL, /, Path),
1405 atomic_list_concat(RL, /, RelTo),
1406 delete_common_prefix(PL, RL, PL1, PL2),
1407 to_dot_dot(PL2, DotDot, PL1),
1408 atomic_list_concat(DotDot, /, RelPath).
1409
1410delete_common_prefix([H|T01], [H|T02], T1, T2) :-
1411 !,
1412 delete_common_prefix(T01, T02, T1, T2).
1413delete_common_prefix(T1, T2, T1, T2).
1414
1415to_dot_dot([], Tail, Tail).
1416to_dot_dot([_], Tail, Tail) :- !.
1417to_dot_dot([_|T0], ['..'|T], Tail) :-
1418 to_dot_dot(T0, T, Tail).
1419
1420
1424
1425tw_literal(^^(Value, Type), State, Out) :-
1426 !,
1427 tw_typed_literal(Type, Value, State, Out).
1428tw_literal(literal(type(Type, Value)), State, Out) :-
1429 !,
1430 tw_typed_literal(Type, Value, State, Out).
1431tw_literal(@(Value, Lang), State, Out) :-
1432 !,
1433 tw_quoted_string(Value, State, Out),
1434 downcase_atom(Lang, TurtleLang), 1435 format(Out, '@~w', [TurtleLang]).
1436tw_literal(literal(lang(Lang, Value)), State, Out) :-
1437 !,
1438 tw_quoted_string(Value, State, Out),
1439 downcase_atom(Lang, TurtleLang), 1440 format(Out, '@~w', [TurtleLang]).
1441tw_literal(literal(Value), State, Out) :-
1442 atom(Value),
1443 !,
1444 rdf_equal(xsd:string, TypeString),
1445 tw_typed_literal(TypeString, Value, State, Out).
1446 1447tw_literal(literal(Value), State, Out) :-
1448 integer(Value),
1449 !,
1450 rdf_equal(Type, xsd:integer),
1451 tw_typed_literal(Type, Value, State, Out).
1452tw_literal(literal(Value), State, Out) :-
1453 float(Value),
1454 !,
1455 rdf_equal(Type, xsd:double),
1456 tw_typed_literal(Type, Value, State, Out).
1457tw_literal(literal(Value), State, Out) :-
1458 xml_is_dom(Value),
1459 !,
1460 rdf_equal(Type, rdf:'XMLLiteral'),
1461 tw_typed_literal(Type, Value, State, Out).
1462tw_literal(Literal, _State, _Out) :-
1463 type_error(rdf_literal, Literal).
1464
1465
1466tw_typed_literal(Type, Value, State, Out) :-
1467 tw_state_abbreviate_literals(State, true),
1468 tw_abbreviated_literal(Type, Value, State, Out),
1469 !.
1470tw_typed_literal(Type, Value, State, Out) :-
1471 (atom(Value) ; string(Value)),
1472 !,
1473 tw_quoted_string(Value, State, Out),
1474 write(Out, '^^'),
1475 tw_resource(Type, State, Out).
1476tw_typed_literal(Type, Value, State, Out) :-
1477 rdf_equal(Type, rdf:'XMLLiteral'),
1478 !,
1479 with_output_to(string(Tmp),
1480 xml_write(Value, [header(false)])),
1481 tw_quoted_string(Tmp, State, Out),
1482 write(Out, '^^'),
1483 tw_resource(Type, State, Out).
1484tw_typed_literal(Type, Value, State, Out) :-
1485 format(string(Tmp), '~q', [Value]),
1486 tw_quoted_string(Tmp, State, Out),
1487 write(Out, '^^'),
1488 tw_resource(Type, State, Out).
1489
1490
1498
1499term_expansion((tw_abbreviated_literal(NS:Local, Value, State, Out) :- Body),
1500 (tw_abbreviated_literal(Type, Value, State, Out) :- Body)) :-
1501 atom(NS),
1502 rdf_global_id(NS:Local, Type).
1503
1504tw_abbreviated_literal(xsd:integer, Value, State, Out) :-
1505 ( tw_state_canonize_numbers(State, false)
1506 -> write(Out, Value)
1507 ; atom_number(Value, Int),
1508 format(Out, '~d', [Int])
1509 ).
1510tw_abbreviated_literal(xsd:double, Value, State, Out) :-
1511 ( tw_state_canonize_numbers(State, false)
1512 -> write(Out, Value)
1513 ; ValueF is float(Value),
1514 xsd_number_string(ValueF, FloatS),
1515 format(Out, '~s', [FloatS])
1516 ).
1517tw_abbreviated_literal(xsd:string, Value, State, Out) :-
1518 tw_quoted_string(Value, State, Out).
1519tw_abbreviated_literal(xsd:decimal, Value, _, Out) :-
1520 format(Out, '~w', [Value]).
1521tw_abbreviated_literal(xsd:boolean, Value, _, Out) :-
1522 format(Out, '~w', [Value]).
1523
1524
1529
1530tw_quoted_string(Atom, _, Out) :-
1531 turtle:turtle_write_quoted_string(Out, Atom).
1532
1533
1534 1537
(State, Format, Args, Out) :-
1539 tw_state_comment(State, true),
1540 !,
1541 format(Out, '~n# ', []),
1542 format(Out, Format, Args),
1543 format(Out, '~n', []).
1544comment(_, _, _, _).
1545
1546
1547
1548 1551
1552inc_triple_count(State, Count) :-
1553 tw_state_triple_count(State, C0),
1554 C1 is C0+Count,
1555 nb_set_triple_count_of_tw_state(C1, State).
1556
1557inc_subject_count(State, Count) :-
1558 tw_state_subject_count(State, C0),
1559 C1 is C0+Count,
1560 nb_set_subject_count_of_tw_state(C1, State).
1561
1562:- multifile
1563 prolog:message//1. 1564
1565prolog:message(rdf(saved(File, Time, SavedSubjects, SavedTriples))) -->
1566 [ 'Saved ~D triples about ~D subjects into '-[SavedTriples, SavedSubjects] ],
1567 rdf_output(File),
1568 [ ' (~3f sec)'-[Time] ].
1569prolog:message(rdf(saved(File, Time, SavedSubjects, SavedTriples,
1570 SavedGraphs))) -->
1571 [ 'Saved ~D graphs, ~D triples about ~D subjects into '-
1572 [SavedGraphs, SavedTriples, SavedSubjects] ],
1573 rdf_output(File),
1574 [ ' (~3f sec)'-[Time] ].
1575
1576rdf_output(StreamSpec) -->
1577 { ( StreamSpec = stream(Stream)
1578 -> true
1579 ; Stream = StreamSpec
1580 ),
1581 is_stream(Stream),
1582 stream_property(Stream, file_name(File))
1583 },
1584 !,
1585 [ '~p'-[File] ].
1586rdf_output(File) -->
1587 [ '~p'-[File] ]